Merlijn van Deen has submitted this change and it was merged.
Change subject: use i18n messages; pep8 changes
......................................................................
use i18n messages; pep8 changes
Change-Id: I9ee37be3c9579030f3a71fca6b503211650032b9
---
M scripts/blockpageschecker.py
1 file changed, 108 insertions(+), 72 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/blockpageschecker.py b/scripts/blockpageschecker.py
index 920166f..affd2b7 100755
--- a/scripts/blockpageschecker.py
+++ b/scripts/blockpageschecker.py
@@ -57,7 +57,7 @@
# (C) Monobi a.k.a. Wikihermit, 2007
# (C) Filnik, 2007-2011
# (C) NicDumZ, 2008-2009
-# (C) Pywikipedia bot team, 2007-2010
+# (C) Pywikipedia bot team, 2007-2013
#
# Distributed under the terms of the MIT license.
#
@@ -67,6 +67,7 @@
import re
import webbrowser
import pywikibot
+from pywikibot import i18n
from pywikibot import pagegenerators
from pywikibot import config
@@ -97,12 +98,14 @@
ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?(|[^\}]*)\}\}'],
'ja': [ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)保護(?:性急|)(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
}
+
# Regex to get the semi-protection move template
templateSemiMoveProtection = {
'en': None,
'it': [r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'],
'ja': [ur'(?<!\<nowiki\>)\{\{(?:[Tt]emplate:|)移動半保護(?:[Ss]|)(?:\|.+|)\}\}(?!\<\/nowiki\>)\s*(?:\r\n|)*'],
}
+
# Regex to get the total-protection move template
templateTotalMoveProtection = {
'en': None,
@@ -111,16 +114,18 @@
}
# If you use only one template for all the type of protection, put it here.
-# You may use only one template or an unique template and some other "old" template that the
-# script should still check (as on it.wikipedia)
+# You may use only one template or an unique template and some other "old"
+# template that the script should still check (as on it.wikipedia)
templateUnique = {
'en': None,
'it': [r'\{\{(?:[Tt]emplate:|)[Pp]rotetta\}\}'],
}
-# Array: 0 => Semi-block, 1 => Total Block, 2 => Semi-Move, 3 => Total-Move, 4 => template-unique
+# Array: 0 => Semi-block, 1 => Total Block, 2 => Semi-Move, 3 => Total-Move,
+# 4 => template-unique
templateNoRegex = {
- 'it': ['{{Avvisobloccoparziale}}', '{{Avvisoblocco}}', None, None, '{{Protetta}}'],
+ 'it': ['{{Avvisobloccoparziale}}', '{{Avvisoblocco}}', None, None,
+ '{{Protetta}}'],
'fr': ['{{Semi-protection}}', '{{Protection}}', None, None, None],
'ja': [u'{{半保護}}', u'{{保護}}', u'{{移動半保護}}', u'{{移動保護}}', None],
}
@@ -129,24 +134,20 @@
categoryToCheck = {
'en': [u'Category:Protected'],
'ar': [u'تصنيف:محتويات محمية'],
- 'fr': [u'Category:Page semi-protégée', u'Category:Page protégée', u'Catégorie:Article protégé'],
- 'he': [u'קטגוריה:ויקיפדיה: דפים מוגנים', u'קטגוריה:ויקיפדיה: דפים מוגנים חלקית'],
- 'it': [u'Categoria:Pagine protette - scadute', u'Categoria:Pagine semiprotette', u'Categoria:Voci protette'],
- 'ja': [u'Category:編集保護中の記事', u'Category:編集半保護中の記事', u'Category:移動保護中の記事'],
- 'pt': [u'Category:!Páginas protegidas', u'Category:!Páginas semiprotegidas'],
- 'zh': [u'Category:被保护的页面', u'Category:被保護的模板', u'Category:暂时不能移动的页面', u'Category:被半保护的页面'],
+ 'fr': [u'Category:Page semi-protégée', u'Category:Page protégée',
+ u'Catégorie:Article protégé'],
+ 'he': [u'קטגוריה:ויקיפדיה: דפים מוגנים',
+ u'קטגוריה:ויקיפדיה: דפים מוגנים חלקית'],
+ 'it': [u'Categoria:Pagine protette - scadute',
+ u'Categoria:Pagine semiprotette', u'Categoria:Voci protette'],
+ 'ja': [u'Category:編集保護中の記事', u'Category:編集半保護中の記事',
+ u'Category:移動保護中の記事'],
+ 'pt': [u'Category:!Páginas protegidas',
+ u'Category:!Páginas semiprotegidas'],
+ 'zh': [u'Category:被保护的页面', u'Category:被保護的模板',
+ u'Category:暂时不能移动的页面', u'Category:被半保护的页面'],
}
-# Comment used when the Bot edits
-comment = {
- 'en': u'Bot: Deleting out-dated template',
- 'ar': u'بوت: حذف قالب قديم',
- 'fr': u'Robot: Mise à jour des bandeaux de protection',
- 'he': u'בוט: מסיר תבנית שעבר זמנה',
- 'it': u'Bot: Tolgo o sistemo template di avviso blocco',
- 'ja': u'ロボットによる: 保護テンプレート除去',
- 'pt': u'Bot: Retirando predefinição de proteção',
- 'zh': u'機器人: 移除過期的保護模板',
-}
+
# Check list to block the users that haven't set their preferences
project_inserted = ['en', 'fr', 'it', 'ja', 'pt', 'zh']
@@ -157,22 +158,22 @@
def understandBlock(text, TTP, TSP, TSMP, TTMP, TU):
""" Understand if the page is blocked and if it has the right template """
- if TTP is not None:
+ if TTP:
for catchRegex in TTP: # TTP = templateTotalProtection
resultCatch = re.findall(catchRegex, text)
if resultCatch:
return ('sysop-total', catchRegex)
- if TSP is not None:
+ if TSP:
for catchRegex in TSP:
resultCatch = re.findall(catchRegex, text)
if resultCatch:
return ('autoconfirmed-total', catchRegex)
- if TU is not None:
+ if TU:
for catchRegex in TU:
resultCatch = re.findall(catchRegex, text)
if resultCatch:
return ('unique', catchRegex)
- if TSMP is not None and TTMP is not None and TTP != TTMP and TSP != TSMP:
+ if TSMP and TTMP and TTP != TTMP and TSP != TSMP:
for catchRegex in TTMP:
resultCatch = re.findall(catchRegex, text)
if resultCatch:
@@ -204,7 +205,7 @@
def main():
""" Main Function """
# Loading the comments
- global categoryToCheck, comment, project_inserted
+ global categoryToCheck, project_inserted
# always, define a generator to understand if the user sets one,
# defining what's genFactory
always = False
@@ -238,7 +239,8 @@
genFactory.handleArg(arg)
if config.mylang not in project_inserted:
- pywikibot.output(u"Your project is not supported by this script.\nYou have to edit the script and add it!")
+ pywikibot.output(u"Your project is not supported by this script.\n"
+ u"You have to edit the script and add it!")
return
site = pywikibot.getSite()
site.login()
@@ -253,7 +255,7 @@
TU = pywikibot.translate(site, templateUnique)
category = pywikibot.translate(site, categoryToCheck)
- commentUsed = pywikibot.translate(site, comment)
+ commentUsed = i18n.twtranslate(site, 'blockpageschecker-summary')
if not generator:
generator = genFactory.getCombinedGenerator()
if not generator:
@@ -299,11 +301,13 @@
try:
config.sysopnames[site.family.name][site.lang]
except:
- pywikibot.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename)
+ pywikibot.output(u"%s is sysop-protected: "
+ u"this account can't edit it! Skipping..."
+ % pagename)
continue
- # Understand, according to the template in the page, what should be the protection
- # and compare it with what there really is.
+ # Understand, according to the template in the page, what should be the
+ # protection and compare it with what there really is.
TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU)
# Only to see if the text is the same or not...
oldtext = text
@@ -313,11 +317,12 @@
if not editRestr:
# page is not edit-protected
# Deleting the template because the page doesn't need it.
- if TU is not None:
+ if TU:
replaceToPerform = u'|'.join(TTP + TSP + TU)
else:
replaceToPerform = u'|'.join(TTP + TSP)
- text, changes = re.subn('<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
+ text, changes = re.subn('<noinclude>(%s)</noinclude>'
+ % replaceToPerform, '', text)
if changes == 0:
text, changes = re.subn('(%s)' % replaceToPerform, '', text)
msg = u'The page is editable for all'
@@ -327,35 +332,40 @@
elif editRestr[0] == 'sysop':
# total edit protection
- if (TemplateInThePage[0] == 'sysop-total' and TTP is not None) or (TemplateInThePage[0] == 'unique' and TU is not None):
+ if (TemplateInThePage[0] == 'sysop-total' and TTP) or \
+ (TemplateInThePage[0] == 'unique' and TU):
msg = 'The page is protected to the sysop'
if not moveBlockCheck:
msg += ', skipping...'
pywikibot.output(msg)
else:
- pywikibot.output(u'The page is protected to the sysop, but the template seems not correct. Fixing...')
- if TU is not None:
+ pywikibot.output(u'The page is protected to the sysop, but the '
+ u'template seems not correct. Fixing...')
+ if TU:
text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
else:
text, changes = re.subn(TemplateInThePage[1], TNR[1], text)
- elif TSP is not None or TU is not None:
+ elif TSP or TU:
# implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection
- if TemplateInThePage[0] == 'autoconfirmed-total' or TemplateInThePage[0] == 'unique':
+ if TemplateInThePage[0] == 'autoconfirmed-total' or \
+ TemplateInThePage[0] == 'unique':
msg = 'The page is editable only for the autoconfirmed users'
if not moveBlockCheck:
msg += ', skipping...'
pywikibot.output(msg)
else:
- pywikibot.output(u'The page is editable only for the autoconfirmed users, but the template seems not correct. Fixing...')
- if TU is not None:
+ pywikibot.output(u'The page is editable only for the '
+ u'autoconfirmed users, but the template '
+ u'seems not correct. Fixing...')
+ if TU:
text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
else:
text, changes = re.subn(TemplateInThePage[1], TNR[0], text)
if changes == 0:
# We tried to fix edit-protection templates, but it did not work.
- pywikibot.output('Warning : No edit-protection template could be found')
+ pywikibot.warning('No edit-protection template could be found')
if moveBlockCheck and changes > -1:
# checking move protection now
@@ -366,51 +376,72 @@
changes = -1
if not moveRestr:
- pywikibot.output(u'The page is movable for all, deleting the template...')
+ pywikibot.output(u'The page is movable for all, deleting the '
+ u'template...')
# Deleting the template because the page doesn't need it.
- if TU is not None:
+ if TU:
replaceToPerform = u'|'.join(TSMP + TTMP + TU)
else:
replaceToPerform = u'|'.join(TSMP + TTMP)
- text, changes = re.subn('<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
+ text, changes = re.subn('<noinclude>(%s)</noinclude>'
+ % replaceToPerform, '', text)
if changes == 0:
text, changes = re.subn('(%s)' % replaceToPerform, '', text)
elif moveRestr[0] == 'sysop':
# move-total-protection
- if (TemplateInThePage[0] == 'sysop-move' and TTMP is not None) or (TemplateInThePage[0] == 'unique' and TU is not None):
- pywikibot.output(u'The page is protected from moving to the sysop, skipping...')
- if TU is not None:
- text = oldtext # no changes needed, better to revert the old text.
+ if (TemplateInThePage[0] == 'sysop-move' and TTMP) or \
+ (TemplateInThePage[0] == 'unique' and TU):
+ pywikibot.output(u'The page is protected from moving to '
+ u'the sysop, skipping...')
+ if TU:
+ # no changes needed, better to revert the old text.
+ text = oldtext
else:
- pywikibot.output(u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...')
- if TU is not None:
- text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
+ pywikibot.output(u'The page is protected from moving to '
+ u'the sysop, but the template seems not '
+ u'correct. Fixing...')
+ if TU:
+ text, changes = re.subn(TemplateInThePage[1], TNR[4],
+ text)
else:
- text, changes = re.subn(TemplateInThePage[1], TNR[3], text)
+ text, changes = re.subn(TemplateInThePage[1], TNR[3],
+ text)
- elif TSMP is not None or TU is not None:
- # implicitely moveRestr[0] = 'autoconfirmed', move-semi-protection
- if TemplateInThePage[0] == 'autoconfirmed-move' or TemplateInThePage[0] == 'unique':
- pywikibot.output(u'The page is movable only for the autoconfirmed users, skipping...')
- if TU is not None:
- text = oldtext # no changes needed, better to revert the old text.
+ elif TSMP or TU:
+ # implicitely moveRestr[0] = 'autoconfirmed',
+ # move-semi-protection
+ if TemplateInThePage[0] == 'autoconfirmed-move' or \
+ TemplateInThePage[0] == 'unique':
+ pywikibot.output(u'The page is movable only for the '
+ u'autoconfirmed users, skipping...')
+ if TU:
+ # no changes needed, better to revert the old text.
+ text = oldtext
else:
- pywikibot.output(u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...')
- if TU is not None:
- text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
+ pywikibot.output(u'The page is movable only for the '
+ u'autoconfirmed users, but the template '
+ u'seems not correct. Fixing...')
+ if TU:
+ text, changes = re.subn(TemplateInThePage[1], TNR[4],
+ text)
else:
- text, changes = re.subn(TemplateInThePage[1], TNR[2], text)
+ text, changes = re.subn(TemplateInThePage[1], TNR[2],
+ text)
if changes == 0:
- # We tried to fix move-protection templates, but it did not work.
- pywikibot.output('Warning : No move-protection template could be found')
+ # We tried to fix move-protection templates, but it did not work
+ pywikibot.warning('No move-protection template could be found')
if oldtext != text:
# Ok, asking if the change has to be performed and do it if yes.
- pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
+ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+ % page.title())
pywikibot.showDiff(oldtext, text)
if not always:
- choice = pywikibot.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
+ choice = pywikibot.inputChoice(u'Do you want to accept these '
+ u'changes?',
+ ['Yes', 'No', 'All'],
+ ['y', 'N', 'a'], 'N')
if choice == 'a':
always = True
if always or choice == 'y':
@@ -421,8 +452,8 @@
pywikibot.output(u'Edit conflict! skip!')
break
except pywikibot.ServerError:
- # Sometimes there is this error that's quite annoying because
- # can block the whole process for nothing.
+ # Sometimes there is this error that's quite annoying
+ # because can block the whole process for nothing.
errorCount += 1
if errorCount < 5:
pywikibot.output(u'Server Error! Wait..')
@@ -432,19 +463,24 @@
# Prevent Infinite Loops
raise pywikibot.ServerError(u'Fifth Server Error!')
except pywikibot.SpamfilterError, e:
- pywikibot.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
+ pywikibot.output(u'Cannot change %s because of '
+ u'blacklist entry %s'
+ % (page.title(), e.url))
break
except pywikibot.PageNotSaved, error:
- pywikibot.output(u'Error putting page: %s' % (error.args,))
+ pywikibot.output(u'Error putting page: %s'
+ % (error.args,))
break
except pywikibot.LockedPage:
- pywikibot.output(u'The page is still protected. Skipping...')
+ pywikibot.output(u'The page is still protected. '
+ u'Skipping...')
break
else:
# Break only if the errors are one after the other
errorCount = 0
break
+
if __name__ == "__main__":
try:
main()
--
To view, visit https://gerrit.wikimedia.org/r/79582
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I9ee37be3c9579030f3a71fca6b503211650032b9
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot
Merlijn van Deen has submitted this change and it was merged.
Change subject: Clean up MySQLPageGenerator
......................................................................
Clean up MySQLPageGenerator
Since we get the database name directly from the site,
it probably doesn't match up with the replicated copy
that is being used (Toolserver or WMF labs).
Also try using oursql if available, it generally has
better unicode support.
Change-Id: Ied3e7b3441ce0b6af500ae9ac6c75c1ba92e13f1
---
M pywikibot/pagegenerators.py
1 file changed, 31 insertions(+), 5 deletions(-)
Approvals:
Xqt: Looks good to me, but someone else must approve
Merlijn van Deen: Looks good to me, approved
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index f25f827..8318048 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -1056,13 +1056,38 @@
if page.site == self.site:
yield page
-def MySQLPageGenerator(query, site = None):
- import MySQLdb as mysqldb
+
+def MySQLPageGenerator(query, site=None):
+ """
+ Requires oursql <http://pythonhosted.org/oursql/> or
+ MySQLdb <https://sourceforge.net/projects/mysql-python/>
+ Yields a list of pages based on a MySQL query. Each query
+ should provide the page namespace and page title. An example
+ query that yields all ns0 pages might look like:
+ SELECT
+ page_namespace,
+ page_title,
+ FROM page
+ WHERE page_namespace = 0;
+ @param query: MySQL query to execute
+ @param site: Site object or raw database name
+ @type site: pywikibot.Site|str
+ @return: iterator of pywikibot.Page
+ """
+ try:
+ import oursql as mysqldb
+ except ImportError:
+ import MySQLdb as mysqldb
if site is None:
site = pywikibot.Site()
- conn = mysqldb.connect(config.db_hostname, db = site.dbName(),
- user = config.db_username,
- passwd = config.db_password)
+ if isinstance(site, pywikibot.site.Site):
+ # We want to let people to set a custom dbname
+ # since the master dbname might not be exactly
+ # equal to the name on the replicated site
+ site = site.dbName()
+ conn = mysqldb.connect(config.db_hostname, db=site,
+ user=config.db_username,
+ passwd=config.db_password)
cursor = conn.cursor()
pywikibot.output(u'Executing query:\n%s' % query)
query = query.encode(site.encoding())
@@ -1085,6 +1110,7 @@
page = pywikibot.Page(site, pageTitle)
yield page
+
def YearPageGenerator(start = 1, end = 2050, site = None):
if site is None:
site = pywikibot.Site()
--
To view, visit https://gerrit.wikimedia.org/r/79560
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ied3e7b3441ce0b6af500ae9ac6c75c1ba92e13f1
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: Make sure all variables passed to request are str
......................................................................
Make sure all variables passed to request are str
All parameters passed to request should be bytestrings - str. If any
of them is accidentally unicode, this will try to convert all bytestrings
to unicode via str.decode(sys.defaultencoding). This is problematic,
because non-ascii data will then throw an UnicodeDecodeError as shown in [1].
[1] http://lists.wikimedia.org/pipermail/pywikipedia-l/2013-August/008218.html
Change-Id: I1b66a31c8752dee1f950d425d532479f1b671926
---
M wikipedia.py
1 file changed, 4 insertions(+), 4 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/wikipedia.py b/wikipedia.py
index 976a310..4e3a543 100644
--- a/wikipedia.py
+++ b/wikipedia.py
@@ -6488,12 +6488,12 @@
address = address[:-1]
headers = {
- 'User-agent': useragent,
+ 'User-agent': str(useragent),
'Content-Length': str(len(data)),
- 'Content-type':contentType,
+ 'Content-type': str(contentType),
}
if cookies:
- headers['Cookie'] = cookies
+ headers['Cookie'] = str(cookies)
if compress:
headers['Accept-encoding'] = 'gzip'
@@ -6507,7 +6507,7 @@
retry_attempt = 0
while True:
try:
- request = urllib2.Request(url, data, headers)
+ request = urllib2.Request(str(url), str(data), headers)
f = MyURLopener.open(request)
# read & info can raise socket.error
--
To view, visit https://gerrit.wikimedia.org/r/80228
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I1b66a31c8752dee1f950d425d532479f1b671926
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: hostname and paths should be str in generated family files
......................................................................
hostname and paths should be str in generated family files
All parameters passed to request should be bytestrings - str. If any
of them is accidentally unicode, this will try to convert all bytestrings
to unicode via str.decode(sys.defaultencoding). This is problematic,
because non-ascii data will then throw an UnicodeDecodeError as shown in [1].
The url will be converted to str on the postData level, but it is also good
to prevent it from being unicode in the first place.
See also: I1b66a31c8752dee1f950d425d532479f1b671926
[1] http://lists.wikimedia.org/pipermail/pywikipedia-l/2013-August/008218.html
Change-Id: I5eb13d8853b6ad08b48f40b5ad881616a2f4fd2a
---
M generate_family_file.py
1 file changed, 2 insertions(+), 2 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/generate_family_file.py b/generate_family_file.py
index 06464c9..aef4d11 100644
--- a/generate_family_file.py
+++ b/generate_family_file.py
@@ -171,7 +171,7 @@
""".lstrip() % {'url': self.base_url, 'name': self.name})
for w in self.wikis.itervalues():
- f.write(" '%(lang)s': u'%(hostname)s',\n" % {'lang': w.lang, 'hostname': urlparse(w.server).netloc})
+ f.write(" '%(lang)s': '%(hostname)s',\n" % {'lang': w.lang, 'hostname': urlparse(w.server).netloc})
f.write(" }\n\n")
@@ -182,7 +182,7 @@
f.write(" return {\n")
for w in self.wikis.itervalues():
- f.write(" '%(lang)s': u'%(path)s',\n" % {'lang': w.lang, 'path': w.scriptpath})
+ f.write(" '%(lang)s': '%(path)s',\n" % {'lang': w.lang, 'path': w.scriptpath})
f.write(" }[code]\n")
f.write("\n")
--
To view, visit https://gerrit.wikimedia.org/r/80229
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5eb13d8853b6ad08b48f40b5ad881616a2f4fd2a
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: Start with a fresh list in html2unicode every time
......................................................................
Start with a fresh list in html2unicode every time
def x(..., something=[]):
something.extend([1,2,3])
means something becomes [1,2,3] on the first call, but
[1,2,3,1,2,3] on the *second* call. This meant html2unicode
got a longer list of replacements every time it is called.
This commit changes it to the standard
def x(..., something=None):
if something is None:
something = []
which means it's always an empty list, instead of what's left from
the last call.
Change-Id: Ie490b575a8a0cc4b5d45bbb97c0606e0fd64d4f9
---
M wikipedia.py
1 file changed, 5 insertions(+), 2 deletions(-)
Approvals:
Ladsgroup: Looks good to me, approved
Malafaya: Checked; Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/wikipedia.py b/wikipedia.py
index 976a310..f304932 100644
--- a/wikipedia.py
+++ b/wikipedia.py
@@ -5657,13 +5657,16 @@
# Utility functions for parsing page titles
-def html2unicode(text, ignore = []):
+def html2unicode(text, ignore = None):
"""Return text, replacing HTML entities by equivalent unicode characters."""
+
+ if ignore is None:
+ ignore = []
# This regular expression will match any decimal and hexadecimal entity and
# also entities that might be named entities.
entityR = re.compile(
r'&(?:amp;)?(#(?P<decimal>\d+)|#x(?P<hex>[0-9a-fA-F]+)|(?P<name>[A-Za-z]+));')
-
+
ignore.extend((38, # Ampersand (&)
39, # Bugzilla 24093
60, # Less than (<)
--
To view, visit https://gerrit.wikimedia.org/r/79811
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie490b575a8a0cc4b5d45bbb97c0606e0fd64d4f9
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: DrTrigon <dr.trigon(a)surfeu.ch>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Malafaya <malafaya(a)clix.pt>
Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Xqt has submitted this change and it was merged.
Change subject: Make generate_user_files.py work for other directories
......................................................................
Make generate_user_files.py work for other directories
Use os.path.abspath(__file__) instead of os.path.abspath(sys.argv[0])
Change-Id: I3172c29352f9e7bddc8de173e6057fd9172e167a
---
M generate_user_files.py
1 file changed, 1 insertion(+), 4 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/generate_user_files.py b/generate_user_files.py
index b8a0064..8f4f7dd 100644
--- a/generate_user_files.py
+++ b/generate_user_files.py
@@ -176,11 +176,8 @@
if choice in "SE":
break
- #
- # I don't like this solution. Temporary for me.
- #
# determine what directory this script (generate_user_files.py) lives in
- install = os.path.dirname(os.path.abspath(sys.argv[0]))
+ install = os.path.dirname(os.path.abspath(__file__))
# config2.py will be in the pywikibot/ directory
f = codecs.open(os.path.join(install, "pywikibot", "config2.py"),
"r", "utf-8")
--
To view, visit https://gerrit.wikimedia.org/r/79577
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I3172c29352f9e7bddc8de173e6057fd9172e167a
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Xqt has submitted this change and it was merged.
Change subject: Change title whitelist to title blacklist
......................................................................
Change title whitelist to title blacklist
Titles with characters outside the BMP [1] (>\uFFFF) are now no longer
detected as illegal. See this thread: [2]
[1] https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
[2] http://thread.gmane.org/gmane.comp.python.pywikipediabot.general/13197/
This list of characters was generated by using the old re and by
enumerating characters:
import re
m = re.compile(u'''[^ %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\u0080-\uFFFF+]''')
for x in range(0,0x80):
if m.match(unichr(x)):
print "%x" % x,
0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 13 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 23 3c 3e 5b 5d 7b 7c 7d 7f
Change-Id: I02c26be9ad814ce11d9adf2f997d3d1e05764fd1
---
M pywikibot/page.py
1 file changed, 2 insertions(+), 2 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index e51977c..58debb7 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -2853,8 +2853,8 @@
"""
illegal_titles_pattern = re.compile(
- # Matching titles will be held as illegal.
- u'''[^ %!\"$&'()*,\\-.\\/0-9:;=?@A-Z\\\\^_`a-z~\u0080-\uFFFF+]'''
+ # Matching titles will be held as illegal.
+ ur'''[\x00-\x1f\x23\x3c\x3e\x5b\x5d\x7b\x7c\x7d\x7f]'''
# URL percent encoding sequences interfere with the ability
# to round-trip titles -- you can't link to them consistently.
u'|%[0-9A-Fa-f]{2}'
--
To view, visit https://gerrit.wikimedia.org/r/78525
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I02c26be9ad814ce11d9adf2f997d3d1e05764fd1
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot