jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/446244 )
Change subject: [cleanup] Code cleanup
......................................................................
[cleanup] Code cleanup
- remove preleading "u" from strings
- use single quotes
- use str.format() instead of modula operator when the line is changed
- use "\n" instead of "\r\n"
detached from https://gerrit.wikimedia.org/r/#/c/pywikibot/core/+/444437/
Change-Id: Ibd4df68d45303f8be9b498823b7a17d50460fcec
---
M scripts/imageharvest.py
1 file changed, 27 insertions(+), 28 deletions(-)
Approvals:
Dalba: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/imageharvest.py b/scripts/imageharvest.py
index 104f898..6e2bd5f 100644
--- a/scripts/imageharvest.py
+++ b/scripts/imageharvest.py
@@ -17,7 +17,7 @@
-justshown Choose _only_ images shown on the page, not those linked
"""
#
-# (C) Pywikibot team, 2004-2017
+# (C) Pywikibot team, 2004-2018
#
# Distributed under the terms of the MIT license.
#
@@ -58,14 +58,14 @@
soup = BeautifulSoup(f.read())
if not shown:
- tagname = "a"
- elif shown == "just":
- tagname = "img"
+ tagname = 'a'
+ elif shown == 'just':
+ tagname = 'img'
else:
- tagname = ["a", "img"]
+ tagname = ['a', 'img']
for tag in soup.findAll(tagname):
- link = tag.get("src", tag.get("href", None))
+ link = tag.get('src', tag.get('href', None))
if link:
ext = os.path.splitext(link)[1].lower().strip('.')
if ext in fileformats:
@@ -79,27 +79,27 @@
image_url = ''
if url == '':
if image_url:
- url = pywikibot.input(u"What URL range should I check "
- u"(use $ for the part that is changeable)")
+ url = pywikibot.input('What URL range should I check '
+ '(use $ for the part that is changeable)')
else:
- url = pywikibot.input(u"From what URL should I get the images?")
+ url = pywikibot.input('From what URL should I get the images?')
if image_url:
minimum = 1
maximum = 99
answer = pywikibot.input(
- u"What is the first number to check (default: 1)")
+ 'What is the first number to check (default: 1)')
if answer:
minimum = int(answer)
answer = pywikibot.input(
- u"What is the last number to check (default: 99)")
+ 'What is the last number to check (default: 99)')
if answer:
maximum = int(answer)
if not desc:
basicdesc = pywikibot.input(
- u"What text should be added at the end of "
- u"the description of each image from this url?")
+ 'What text should be added at the end of '
+ 'the description of each image from this url?')
else:
basicdesc = desc
@@ -107,7 +107,7 @@
ilinks = []
i = minimum
while i <= maximum:
- ilinks += [url.replace("$", str(i))]
+ ilinks += [url.replace('$', str(i))]
i += 1
else:
ilinks = get_imagelinks(url)
@@ -115,20 +115,19 @@
for image in ilinks:
if pywikibot.input_yn('Include image %s?' % image, default=False,
automatic_quit=False):
- desc = pywikibot.input(u"Give the description of this image:")
+ desc = pywikibot.input('Give the description of this image:')
categories = []
while True:
- cat = pywikibot.input(u"Specify a category (or press enter to "
- u"end adding categories)")
+ cat = pywikibot.input('Specify a category (or press enter to '
+ 'end adding categories)')
if not cat.strip():
break
- if ":" in cat:
- categories.append(u"[[%s]]" % cat)
+ if ':' in cat:
+ categories.append('[[{}]]'.format(cat))
else:
- categories.append(u"[[%s:%s]]"
+ categories.append('[[%s:%s]]'
% (mysite.namespace(14), cat))
- desc += "\r\n\r\n" + basicdesc + "\r\n\r\n" + \
- "\r\n".join(categories)
+ desc += '\n\n' + basicdesc + '\n\n' + '\n'.join(categories)
UploadRobot(image, description=desc).run()
elif answer == 's':
break
@@ -138,19 +137,19 @@
"""Process command line arguments and invoke bot."""
global shown
global mysite
- url = u''
+ url = ''
image_url = False
shown = False
desc = []
for arg in pywikibot.handle_args():
- if arg == "-pattern":
+ if arg == '-pattern':
image_url = True
- elif arg == "-shown":
+ elif arg == '-shown':
shown = True
- elif arg == "-justshown":
- shown = "just"
- elif url == u'':
+ elif arg == '-justshown':
+ shown = 'just'
+ elif url == '':
url = arg
else:
desc += [arg]
--
To view, visit https://gerrit.wikimedia.org/r/446244
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibd4df68d45303f8be9b498823b7a17d50460fcec
Gerrit-Change-Number: 446244
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Dalba <dalba.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Zoranzoki21 <zorandori4444(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/446227 )
Change subject: [bugfix] Recognize more than one escape sequences
......................................................................
[bugfix] Recognize more than one escape sequences
Change-Id: I0dfe6eac74bf66ba3827138c003ac38959b4b88d
---
M scripts/maintenance/diff_checker.py
1 file changed, 1 insertion(+), 2 deletions(-)
Approvals:
Dalba: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/maintenance/diff_checker.py b/scripts/maintenance/diff_checker.py
index 081236e..59d12b8 100644
--- a/scripts/maintenance/diff_checker.py
+++ b/scripts/maintenance/diff_checker.py
@@ -78,8 +78,7 @@
elif quote == "'":
if (
'r' not in match.group('prefix')
- and r'\'' in string
- and not string.endswith(r'\'')
+ and string.count(r'\'') - int(string.endswith(r'\''))
and '"' not in string
):
print_error(
--
To view, visit https://gerrit.wikimedia.org/r/446227
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I0dfe6eac74bf66ba3827138c003ac38959b4b88d
Gerrit-Change-Number: 446227
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Dalba <dalba.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Zoranzoki21 <zorandori4444(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/445885 )
Change subject: Don't let WikidataBot crash on save related errors
......................................................................
Don't let WikidataBot crash on save related errors
Bug: T199642
Change-Id: I0c7a7f4fc91fff5ddb711f8fe378405938d7ca4e
---
M pywikibot/bot.py
1 file changed, 17 insertions(+), 8 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index dbb2193..1070245 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -1939,7 +1939,9 @@
u'property ID (e.g. P123) of it:'
% property_name).upper()
- def user_edit_entity(self, item, data=None, **kwargs):
+ def user_edit_entity(self, item, data=None,
+ ignore_save_related_errors=None,
+ ignore_server_errors=None, **kwargs):
"""
Edit entity with data provided, with user confirmation as required.
@@ -1947,20 +1949,24 @@
@type item: ItemPage
@param data: data to be saved, or None if the diff should be created
automatically
+ @param ignore_save_related_errors: Ignore save related errors and
+ automatically print a message. If None uses this instances default.
+ @type ignore_save_related_errors: bool or None
+ @param ignore_server_errors: Ignore server errors and automatically
+ print a message. If None uses this instances default.
+ @type ignore_server_errors: bool or None
@kwarg summary: revision comment, passed to ItemPage.editEntity
@type summary: str
@kwarg show_diff: show changes between oldtext and newtext (default:
True)
@type show_diff: bool
- @kwarg ignore_server_errors: if True, server errors will be reported
- and ignored (default: False)
- @type ignore_server_errors: bool
- @kwarg ignore_save_related_errors: if True, errors related to
- page save will be reported and ignored (default: False)
- @type ignore_save_related_errors: bool
@return: whether the item was saved successfully
@rtype: bool
"""
+ if ignore_save_related_errors is None:
+ ignore_save_related_errors = self.ignore_save_related_errors
+ if ignore_server_errors is None:
+ ignore_server_errors = self.ignore_server_errors
show_diff = kwargs.pop('show_diff', True)
if show_diff:
if data is None:
@@ -1975,7 +1981,10 @@
# TODO PageSaveRelatedErrors should be actually raised in editEntity
# (bug T86083)
- return self._save_page(item, item.editEntity, data, **kwargs)
+ return self._save_page(
+ item, item.editEntity, data,
+ ignore_save_related_errors=ignore_save_related_errors,
+ ignore_server_errors=ignore_server_errors, **kwargs)
def _add_source_callback(self, claim, source, **kwargs):
"""
--
To view, visit https://gerrit.wikimedia.org/r/445885
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I0c7a7f4fc91fff5ddb711f8fe378405938d7ca4e
Gerrit-Change-Number: 445885
Gerrit-PatchSet: 4
Gerrit-Owner: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: Zoranzoki21 <zorandori4444(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/446224 )
Change subject: Fix some of the N806 naming errors in various scripts
......................................................................
Fix some of the N806 naming errors in various scripts
Also remove D102 from global ignore list of tox.ini and fix its only
occurrence in download_dump.py.
Change-Id: I4c6af21343c2c1568d952746d32433c4c961b9de
---
M pywikibot/config2.py
M scripts/capitalize_redirects.py
M scripts/maintenance/download_dump.py
M scripts/reflinks.py
M scripts/table2wiki.py
M tox.ini
6 files changed, 171 insertions(+), 171 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/config2.py b/pywikibot/config2.py
index bd66798..7f16263 100644
--- a/pywikibot/config2.py
+++ b/pywikibot/config2.py
@@ -309,10 +309,6 @@
if test_directory is not None:
test_directory = os.path.abspath(test_directory)
- DIRNAME_WIN = u"Pywikibot"
- DIRNAME_WIN_FBCK = u"pywikibot"
- DIRNAME_UNIX = u".pywikibot"
-
base_dir = ""
for arg in sys.argv[1:]:
if arg.startswith(str('-dir:')):
@@ -340,10 +336,10 @@
else:
raise WindowsError(u'Windows version %s not supported yet.'
% win_version)
- base_dir_cand.extend([[home] + sub_dir + [DIRNAME_WIN],
- [home] + sub_dir + [DIRNAME_WIN_FBCK]])
+ base_dir_cand.extend([[home] + sub_dir + ['Pywikibot'],
+ [home] + sub_dir + ['pywikibot']])
else:
- base_dir_cand.append([home, DIRNAME_UNIX])
+ base_dir_cand.append([home, '.pywikibot'])
for dir in base_dir_cand:
dir = os.path.join(*dir)
diff --git a/scripts/capitalize_redirects.py b/scripts/capitalize_redirects.py
index db3e653..dcc8442 100755
--- a/scripts/capitalize_redirects.py
+++ b/scripts/capitalize_redirects.py
@@ -98,7 +98,7 @@
options = {}
local_args = pywikibot.handle_args(args)
- genFactory = pagegenerators.GeneratorFactory()
+ gen_factory = pagegenerators.GeneratorFactory()
for arg in local_args:
if arg == '-always':
@@ -106,9 +106,9 @@
elif arg == '-titlecase':
options['titlecase'] = True
else:
- genFactory.handleArg(arg)
+ gen_factory.handleArg(arg)
- gen = genFactory.getCombinedGenerator(preload=True)
+ gen = gen_factory.getCombinedGenerator(preload=True)
if gen:
bot = CapitalizeBot(gen, **options)
bot.run()
diff --git a/scripts/maintenance/download_dump.py b/scripts/maintenance/download_dump.py
index d574588..9844889 100644
--- a/scripts/maintenance/download_dump.py
+++ b/scripts/maintenance/download_dump.py
@@ -35,6 +35,7 @@
import os
def replace(src, dst):
+ """Rename a file or directory, overwriting the destination."""
try:
os.rename(src, dst)
except OSError:
diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index 632bafe..0821d1c 100755
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -301,10 +301,10 @@
# keys are ref content
# values are [name, [list of full ref matches],
# quoted, need_to_change]
- foundRefs = {}
- foundRefNames = {}
+ found_refs = {}
+ found_ref_names = {}
# Replace key by [value, quoted]
- namedRepl = {}
+ named_repl = {}
for match in self.REFS.finditer(text):
content = match.group('content')
@@ -313,10 +313,10 @@
params = match.group('params')
group = self.GROUPS.match(params)
- if group not in foundRefs:
- foundRefs[group] = {}
+ if group not in found_refs:
+ found_refs[group] = {}
- groupdict = foundRefs[group]
+ groupdict = found_refs[group]
if content in groupdict:
v = groupdict[content]
v[1].append(match.group())
@@ -328,13 +328,13 @@
name = name.group('name')
if v[0]:
if v[0] != name:
- namedRepl[name] = [v[0], v[2]]
+ named_repl[name] = [v[0], v[2]]
else:
# First name associated with this content
if name == 'population':
pywikibot.output(content)
- if name not in foundRefNames:
+ if name not in found_ref_names:
# first time ever we meet this name
if name == 'population':
pywikibot.output("in")
@@ -344,13 +344,13 @@
# if has_key, means that this name is used
# with another content. We'll need to change it
v[3] = True
- foundRefNames[name] = 1
+ found_ref_names[name] = 1
groupdict[content] = v
id = 1
- while self.autogen + str(id) in foundRefNames:
+ while self.autogen + str(id) in found_ref_names:
id += 1
- for (g, d) in foundRefs.items():
+ for (g, d) in found_refs.items():
if g:
group = u"group=\"%s\" " % group
else:
@@ -379,7 +379,7 @@
end = end.replace(ref, unnamed)
text = header + end
- for (k, v) in namedRepl.items():
+ for (k, v) in named_repl.items():
# TODO : Support ref groups
name = v[0]
if v[1]:
@@ -498,7 +498,7 @@
def run(self):
"""Run the Bot."""
try:
- deadLinks = codecs.open(listof404pages, 'r', 'latin_1').read()
+ dead_links = codecs.open(listof404pages, 'r', 'latin_1').read()
except IOError:
raise NotImplementedError(
'404-links.txt is required for reflinks.py\n'
@@ -543,8 +543,8 @@
ref.url, use_fake_user_agent=self._use_fake_user_agent)
# Try to get Content-Type from server
- contentType = f.response_headers.get('content-type')
- if contentType and not self.MIME.search(contentType):
+ content_type = f.response_headers.get('content-type')
+ if content_type and not self.MIME.search(content_type):
if ref.link.lower().endswith('.pdf') and \
not self.getOption('ignorepdf'):
# If file has a PDF suffix
@@ -595,7 +595,7 @@
# purposely removed
if f.status == 410 or \
(f.status == 404 and ('\t{}\t'.format(ref.url)
- in deadLinks)):
+ in dead_links)):
repl = ref.refDead()
new_text = new_text.replace(match.group(), repl)
continue
@@ -625,14 +625,14 @@
meta_content = self.META_CONTENT.search(linkedpagetext)
enc = []
s = None
- if contentType:
+ if content_type:
# use charset from http header
- s = self.CHARSET.search(contentType)
+ s = self.CHARSET.search(content_type)
if meta_content:
tag = meta_content.group()
# Prefer the contentType from the HTTP header :
- if not contentType:
- contentType = tag
+ if not content_type:
+ content_type = tag
if not s:
# use charset from html
s = self.CHARSET.search(str(tag))
@@ -651,10 +651,10 @@
enc.append(tmp)
else:
pywikibot.output(u'No charset found for %s' % ref.link)
- if not contentType:
+ if not content_type:
pywikibot.output('No content-type found for %s' % ref.link)
continue
- elif not self.MIME.search(contentType):
+ elif not self.MIME.search(content_type):
pywikibot.output(color_format(
'{lightyellow}WARNING{default} : media : {0} ',
ref.link))
@@ -769,14 +769,14 @@
@param args: command line arguments
@type args: list of unicode
"""
- xmlFilename = None
- xmlStart = None
+ xml_filename = None
+ xml_start = None
options = {}
generator = None
# Process global args and prepare generator args parser
local_args = pywikibot.handle_args(args)
- genFactory = pagegenerators.GeneratorFactory()
+ gen_factory = pagegenerators.GeneratorFactory()
for arg in local_args:
if arg.startswith('-summary:'):
@@ -789,28 +789,28 @@
options['limit'] = int(arg[7:])
elif arg.startswith('-xmlstart'):
if len(arg) == 9:
- xmlStart = pywikibot.input(
+ xml_start = pywikibot.input(
u'Please enter the dumped article to start with:')
else:
- xmlStart = arg[10:]
+ xml_start = arg[10:]
elif arg.startswith('-xml'):
if len(arg) == 4:
- xmlFilename = pywikibot.input(
+ xml_filename = pywikibot.input(
u'Please enter the XML dump\'s filename:')
else:
- xmlFilename = arg[5:]
+ xml_filename = arg[5:]
else:
- genFactory.handleArg(arg)
+ gen_factory.handleArg(arg)
- if xmlFilename:
- generator = XmlDumpPageGenerator(xmlFilename, xmlStart,
- genFactory.namespaces)
+ if xml_filename:
+ generator = XmlDumpPageGenerator(xml_filename, xml_start,
+ gen_factory.namespaces)
if not generator:
- generator = genFactory.getCombinedGenerator()
+ generator = gen_factory.getCombinedGenerator()
if not generator:
pywikibot.bot.suggest_help(missing_generator=True)
return False
- if not genFactory.nopreload:
+ if not gen_factory.nopreload:
generator = pagegenerators.PreloadingGenerator(generator)
generator = pagegenerators.RedirectFilterPageGenerator(generator)
bot = ReferencesRobot(generator, **options)
diff --git a/scripts/table2wiki.py b/scripts/table2wiki.py
index 9c6e576..06ff7fa 100644
--- a/scripts/table2wiki.py
+++ b/scripts/table2wiki.py
@@ -76,9 +76,8 @@
self.xmldump = xmlreader.XmlDump(xmlfilename)
def __iter__(self):
- tableTagR = re.compile('<table', re.IGNORECASE)
for entry in self.xmldump.parse():
- if tableTagR.search(entry.text):
+ if _table_start_regex.search(entry.text):
yield pywikibot.Page(pywikibot.Site(), entry.title)
@@ -117,13 +116,13 @@
# possible errors, before the user is asked if he wants to accept the
# changes.
warning_messages = []
- newTable = table
+ new_table = table
##################
# bring every <tag> into one single line.
num = 1
while num != 0:
- newTable, num = re.subn(r'([^\r\n]{1})(<[tT]{1}[dDhHrR]{1})',
- r'\1\r\n\2', newTable)
+ new_table, num = re.subn(
+ r'([^\r\n]{1})(<[tT]{1}[dDhHrR]{1})', r'\1\r\n\2', new_table)
##################
# every open-tag gets a new line.
@@ -131,58 +130,62 @@
##################
# Note that we added the ## characters in markActiveTables().
# <table> tag with attributes, with more text on the same line
- newTable = re.sub(
+ new_table = re.sub(
r'(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>'
r'(?P<more>[\w\W]*?)[\r\n ]*',
- r'\r\n{| \g<attr>\r\n\g<more>', newTable)
+ r'\r\n{| \g<attr>\r\n\g<more>', new_table)
# <table> tag without attributes, with more text on the same line
- newTable = re.sub(r'(?i)[\r\n]*?<##table##>(?P<more>[\w\W]*?)[\r\n ]*',
- r'\r\n{|\n\g<more>\r\n', newTable)
+ new_table = re.sub(
+ r'(?i)[\r\n]*?<##table##>(?P<more>[\w\W]*?)[\r\n ]*',
+ r'\r\n{|\n\g<more>\r\n', new_table)
# <table> tag with attributes, without more text on the same line
- newTable = re.sub(
+ new_table = re.sub(
r'(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>[\r\n ]*',
- r'\r\n{| \g<attr>\r\n', newTable)
+ r'\r\n{| \g<attr>\r\n', new_table)
# <table> tag without attributes, without more text on the same line
- newTable = re.sub(r'(?i)[\r\n]*?<##table##>[\r\n ]*',
- '\r\n{|\r\n', newTable)
+ new_table = re.sub(
+ r'(?i)[\r\n]*?<##table##>[\r\n ]*', '\r\n{|\r\n', new_table)
# end </table>
- newTable = re.sub(r'(?i)[\s]*<\/##table##>',
- '\r\n|}', newTable)
+ new_table = re.sub(
+ r'(?i)[\s]*<\/##table##>', '\r\n|}', new_table)
##################
# caption with attributes
- newTable = re.sub(
+ new_table = re.sub(
r'(?i)<caption (?P<attr>[\w\W]*?)>'
r'(?P<caption>[\w\W]*?)<\/caption>',
- r'\r\n|+\g<attr> | \g<caption>', newTable)
+ r'\r\n|+\g<attr> | \g<caption>', new_table)
# caption without attributes
- newTable = re.sub(r'(?i)<caption>(?P<caption>[\w\W]*?)<\/caption>',
- r'\r\n|+ \g<caption>', newTable)
+ new_table = re.sub(
+ r'(?i)<caption>(?P<caption>[\w\W]*?)<\/caption>',
+ r'\r\n|+ \g<caption>', new_table)
##################
# <th> often people don't write them within <tr>, be warned!
# <th> with attributes
- newTable = re.sub(
+ new_table = re.sub(
r"(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)<\/th>",
- r"\r\n!\g<attr> | \g<header>\r\n", newTable)
+ r'\r\n!\g<attr> | \g<header>\r\n', new_table)
# <th> without attributes
- newTable = re.sub(r"(?i)[\r\n]+<th>(?P<header>[\w\W]*?)<\/th>",
- r'\r\n! \g<header>\r\n', newTable)
+ new_table = re.sub(
+ r'(?i)[\r\n]+<th>(?P<header>[\w\W]*?)</th>',
+ r'\r\n! \g<header>\r\n', new_table)
# fail save. sometimes people forget </th>
# <th> without attributes, without closing </th>
- newTable, n = re.subn(r'(?i)[\r\n]+<th>(?P<header>[\w\W]*?)[\r\n]+',
- r'\r\n! \g<header>\r\n', newTable)
+ new_table, n = re.subn(
+ r'(?i)[\r\n]+<th>(?P<header>[\w\W]*?)[\r\n]+',
+ r'\r\n! \g<header>\r\n', new_table)
if n > 0:
warning_messages.append(
u'WARNING: found <th> without </th>. (%d occurences)\n' % n)
warnings += n
# <th> with attributes, without closing </th>
- newTable, n = re.subn(
+ new_table, n = re.subn(
r'(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)[\r\n]+',
- r'\n!\g<attr> | \g<header>\r\n', newTable)
+ r'\n!\g<attr> | \g<header>\r\n', new_table)
if n > 0:
warning_messages.append(
'WARNING: found <th ...> without </th>. (%d occurences\n)' % n)
@@ -190,38 +193,42 @@
##################
# <tr> with attributes
- newTable = re.sub("(?i)[\r\n]*<tr(?P<attr> [^>]*?)>[\r\n]*",
- r"\r\n|-\g<attr>\r\n", newTable)
+ new_table = re.sub(
+ '(?i)[\r\n]*<tr(?P<attr> [^>]*?)>[\r\n]*',
+ r'\r\n|-\g<attr>\r\n', new_table)
# <tr> without attributes
- newTable = re.sub("(?i)[\r\n]*<tr>[\r\n]*",
- r"\r\n|-\r\n", newTable)
+ new_table = re.sub(
+ '(?i)[\r\n]*<tr>[\r\n]*',
+ r'\r\n|-\r\n', new_table)
##################
# normal <td> without arguments
- newTable = re.sub(r'(?i)[\r\n]+<td>(?P<cell>[\w\W]*?)<\/td>',
- r'\r\n| \g<cell>\r\n', newTable)
+ new_table = re.sub(
+ r'(?i)[\r\n]+<td>(?P<cell>[\w\W]*?)<\/td>',
+ r'\r\n| \g<cell>\r\n', new_table)
##################
# normal <td> with arguments
- newTable = re.sub(
+ new_table = re.sub(
r'(?i)[\r\n]+<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)<\/td>',
- r'\r\n|\g<attr> | \g<cell>', newTable)
+ r'\r\n|\g<attr> | \g<cell>', new_table)
# WARNING: this sub might eat cells of bad HTML, but most likely it
# will correct errors
# TODO: some more docu please
- newTable, n = re.subn("(?i)[\r\n]+<td>(?P<cell>[^\r\n]*?)<td>",
- r"\r\n| \g<cell>\r\n", newTable)
+ new_table, n = re.subn(
+ '(?i)[\r\n]+<td>(?P<cell>[^\r\n]*?)<td>',
+ r'\r\n| \g<cell>\r\n', new_table)
if n > 0:
warning_messages.append(
u'<td> used where </td> was expected. (%d occurences)\n' % n)
warnings += n
# what is this for?
- newTable, n = re.subn(
+ new_table, n = re.subn(
r'[\r\n]+<(td|TD)([^>]+?)>([^\r\n]*?)</(td|TD)>',
- r'\r\n|\2 | \3\r\n', newTable)
+ r'\r\n|\2 | \3\r\n', new_table)
if n > 0:
warning_messages.append(
u"WARNING: (sorry, bot code unreadable (1). I don't know why "
@@ -229,25 +236,26 @@
# fail save. sometimes people forget </td>
# <td> without arguments, with missing </td>
- newTable, n = re.subn(r'(?i)<td>(?P<cell>[^<]*?)[\r\n]+',
- r'\r\n| \g<cell>\r\n', newTable)
+ new_table, n = re.subn(
+ r'(?i)<td>(?P<cell>[^<]*?)[\r\n]+',
+ r'\r\n| \g<cell>\r\n', new_table)
if n > 0:
warning_messages.append(u"NOTE: Found <td> without </td>. This "
u"shouldn't cause problems.\n")
# <td> with attributes, with missing </td>
- newTable, n = re.subn(
+ new_table, n = re.subn(
r'(?i)[\r\n]*<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)[\r\n]+',
- r'\r\n|\g<attr> | \g<cell>\r\n', newTable)
+ r'\r\n|\g<attr> | \g<cell>\r\n', new_table)
if n > 0:
warning_messages.append(u"NOTE: Found <td> without </td>. This "
u"shouldn't cause problems.\n")
##################
# Garbage collecting ;-)
- newTable = re.sub(r'(?i)<td>[\r\n]*<\/tr>', '', newTable)
+ new_table = re.sub(r'(?i)<td>[\r\n]*</tr>', '', new_table)
# delete closing tags
- newTable = re.sub(r'(?i)[\r\n]*<\/t[rdh]>', '', newTable)
+ new_table = re.sub(r'(?i)[\r\n]*</t[rdh]>', '', new_table)
##################
# OK, that's only theory but works most times.
@@ -263,8 +271,9 @@
##################
# most <th> come with '''title'''. Senseless in my eyes cuz
# <th> should be bold anyways.
- newTable = re.sub(r"[\r\n]+\!([^'\n\r]*)'''([^'\r\n]*)'''",
- r'\r\n!\1\2', newTable)
+ new_table = re.sub(
+ r"[\r\n]+\!([^'\n\r]*)'''([^'\r\n]*)'''",
+ r'\r\n!\1\2', new_table)
##################
# kills indention within tables. Be warned, it might seldom bring
@@ -273,30 +282,29 @@
if config.deIndentTables:
num = 1
while num != 0:
- newTable, num = re.subn(
+ new_table, num = re.subn(
r'(\{\|[\w\W]*?)\n[ \t]+([\w\W]*?\|\})',
- r'\1\r\n\2', newTable)
+ r'\1\r\n\2', new_table)
##################
# kills additional spaces after | or ! or {|
# This line was creating problems, so I commented it out --Daniel
# newTable = re.sub("[\r\n]+\|[\t ]+?[\r\n]+", "\r\n| ", newTable)
# kills trailing spaces and tabs
- newTable = re.sub(r'\r\n(.*)[\t\ ]+[\r\n]+',
- r'\r\n\1\r\n', newTable)
+ new_table = re.sub(
+ r'\r\n(.*)[\t ]+[\r\n]+', r'\r\n\1\r\n', new_table)
# kill extra new-lines
- newTable = re.sub(r'[\r\n]{4,}(\!|\|)',
- r'\r\n\1', newTable)
+ new_table = re.sub(r'[\r\n]{4,}[!|]', r'\r\n\1', new_table)
##################
# shortening if <table> had no arguments/parameters
- newTable = re.sub(r'[\r\n]+\{\|[\ ]+\| ', r'\r\n{| ', newTable)
+ new_table = re.sub(r'[\r\n]+{\| +\| ', r'\r\n{| ', new_table)
# shortening if <td> had no articles
- newTable = re.sub(r'[\r\n]+\|[\ ]+\| ', '\r\n| ', newTable)
+ new_table = re.sub(r'[\r\n]+\| +\| ', '\r\n| ', new_table)
# shortening if <th> had no articles
- newTable = re.sub(r'\n\|\+[\ ]+\|', '\n|+ ', newTable)
+ new_table = re.sub(r'\n\|\+ +\|', '\n|+ ', new_table)
# shortening of <caption> had no articles
- newTable = re.sub(r'[\r\n]+\![\ ]+\| ', '\r\n! ', newTable)
+ new_table = re.sub(r'[\r\n]+! +\| ', '\r\n! ', new_table)
##################
# proper attributes. attribute values need to be in quotation marks.
@@ -310,9 +318,9 @@
# We recognize it by searching for a string of non-whitespace
# characters
# - [^\s]+? - which is not embraced by quotation marks - [^"]
- newTable, num = re.subn(
+ new_table, num = re.subn(
r'([\r\n]+(?:\|-|\{\|)[^\r\n\|]+) *= *([^"\s>]+)',
- r'\1="\2"', newTable, 1)
+ r'\1="\2"', new_table, 1)
num = 1
while num != 0:
@@ -321,44 +329,44 @@
# change cell contents which accidentially contain an equal sign.
# Group 1 and 2 are anologously to the previous regular expression,
# group 3 are the remaining attribute key - value pairs.
- newTable, num = re.subn(
+ new_table, num = re.subn(
r'([\r\n]+(?:!|\|)[^\r\n\|]+) *= *([^"\s>]+)([^\|\r\n]*)\|',
- r'\1="\2"\3|', newTable, 1)
+ r'\1="\2"\3|', new_table, 1)
##################
# merge two short <td>s
num = 1
while num != 0:
- newTable, num = re.subn(
+ new_table, num = re.subn(
r'[\r\n]+(\|[^\|\-\}]{1}[^\n\r]{0,35})'
r'[\r\n]+(\|[^\|\-\}]{1}[^\r\n]{0,35})[\r\n]+',
- r'\r\n\1 |\2\r\n', newTable)
+ r'\r\n\1 |\2\r\n', new_table)
####
# add a new line if first is * or #
- newTable = re.sub(r'[\r\n]+\| ([*#]{1})',
- r'\r\n|\r\n\1', newTable)
+ new_table = re.sub(r'[\r\n]+\| ([*#]{1})', r'\r\n|\r\n\1', new_table)
##################
# strip <center> from <th>
- newTable = re.sub(r'([\r\n]+\![^\r\n]+?)<center>([\w\W]+?)<\/center>',
- r'\1 \2', newTable)
+ new_table = re.sub(
+ r'([\r\n]+![^\r\n]+?)<center>([\w\W]+?)</center>',
+ r'\1 \2', new_table)
# strip align="center" from <th> because the .css does it
# if there are no other attributes than align, we don't need
# that | either
- newTable = re.sub(r'([\r\n]+\! +)align\=\"center\" +\|',
- r'\1', newTable)
+ new_table = re.sub(
+ r'([\r\n]+! +)align=\"center\" +\|', r'\1', new_table)
# if there are other attributes, simply strip the align="center"
- newTable = re.sub(
- r'([\r\n]+\![^\r\n\|]+?)align\=\"center\"([^\n\r\|]+?\|)',
- r'\1 \2', newTable)
+ new_table = re.sub(
+ r'([\r\n]+![^\r\n|]+?)align=\"center\"([^\n\r|]+?\|)',
+ r'\1 \2', new_table)
##################
# kill additional spaces within arguments
num = 1
while num != 0:
- newTable, num = re.subn(
+ new_table, num = re.subn(
r'[\r\n]+(\||\!)([^|\r\n]*?)[ \t]{2,}([^\r\n]+?)',
- r'\r\n\1\2 \3', newTable)
+ r'\r\n\1\2 \3', new_table)
##################
# I hate those long lines because they make a wall of letters
@@ -368,11 +376,11 @@
while num != 0:
# TODO: how does this work? docu please.
# why are only äöüß used, but not other special characters?
- newTable, num = re.subn(
+ new_table, num = re.subn(
r'(\r\n[A-Z]{1}[^\n\r]{200,}?[a-zäöüß]\.)'
r'\ ([A-ZÄÖÜ]{1}[^\n\r]{200,})',
- r'\1\r\n\2', newTable)
- return newTable, warnings, warning_messages
+ r'\1\r\n\2', new_table)
+ return new_table, warnings, warning_messages
def markActiveTables(self, text):
"""
@@ -381,13 +389,10 @@
Mark all table start and end tags that are not disabled by nowiki tags,
comments etc. We will then later only work on these marked tags.
"""
- tableStartTagR = re.compile("<table", re.IGNORECASE)
- tableEndTagR = re.compile("</table>", re.IGNORECASE)
-
- text = pywikibot.replaceExcept(text, tableStartTagR, "<##table##",
+ text = pywikibot.replaceExcept(text, _table_start_regex, '<##table##',
exceptions=['comment', 'math',
'nowiki', 'pre', 'source'])
- text = pywikibot.replaceExcept(text, tableEndTagR, "</##table##>",
+ text = pywikibot.replaceExcept(text, _table_end_regex, '</##table##>',
exceptions=['comment', 'math',
'nowiki', 'pre', 'source'])
return text
@@ -399,38 +404,36 @@
Returns the table and the start and end position inside the text.
"""
# Note that we added the ## characters in markActiveTables().
- markedTableStartTagR = re.compile("<##table##", re.IGNORECASE)
- markedTableEndTagR = re.compile("</##table##>", re.IGNORECASE)
- m = markedTableStartTagR.search(text)
+ m = _marked_table_start_search(text)
if not m:
return None, 0, 0
else:
start = m.start()
offset = m.end()
- originalText = text
+ original_text = text
text = text[m.end():]
# depth level of table nesting
depth = 1
# i = start + 1
while depth > 0:
- nextStarting = markedTableStartTagR.search(text)
- nextEnding = markedTableEndTagR.search(text)
- if not nextEnding:
+ next_starting = _marked_table_start_search(text)
+ next_ending = _marked_table_end_search(text)
+ if not next_ending:
pywikibot.output(
'More opening than closing table tags. Skipping.')
return None, 0, 0
# if another table tag is opened before one is closed
- elif (nextStarting and
- nextStarting.start() < nextEnding.start()):
- offset += nextStarting.end()
- text = text[nextStarting.end():]
+ elif (next_starting and
+ next_starting.start() < next_ending.start()):
+ offset += next_starting.end()
+ text = text[next_starting.end():]
depth += 1
else:
- offset += nextEnding.end()
- text = text[nextEnding.end():]
+ offset += next_ending.end()
+ text = text[next_ending.end():]
depth -= 1
end = offset
- return originalText[start:end], start, end
+ return original_text[start:end], start, end
def convertAllHTMLTables(self, text):
"""
@@ -441,9 +444,9 @@
"""
text = self.markActiveTables(text)
- convertedTables = 0
- warningSum = 0
- warningMessages = u''
+ converted_tables = 0
+ warning_sum = 0
+ warning_messages = ''
while True:
table, start, end = self.findTable(text)
@@ -452,31 +455,29 @@
break
# convert the current table
- newTable, warningsThisTable, warnMsgsThisTable = self.convertTable(
- table)
- warningSum += warningsThisTable
- for msg in warnMsgsThisTable:
- warningMessages += 'In table %i: %s' % (convertedTables + 1,
- msg)
- text = text[:start] + newTable + text[end:]
- convertedTables += 1
+ new_table, table_warns_num, table_warns = self.convertTable(table)
+ warning_sum += table_warns_num
+ for msg in table_warns:
+ warning_messages += 'In table %i: %s' % (
+ converted_tables + 1, msg)
+ text = text[:start] + new_table + text[end:]
+ converted_tables += 1
- pywikibot.output(warningMessages)
- return text, convertedTables, warningSum
+ pywikibot.output(warning_messages)
+ return text, converted_tables, warning_sum
def treat_page(self):
"""Convert all HTML tables in text to wiki syntax and save it."""
text = self.current_page.text
- newText, convertedTables, warnings = self.convertAllHTMLTables(text)
+ new_text, converted_tables, warnings = self.convertAllHTMLTables(text)
# Check if there are any marked tags left
- markedTableTagR = re.compile("<##table##|</##table##>", re.IGNORECASE)
- if markedTableTagR.search(newText):
+ if re.search('<##table##|</##table##>', new_text, re.IGNORECASE):
pywikibot.error(
u'not all marked table start or end tags processed!')
return
- if convertedTables == 0:
+ if converted_tables == 0:
pywikibot.output(u"No changes were necessary.")
return
@@ -495,19 +496,25 @@
# get edit summary message
if warnings == 0:
- editSummaryMessage = i18n.twtranslate(
+ edit_summary = i18n.twtranslate(
self.site.code, 'table2wiki-no-warning')
else:
- editSummaryMessage = i18n.twntranslate(
+ edit_summary = i18n.twntranslate(
self.site.code,
'table2wiki-warnings',
{'count': warnings}
)
- self.put_current(newText, summary=editSummaryMessage,
+ self.put_current(new_text, summary=edit_summary,
show_diff=not (self.getOption('quiet') and
self.getOption('always')))
+_marked_table_start_search = re.compile('<##table##', re.IGNORECASE).search
+_marked_table_end_search = re.compile('</##table##>', re.IGNORECASE).search
+_table_start_regex = re.compile('<table', re.IGNORECASE)
+_table_end_regex = re.compile('</table>', re.IGNORECASE)
+
+
def main(*args):
"""
Process command line arguments and invoke bot.
@@ -525,7 +532,7 @@
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
- genFactory = pagegenerators.GeneratorFactory(positional_arg_name='page')
+ gen_factory = pagegenerators.GeneratorFactory(positional_arg_name='page')
for arg in local_args:
option, sep, value = arg.partition(':')
@@ -553,16 +560,16 @@
WHERE old_text LIKE '%<table%'
"""
arg = '-mysqlquery:' + query
- genFactory.handleArg(arg)
+ gen_factory.handleArg(arg)
if gen:
gen = pagegenerators.NamespaceFilterPageGenerator(
- gen, genFactory.namespaces)
+ gen, gen_factory.namespaces)
else:
- gen = genFactory.getCombinedGenerator()
+ gen = gen_factory.getCombinedGenerator()
if gen:
- if not genFactory.nopreload:
+ if not gen_factory.nopreload:
gen = pagegenerators.PreloadingGenerator(gen)
bot = Table2WikiRobot(generator=gen, **options)
bot.run()
diff --git a/tox.ini b/tox.ini
index a94e1f4..4f1c94e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -151,7 +151,7 @@
# D413: Missing blank line after last section
# D412: No blank lines allowed between a section header and its content
-ignore = D105,D211,FI10,FI12,FI13,FI15,FI16,FI17,FI5,H101,H236,H301,H404,H405,H903,N802,D401,D413,D103,D412,P101,P102,P103,W503
+ignore = D105,D211,FI10,FI12,FI13,FI15,FI16,FI17,FI5,H101,H236,H301,H404,H405,H903,N802,D401,D413,D412,P101,P102,P103,W503
exclude = .tox,.git,./*.egg,ez_setup.py,build,externals,user-config.py,./scripts/i18n/*,scripts/userscripts/*
min-version = 2.7
max_line_length = 100
@@ -164,7 +164,6 @@
pywikibot/__init__.py : D999, N806
pywikibot/comms/http.py : T001
pywikibot/compat/catlib.py : N803
- pywikibot/config2.py : N806
pywikibot/cosmetic_changes.py : N803, N806
pywikibot/data/api.py : N803, N806
pywikibot/date.py : E241, N803, N806
@@ -195,7 +194,6 @@
scripts/add_text.py : N803, N806
scripts/archive/featured.py : D102, D103
scripts/blockpageschecker.py : N803, N806
- scripts/capitalize_redirects.py : N806
scripts/casechecker.py : N803, N806
scripts/category.py : N803, N806
scripts/category_redirect.py : N803, N806
@@ -220,11 +218,9 @@
scripts/noreferences.py : N803, N806
scripts/nowcommons.py : N803, N806
scripts/redirect.py : N803, N806
- scripts/reflinks.py : N806
scripts/replace.py : N803, N806
scripts/script_wui.py : D102, N806
scripts/solve_disambiguation.py : N803, N806
- scripts/table2wiki.py : N806
scripts/unlink.py : N803
scripts/watchlist.py : N803
scripts/weblinkchecker.py : N803, N806
--
To view, visit https://gerrit.wikimedia.org/r/446224
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I4c6af21343c2c1568d952746d32433c4c961b9de
Gerrit-Change-Number: 446224
Gerrit-PatchSet: 1
Gerrit-Owner: Dalba <dalba.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: Zoranzoki21 <zorandori4444(a)gmail.com>
Gerrit-Reviewer: jenkins-bot