jenkins-bot has submitted this change and it was merged.
Change subject: Split run method into __init__ and treat method
......................................................................
Split run method into __init__ and treat method
Change-Id: I18c29f25b272b2ccee6cfc7985f3997bb845a6db
---
M scripts/unusedfiles.py
1 file changed, 29 insertions(+), 21 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/unusedfiles.py b/scripts/unusedfiles.py
index 3024a2a..147e869 100755
--- a/scripts/unusedfiles.py
+++ b/scripts/unusedfiles.py
@@ -44,8 +44,6 @@
super(UnusedFilesBot, self).__init__(**kwargs)
self.site = site
- def run(self):
- """Start the bot."""
template_image = i18n.translate(self.site,
template_to_the_image)
template_user = i18n.translate(self.site,
@@ -54,27 +52,37 @@
if not all([template_image, template_user]):
raise pywikibot.Error(u'This script is not localized for %s site.'
% self.site)
+
+ self.template_image = template_image
+ self.template_user = template_user
+
generator = pagegenerators.UnusedFilesGenerator(site=self.site)
generator = pagegenerators.PreloadingGenerator(generator)
- for image in generator:
- if not image.exists():
- pywikibot.output("File '%s' does not exist (see bug T71133)."
- % image.title())
- continue
- # Use fileUrl() and fileIsShared() to confirm it is local media
- # rather than a local page with the same name as shared media.
- if (image.fileUrl() and not image.fileIsShared() and
- u'http://' not in image.text):
- if template_image in image.text:
- pywikibot.output(u"%s done already"
- % image.title(asLink=True))
- continue
- self.append_text(image, u"\n\n" + template_image)
- uploader = image.getFileVersionHistory().pop(0)['user']
- user = pywikibot.User(image.site, uploader)
- usertalkpage = user.getUserTalkPage()
- msg2uploader = template_user % {'title': image.title()}
- self.append_text(usertalkpage, msg2uploader)
+
+ self.generator = generator
+
+ def treat(self, image):
+ """Process one image page."""
+ if not image.exists():
+ pywikibot.output("File '%s' does not exist (see bug T71133)."
+ % image.title())
+ return
+ # Use fileUrl() and fileIsShared() to confirm it is local media
+ # rather than a local page with the same name as shared media.
+ if (image.fileUrl() and not image.fileIsShared() and
+ u'http://' not in image.text):
+ if self.template_image in image.text:
+ pywikibot.output(u"%s done already"
+ % image.title(asLink=True))
+ return
+
+ self.append_text(image, '\n\n' + self.template_image)
+
+ uploader = image.getFileVersionHistory().pop(0)['user']
+ user = pywikibot.User(image.site, uploader)
+ usertalkpage = user.getUserTalkPage()
+ msg2uploader = self.template_user % {'title': image.title()}
+ self.append_text(usertalkpage, msg2uploader)
def append_text(self, page, apptext):
"""Append apptext to the page."""
--
To view, visit https://gerrit.wikimedia.org/r/305869
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I18c29f25b272b2ccee6cfc7985f3997bb845a6db
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Add wikisourcetext.py: load text in Page ns
......................................................................
Add wikisourcetext.py: load text in Page ns
This bot applies to wikisource sites to upload text.
Text is uploaded to not-(yet)-existing pages in Page ns, for a specified
Index.
Text to be stored, if the page is not-existing, is preloaded from the
file used to create the Index page, making the upload feature independent
from the format of the file, as long as it is supported by the
MW ProofreadPage extension.
Change-Id: I42eff9ae6f0cc4170381dcb1ef04869252662493
---
A scripts/wikisourcetext.py
1 file changed, 179 insertions(+), 0 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/wikisourcetext.py b/scripts/wikisourcetext.py
new file mode 100644
index 0000000..fdad386
--- /dev/null
+++ b/scripts/wikisourcetext.py
@@ -0,0 +1,179 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+
+This bot applies to wikisource sites to upload text.
+
+Text is uploaded to not-(yet)-existing pages in Page ns, for a specified Index.
+Text to be stored, if the page is not-existing, is preloaded from the file used
+to create the Index page, making the upload feature independent from the format
+of the file, as long as it is supported by the MW ProofreadPage extension.
+
+The following parameters are supported:
+
+ -index:... name of the index page
+
+ -pages:<start>-<end>,...<start>-<end>,<start>-<end>
+ Page range to upload;
+ optional, start=1, end=djvu file number of images.
+ Page ranges can be specified as:
+ A-B -> pages A until B
+ A- -> pages A until number of images
+ A -> just page A
+ -B -> pages 1 until B
+
+ -showdiff show difference before old and new text before saving.
+
+ -summary: custom edit summary.
+ Use quotes if edit summary contains spaces.
+
+ -force overwrites existing text
+ optional, default False
+
+ -always don't bother asking to confirm any of the changes.
+
+"""
+#
+# (C) Pywikibot team, 2016
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import absolute_import, unicode_literals
+
+__version__ = '$Id$'
+
+import itertools
+
+import pywikibot
+
+from pywikibot import i18n
+
+from pywikibot.bot import SingleSiteBot
+from pywikibot.proofreadpage import IndexPage
+
+
+class UploadTextBot(SingleSiteBot):
+
+ """
+ A bot that uploads text-layer to Page:namespace.
+
+ Text is fetched via preload as on Wikisource wikis, text can be preloaded
+ even if a page does not exist, if an Index page is present.
+
+ Works only on sites with Proofread Page extension installed.
+ """
+
+ def __init__(self, generator, **kwargs):
+ """
+ Constructor.
+
+ @param generator: page generator
+ @type generator: generator
+ """
+ self.availableOptions.update({
+ 'force': False,
+ 'showdiff': False,
+ 'summary': 'Uploading text'
+ })
+ super(UploadTextBot, self).__init__(**kwargs)
+ self.generator = generator
+
+ # TODO: create i18 files
+ # Get edit summary message if it's empty.
+ if not self.getOption('summary'):
+ self.options['summary'] = i18n.twtranslate(
+ self.site, 'djvutext-creating')
+
+ def treat(self, page):
+ """Process one page."""
+ old_text = ''
+ new_text = page.text
+
+ summary = self.getOption('summary')
+ if page.exists() and not self.getOption('force'):
+ pywikibot.output('Page %s already exists, not adding!' % page)
+ else:
+ self.userPut(page, old_text, new_text,
+ summary=summary, minor=True, botflag=True,
+ show_diff=self.getOption('showdiff'))
+
+
+def main(*args):
+ """
+ Process command line arguments and invoke bot.
+
+ If args is an empty list, sys.argv is used.
+
+ @param args: command line arguments
+ @type args: list of unicode
+ """
+ index = None
+ pages = '1-'
+ options = {}
+
+ # Parse command line arguments.
+ local_args = pywikibot.handle_args(args)
+ for arg in local_args:
+ arg, sep, value = arg.partition(':')
+ if arg == '-index':
+ index = value
+ elif arg == '-pages':
+ pages = value
+ elif arg == '-showdiff':
+ options['showdiff'] = True
+ elif arg == '-summary':
+ options['summary'] = value
+ elif arg == '-force':
+ options['force'] = True
+ elif arg == '-always':
+ options['always'] = True
+ else:
+ pywikibot.output('Unknown argument %s' % arg)
+
+ # index is mandatory.
+ if not index:
+ pywikibot.bot.suggest_help(missing_parameters=['-index'])
+ return False
+
+ site = pywikibot.Site()
+ if not site.has_extension('ProofreadPage'):
+ pywikibot.error('Site %s must have ProofreadPage extension.' % site)
+ return False
+
+ index = IndexPage(site, index)
+
+ if not index.exists():
+ pywikibot.error("Page %s doesn't exist." % index)
+ return False
+
+ # Parse pages param.
+ # Create a list of (start, end) tuples.
+ pages = pages.split(',')
+ for interval in range(len(pages)):
+ start, sep, end = pages[interval].partition('-')
+ start = 1 if not start else int(start)
+ if not sep:
+ end = start
+ else:
+ end = int(end) if end else index.num_pages
+ pages[interval] = (start, end)
+
+ gen_list = []
+ for start, end in sorted(pages):
+ gen = index.page_gen(start=start, end=end,
+ filter_ql=[1], content=False)
+ gen_list.append(gen)
+
+ gen = itertools.chain(*gen_list)
+
+ pywikibot.output('\nUploading text to %s\n' % index.title(asLink=True))
+
+ bot = UploadTextBot(gen, site=index.site, **options)
+ bot.run()
+
+
+if __name__ == '__main__':
+ try:
+ main()
+ except Exception:
+ pywikibot.error('Fatal error:', exc_info=True)
--
To view, visit https://gerrit.wikimedia.org/r/283935
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I42eff9ae6f0cc4170381dcb1ef04869252662493
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Aschroet <arnd.schroeter(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Fix xml dump gen to use textlib correctly
......................................................................
Fix xml dump gen to use textlib correctly
Addressing the following issues:
* sending correct parameters to replaceExcept in
the correct order (e.g site)
* using the exceptions of the replacement to avoid generation of
non putative replacements
Change-Id: I5262878d096dd0b25dc22d77337aa5e351e0cbda
---
M scripts/replace.py
1 file changed, 25 insertions(+), 16 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/replace.py b/scripts/replace.py
index 7e3f0d9..ac88936 100755
--- a/scripts/replace.py
+++ b/scripts/replace.py
@@ -189,6 +189,11 @@
exceptions[exceptionCategory] = patterns
+def _get_text_exceptions(exceptions):
+ """Get exceptions on text (inside exceptions)."""
+ return exceptions.get('inside-tags', []) + exceptions.get('inside', [])
+
+
class ReplacementBase(object):
"""The replacement instructions."""
@@ -292,6 +297,10 @@
"""Compile the search regex and exceptions."""
super(Replacement, self)._compile(use_regex, flags)
precompile_exceptions(self.exceptions, use_regex, flags)
+
+ def get_inside_exceptions(self):
+ """Get exceptions on text (inside exceptions)."""
+ return _get_text_exceptions(self.exceptions or {})
class ReplacementList(list):
@@ -425,17 +434,20 @@
if entry.title != self.xmlStart:
continue
self.skipping = False
- if not self.isTitleExcepted(entry.title) \
- and not self.isTextExcepted(entry.text):
- new_text = entry.text
- for replacement in self.replacements:
- # This doesn't do an actual replacement but just
- # checks if at least one does apply
- new_text = textlib.replaceExcept(
- new_text, replacement.old_regex, replacement.new,
- self.excsInside, self.site)
- if new_text != entry.text:
- yield pywikibot.Page(self.site, entry.title)
+ if self.isTitleExcepted(entry.title) \
+ or self.isTextExcepted(entry.text):
+ continue
+ new_text = entry.text
+ for replacement in self.replacements:
+ # This doesn't do an actual replacement but just
+ # checks if at least one does apply
+ new_text = textlib.replaceExcept(
+ new_text, replacement.old_regex, replacement.new,
+ self.excsInside + replacement.get_inside_exceptions(),
+ site=self.site)
+ if new_text != entry.text:
+ yield pywikibot.Page(self.site, entry.title)
+
except KeyboardInterrupt:
try:
if not self.skipping:
@@ -588,15 +600,12 @@
@rtype: unicode, set
"""
- def get_exceptions(exceptions):
- return exceptions.get('inside-tags', []) + exceptions.get('inside', [])
-
if page is None:
pywikibot.warn(
'You must pass the target page as the "page" parameter to '
'apply_replacements().', DeprecationWarning, stacklevel=2)
new_text = original_text
- exceptions = get_exceptions(self.exceptions)
+ exceptions = _get_text_exceptions(self.exceptions)
skipped_containers = set()
for replacement in self.replacements:
if self.sleep is not None:
@@ -622,7 +631,7 @@
old_text = new_text
new_text = textlib.replaceExcept(
new_text, replacement.old_regex, replacement.new,
- exceptions + get_exceptions(replacement.exceptions or {}),
+ exceptions + replacement.get_inside_exceptions(),
allowoverlap=self.allowoverlap, site=self.site)
if old_text != new_text:
applied.add(replacement)
--
To view, visit https://gerrit.wikimedia.org/r/206355
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5262878d096dd0b25dc22d77337aa5e351e0cbda
Gerrit-PatchSet: 9
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Eranroz <eranroz89(a)gmail.com>
Gerrit-Reviewer: Eranroz <eranroz89(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <Ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>