jenkins-bot has submitted this change and it was merged.
Change subject: Fix xml dump gen to use textlib correctly ......................................................................
Fix xml dump gen to use textlib correctly
Addressing the following issues: * sending correct parameters to replaceExcept in the correct order (e.g site) * using the exceptions of the replacement to avoid generation of non putative replacements
Change-Id: I5262878d096dd0b25dc22d77337aa5e351e0cbda --- M scripts/replace.py 1 file changed, 25 insertions(+), 16 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/replace.py b/scripts/replace.py index 7e3f0d9..ac88936 100755 --- a/scripts/replace.py +++ b/scripts/replace.py @@ -189,6 +189,11 @@ exceptions[exceptionCategory] = patterns
+def _get_text_exceptions(exceptions): + """Get exceptions on text (inside exceptions).""" + return exceptions.get('inside-tags', []) + exceptions.get('inside', []) + + class ReplacementBase(object):
"""The replacement instructions.""" @@ -292,6 +297,10 @@ """Compile the search regex and exceptions.""" super(Replacement, self)._compile(use_regex, flags) precompile_exceptions(self.exceptions, use_regex, flags) + + def get_inside_exceptions(self): + """Get exceptions on text (inside exceptions).""" + return _get_text_exceptions(self.exceptions or {})
class ReplacementList(list): @@ -425,17 +434,20 @@ if entry.title != self.xmlStart: continue self.skipping = False - if not self.isTitleExcepted(entry.title) \ - and not self.isTextExcepted(entry.text): - new_text = entry.text - for replacement in self.replacements: - # This doesn't do an actual replacement but just - # checks if at least one does apply - new_text = textlib.replaceExcept( - new_text, replacement.old_regex, replacement.new, - self.excsInside, self.site) - if new_text != entry.text: - yield pywikibot.Page(self.site, entry.title) + if self.isTitleExcepted(entry.title) \ + or self.isTextExcepted(entry.text): + continue + new_text = entry.text + for replacement in self.replacements: + # This doesn't do an actual replacement but just + # checks if at least one does apply + new_text = textlib.replaceExcept( + new_text, replacement.old_regex, replacement.new, + self.excsInside + replacement.get_inside_exceptions(), + site=self.site) + if new_text != entry.text: + yield pywikibot.Page(self.site, entry.title) + except KeyboardInterrupt: try: if not self.skipping: @@ -588,15 +600,12 @@
@rtype: unicode, set """ - def get_exceptions(exceptions): - return exceptions.get('inside-tags', []) + exceptions.get('inside', []) - if page is None: pywikibot.warn( 'You must pass the target page as the "page" parameter to ' 'apply_replacements().', DeprecationWarning, stacklevel=2) new_text = original_text - exceptions = get_exceptions(self.exceptions) + exceptions = _get_text_exceptions(self.exceptions) skipped_containers = set() for replacement in self.replacements: if self.sleep is not None: @@ -622,7 +631,7 @@ old_text = new_text new_text = textlib.replaceExcept( new_text, replacement.old_regex, replacement.new, - exceptions + get_exceptions(replacement.exceptions or {}), + exceptions + replacement.get_inside_exceptions(), allowoverlap=self.allowoverlap, site=self.site) if old_text != new_text: applied.add(replacement)
pywikibot-commits@lists.wikimedia.org