jenkins-bot has submitted this change and it was merged.
Change subject: Fix xml dump gen to use textlib correctly
......................................................................
Fix xml dump gen to use textlib correctly
Addressing the following issues:
* sending correct parameters to replaceExcept in
the correct order (e.g site)
* using the exceptions of the replacement to avoid generation of
non putative replacements
Change-Id: I5262878d096dd0b25dc22d77337aa5e351e0cbda
---
M scripts/replace.py
1 file changed, 25 insertions(+), 16 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/replace.py b/scripts/replace.py
index 7e3f0d9..ac88936 100755
--- a/scripts/replace.py
+++ b/scripts/replace.py
@@ -189,6 +189,11 @@
exceptions[exceptionCategory] = patterns
+def _get_text_exceptions(exceptions):
+ """Get exceptions on text (inside exceptions)."""
+ return exceptions.get('inside-tags', []) + exceptions.get('inside',
[])
+
+
class ReplacementBase(object):
"""The replacement instructions."""
@@ -292,6 +297,10 @@
"""Compile the search regex and exceptions."""
super(Replacement, self)._compile(use_regex, flags)
precompile_exceptions(self.exceptions, use_regex, flags)
+
+ def get_inside_exceptions(self):
+ """Get exceptions on text (inside exceptions)."""
+ return _get_text_exceptions(self.exceptions or {})
class ReplacementList(list):
@@ -425,17 +434,20 @@
if entry.title != self.xmlStart:
continue
self.skipping = False
- if not self.isTitleExcepted(entry.title) \
- and not self.isTextExcepted(entry.text):
- new_text = entry.text
- for replacement in self.replacements:
- # This doesn't do an actual replacement but just
- # checks if at least one does apply
- new_text = textlib.replaceExcept(
- new_text, replacement.old_regex, replacement.new,
- self.excsInside, self.site)
- if new_text != entry.text:
- yield pywikibot.Page(self.site, entry.title)
+ if self.isTitleExcepted(entry.title) \
+ or self.isTextExcepted(entry.text):
+ continue
+ new_text = entry.text
+ for replacement in self.replacements:
+ # This doesn't do an actual replacement but just
+ # checks if at least one does apply
+ new_text = textlib.replaceExcept(
+ new_text, replacement.old_regex, replacement.new,
+ self.excsInside + replacement.get_inside_exceptions(),
+ site=self.site)
+ if new_text != entry.text:
+ yield pywikibot.Page(self.site, entry.title)
+
except KeyboardInterrupt:
try:
if not self.skipping:
@@ -588,15 +600,12 @@
@rtype: unicode, set
"""
- def get_exceptions(exceptions):
- return exceptions.get('inside-tags', []) +
exceptions.get('inside', [])
-
if page is None:
pywikibot.warn(
'You must pass the target page as the "page" parameter to
'
'apply_replacements().', DeprecationWarning, stacklevel=2)
new_text = original_text
- exceptions = get_exceptions(self.exceptions)
+ exceptions = _get_text_exceptions(self.exceptions)
skipped_containers = set()
for replacement in self.replacements:
if self.sleep is not None:
@@ -622,7 +631,7 @@
old_text = new_text
new_text = textlib.replaceExcept(
new_text, replacement.old_regex, replacement.new,
- exceptions + get_exceptions(replacement.exceptions or {}),
+ exceptions + replacement.get_inside_exceptions(),
allowoverlap=self.allowoverlap, site=self.site)
if old_text != new_text:
applied.add(replacement)
--
To view, visit
https://gerrit.wikimedia.org/r/206355
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5262878d096dd0b25dc22d77337aa5e351e0cbda
Gerrit-PatchSet: 9
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Eranroz <eranroz89(a)gmail.com>
Gerrit-Reviewer: Eranroz <eranroz89(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <Ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>