jenkins-bot has submitted this change and it was merged.
Change subject: [bugfix] new regex for retrieving page titles from special page. ......................................................................
[bugfix] new regex for retrieving page titles from special page.
The title part or list elements may contain html entities which will be interpreted as section while instanciating the page and leads to an invalid title exception (e.g. invalid section in category).
Now the page title will be fetched from that part inside the <a /> tag.
Change-Id: I15383809fbb911e4d8722dfb87c04b71d182bf7f --- M redirect.py 1 file changed, 8 insertions(+), 6 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/redirect.py b/redirect.py index 1cd7c40..cb26269 100644 --- a/redirect.py +++ b/redirect.py @@ -69,6 +69,8 @@ import query import xmlreader
+SPECIALPAGE_REGEX = '<li><a href=".+?" title=".*?">(.+?)</a>' +
class RedirectGenerator: def __init__(self, xmlFilename=None, namespaces=[], offset=-1, @@ -127,7 +129,7 @@ if target.startswith('%s:' % code) \ or target.startswith(':%s:' % code): if code == self.site.language(): - # link to our wiki, but with the lang prefix + # link to our wiki, but with the lang prefix target = target[(len(code) + 1):] if target.startswith(':'): target = target[1:] @@ -234,7 +236,7 @@ params = { 'action': 'query', 'redirects': 1, - #'': '', + # '': '', } for apiQ in self._next_redirect_group(): params['pageids'] = apiQ @@ -294,7 +296,7 @@
# regular expression which finds redirects which point to a # non-existing page inside the HTML - Rredir = re.compile('<li><a href=".+?" title="(.*?)"') + Rredir = re.compile(SPECIALPAGE_REGEX)
redir_names = Rredir.findall(maintenance_txt) pywikibot.output(u'Retrieved %d redirects from special page.\n' @@ -350,7 +352,7 @@
# regular expression which finds redirects which point to # another redirect inside the HTML - Rredir = re.compile('<li><a href=".+?" title="(.*?)">') + Rredir = re.compile(SPECIALPAGE_REGEX) redir_names = Rredir.findall(maintenance_txt) pywikibot.output(u'Retrieved %i redirects from special page.\n' % len(redir_names)) @@ -521,8 +523,8 @@ u"Won't delete anything." % targetPage.title(asLink=True)) else: - #we successfully get the target page, meaning that - #it exists and is not a redirect: no reason to touch it. + # we successfully get the target page, meaning that + # it exists and is not a redirect: no reason to touch it. pywikibot.output( u'Redirect target %s does exist! Won't delete anything.' % targetPage.title(asLink=True))
pywikibot-commits@lists.wikimedia.org