jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/669432 )
Change subject: [bugfix] fix another treat process flow bug ......................................................................
[bugfix] fix another treat process flow bug
Change-Id: I5f7a6c902544efe3f2f6e8cb2f812d7e33165b7b --- M scripts/reflinks.py 1 file changed, 2 insertions(+), 5 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/reflinks.py b/scripts/reflinks.py index a27847b..2514ec0 100755 --- a/scripts/reflinks.py +++ b/scripts/reflinks.py @@ -522,11 +522,9 @@ textlib.removeDisabledParts(page.get())):
link = match.group('url') - # debugging purpose - # print link if 'jstor.org' in link: # TODO: Clean URL blacklist - return + continue
ref = RefLink(link, match.group('name'), site=self.site)
@@ -593,8 +591,6 @@ new_text = new_text.replace(match.group(), repl) continue
- linkedpagetext = r.content - except UnicodeError: # example: # http://www.adminet.com/jo/20010615%C2%A6/ECOC0100037D.html @@ -615,6 +611,7 @@ .format(ref.url, e)) continue
+ linkedpagetext = r.content # remove <script>/<style>/comments/CDATA tags linkedpagetext = self.NON_HTML.sub(b'', linkedpagetext)