jenkins-bot has submitted this change and it was merged.
Change subject: Deprecate page.isEmpty() and add function in interwiki to check for empty pages ......................................................................
Deprecate page.isEmpty() and add function in interwiki to check for empty pages
The change allows interwiki.py to have its own 'page_empty_check' function which can be optimized further rather than use page.isEmpty() which is used by other tools and hence can't be changed
Bug: T112340 Change-Id: I3070efc8dd568b6817e65be880e92974feb084ce --- M pywikibot/page.py M scripts/interwiki.py 2 files changed, 28 insertions(+), 6 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py index bdcc0b1..dc34774 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -746,6 +746,7 @@ return Category(Link(self._catredirect, self.site)) raise pywikibot.IsNotRedirectPage(self)
+ @deprecated("interwiki.page_empty_check(page)") def isEmpty(self): """Return True if the page text has less than 4 characters.
diff --git a/scripts/interwiki.py b/scripts/interwiki.py index fb20f58..dcd2095 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -1372,7 +1372,7 @@
# must be behind the page.isRedirectPage() part # otherwise a redirect error would be raised - elif not page.isCategory() and page.isEmpty(): + elif page_empty_check(page): globalvar.remove.append(unicode(page)) if not globalvar.quiet: pywikibot.output(u"NOTE: %s is empty. Skipping." % page) @@ -1839,7 +1839,7 @@ except pywikibot.NoPage: pywikibot.output(u"Not editing %s: page does not exist" % page) raise SaveError(u'Page doesn't exist') - if not page.isCategory() and page.isEmpty(): + if page_empty_check(page): pywikibot.output(u"Not editing %s: page is empty" % page) raise SaveError(u'Page is empty.')
@@ -2196,10 +2196,6 @@ if page.isTalkPage(): pywikibot.output(u'Skipping: %s is a talk page' % page) continue - # doesn't work: page must be preloaded for this test - # if page.isEmpty(): - # pywikibot.output(u'Skipping: %s is a empty page' % page.title()) - # continue if page.namespace() == 10: loc = None try: @@ -2458,6 +2454,31 @@ bot.add(page, hints=hintStrings)
+def page_empty_check(page): + """ + Return True if page should be skipped as it is almost empty. + + Pages in content namespaces are considered empty if they contain less than 50 + characters, and other pages are considered empty if they are not category + pages and contain less than 4 characters excluding interlanguage links and + categories. + + @rtype: bool + """ + # Check if the page is in content namespace + if page.namespace() == 0: + # Check if the page contains at least 50 characters + return len(page.text) < 50 + else: + if not page.isCategory(): + txt = page.get() + txt = textlib.removeLanguageLinks(txt, site=page.site) + txt = textlib.removeCategoryLinks(txt, site=page.site) + return len(txt) < 4 + else: + return False + + def main(*args): """ Process command line arguments and invoke bot.