jenkins-bot has submitted this change and it was merged.
Change subject: Deprecate page.isEmpty() and add function in interwiki to check for empty
pages
......................................................................
Deprecate page.isEmpty() and add function in interwiki to check for empty pages
The change allows interwiki.py to have its own 'page_empty_check' function which
can be optimized further rather than use page.isEmpty() which is used by other
tools and hence can't be changed
Bug: T112340
Change-Id: I3070efc8dd568b6817e65be880e92974feb084ce
---
M pywikibot/page.py
M scripts/interwiki.py
2 files changed, 28 insertions(+), 6 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index bdcc0b1..dc34774 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -746,6 +746,7 @@
return Category(Link(self._catredirect, self.site))
raise pywikibot.IsNotRedirectPage(self)
+ @deprecated("interwiki.page_empty_check(page)")
def isEmpty(self):
"""Return True if the page text has less than 4 characters.
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index fb20f58..dcd2095 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -1372,7 +1372,7 @@
# must be behind the page.isRedirectPage() part
# otherwise a redirect error would be raised
- elif not page.isCategory() and page.isEmpty():
+ elif page_empty_check(page):
globalvar.remove.append(unicode(page))
if not globalvar.quiet:
pywikibot.output(u"NOTE: %s is empty. Skipping." % page)
@@ -1839,7 +1839,7 @@
except pywikibot.NoPage:
pywikibot.output(u"Not editing %s: page does not exist" % page)
raise SaveError(u'Page doesn\'t exist')
- if not page.isCategory() and page.isEmpty():
+ if page_empty_check(page):
pywikibot.output(u"Not editing %s: page is empty" % page)
raise SaveError(u'Page is empty.')
@@ -2196,10 +2196,6 @@
if page.isTalkPage():
pywikibot.output(u'Skipping: %s is a talk page' % page)
continue
- # doesn't work: page must be preloaded for this test
- # if page.isEmpty():
- # pywikibot.output(u'Skipping: %s is a empty page' %
page.title())
- # continue
if page.namespace() == 10:
loc = None
try:
@@ -2458,6 +2454,31 @@
bot.add(page, hints=hintStrings)
+def page_empty_check(page):
+ """
+ Return True if page should be skipped as it is almost empty.
+
+ Pages in content namespaces are considered empty if they contain less than 50
+ characters, and other pages are considered empty if they are not category
+ pages and contain less than 4 characters excluding interlanguage links and
+ categories.
+
+ @rtype: bool
+ """
+ # Check if the page is in content namespace
+ if page.namespace() == 0:
+ # Check if the page contains at least 50 characters
+ return len(page.text) < 50
+ else:
+ if not page.isCategory():
+ txt = page.get()
+ txt = textlib.removeLanguageLinks(txt, site=page.site)
+ txt = textlib.removeCategoryLinks(txt, site=page.site)
+ return len(txt) < 4
+ else:
+ return False
+
+
def main(*args):
"""
Process command line arguments and invoke bot.
--
To view, visit
https://gerrit.wikimedia.org/r/257367
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I3070efc8dd568b6817e65be880e92974feb084ce
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Vadiraja.k <vadi.fedx(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>