jenkins-bot has submitted this change and it was merged.
Change subject: lonelypages.py: ported to core
......................................................................
lonelypages.py: ported to core
Ported lonelypages.py to pywikibot core. Removed some unnecessary inline
comments from the code. Also shifted messages to i18n TranslateWiki (see
I9b3506d1e801909a2b1d25f6d82063cdca195d26).
Change-Id: Ic832222ec22c90842388ab78c2220af15a567802
---
A scripts/lonelypages.py
1 file changed, 263 insertions(+), 0 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/lonelypages.py b/scripts/lonelypages.py
new file mode 100644
index 0000000..b5ce898
--- /dev/null
+++ b/scripts/lonelypages.py
@@ -0,0 +1,263 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+This is a script written to add the template "orphan" to the pages that
aren't
+linked by other pages. It can give some strange Errors sometime, I hope that
+all of them are fixed in this version.
+
+These command line parameters can be used to specify which pages to work on:
+
+¶ms;
+
+-xml Retrieve information from a local XML dump (pages-articles
+ or pages-meta-current, see
http://download.wikimedia.org).
+ Argument can also be given as "-xml:filename".
+
+-page Only edit a specific page.
+ Argument can also be given as "-page:pagetitle". You can
+ give this parameter multiple times to edit multiple pages.
+
+Furthermore, the following command line parameters are supported:
+
+-enable: - Enable or disable the bot via a Wiki Page.
+
+-disambig: - Set a page where the bot save the name of the disambig
+ pages found (default: skip the pages)
+
+-limit: - Set how many pages check.
+
+-always - Always say yes, won't ask
+
+--- FixMes ---
+* Check that all the code hasn't bugs
+
+--- Credit and Help ---
+This Script has been developed by Pietrodn and Filnik on botwiki. If you want
+to help us improving our script archive and pywikipediabot's archive or you
+simply need help you can find us here:
http://botwiki.sno.cc
+
+--- Examples ---
+python lonelypages.py -enable:User:Bot/CheckBot -always
+"""
+#
+# (C) Pietrodn, it.wiki 2006-2007
+# (C) Filnik, it.wiki 2007
+# (C) Pywikipedia bot team, 2008-2012
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import pywikibot
+from pywikibot import i18n
+from pywikibot import pagegenerators
+from pywikibot import re
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+ '¶ms;': pagegenerators.parameterHelp,
+}
+
+Template = {
+ 'ar': u'{{يتيمة|تاريخ={{نسخ:اسم_شهر}} {{نسخ:عام}}}}',
+ 'ca': u'{{Orfe|date={{subst:CURRENTMONTHNAME}}
{{subst:CURRENTYEAR}}}}',
+ 'en': u'{{Orphan|date={{subst:CURRENTMONTHNAME}}
{{subst:CURRENTYEAR}}}}',
+ 'it': u'{{O||mese={{subst:CURRENTMONTHNAME}}
{{subst:CURRENTYEAR}}}}',
+ 'jh': u'{{孤立|{{subst:DATE}}}}',
+ 'za': u'{{subst:Orphan/auto}}',
+}
+
+exception_regex = {
+ 'ar': [ur'\{\{(?:قالب:|)(يتيمة)[\|\}]'],
+ 'ca': [r'\{\{(?:template:|)(orfe)[\|\}]'],
+ 'en': [r'\{\{(?:template:|)(orphan)[\|\}]',
r'\{\{(?:template:|)(wi)[\|\}]'],
+ 'it': [r'\{\{(?:template:|)(o|a)[\|\}]'],
+ 'jh': [ur'\{\{(?:template:|)(孤立)[\|\}]'],
+ 'za': [r'\{\{(?:template:|)(orphan)[\|\}]'],
+}
+#####################################################
+# Here you have to put the config for your Project. #
+#####################################################
+
+
+def main():
+ enablePage = None # Check if someone set an enablePage or not
+ limit = 50000 # Hope that there aren't so many lonely pages in a project
+ generator = None # Check if bot should use default generator or not
+ # Load all default generators!
+ genFactory = pagegenerators.GeneratorFactory()
+ nwpages = False # Check variable for newpages
+ always = False # Check variable for always
+ disambigPage = None # If no disambigPage given, not use it.
+ # Arguments!
+ for arg in pywikibot.handleArgs():
+ if arg.startswith('-enable'):
+ if len(arg) == 7:
+ enablePage = pywikibot.input(u'Would you like to check if the \
+ bot should run or not?')
+ else:
+ enablePage = arg[8:]
+ if arg.startswith('-disambig'):
+ if len(arg) == 9:
+ disambigPage = pywikibot.input(u'In which page should the bot \
+ save the disambig pages?')
+ else:
+ disambigPage = arg[10:]
+ elif arg.startswith('-limit'):
+ if len(arg) == 6:
+ limit = int(pywikibot.input(u'How many pages do you want to \
+ check?'))
+ else:
+ limit = int(arg[7:])
+ elif arg.startswith('-newpages'):
+ if len(arg) == 9:
+ nwlimit = 50 # Default: 50 pages
+ else:
+ nwlimit = int(arg[10:])
+ generator = pywikibot.getSite().newpages(number=nwlimit)
+ nwpages = True
+ elif arg == '-always':
+ always = True
+ else:
+ genFactory.handleArg(arg)
+ # Retrive the site
+ wikiSite = pywikibot.getSite()
+
+ if not generator:
+ generator = genFactory.getCombinedGenerator()
+
+ # If the generator is not given, use the default one
+ if not generator:
+ generator = wikiSite.lonelypages(repeat=True, number=limit)
+ # Take the configurations according to our project
+ comment = i18n.twtranslate(wikiSite, 'lonelypages-comment-add-template')
+ commentdisambig = i18n.twtranslate(wikiSite,
'lonelypages-comment-add-disambig-template')
+ template = i18n.translate(wikiSite, Template, fallback=False)
+ exception = i18n.translate(wikiSite, exception_regex, fallback=False)
+ if template is None or exception is None:
+ raise Exception("Missing configuration for site %r" % wikiSite)
+ # EnablePage part
+ if enablePage is not None:
+ # Define the Page Object
+ enable = pywikibot.Page(wikiSite, enablePage)
+ # Loading the page's data
+ try:
+ getenable = enable.get()
+ except pywikibot.NoPage:
+ pywikibot.output(u"%s doesn't esist, I use the page as if it was \
+ blank!" % enable.title())
+ getenable = ''
+ except wikiepedia.IsRedirect:
+ pywikibot.output(u"%s is a redirect, skip!" % enable.title())
+ getenable = ''
+ # If the enable page is set to disable, turn off the bot
+ # (useful when the bot is run on a server)
+ if getenable != 'enable':
+ pywikibot.output('The bot is disabled')
+ return
+ # DisambigPage part
+ if disambigPage is not None:
+ disambigpage = pywikibot.Page(wikiSite, disambigPage)
+ try:
+ disambigtext = disambigpage.get()
+ except pywikibot.NoPage:
+ pywikibot.output(u"%s doesn't esist, skip!" %
disambigpage.title())
+ disambigtext = ''
+ except wikiepedia.IsRedirect:
+ pywikibot.output(
+ u"%s is a redirect, don't use it!" % disambigpage.title())
+ disambigPage = None
+ # Main Loop
+ for page in generator:
+ if nwpages is True:
+ # The newpages generator returns a tuple, not a Page object.
+ page = page[0]
+ pywikibot.output(u"Checking %s..." % page.title())
+ if page.isRedirectPage(): # If redirect, skip!
+ pywikibot.output(u'%s is a redirect! Skip...' % page.title())
+ continue
+ # refs is not a list, it's a generator while resList... is a list, yes.
+ refs = page.getReferences()
+ refsList = list()
+ for j in refs:
+ if j is None:
+ # We have to find out why the function returns that value
+ pywikibot.error(u'1 --> Skip page')
+ continue
+ refsList.append(j)
+ # This isn't possible with a generator
+ if refsList != []:
+ pywikibot.output(u"%s isn't orphan! Skip..." % page.title())
+ continue
+ # Never understood how a list can turn in "None", but it happened :-S
+ elif refsList is None:
+ # We have to find out why the function returns that value
+ pywikibot.error(u'2 --> Skip page')
+ continue
+ else:
+ # no refs, no redirect; check if there's already the template
+ try:
+ oldtxt = page.get()
+ except pywikibot.NoPage:
+ pywikibot.output(u"%s doesn't exist! Skip..." %
page.title())
+ continue
+ except pywikibot.IsRedirectPage:
+ pywikibot.output(u"%s is a redirect! Skip..." % page.title())
+ continue
+ # I've used a loop in a loop. If I use continue in the second loop,
+ # it won't do anything in the first. So let's create a variable to
+ # avoid this problem.
+ Find = False
+ for regexp in exception:
+ res = re.findall(regexp, oldtxt.lower())
+ # Found a template! Let's skip the page!
+ if res != []:
+ pywikibot.output(u'Your regex has found something in %s, \
+ skipping...' % page.title())
+ Find = True
+ break
+ # Skip the page..
+ if Find:
+ continue
+ # Is the page a disambig?
+ if page.isDisambig() and disambigPage is not None:
+ pywikibot.output(
+ u'%s is a disambig page, report..' % page.title())
+ if not page.title().lower() in disambigtext.lower():
+ disambigtext = u"%s\n*[[%s]]" % (disambigtext,
page.title())
+ disambigpage.put(disambigtext, commentdisambig)
+ continue
+ # Is the page a disambig but there's not disambigPage? Skip!
+ elif page.isDisambig():
+ pywikibot.output(
+ u'%s is a disambig page, skip...' % page.title())
+ continue
+ else:
+ # Ok, the page need the template. Let's put it there!
+ # Adding the template in the text
+ newtxt = u"%s\n%s" % (template, oldtxt)
+ # Showing the title
+ pywikibot.output(u"\t\t>>> %s <<<" %
page.title())
+ # Showing the changes
+ pywikibot.showDiff(oldtxt, newtxt)
+ choice = 'y' # Default answer
+ if not always:
+ choice = pywikibot.inputChoice(u'Orphan page found, add \
+ template?', ['Yes', 'No', 'All'],
['y', 'n', 'a'])
+ if choice == 'a':
+ always = True
+ choice = 'y'
+ if choice == 'y':
+ try:
+ page.put(newtxt, comment)
+ except pywikibot.EditConflict:
+ pywikibot.output(u'Edit Conflict! Skip...')
+ continue
+
+if __name__ == '__main__':
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
--
To view, visit
https://gerrit.wikimedia.org/r/103106
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ic832222ec22c90842388ab78c2220af15a567802
Gerrit-PatchSet: 16
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Strainu <wiki(a)strainu.ro>
Gerrit-Reviewer: jenkins-bot