[Pywikipedia-l] SVN: [4771] trunk/pywikipedia/lonelypages.py

filnik at svn.wikimedia.org filnik at svn.wikimedia.org
Fri Dec 28 17:16:52 UTC 2007

Revision: 4771
Author:   filnik
Date:     2007-12-28 17:16:52 +0000 (Fri, 28 Dec 2007)

Log Message:
Adding a new script to the framework. It's already used on it.wiki and en.wiki to tag the lonely pages. It supports a lot of generator (also the standard ones), stable version. Two days of long testing.

Added Paths:

Added: trunk/pywikipedia/lonelypages.py
--- trunk/pywikipedia/lonelypages.py	                        (rev 0)
+++ trunk/pywikipedia/lonelypages.py	2007-12-28 17:16:52 UTC (rev 4771)
@@ -0,0 +1,241 @@
+# -*- coding: utf-8 -*-
+This is a script written to add the template "orphan" to the pages that aren't linked by other pages.
+It can give some strange Errors sometime, I hope that all of them are fixed in this version.
+-enable:            - Enable or disable the bot via a Wiki Page.
+-disambig:          - Set a page where the bot save the name of the disambig pages found (default: skip the pages)
+-limit:             - Set how many pages check.
+-page:              - Work only on the page given.
+-always             - Always say yes, won't ask
+-newpages:          - Check the newpages (default: the first 50 pages)
+-standard arguments (like -start, -cat, -ref and so on)
+--- FixMes ---
+* Check that all the code hasn't bugs
+--- Credit and Help ---
+This Script has been developed by Pietrodn and Filnik on botwiki. If you want to help us
+improving our script archive and pywikipediabot's archive or you simply need help
+you can find us here: http://botwiki.sno.cc
+--- Examples ---
+python lonelypages.py -enable:User:Bot/CheckBot -always 
+# (C) Pietrodn, it.wiki 2006-2007
+# (C) Filnik, it.wiki 2007
+# Distributed under the terms of the MIT license.
+__version__ = '$Id: lonelypages.py,v 1.0 2007/12/28 19.16.00 filnik Exp$'
+import wikipedia, pagegenerators
+import re
+# Here you have to put the config for your Project. #
+# ************* Modify only below! ************* #
+# Template to add in the orphan pages
+Template = {
+            'en':u'{{Orphan|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}',
+            'it':u'{{O||mese={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}',
+            }
+# Comment that the Bot will use to put the template
+commento = {
+           'en':u'Bot: Orphan page, add template',
+           'it':u'Bot: Voce orfana, aggiungo template {{O}}',
+           }
+# When you add a disambig to the list of disambig pages
+#(if you set disambigPage to None, you can put here nothing)
+commenttodisambig = {
+           'en':u'Bot: Adding a disambig page',
+           'it':u'Bot: Aggiungo una disambigua',
+           }
+# Use regex to prevent to put the same template twice!
+# If you need help with regex, ask on botwiki ( http://botwiki.sno.cc )
+# Warning: put always "()" inside the regex, so the bot will find "something"
+exception = {
+            'en': [r'\{\{(?:template:|)(orphan)[\|\}]', r'\{\{(?:template:|)(wi)[\|\}]'],
+            'it': [r'\{\{(?:template:|)(o)[\|\}]'],
+            }
+# ************* Modify only above! ************* #
+def main():
+    # Load the configurations in the function namespace
+    global commento; global Template; global disambigPage; global commenttodisambig
+    global exception
+    enablePage = None # Check if someone set an enablePage or not
+    limit = 50000 # All the pages! (I hope that there aren't so many lonely pages in a project..)
+    generator = None # Check if the bot should use the default generator or not
+    genFactory = pagegenerators.GeneratorFactory() # Load all the default generators!
+    nwpages = False # Check variable for newpages
+    always = False # Check variable for always
+    disambigPage = None # If no disambigPage given, not use it.
+    # Arguments!
+    for arg in wikipedia.handleArgs():
+        if arg.startswith('-enable'):
+            if len(arg) == 7:
+                enablePage = wikipedia.input(u'Would you like to check if the bot should run or not?')
+            else:
+                enablePage = arg[8:]
+        if arg.startswith('-disambig'):
+            if len(arg) == 9:
+                disambigPage = wikipedia.input(u'In which page should the bot save the disambig pages?')
+            else:
+                disambigPage = arg[10:]                
+        elif arg.startswith('-limit'):
+            if len(arg) == 6:
+                limit = int(wikipedia.input(u'How many pages do you want to check?'))
+            else:
+                limit = int(arg[7:])
+        elif arg.startswith('-newpages'):
+            if len(arg) == 9:
+                nwlimit = 50 # Default: 50 pages
+            else:
+                nwlimit = int(arg[10:])
+            generator = wikipedia.getSite().newpages(number = nwlimit)
+            nwpages = True
+        elif arg.startswith('-page'):
+            if len(arg) == 5:
+                generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'How many pages do you want to check?'))]
+            else:
+                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
+        elif arg == '-always':
+            always = True       
+        else:
+            generator = genFactory.handleArg(arg)
+    # Retrive the site
+    wikiSite = wikipedia.getSite()
+    # If the generator is not given, use the default one
+    if generator == None:
+        generator = wikiSite.lonelypages(repeat = True, number = limit)
+    # Take the configurations according to our project
+    comment = wikipedia.translate(wikiSite, commento)
+    commentdisambig = wikipedia.translate(wikiSite, commenttodisambig)
+    template = wikipedia.translate(wikiSite, Template)
+    exception = wikipedia.translate(wikiSite, exception)
+    # EnablePage part
+    if enablePage != None:
+        # Define the Page Object
+        enable = wikipedia.Page(wikiSite, enablePage)
+        # Loading the page's data
+        try:
+            getenable = enable.get()
+        except wikipedia.NoPage:
+            wikipedia.output(u"%s doesn't esist, I use the page as if it was blank!" % enable.title())
+            getenable = ''
+        except wikiepedia.IsRedirect:
+            wikipedia.output(u"%s is a redirect, skip!" % enable.title())
+            getenable = ''
+        # If the enable page is set to disable, turn off the bot
+        # (useful when the bot is run on a server)
+        if getenable != 'enable':
+            wikipedia.output('The bot is disabled')
+            wikipedia.stopme()
+    # DisambigPage part
+    if disambigPage != None:
+        disambigpage = wikipedia.Page(wikiSite, disambigPage)
+        try:
+            disambigtext = disambigpage.get()
+        except wikipedia.NoPage:
+            wikipedia.output(u"%s doesn't esist, skip!" % disambigpage.title())
+            disambigtext = ''
+        except wikiepedia.IsRedirect:
+            wikipedia.output(u"%s is a redirect, don't use it!" % disambigpage.title())
+            disambigPage = None
+    # Main Loop
+    for page in generator:
+        if nwpages == True:
+            page = page[0] # The newpages generator returns a tuple, not a Page object.
+        wikipedia.output(u"Checking %s..." % page.title())
+        # Used to skip the first pages in test phase...
+        #if page.title()[0] in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']:
+            #continue
+        if page.isRedirectPage(): # If redirect, skip!
+            wikipedia.output(u'%s is a redirect! Skip...' % page.title())
+            continue
+        # refs is not a list, it's a generator while resList... is a list, yes.
+        refs = page.getReferences()
+        refsList = list()
+        for j in refs:
+            if j == None:
+                # We have to find out why the function returns that value
+                wikipedia.output(u'Error: 1 --> Skip page')
+                continue
+            refsList.append(j)
+        # This isn't possible with a generator
+        if refsList != []:
+            wikipedia.output(u"%s isn't orphan! Skip..." % page.title())
+            continue
+        # Never understood how a list can turn in "None", but it happened :-S
+        elif refsList == None:
+            # We have to find out why the function returns that value
+            wikipedia.output(u'Error: 2 --> Skip page')
+            continue
+        else:
+            # Ok, no refs, no redirect... let's check if there's already the template
+            try:
+                oldtxt = page.get()
+            except wikipedia.NoPage:
+                wikipedia.output(u"%s doesn't exist! Skip..." % page.title())
+                continue
+            except wikipedia.IsRedirectPage:
+                wikipedia.output(u"%s is a redirect! Skip..." % page.title())
+                continue
+            # I've used a loop in a loop. If I use continue in the second loop, it won't do anything
+            # in the first. So let's create a variable to avoid this problem.
+            Find = False
+            for regexp in exception:
+                res = re.findall(regexp, oldtxt.lower())
+                # Found a template! Let's skip the page!
+                if res != []:
+                    wikipedia.output(u'Your regex has found something in %s, skipping...' % page.title())
+                    Find = True
+                    break
+            # Skip the page..
+            if Find:
+                continue
+            # Is the page a disambig?
+            if page.isDisambig() and disambigPage != None:
+                wikipedia.output(u'%s is a disambig page, report..' % page.title())
+                disambigtext = u"%s\n*[[%s]]" % (disambigtext, page.title())
+                disambigpage.put(disambigtext, commentdisambig)
+                continue
+            # Is the page a disambig but there's not disambigPage? Skip!
+            elif page.isDisambig():
+                 wikipedia.output(u'%s is a disambig page, skip...' % page.title())
+                 continue
+            else:
+                # Ok, the page need the template. Let's put it there!
+                newtxt = u"%s\n%s" % (template, oldtxt) # Adding the template in the text
+                wikipedia.output(u"\t\t>>> %s <<<" % page.title()) # Showing the title
+                wikipedia.showDiff(oldtxt, newtxt) # Showing the changes
+                choice = 'y' # Default answer
+                if not always:
+                    choice = wikipedia.inputChoice(u'Orphan page found, shall I add the template?',  [u'Yes', u'No', u'All'], [u'y', u'n', u'a'], [u'Y', u'N', 'A'])
+                if choice.lower() in [u'a', u'all']:
+                    always = True
+                    choice = 'y'
+                if choice.lower() in [u'y', u'yes']:
+                    try:
+                        page.put(newtxt, comment)
+                    except wikipedia.EditConflict:
+                        wikipedia.output(u'Edit Conflict! Skip...')
+                        continue
+if __name__ == '__main__':
+    try:
+        main()
+    finally:
+        wikipedia.stopme()

More information about the Pywikipedia-l mailing list