SVN: [5675] trunk/pywikipedia - Pywikipedia-l

6 Jul 2008

Revision: 5675
Author:   siebrand
Date:     2008-07-05 18:21:03 +0000 (Sat, 05 Jul 2008)
Log Message:
-----------
svn:eol-style:native
Modified Paths:
--------------
    trunk/pywikipedia/archive/mediawiki_messages.py
    trunk/pywikipedia/category_redirect.py
    trunk/pywikipedia/commonsdelinker/plugins.txt
    trunk/pywikipedia/families/README-family.txt
    trunk/pywikipedia/protect.py
Property Changed:
----------------
    trunk/pywikipedia/archive/mediawiki_messages.py
    trunk/pywikipedia/category_redirect.py
    trunk/pywikipedia/commonsdelinker/plugins.txt
    trunk/pywikipedia/families/README-family.txt
    trunk/pywikipedia/protect.py
Modified: trunk/pywikipedia/archive/mediawiki_messages.py
===================================================================

--- trunk/pywikipedia/archive/mediawiki_messages.py	2008-07-05 12:57:13 UTC (rev 5674)
+++ trunk/pywikipedia/archive/mediawiki_messages.py	2008-07-05 18:21:03 UTC (rev 5675)
@@ -1,218 +1,218 @@
-# -*- coding: utf-8 -*-
-"""
-Allows access to the MediaWiki messages, that's the label texts of the MediaWiki
-software in the current language. These can be used in other bots.
-
-The function refresh_messages() downloads all the current messages and saves
-them to disk. It is run automatically when a bot first tries to access one of
-the messages. It can be updated manually by running this script, e.g. when
-somebody changed the current message at the wiki. The texts will also be
-reloaded automatically once a month.
-
-Syntax: python mediawiki_messages [-all]
-
-Command line options:
-    -refresh - Reloads messages for the home wiki or for the one defined via
-               the -lang and -family parameters.
-
-    -all     - Reloads messages for all wikis where messages are already present
-
-    If another parameter is given, it will be interpreted as a MediaWiki key.
-    The script will then output the respective value, without refreshing..
-    
-"""
-
-# (C) Daniel Herding, 2004
-#
-# Distributed under the terms of the MIT license.
-
-##THIS MODULE IS DEPRECATED AND HAS BEEN REPLACED BY NEW FUNCTIONALITY IN
-##WIKIPEDIA.PY.  It is being retained solely for compatibility in case any
-##custom-written bots rely upon it.  Bot authors should replace any uses
-##of this module as follows:
-##
-##    OLD:    mediawiki_messages.get(key, site)
-##    NEW:    site.mediawiki_message(key)
-##
-##    OLD:    mediawiki_messages.has(key, site)
-##    NEW:    site.has_mediawiki_message(key)
-##
-##    OLD:    mediawiki_messages.makepath(path)
-##    NEW:    wikipedia.makepath(path)
-##
-##########################################################################
-
-import warnings
-warnings.warn(
-"""The mediawiki_messages module is deprecated and no longer
-maintained; see the source code for new methods to replace
-calls to this module.""",
-            DeprecationWarning, stacklevel=2)
-
-
-import wikipedia
-import re, sys, pickle
-import os.path
-import time
-import codecs
-import urllib
-from BeautifulSoup import *
-
-__version__='$Id: mediawiki_messages.py 3731 2007-06-20 14:42:55Z russblau $'
-
-loaded = {}
-
-def get(key, site = None, allowreload = True):
-    site = site or wikipedia.getSite()
-    if loaded.has_key(site):
-        # Use cached copy if it exists.
-        dictionary = loaded[site]
-    else:
-        fn = 'mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)
-        try:
-            # find out how old our saved dump is (in seconds)
-            file_age = time.time() - os.path.getmtime(fn)
-            # if it's older than 1 month, reload it
-            if file_age > 30 * 24 * 60 * 60:
-                print 'Current MediaWiki message dump is one month old, reloading'
-                refresh_messages(site)
-        except OSError:
-            # no saved dumped exists yet
-            refresh_messages(site)
-        f = open(fn, 'r')
-        dictionary = pickle.load(f)
-        f.close()
-        loaded[site] = dictionary
-    key = key[0].lower() + key[1:]
-    if dictionary.has_key(key):
-        return dictionary[key]
-    elif allowreload:
-        refresh_messages(site = site)
-        return get(key, site = site, allowreload = False)
-    else:
-        raise KeyError('MediaWiki Key %s not found' % key)
-
-def has(key, site = None, allowreload = True):
-    try:
-        get(key, site, allowreload)
-        return True
-    except KeyError:
-        return False
-
-def makepath(path):
-    """ creates missing directories for the given path and
-        returns a normalized absolute version of the path.
-
-    - if the given path already exists in the filesystem
-      the filesystem is not modified.
-
-    - otherwise makepath creates directories along the given path
-      using the dirname() of the path. You may append
-      a '/' to the path if you want it to be a directory path.
-
-    from holger@trillke.net 2002/03/18
-    """
-    from os import makedirs
-    from os.path import normpath,dirname,exists,abspath
-
-    dpath = normpath(dirname(path))
-    if not exists(dpath): makedirs(dpath)
-    return normpath(abspath(path))
-    
-def refresh_messages(site = None):
-    site = site or wikipedia.getSite()
-    # get 'all messages' special page's path
-    path = site.allmessages_address()
-    print 'Retrieving MediaWiki messages for %s' % repr(site)
-    wikipedia.put_throttle() # It actually is a get, but a heavy one.
-    allmessages = site.getUrl(path)
-
-    print 'Parsing MediaWiki messages'
-    soup = BeautifulSoup(allmessages,
-                         convertEntities=BeautifulSoup.HTML_ENTITIES)
-    # The MediaWiki namespace in URL-encoded format, as it can contain
-    # non-ASCII characters and spaces.
-    quotedMwNs = urllib.quote(site.namespace(8).replace(' ', '_').encode(site.encoding()))
-    mw_url = site.path() + "?title=" + quotedMwNs + ":"
-    altmw_url = site.path() + "/" + quotedMwNs + ":"
-    nicemw_url = site.nice_get_address(quotedMwNs + ":")
-    shortmw_url = "/" + quotedMwNs + ":"
-    ismediawiki = lambda url:url and (url.startswith(mw_url)
-                                      or url.startswith(altmw_url)
-                                      or url.startswith(nicemw_url)
-                                      or url.startswith(shortmw_url))
-    # we will save the found key:value pairs here
-    dictionary = {}
-
-    try:
-        for keytag in soup('a', href=ismediawiki):
-            # Key strings only contain ASCII characters, so we can save them as
-            # strs
-            key = str(keytag.find(text=True))
-            keyrow = keytag.parent.parent
-            if keyrow['class'] == "orig":
-                valrow = keyrow.findNextSibling('tr')
-                assert valrow['class'] == "new"
-                value = unicode(valrow.td.string).strip()
-            elif keyrow['class'] == 'def':
-                value = unicode(keyrow('td')[1].string).strip()
-            else:
-                raise AssertionError("Unknown tr class value: %s" % keyrow['class'])
-            dictionary[key] = value
-    except Exception, e:
-        wikipedia.debugDump( 'MediaWiki_Msg', site, u'%s: %s while processing URL: %s' % (repr(e), str(e), unicode(path)), allmessages)
-        raise
-
-    # Save the dictionary to disk
-    # The file is stored in the mediawiki_messages subdir. Create if necessary. 
-    if dictionary == {}:
-        wikipedia.debugDump( 'MediaWiki_Msg', site, u'Error URL: '+unicode(path), allmessages )
-        sys.exit()
-    else:
-        f = open(makepath('mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)), 'w')
-        pickle.dump(dictionary, f)
-        f.close()
-    print "Loaded %i values from %s" % (len(dictionary.keys()), site)
-    #print dictionary['sitestatstext']
-
-def refresh_all_messages():
-    import dircache, time
-    filenames = dircache.listdir('mediawiki-messages')
-    message_filenameR = re.compile('mediawiki-messages-([a-z:]+)-([a-z:]+).dat')
-    for filename in filenames:
-        match = message_filenameR.match(filename)
-        if match:
-            family = match.group(1)
-            lang = match.group(2)
-            site = wikipedia.getSite(code = lang, fam = family)
-            refresh_messages(site)
-
-def main():
-    refresh_all = False
-    refresh = False
-    key = None
-    for arg in wikipedia.handleArgs():
-        if arg == '-all':
-            refresh_all = True
-        elif arg == '-refresh':
-            refresh = True
-        else:
-            key = arg
-    if key:
-        wikipedia.output(get(key), toStdout = True)
-    elif refresh_all:
-        refresh_all_messages()
-    elif refresh:
-        refresh_messages(wikipedia.getSite())
-    else:
-        wikipedia.showHelp('mediawiki_messages')
-
-if __name__ == "__main__":
-    try:
-        main()
-    except:
-        wikipedia.stopme()
-        raise
-    else:
-        wikipedia.stopme()
-
+# -*- coding: utf-8 -*-
+"""
+Allows access to the MediaWiki messages, that's the label texts of the MediaWiki
+software in the current language. These can be used in other bots.
+
+The function refresh_messages() downloads all the current messages and saves
+them to disk. It is run automatically when a bot first tries to access one of
+the messages. It can be updated manually by running this script, e.g. when
+somebody changed the current message at the wiki. The texts will also be
+reloaded automatically once a month.
+
+Syntax: python mediawiki_messages [-all]
+
+Command line options:
+    -refresh - Reloads messages for the home wiki or for the one defined via
+               the -lang and -family parameters.
+
+    -all     - Reloads messages for all wikis where messages are already present
+
+    If another parameter is given, it will be interpreted as a MediaWiki key.
+    The script will then output the respective value, without refreshing..
+    
+"""
+
+# (C) Daniel Herding, 2004
+#
+# Distributed under the terms of the MIT license.
+
+##THIS MODULE IS DEPRECATED AND HAS BEEN REPLACED BY NEW FUNCTIONALITY IN
+##WIKIPEDIA.PY.  It is being retained solely for compatibility in case any
+##custom-written bots rely upon it.  Bot authors should replace any uses
+##of this module as follows:
+##
+##    OLD:    mediawiki_messages.get(key, site)
+##    NEW:    site.mediawiki_message(key)
+##
+##    OLD:    mediawiki_messages.has(key, site)
+##    NEW:    site.has_mediawiki_message(key)
+##
+##    OLD:    mediawiki_messages.makepath(path)
+##    NEW:    wikipedia.makepath(path)
+##
+##########################################################################
+
+import warnings
+warnings.warn(
+"""The mediawiki_messages module is deprecated and no longer
+maintained; see the source code for new methods to replace
+calls to this module.""",
+            DeprecationWarning, stacklevel=2)
+
+
+import wikipedia
+import re, sys, pickle
+import os.path
+import time
+import codecs
+import urllib
+from BeautifulSoup import *
+
+__version__='$Id: mediawiki_messages.py 3731 2007-06-20 14:42:55Z russblau $'
+
+loaded = {}
+
+def get(key, site = None, allowreload = True):
+    site = site or wikipedia.getSite()
+    if loaded.has_key(site):
+        # Use cached copy if it exists.
+        dictionary = loaded[site]
+    else:
+        fn = 'mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)
+        try:
+            # find out how old our saved dump is (in seconds)
+            file_age = time.time() - os.path.getmtime(fn)
+            # if it's older than 1 month, reload it
+            if file_age > 30 * 24 * 60 * 60:
+                print 'Current MediaWiki message dump is one month old, reloading'
+                refresh_messages(site)
+        except OSError:
+            # no saved dumped exists yet
+            refresh_messages(site)
+        f = open(fn, 'r')
+        dictionary = pickle.load(f)
+        f.close()
+        loaded[site] = dictionary
+    key = key[0].lower() + key[1:]
+    if dictionary.has_key(key):
+        return dictionary[key]
+    elif allowreload:
+        refresh_messages(site = site)
+        return get(key, site = site, allowreload = False)
+    else:
+        raise KeyError('MediaWiki Key %s not found' % key)
+
+def has(key, site = None, allowreload = True):
+    try:
+        get(key, site, allowreload)
+        return True
+    except KeyError:
+        return False
+
+def makepath(path):
+    """ creates missing directories for the given path and
+        returns a normalized absolute version of the path.
+
+    - if the given path already exists in the filesystem
+      the filesystem is not modified.
+
+    - otherwise makepath creates directories along the given path
+      using the dirname() of the path. You may append
+      a '/' to the path if you want it to be a directory path.
+
+    from holger@trillke.net 2002/03/18
+    """
+    from os import makedirs
+    from os.path import normpath,dirname,exists,abspath
+
+    dpath = normpath(dirname(path))
+    if not exists(dpath): makedirs(dpath)
+    return normpath(abspath(path))
+    
+def refresh_messages(site = None):
+    site = site or wikipedia.getSite()
+    # get 'all messages' special page's path
+    path = site.allmessages_address()
+    print 'Retrieving MediaWiki messages for %s' % repr(site)
+    wikipedia.put_throttle() # It actually is a get, but a heavy one.
+    allmessages = site.getUrl(path)
+
+    print 'Parsing MediaWiki messages'
+    soup = BeautifulSoup(allmessages,
+                         convertEntities=BeautifulSoup.HTML_ENTITIES)
+    # The MediaWiki namespace in URL-encoded format, as it can contain
+    # non-ASCII characters and spaces.
+    quotedMwNs = urllib.quote(site.namespace(8).replace(' ', '_').encode(site.encoding()))
+    mw_url = site.path() + "?title=" + quotedMwNs + ":"
+    altmw_url = site.path() + "/" + quotedMwNs + ":"
+    nicemw_url = site.nice_get_address(quotedMwNs + ":")
+    shortmw_url = "/" + quotedMwNs + ":"
+    ismediawiki = lambda url:url and (url.startswith(mw_url)
+                                      or url.startswith(altmw_url)
+                                      or url.startswith(nicemw_url)
+                                      or url.startswith(shortmw_url))
+    # we will save the found key:value pairs here
+    dictionary = {}
+
+    try:
+        for keytag in soup('a', href=ismediawiki):
+            # Key strings only contain ASCII characters, so we can save them as
+            # strs
+            key = str(keytag.find(text=True))
+            keyrow = keytag.parent.parent
+            if keyrow['class'] == "orig":
+                valrow = keyrow.findNextSibling('tr')
+                assert valrow['class'] == "new"
+                value = unicode(valrow.td.string).strip()
+            elif keyrow['class'] == 'def':
+                value = unicode(keyrow('td')[1].string).strip()
+            else:
+                raise AssertionError("Unknown tr class value: %s" % keyrow['class'])
+            dictionary[key] = value
+    except Exception, e:
+        wikipedia.debugDump( 'MediaWiki_Msg', site, u'%s: %s while processing URL: %s' % (repr(e), str(e), unicode(path)), allmessages)
+        raise
+
+    # Save the dictionary to disk
+    # The file is stored in the mediawiki_messages subdir. Create if necessary. 
+    if dictionary == {}:
+        wikipedia.debugDump( 'MediaWiki_Msg', site, u'Error URL: '+unicode(path), allmessages )
+        sys.exit()
+    else:
+        f = open(makepath('mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)), 'w')
+        pickle.dump(dictionary, f)
+        f.close()
+    print "Loaded %i values from %s" % (len(dictionary.keys()), site)
+    #print dictionary['sitestatstext']
+
+def refresh_all_messages():
+    import dircache, time
+    filenames = dircache.listdir('mediawiki-messages')
+    message_filenameR = re.compile('mediawiki-messages-([a-z:]+)-([a-z:]+).dat')
+    for filename in filenames:
+        match = message_filenameR.match(filename)
+        if match:
+            family = match.group(1)
+            lang = match.group(2)
+            site = wikipedia.getSite(code = lang, fam = family)
+            refresh_messages(site)
+
+def main():
+    refresh_all = False
+    refresh = False
+    key = None
+    for arg in wikipedia.handleArgs():
+        if arg == '-all':
+            refresh_all = True
+        elif arg == '-refresh':
+            refresh = True
+        else:
+            key = arg
+    if key:
+        wikipedia.output(get(key), toStdout = True)
+    elif refresh_all:
+        refresh_all_messages()
+    elif refresh:
+        refresh_messages(wikipedia.getSite())
+    else:
+        wikipedia.showHelp('mediawiki_messages')
+
+if __name__ == "__main__":
+    try:
+        main()
+    except:
+        wikipedia.stopme()
+        raise
+    else:
+        wikipedia.stopme()
+
Property changes on: trunk/pywikipedia/archive/mediawiki_messages.py
___________________________________________________________________
Name: svn:eol-style
   + native
Modified: trunk/pywikipedia/category_redirect.py
===================================================================
--- trunk/pywikipedia/category_redirect.py	2008-07-05 12:57:13 UTC (rev 5674)
+++ trunk/pywikipedia/category_redirect.py	2008-07-05 18:21:03 UTC (rev 5675)
@@ -1,71 +1,71 @@
-#!/usr/bin/python
-# -*- coding: utf-8  -*-
-"""
-Script to clean up http://commons.wikimedia.org/wiki/Category:Non-empty_category_redirects
-
-Moves all images, pages and categories in redirect categories to the target category.
-
-"""
-
-#
-# (C) Multichill, 2008
-#
-# Distributed under the terms of the MIT license.
-#
-
-import wikipedia, config, catlib
-from category import *
-
-redirect_templates = [u'Category redirect', u'Categoryredirect', u'See cat', u'Seecat', u'Catredirect', u'Cat redirect', u'CatRed', u'Catredir']
-move_message = u'Moving from [[%s|%s]] to [[%s|%s]] (following [[Template:Category redirect|category redirect]])'
-
-def get_redirect_cat(category=None):
-    '''
-    Return the target category
-    '''
-    destination = None
-    site = wikipedia.getSite(u'commons', u'commons')
-    for template in category.templatesWithParams():
-        if ((template[0] in redirect_templates) and (len(template[1]) > 0)):
-            #destination = template[1][0];
-            destination =catlib.Category(site, template[1][0])
-            if not destination.exists():
-                return None
-    return destination
-    
-
-def main():
-    '''
-    Main loop. Loop over all categories of Category:Non-empty_category_redirects and move all content.
-    '''    
-
-    site = wikipedia.getSite(u'commons', u'commons')
-    dirtycat = catlib.Category(site, u'Category:Non-empty category redirects')
-    destination = None
-    catbot = None
-    
-    for old_category in dirtycat.subcategories():
-        destination = get_redirect_cat(old_category)
-        if destination:
-            wikipedia.output(destination.title())
-            for page in old_category.articles():
-                try:                                
-                    catlib.change_category(page, old_category, destination, move_message % (old_category.title(), old_category.titleWithoutNamespace(), destination.title(), destination.titleWithoutNamespace()))
-                except wikipedia.IsRedirectPage:
-                    wikipedia.output(page.title() + u' is a redirect!')
-            for cat in old_category.subcategories():
-                try:                
-                    catlib.change_category(cat, old_category, destination, move_message % (old_category.title(), old_category.titleWithoutNamespace(), destination.title(), destination.titleWithoutNamespace()))
-                except wikipedia.IsRedirectPage:
-                    wikipedia.output(page.title() + u' is a redirect!')
-        #Dummy edit to refresh the page, shouldnt show up in any logs.
-        try:
-            old_category.put(old_category.get())
-        except:
-            wikipedia.output(u'Dummy edit at ' + old_category.title() + u' failed')        
-
-if __name__ == "__main__":
-    try:
-        main()
-    finally:
-        wikipedia.stopme()
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+"""
+Script to clean up http://commons.wikimedia.org/wiki/Category:Non-empty_category_redirects
+
+Moves all images, pages and categories in redirect categories to the target category.
+
+"""
+
+#
+# (C) Multichill, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+
+import wikipedia, config, catlib
+from category import *
+
+redirect_templates = [u'Category redirect', u'Categoryredirect', u'See cat', u'Seecat', u'Catredirect', u'Cat redirect', u'CatRed', u'Catredir']
+move_message = u'Moving from [[%s|%s]] to [[%s|%s]] (following [[Template:Category redirect|category redirect]])'
+
+def get_redirect_cat(category=None):
+    '''
+    Return the target category
+    '''
+    destination = None
+    site = wikipedia.getSite(u'commons', u'commons')
+    for template in category.templatesWithParams():
+        if ((template[0] in redirect_templates) and (len(template[1]) > 0)):
+            #destination = template[1][0];
+            destination =catlib.Category(site, template[1][0])
+            if not destination.exists():
+                return None
+    return destination
+    
+
+def main():
+    '''
+    Main loop. Loop over all categories of Category:Non-empty_category_redirects and move all content.
+    '''    
+
+    site = wikipedia.getSite(u'commons', u'commons')
+    dirtycat = catlib.Category(site, u'Category:Non-empty category redirects')
+    destination = None
+    catbot = None
+    
+    for old_category in dirtycat.subcategories():
+        destination = get_redirect_cat(old_category)
+        if destination:
+            wikipedia.output(destination.title())
+            for page in old_category.articles():
+                try:                                
+                    catlib.change_category(page, old_category, destination, move_message % (old_category.title(), old_category.titleWithoutNamespace(), destination.title(), destination.titleWithoutNamespace()))
+                except wikipedia.IsRedirectPage:
+                    wikipedia.output(page.title() + u' is a redirect!')
+            for cat in old_category.subcategories():
+                try:                
+                    catlib.change_category(cat, old_category, destination, move_message % (old_category.title(), old_category.titleWithoutNamespace(), destination.title(), destination.titleWithoutNamespace()))
+                except wikipedia.IsRedirectPage:
+                    wikipedia.output(page.title() + u' is a redirect!')
+        #Dummy edit to refresh the page, shouldnt show up in any logs.
+        try:
+            old_category.put(old_category.get())
+        except:
+            wikipedia.output(u'Dummy edit at ' + old_category.title() + u' failed')        
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        wikipedia.stopme()
Property changes on: trunk/pywikipedia/category_redirect.py
___________________________________________________________________
Name: svn:eol-style
   + native
Modified: trunk/pywikipedia/commonsdelinker/plugins.txt
===================================================================
--- trunk/pywikipedia/commonsdelinker/plugins.txt	2008-07-05 12:57:13 UTC (rev 5674)
+++ trunk/pywikipedia/commonsdelinker/plugins.txt	2008-07-05 18:21:03 UTC (rev 5675)
@@ -1,53 +1,53 @@
-CommonsDelinker supports a plugin system, which allows modifying the delink and
-replace parameters on a case by case basis. 
-
-Plugins should be registered in the configuration file. CommonsDelinker expects
-the configuration value CommonsDelinker['plugins'] to be an iterable object.
-The items of this iterable should be module.object strings of the plugin. The 
-plugin is expected to reside as module.py in commonsdelinker/plugins. The
-object should exist and should be a callable object or type with an attribute 
-'hook' being a string indicating the hook name.
-
-Some parameters are modyfiable by the plugin. Those include the mutable objects
-and some immutable object wrapped in an ImmutableByReference object. The value
-of such an object can be get/set by the get and set method. Modyfiable 
-parameters are preceded by an ampersand & in this documentation.
-
-A hook that gives False as return value will terminate the hook chain and for
-most hooks also terminate the caller.
-
-== List of hooks and their parameters ==
-
-before_delink(image, usage, timestamp, admin, reason, replacement)
-  Called once per image. Returing False will cancel delinking this image.
-
-simple_replace(page, summary, image, &replacement, match, groups)
-gallery_replace(page, summary, image, &replacement, match, groups)
-complex_replace(page, summary, image, &replacement, match, groups)
-  Called each time an occerence is to be replaced. Returning False will not 
-  replace this occerence.
-
-before_save(page, text, &new_text, &summary)
-  Called before the page is saved. Returning False will not save the page.
-
-after_delink(image, usage, timestamp, admin, reason, replacement)
-  Called once per image after delink.
-
-== Example ==
-# Saves a diff for every delink.
-import difflib
-
-class Diff(object):
-	hook = 'before_save'
-	def __init__(self, CommonsDelinker):
-		self.CommonsDelinker = CommonsDelinker
-	def __call__(self, page, text, new_text, summary):
-		diff = difflib.context_diff(
-			text.encode('utf-8').splitlines(True),
-			new_text.get().encode('utf-8').splitlines(True))
-			
-		f = open((u'diff/%s-%s-%s.txt' % (page.urlname().replace('/', '-'),
-			page.site().dbName(), page.editTime())).encode('utf-8', 'ignore'), 'w')
-						
-		f.writelines(diff)
-		f.close()
\ No newline at end of file
+CommonsDelinker supports a plugin system, which allows modifying the delink and
+replace parameters on a case by case basis.
+
+Plugins should be registered in the configuration file. CommonsDelinker expects
+the configuration value CommonsDelinker['plugins'] to be an iterable object.
+The items of this iterable should be module.object strings of the plugin. The
+plugin is expected to reside as module.py in commonsdelinker/plugins. The
+object should exist and should be a callable object or type with an attribute
+'hook' being a string indicating the hook name.
+
+Some parameters are modyfiable by the plugin. Those include the mutable objects
+and some immutable object wrapped in an ImmutableByReference object. The value
+of such an object can be get/set by the get and set method. Modyfiable
+parameters are preceded by an ampersand & in this documentation.
+
+A hook that gives False as return value will terminate the hook chain and for
+most hooks also terminate the caller.
+
+== List of hooks and their parameters ==
+
+before_delink(image, usage, timestamp, admin, reason, replacement)
+  Called once per image. Returing False will cancel delinking this image.
+
+simple_replace(page, summary, image, &replacement, match, groups)
+gallery_replace(page, summary, image, &replacement, match, groups)
+complex_replace(page, summary, image, &replacement, match, groups)
+  Called each time an occerence is to be replaced. Returning False will not
+  replace this occerence.
+
+before_save(page, text, &new_text, &summary)
+  Called before the page is saved. Returning False will not save the page.
+
+after_delink(image, usage, timestamp, admin, reason, replacement)
+  Called once per image after delink.
+
+== Example ==
+# Saves a diff for every delink.
+import difflib
+
+class Diff(object):
+	hook = 'before_save'
+	def __init__(self, CommonsDelinker):
+		self.CommonsDelinker = CommonsDelinker
+	def __call__(self, page, text, new_text, summary):
+		diff = difflib.context_diff(
+			text.encode('utf-8').splitlines(True),
+			new_text.get().encode('utf-8').splitlines(True))
+
+		f = open((u'diff/%s-%s-%s.txt' % (page.urlname().replace('/', '-'),
+			page.site().dbName(), page.editTime())).encode('utf-8', 'ignore'), 'w')
+
+		f.writelines(diff)
+		f.close()
Property changes on: trunk/pywikipedia/commonsdelinker/plugins.txt
___________________________________________________________________
Name: svn:eol-style
   + native
Modified: trunk/pywikipedia/families/README-family.txt
===================================================================
--- trunk/pywikipedia/families/README-family.txt	2008-07-05 12:57:13 UTC (rev 5674)
+++ trunk/pywikipedia/families/README-family.txt	2008-07-05 18:21:03 UTC (rev 5675)
@@ -1,183 +1,183 @@
-How to create a new family file to add a new wiki to the bot framework.
-
-(c) 2008, the Pywikipediabot team
-
-Copy and paste the text below "COPY HERE" into your favorite text editor, and
-save it as WIKINAME_family.py in the families/ subdirectory.  Replace
-WIKINAME with the name you want to use for the new wiki family, making sure
-that it doesn't duplicate any existing name.
-
-A "family" is any group of wikis located on the same server; usually they
-are versions of the same type of content in different languages, but this
-isn't required.  A family can consist of just one wiki, or more; if there is
-more than one wiki, each wiki needs to be identified by a unique code.
-
-After you copy the text, go through and edit it, based upon the comment
-lines.  First, do a global search-and-replace to change all instances of
-'WIKINAME' to your actual wiki name.  Everything in the example below is
-based on the bot's default settings, except for the namespace names, which
-are made-up examples. You only need to change it if your wiki's value is
-different from the default. You can delete anything that is not indicated as
-"REQUIRED", if your new wiki doesn't vary from the default settings.
-
-== COPY HERE ==
-
-# -*- coding: utf-8  -*-              # REQUIRED
-import config, family, urllib         # REQUIRED
-
-class Family(family.Family):          # REQUIRED
-    def __init__(self):               # REQUIRED
-        family.Family.__init__(self)  # REQUIRED
-        self.name = 'WIKINAME'        # REQUIRED; replace with actual name
-
-        self.langs = {                # REQUIRED
-            'en': 'www.example.com',  # Include one line for each wiki in family
-            'fr': 'www.example.fr',   # in the format 'code': 'hostname',
-        }
-
-        # Translation used on all wikis for the different namespaces.
-        # Most namespaces are inherited from family.Family.
-        # Check the family.py file (in main directory) to see the standard
-        # namespace translations for each known language.
-
-        # You only need to enter translations that differ from the default.
-        # There are two ways of entering namespace translations.
-        # 1.  If you only need to change the translation of a particular
-        #     namespace for one or two languages, use this format:
-        self.namespaces[2]['en'] = u'Wikiuser'
-        self.namespaces[3]['en'] = u'Wikiuser talk'
-
-        # 2.  If you need to change the translation for many languages
-        #     for the same namespace number, use this format (this is common
-        #     for namespaces 4 and 5, because these are usually given a
-        #     unique name for each wiki):
-        self.namespaces[4] = {
-            '_default': [u'WIKINAME', self.namespaces[4]['_default']], # REQUIRED
-            'de': 'Name des wiki',
-            'es': 'Nombre del wiki',
-            'fr': 'Nom du wiki',
-            # ETC.
-        }
-        
-        # Wikimedia wikis all use "bodyContent" as the id of the <div>
-        # element that contains the actual page content; change this for
-        # wikis that use something else (e.g., mozilla family)
-        self.content_id = "bodyContent"
-
-        # On most wikis page names must start with a capital letter, but some
-        # languages don't use this.  This should be a list of languages that
-        # _don't_ require the first letter to be capitalized; e.g.,
-        # self.nocapitalize = ['foo', 'bar']
-        self.nocapitalize = []
-
-        # SETTINGS FOR WIKIS THAT USE DISAMBIGUATION PAGES:
-        
-        # A list of disambiguation template names in different languages
-        self.disambiguationTemplates = {
-            'en': ['disambig', 'disambiguation'],
-        }
-
-        # A list with the name of the category containing disambiguation
-        # pages for the various languages. Only one category per language,
-        # and without the namespace, so add things like:
-        self.disambcatname = {
-            'en': "Disambiguation",
-        }
-
-        # SETTINGS FOR WIKIS THAT USE INTERLANGUAGE LINKS:
-        
-        # attop is a list of languages that prefer to have the interwiki
-        # links at the top of the page.
-        self.interwiki_attop = []
-
-        # on_one_line is a list of languages that want the interwiki links
-        # one-after-another on a single line
-        self.interwiki_on_one_line = []
-
-        # String used as separator between interwiki links and the text
-        self.interwiki_text_separator = '\r\n\r\n'
-
-        # Which languages have a special order for putting interlanguage links,
-        # and what order is it? If a language is not in interwiki_putfirst,
-        # alphabetical order on language code is used. For languages that are in
-        # interwiki_putfirst, interwiki_putfirst is checked first, and
-        # languages are put in the order given there. All other languages are put
-        # after those, in code-alphabetical order.
-        self.interwiki_putfirst = {}
-
-        # Languages in interwiki_putfirst_doubled should have a number plus a list
-        # of languages. If there are at least the number of interwiki links, all
-        # languages in the list should be placed at the front as well as in the
-        # normal list.
-        self.interwiki_putfirst_doubled = {}
-
-        # Some families, e. g. commons and meta, are not multilingual and
-        # forward interlanguage links to another family (wikipedia).
-        # These families can set this variable to the name of the target
-        # family.
-        self.interwiki_forward = None
-
-        # Which language codes no longer exist and by which language code
-        # should they be replaced. If for example the language with code xx:
-        # has been replaced by code yy:, add {'xx':'yy'} to obsolete.
-        # If all links to language xx: should be removed, add {'xx': None}.
-        self.obsolete = {}
-
-        # SETTINGS FOR CATEGORY LINKS:
-        
-        # Languages that want the category links at the top of the page
-        self.category_attop = []
-        
-        # languages that want the category links
-        # one-after-another on a single line
-        self.category_on_one_line = []
-
-        # String used as separator between category links and the text
-        self.category_text_separator = '\r\n\r\n'
-
-        # When both at the bottom should categories come after interwikilinks?
-        self.categories_last = []
-
-        # SETTINGS FOR LDAP AUTHENTICATION
-        # If your wiki uses:
-        #  http://www.mediawiki.org/wiki/Extension:LDAP_Authentication.
-        # then uncomment this line and define the user's domain required
-        # at login.
-        #self.name = 'domain here'
-
-    def protocol(self, code):
-        """
-        Can be overridden to return 'https'. Other protocols are not supported.
-        """
-        return 'http'
-
-    def scriptpath(self, code):
-        """The prefix used to locate scripts on this wiki.
-
-        This is the value displayed when you enter {{SCRIPTPATH}} on a
-        wiki page (often displayed at [[Help:Variables]] if the wiki has
-        copied the master help page correctly).
-
-        The default value is the one used on Wikimedia Foundation wikis,
-        but needs to be overridden in the family file for any wiki that
-        uses a different value.
-
-        """
-        return '/w'
-
-    # IMPORTANT: if your wiki does not support the api.php interface,
-    # you must uncomment the second line of this method:
-    def apipath(self, code):
-        # raise NotImplementedError, "%s wiki family does not support api.php" % self.name
-        return '%s/api.php' % self.scriptpath(code)
-
-    # Which version of MediaWiki is used?
-    def version(self, code):
-        # Replace with the actual version being run on your wiki
-        return '1.13alpha'
-
-    def code2encoding(self, code):
-        """Return the encoding for a specific language wiki"""
-        # Most wikis nowadays use UTF-8, but change this if yours uses
-        # a different encoding
-        return 'utf-8'
+How to create a new family file to add a new wiki to the bot framework.
+
+(c) 2008, the Pywikipediabot team
+
+Copy and paste the text below "COPY HERE" into your favorite text editor, and
+save it as WIKINAME_family.py in the families/ subdirectory.  Replace
+WIKINAME with the name you want to use for the new wiki family, making sure
+that it doesn't duplicate any existing name.
+
+A "family" is any group of wikis located on the same server; usually they
+are versions of the same type of content in different languages, but this
+isn't required.  A family can consist of just one wiki, or more; if there is
+more than one wiki, each wiki needs to be identified by a unique code.
+
+After you copy the text, go through and edit it, based upon the comment
+lines.  First, do a global search-and-replace to change all instances of
+'WIKINAME' to your actual wiki name.  Everything in the example below is
+based on the bot's default settings, except for the namespace names, which
+are made-up examples. You only need to change it if your wiki's value is
+different from the default. You can delete anything that is not indicated as
+"REQUIRED", if your new wiki doesn't vary from the default settings.
+
+== COPY HERE ==
+
+# -*- coding: utf-8  -*-              # REQUIRED
+import config, family, urllib         # REQUIRED
+
+class Family(family.Family):          # REQUIRED
+    def __init__(self):               # REQUIRED
+        family.Family.__init__(self)  # REQUIRED
+        self.name = 'WIKINAME'        # REQUIRED; replace with actual name
+
+        self.langs = {                # REQUIRED
+            'en': 'www.example.com',  # Include one line for each wiki in family
+            'fr': 'www.example.fr',   # in the format 'code': 'hostname',
+        }
+
+        # Translation used on all wikis for the different namespaces.
+        # Most namespaces are inherited from family.Family.
+        # Check the family.py file (in main directory) to see the standard
+        # namespace translations for each known language.
+
+        # You only need to enter translations that differ from the default.
+        # There are two ways of entering namespace translations.
+        # 1.  If you only need to change the translation of a particular
+        #     namespace for one or two languages, use this format:
+        self.namespaces[2]['en'] = u'Wikiuser'
+        self.namespaces[3]['en'] = u'Wikiuser talk'
+
+        # 2.  If you need to change the translation for many languages
+        #     for the same namespace number, use this format (this is common
+        #     for namespaces 4 and 5, because these are usually given a
+        #     unique name for each wiki):
+        self.namespaces[4] = {
+            '_default': [u'WIKINAME', self.namespaces[4]['_default']], # REQUIRED
+            'de': 'Name des wiki',
+            'es': 'Nombre del wiki',
+            'fr': 'Nom du wiki',
+            # ETC.
+        }
+
+        # Wikimedia wikis all use "bodyContent" as the id of the <div>
+        # element that contains the actual page content; change this for
+        # wikis that use something else (e.g., mozilla family)
+        self.content_id = "bodyContent"
+
+        # On most wikis page names must start with a capital letter, but some
+        # languages don't use this.  This should be a list of languages that
+        # _don't_ require the first letter to be capitalized; e.g.,
+        # self.nocapitalize = ['foo', 'bar']
+        self.nocapitalize = []
+
+        # SETTINGS FOR WIKIS THAT USE DISAMBIGUATION PAGES:
+
+        # A list of disambiguation template names in different languages
+        self.disambiguationTemplates = {
+            'en': ['disambig', 'disambiguation'],
+        }
+
+        # A list with the name of the category containing disambiguation
+        # pages for the various languages. Only one category per language,
+        # and without the namespace, so add things like:
+        self.disambcatname = {
+            'en': "Disambiguation",
+        }
+
+        # SETTINGS FOR WIKIS THAT USE INTERLANGUAGE LINKS:
+
+        # attop is a list of languages that prefer to have the interwiki
+        # links at the top of the page.
+        self.interwiki_attop = []
+
+        # on_one_line is a list of languages that want the interwiki links
+        # one-after-another on a single line
+        self.interwiki_on_one_line = []
+
+        # String used as separator between interwiki links and the text
+        self.interwiki_text_separator = '\r\n\r\n'
+
+        # Which languages have a special order for putting interlanguage links,
+        # and what order is it? If a language is not in interwiki_putfirst,
+        # alphabetical order on language code is used. For languages that are in
+        # interwiki_putfirst, interwiki_putfirst is checked first, and
+        # languages are put in the order given there. All other languages are put
+        # after those, in code-alphabetical order.
+        self.interwiki_putfirst = {}
+
+        # Languages in interwiki_putfirst_doubled should have a number plus a list
+        # of languages. If there are at least the number of interwiki links, all
+        # languages in the list should be placed at the front as well as in the
+        # normal list.
+        self.interwiki_putfirst_doubled = {}
+
+        # Some families, e. g. commons and meta, are not multilingual and
+        # forward interlanguage links to another family (wikipedia).
+        # These families can set this variable to the name of the target
+        # family.
+        self.interwiki_forward = None
+
+        # Which language codes no longer exist and by which language code
+        # should they be replaced. If for example the language with code xx:
+        # has been replaced by code yy:, add {'xx':'yy'} to obsolete.
+        # If all links to language xx: should be removed, add {'xx': None}.
+        self.obsolete = {}
+
+        # SETTINGS FOR CATEGORY LINKS:
+
+        # Languages that want the category links at the top of the page
+        self.category_attop = []
+
+        # languages that want the category links
+        # one-after-another on a single line
+        self.category_on_one_line = []
+
+        # String used as separator between category links and the text
+        self.category_text_separator = '\r\n\r\n'
+
+        # When both at the bottom should categories come after interwikilinks?
+        self.categories_last = []
+
+        # SETTINGS FOR LDAP AUTHENTICATION
+        # If your wiki uses:
+        #  http://www.mediawiki.org/wiki/Extension:LDAP_Authentication.
+        # then uncomment this line and define the user's domain required
+        # at login.
+        #self.name = 'domain here'
+
+    def protocol(self, code):
+        """
+        Can be overridden to return 'https'. Other protocols are not supported.
+        """
+        return 'http'
+
+    def scriptpath(self, code):
+        """The prefix used to locate scripts on this wiki.
+
+        This is the value displayed when you enter {{SCRIPTPATH}} on a
+        wiki page (often displayed at [[Help:Variables]] if the wiki has
+        copied the master help page correctly).
+
+        The default value is the one used on Wikimedia Foundation wikis,
+        but needs to be overridden in the family file for any wiki that
+        uses a different value.
+
+        """
+        return '/w'
+
+    # IMPORTANT: if your wiki does not support the api.php interface,
+    # you must uncomment the second line of this method:
+    def apipath(self, code):
+        # raise NotImplementedError, "%s wiki family does not support api.php" % self.name
+        return '%s/api.php' % self.scriptpath(code)
+
+    # Which version of MediaWiki is used?
+    def version(self, code):
+        # Replace with the actual version being run on your wiki
+        return '1.13alpha'
+
+    def code2encoding(self, code):
+        """Return the encoding for a specific language wiki"""
+        # Most wikis nowadays use UTF-8, but change this if yours uses
+        # a different encoding
+        return 'utf-8'
Property changes on: trunk/pywikipedia/families/README-family.txt
___________________________________________________________________
Name: svn:eol-style
   + native
Modified: trunk/pywikipedia/protect.py
===================================================================
--- trunk/pywikipedia/protect.py	2008-07-05 12:57:13 UTC (rev 5674)
+++ trunk/pywikipedia/protect.py	2008-07-05 18:21:03 UTC (rev 5675)
@@ -1,252 +1,252 @@
-# -*- coding: utf-8 -*-
-"""
-This script can be used to protect and unprotect pages en masse.
-Of course, you will need an admin account on the relevant wiki.
- 
-Syntax: python protect.py OPTION...
- 
-Command line options:
- 
--page:       Protect specified page
--cat:        Protect all pages in the given category.
--nosubcats:  Don't protect pages in the subcategories.
--links:      Protect all pages linked from a given page.
--file:       Protect all pages listed in a text file.
--ref:        Protect all pages referring from a given page.
--images:     Protect all images used on a given page.
--always:     Don't prompt to protect pages, just do it.
--summary:    Supply a custom edit summary.
--unprotect:   Actually unprotect pages instead of protecting
--edit:PROTECTION_LEVEL Set edit protection level to PROTECTION_LEVEL
--move:PROTECTION_LEVEL Set move protection level to PROTECTION_LEVEL
-
-## Without support ##
-## -create:PROTECTION_LEVEL Set move protection level to PROTECTION_LEVEL ##
- 
-Values for PROTECTION_LEVEL are: sysop, autoconfirmed, none.
-If an operation parameter (edit, move or create) is not specified, default
-protection level is 'sysop' (or 'none' if -unprotect).
- 
-Examples:
- 
-Protect everything in the category "To protect" prompting.
-    python protect.py -cat:"To protect" -always
- 
-Unprotect all pages listed in text file "unprotect.txt" without prompting.
-    python protect.py -file:unprotect.txt -unprotect
-"""
- 
-# Written by http://it.wikisource.org/wiki/Utente:Qualc1
-# Created by modifying delete.py
-__version__ = '$Id: delete.py 4946 2008-01-29 14:58:25Z wikipedian $'
- 
-#
-# Distributed under the terms of the MIT license.
-#
- 
-import wikipedia, catlib
-import pagegenerators
- 
-# Summary messages for protecting from a category.
-msg_simple_protect = {
-    'en': u'Bot: Protecting a list of files.',
-    'ar': u'بوت: حماية قائمة من الملفات.',
-    'it': u'Bot: Protezione di una lista di pagine.',
-    'pt': u'Bot: Protegendo uma lista de artigos.',
-}
-msg_protect_category = {
-    'en': u'Robot - Protecting all pages from category %s',
-    'ar': u'روبوت - حماية كل الصفحات من التصنيف %s',
-    'it': u'Bot: Protezione di tutte le pagine nella categoria %s.',
-    'pt': u'Bot: Protegendo todos os artigos da categoria %s',
-}
-msg_protect_links = {
-    'en': u'Robot - Protecting all pages linked from %s',
-    'ar': u'روبوت - حماية كل الصفحات الموصولة من %s',
-    'it': u'Bot: Protezione di tutte le pagine linkate da %s.',
-    'pt': u'Bot: Protegendo todos os artigos ligados a %s',
-}
-msg_protect_ref = {
-    'en': u'Robot - Protecting all pages referring from %s',
-    'ar': u'روبوت - حماية كل الصفحات الراجعة من %s',
-    'it': u'Bot: Protezione di tutte le pagine con link verso %s.',
-    'pt': u'Bot: Protegendo todos os artigos afluentes a %s',
-}
-msg_protect_images = {
-    'en': u'Robot - Protecting all images on page %s',
-    'ar': u'روبوت - حماية كل الصور في الصفحة %s',
-    'it': u'Bot: Protezione di tutte le immagini presenti in %s.',
-    'pt': u'Bot: Protegendo todas as imagens do artigo %s',
-}
- 
-class ProtectionRobot:
-    """
-    This robot allows protection of pages en masse.
-    """
- 
-    def __init__(self, generator, summary, always = False, unprotect=False,
-                edit='sysop', move='sysop', create='sysop'):
-        """
-        Arguments:
-            * generator - A page generator.
-            * always - Protect without prompting?
-            * edit, move, create - protection level for these operations
-            * unprotect - unprotect pages (and ignore edit, move, create params)
-        """
-        self.generator = generator
-        self.summary = summary
-        self.always = always
-        self.unprotect = unprotect
-        self.edit = edit
-        self.move = move
- 
-    def run(self):
-        """
-        Starts the robot's action.
-        """
-        #Loop through everything in the page generator and (un)protect it.
-        for page in self.generator:
-            wikipedia.output(u'Processing page %s' % page.title())
-            print self.edit, self.move#, self.create
-            page.protect(unprotect=self.unprotect, reason=self.summary, prompt=self.always,
-                        edit=self.edit, move=self.move)
- 
-# Asks a valid protection level for "operation".
-# Returns the protection level chosen by user.
-def choiceProtectionLevel(operation, default):
-    default = default[0]
-    firstChar = map(lambda level: level[0], protectionLevels)
-    choiceChar = wikipedia.inputChoice('Choice a protection level to %s:' % operation, 
-                            protectionLevels, firstChar, default = default)
-    for level in protectionLevels:
-        if level.startswith(choiceChar):
-            return level
- 
-def main():
-    global protectionLevels
-    protectionLevels = ['sysop', 'autoconfirmed', 'none']
- 
-    pageName = ''
-    summary = ''
-    always = False
-    doSinglePage = False
-    doCategory = False
-    protectSubcategories = True
-    doRef = False
-    doLinks = False
-    doImages = False
-    fileName = ''
-    gen = None
-    edit = ''
-    move = ''
-    defaultProtection = 'sysop'
- 
-    # read command line parameters
-    for arg in wikipedia.handleArgs():
-        if arg == '-always':
-            always = True
-        elif arg.startswith('-file'):
-            if len(arg) == len('-file'):
-                fileName = wikipedia.input(u'Enter name of file to protect pages from:')
-            else:
-                fileName = arg[len('-file:'):]
-        elif arg.startswith('-summary'):
-            if len(arg) == len('-summary'):
-                summary = wikipedia.input(u'Enter a reason for the protection:')
-            else:
-                summary = arg[len('-summary:'):]
-        elif arg.startswith('-cat'):
-            doCategory = True
-            if len(arg) == len('-cat'):
-                pageName = wikipedia.input(u'Enter the category to protect from:')
-            else:
-                pageName = arg[len('-cat:'):]
-        elif arg.startswith('-nosubcats'):
-            protectSubcategories = False
-        elif arg.startswith('-links'):
-            doLinks = True
-            if len(arg) == len('-links'):
-                pageName = wikipedia.input(u'Enter the page to protect from:')
-            else:
-                pageName = arg[len('-links:'):]
-        elif arg.startswith('-ref'):
-            doRef = True
-            if len(arg) == len('-ref'):
-                pageName = wikipedia.input(u'Enter the page to protect from:')
-            else:
-                pageName = arg[len('-ref:'):]
-        elif arg.startswith('-page'):
-            doSinglePage = True
-            if len(arg) == len('-page'):
-                pageName = wikipedia.input(u'Enter the page to protect:')
-            else:
-                pageName = arg[len('-page:'):]
-        elif arg.startswith('-images'):
-            doImages = True
-            if len(arg) == len('-images'):
-                pageName = wikipedia.input(u'Enter the page with the images to protect:')
-            else:
-                pageName = arg[len('-images:'):]
-        elif arg.startswith('-unprotect'):
-            defaultProtection = 'none'
-        elif arg.startswith('-edit'):
-            edit = arg[len('-edit:'):]
-            if edit not in protectionLevels:
-                edit = choiceProtectionLevel('edit', defaultProtection)
-        elif arg.startswith('-move'):
-            move = arg[len('-move:'):]
-            if move not in protectionLevels:
-                move = choiceProtectionLevel('move', defaultProtection)
-        elif arg.startswith('-create'):
-            create = arg[len('-create:'):]
-            if create not in protectionLevels:
-                create = choiceProtectionLevel('create', defaultProtection)
- 
-    mysite = wikipedia.getSite()
- 
-    if doSinglePage:
-        if not summary:
-            summary = wikipedia.input(u'Enter a reason for the protection:')
-        page = wikipedia.Page(mysite, pageName)
-        gen = iter([page])
-    elif doCategory:
-        if not summary:
-            summary = wikipedia.translate(mysite, msg_protect_category) % pageName
-        ns = mysite.category_namespace()
-        categoryPage = catlib.Category(mysite, ns + ':' + pageName)
-        gen = pagegenerators.CategorizedPageGenerator(categoryPage, recurse = protectSubcategories)
-    elif doLinks:
-        if not summary:
-            summary = wikipedia.translate(mysite, msg_protect_links) % pageName
-        linksPage = wikipedia.Page(mysite, pageName)
-        gen = pagegenerators.LinkedPageGenerator(linksPage)
-    elif doRef:
-        if not summary:
-            summary = wikipedia.translate(mysite, msg_protect_ref) % pageName
-        refPage = wikipedia.Page(mysite, pageName)
-        gen = pagegenerators.ReferringPageGenerator(refPage)
-    elif fileName:
-        if not summary:
-            summary = wikipedia.translate(mysite, msg_simple_protect)
-        gen = pagegenerators.TextfilePageGenerator(fileName)
-    elif doImages:
-        if not summary:
-            summary = wikipedia.translate(mysite, msg_protect_images) % pageName
-        gen = pagegenerators.ImagesPageGenerator(wikipedia.Page(mysite, pageName))
- 
-    if gen:
-        wikipedia.setAction(summary)
-        # We are just protecting pages, so we have no need of using a preloading page generator
-        # to actually get the text of those pages.
-        if not edit: edit = defaultProtection
-        if not move: move = defaultProtection
-        bot = ProtectionRobot(gen, summary, always, edit=edit, move=move)
-        bot.run()
-    else:
-        wikipedia.showHelp(u'protect')
- 
-if __name__ == "__main__":
-    try:
-        main()
-    finally:
-        wikipedia.stopme()
+# -*- coding: utf-8 -*-
+"""
+This script can be used to protect and unprotect pages en masse.
+Of course, you will need an admin account on the relevant wiki.
+ 
+Syntax: python protect.py OPTION...
+ 
+Command line options:
+ 
+-page:       Protect specified page
+-cat:        Protect all pages in the given category.
+-nosubcats:  Don't protect pages in the subcategories.
+-links:      Protect all pages linked from a given page.
+-file:       Protect all pages listed in a text file.
+-ref:        Protect all pages referring from a given page.
+-images:     Protect all images used on a given page.
+-always:     Don't prompt to protect pages, just do it.
+-summary:    Supply a custom edit summary.
+-unprotect:   Actually unprotect pages instead of protecting
+-edit:PROTECTION_LEVEL Set edit protection level to PROTECTION_LEVEL
+-move:PROTECTION_LEVEL Set move protection level to PROTECTION_LEVEL
+
+## Without support ##
+## -create:PROTECTION_LEVEL Set move protection level to PROTECTION_LEVEL ##
+ 
+Values for PROTECTION_LEVEL are: sysop, autoconfirmed, none.
+If an operation parameter (edit, move or create) is not specified, default
+protection level is 'sysop' (or 'none' if -unprotect).
+ 
+Examples:
+ 
+Protect everything in the category "To protect" prompting.
+    python protect.py -cat:"To protect" -always
+ 
+Unprotect all pages listed in text file "unprotect.txt" without prompting.
+    python protect.py -file:unprotect.txt -unprotect
+"""
+ 
+# Written by http://it.wikisource.org/wiki/Utente:Qualc1
+# Created by modifying delete.py
+__version__ = '$Id: delete.py 4946 2008-01-29 14:58:25Z wikipedian $'
+ 
+#
+# Distributed under the terms of the MIT license.
+#
+ 
+import wikipedia, catlib
+import pagegenerators
+ 
+# Summary messages for protecting from a category.
+msg_simple_protect = {
+    'en': u'Bot: Protecting a list of files.',
+    'ar': u'بوت: حماية قائمة من الملفات.',
+    'it': u'Bot: Protezione di una lista di pagine.',
+    'pt': u'Bot: Protegendo uma lista de artigos.',
+}
+msg_protect_category = {
+    'en': u'Robot - Protecting all pages from category %s',
+    'ar': u'روبوت - حماية كل الصفحات من التصنيف %s',
+    'it': u'Bot: Protezione di tutte le pagine nella categoria %s.',
+    'pt': u'Bot: Protegendo todos os artigos da categoria %s',
+}
+msg_protect_links = {
+    'en': u'Robot - Protecting all pages linked from %s',
+    'ar': u'روبوت - حماية كل الصفحات الموصولة من %s',
+    'it': u'Bot: Protezione di tutte le pagine linkate da %s.',
+    'pt': u'Bot: Protegendo todos os artigos ligados a %s',
+}
+msg_protect_ref = {
+    'en': u'Robot - Protecting all pages referring from %s',
+    'ar': u'روبوت - حماية كل الصفحات الراجعة من %s',
+    'it': u'Bot: Protezione di tutte le pagine con link verso %s.',
+    'pt': u'Bot: Protegendo todos os artigos afluentes a %s',
+}
+msg_protect_images = {
+    'en': u'Robot - Protecting all images on page %s',
+    'ar': u'روبوت - حماية كل الصور في الصفحة %s',
+    'it': u'Bot: Protezione di tutte le immagini presenti in %s.',
+    'pt': u'Bot: Protegendo todas as imagens do artigo %s',
+}
+ 
+class ProtectionRobot:
+    """
+    This robot allows protection of pages en masse.
+    """
+ 
+    def __init__(self, generator, summary, always = False, unprotect=False,
+                edit='sysop', move='sysop', create='sysop'):
+        """
+        Arguments:
+            * generator - A page generator.
+            * always - Protect without prompting?
+            * edit, move, create - protection level for these operations
+            * unprotect - unprotect pages (and ignore edit, move, create params)
+        """
+        self.generator = generator
+        self.summary = summary
+        self.always = always
+        self.unprotect = unprotect
+        self.edit = edit
+        self.move = move
+ 
+    def run(self):
+        """
+        Starts the robot's action.
+        """
+        #Loop through everything in the page generator and (un)protect it.
+        for page in self.generator:
+            wikipedia.output(u'Processing page %s' % page.title())
+            print self.edit, self.move#, self.create
+            page.protect(unprotect=self.unprotect, reason=self.summary, prompt=self.always,
+                        edit=self.edit, move=self.move)
+ 
+# Asks a valid protection level for "operation".
+# Returns the protection level chosen by user.
+def choiceProtectionLevel(operation, default):
+    default = default[0]
+    firstChar = map(lambda level: level[0], protectionLevels)
+    choiceChar = wikipedia.inputChoice('Choice a protection level to %s:' % operation, 
+                            protectionLevels, firstChar, default = default)
+    for level in protectionLevels:
+        if level.startswith(choiceChar):
+            return level
+ 
+def main():
+    global protectionLevels
+    protectionLevels = ['sysop', 'autoconfirmed', 'none']
+ 
+    pageName = ''
+    summary = ''
+    always = False
+    doSinglePage = False
+    doCategory = False
+    protectSubcategories = True
+    doRef = False
+    doLinks = False
+    doImages = False
+    fileName = ''
+    gen = None
+    edit = ''
+    move = ''
+    defaultProtection = 'sysop'
+ 
+    # read command line parameters
+    for arg in wikipedia.handleArgs():
+        if arg == '-always':
+            always = True
+        elif arg.startswith('-file'):
+            if len(arg) == len('-file'):
+                fileName = wikipedia.input(u'Enter name of file to protect pages from:')
+            else:
+                fileName = arg[len('-file:'):]
+        elif arg.startswith('-summary'):
+            if len(arg) == len('-summary'):
+                summary = wikipedia.input(u'Enter a reason for the protection:')
+            else:
+                summary = arg[len('-summary:'):]
+        elif arg.startswith('-cat'):
+            doCategory = True
+            if len(arg) == len('-cat'):
+                pageName = wikipedia.input(u'Enter the category to protect from:')
+            else:
+                pageName = arg[len('-cat:'):]
+        elif arg.startswith('-nosubcats'):
+            protectSubcategories = False
+        elif arg.startswith('-links'):
+            doLinks = True
+            if len(arg) == len('-links'):
+                pageName = wikipedia.input(u'Enter the page to protect from:')
+            else:
+                pageName = arg[len('-links:'):]
+        elif arg.startswith('-ref'):
+            doRef = True
+            if len(arg) == len('-ref'):
+                pageName = wikipedia.input(u'Enter the page to protect from:')
+            else:
+                pageName = arg[len('-ref:'):]
+        elif arg.startswith('-page'):
+            doSinglePage = True
+            if len(arg) == len('-page'):
+                pageName = wikipedia.input(u'Enter the page to protect:')
+            else:
+                pageName = arg[len('-page:'):]
+        elif arg.startswith('-images'):
+            doImages = True
+            if len(arg) == len('-images'):
+                pageName = wikipedia.input(u'Enter the page with the images to protect:')
+            else:
+                pageName = arg[len('-images:'):]
+        elif arg.startswith('-unprotect'):
+            defaultProtection = 'none'
+        elif arg.startswith('-edit'):
+            edit = arg[len('-edit:'):]
+            if edit not in protectionLevels:
+                edit = choiceProtectionLevel('edit', defaultProtection)
+        elif arg.startswith('-move'):
+            move = arg[len('-move:'):]
+            if move not in protectionLevels:
+                move = choiceProtectionLevel('move', defaultProtection)
+        elif arg.startswith('-create'):
+            create = arg[len('-create:'):]
+            if create not in protectionLevels:
+                create = choiceProtectionLevel('create', defaultProtection)
+ 
+    mysite = wikipedia.getSite()
+ 
+    if doSinglePage:
+        if not summary:
+            summary = wikipedia.input(u'Enter a reason for the protection:')
+        page = wikipedia.Page(mysite, pageName)
+        gen = iter([page])
+    elif doCategory:
+        if not summary:
+            summary = wikipedia.translate(mysite, msg_protect_category) % pageName
+        ns = mysite.category_namespace()
+        categoryPage = catlib.Category(mysite, ns + ':' + pageName)
+        gen = pagegenerators.CategorizedPageGenerator(categoryPage, recurse = protectSubcategories)
+    elif doLinks:
+        if not summary:
+            summary = wikipedia.translate(mysite, msg_protect_links) % pageName
+        linksPage = wikipedia.Page(mysite, pageName)
+        gen = pagegenerators.LinkedPageGenerator(linksPage)
+    elif doRef:
+        if not summary:
+            summary = wikipedia.translate(mysite, msg_protect_ref) % pageName
+        refPage = wikipedia.Page(mysite, pageName)
+        gen = pagegenerators.ReferringPageGenerator(refPage)
+    elif fileName:
+        if not summary:
+            summary = wikipedia.translate(mysite, msg_simple_protect)
+        gen = pagegenerators.TextfilePageGenerator(fileName)
+    elif doImages:
+        if not summary:
+            summary = wikipedia.translate(mysite, msg_protect_images) % pageName
+        gen = pagegenerators.ImagesPageGenerator(wikipedia.Page(mysite, pageName))
+ 
+    if gen:
+        wikipedia.setAction(summary)
+        # We are just protecting pages, so we have no need of using a preloading page generator
+        # to actually get the text of those pages.
+        if not edit: edit = defaultProtection
+        if not move: move = defaultProtection
+        bot = ProtectionRobot(gen, summary, always, edit=edit, move=move)
+        bot.run()
+    else:
+        wikipedia.showHelp(u'protect')
+ 
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        wikipedia.stopme()
Property changes on: trunk/pywikipedia/protect.py
___________________________________________________________________
Name: svn:eol-style
   + native