Revision: 6212
Author: cydeweys
Date: 2008-12-27 17:55:58 +0000 (Sat, 27 Dec 2008)
Log Message:
-----------
Added the functionality to process pages from a list of categories.
Modified Paths:
--------------
trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2008-12-27 15:20:35 UTC (rev 6211)
+++ trunk/pywikipedia/replace.py 2008-12-27 17:55:58 UTC (rev 6212)
@@ -16,6 +16,10 @@
Argument can also be given as "-page:pagetitle". You can
give this parameter multiple times to edit multiple pages.
+-category Works on all of the pages in a specific category. Specify
+ this argument multiple times to work on multiple categories
+ simultaneously.
+
Furthermore, the following command line parameters are supported:
-regex Make replacements using regular expressions. If this argument
@@ -501,6 +505,8 @@
# Between a regex and another (using -fix) sleep some time (not to waste
# too much CPU
sleep = None
+ # A list of categories whose pages we should process.
+ categories = []
# Read commandline parameters.
for arg in wikipedia.handleArgs():
@@ -520,6 +526,8 @@
xmlFilename = arg[5:]
elif arg =='-sql':
useSql = True
+ elif arg.startswith('-category'):
+ categories.append(arg[len('-category:'):])
elif arg.startswith('-page'):
if len(arg) == 5:
PageTitles.append(wikipedia.input(
@@ -547,14 +555,14 @@
elif arg == '-nocase':
caseInsensitive = True
elif arg.startswith('-addcat:'):
- add_cat = arg[8:]
+ add_cat = arg[len('addcat:'):]
elif arg.startswith('-namespace:'):
try:
namespaces.append(int(arg[11:]))
except ValueError:
namespaces.append(arg[11:])
elif arg.startswith('-summary:'):
- wikipedia.setAction(arg[9:])
+ wikipedia.setAction(arg[len('-summary:'):])
summary_commandline = True
elif arg.startswith('-allowoverlap'):
allowoverlap = True
@@ -632,7 +640,7 @@
exceptions = fix['exceptions']
replacements = fix['replacements']
- # already compile all regular expressions here to save time later
+ # Pre-compile all regular expressions here to save time later
for i in range(len(replacements)):
old, new = replacements[i]
if not regex:
@@ -681,7 +689,9 @@
%s
LIMIT 200""" % (whereClause, exceptClause)
gen = pagegenerators.MySQLPageGenerator(query)
-
+ elif categories:
+ gens =
[pagegenerators.CategorizedPageGenerator(catlib.Category(wikipedia.getSite(),
'Category:' + t)) for t in categories]
+ gen =
pagegenerators.DuplicateFilterPageGenerator(pagegenerators.CombinedPageGenerator(gens))
elif PageTitles:
pages = [wikipedia.Page(wikipedia.getSite(), PageTitle)
for PageTitle in PageTitles]