Revision: 4251 Author: wikipedian Date: 2007-09-12 10:36:03 +0000 (Wed, 12 Sep 2007)
Log Message: ----------- usability: scripts now not only allow -namespace:4, but also e.g. -namespace:Wikip?\195?\169dia on fr:, -namespace:Wikipedia on all Wikipedias, and -namespace:Project on all wikis.
Modified Paths: -------------- trunk/pywikipedia/capitalize_redirects.py trunk/pywikipedia/copyright.py trunk/pywikipedia/family.py trunk/pywikipedia/noreferences.py trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/redirect.py trunk/pywikipedia/refcheck.py trunk/pywikipedia/replace.py trunk/pywikipedia/selflink.py trunk/pywikipedia/standardize_notes.py trunk/pywikipedia/template.py trunk/pywikipedia/templatecount.py trunk/pywikipedia/unlink.py trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/capitalize_redirects.py =================================================================== --- trunk/pywikipedia/capitalize_redirects.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/capitalize_redirects.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -19,12 +19,13 @@
-start Work on all pages on the home wiki, starting at the named page. - + -page Work on a single page.
-namespace Run over especific namespace. - Argument can also be given as "-namespace:100". - + Argument can also be given as "-namespace:100" or + "-namespace:Image". + -always Don't prompt to make changes, just do them.
Example: "python capitalize_redirects.py -start:B -always" @@ -129,7 +130,10 @@ elif arg == '-always': acceptall = True elif arg.startswith('-namespace:'): - namespaces.append(int(arg[11:])) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) else: commandline_replacements.append(arg)
Modified: trunk/pywikipedia/copyright.py =================================================================== --- trunk/pywikipedia/copyright.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/copyright.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -47,7 +47,7 @@ -links - Work on all pages that are linked to from a certain page. Argument can also be given as "-links:linkingpagetitle". -start - Work on all pages in the wiki, starting at a given page. --namespace:n - Number of namespace to process. The parameter can be used +-namespace:n - Number or name of namespace to process. The parameter can be used multiple times.
Examples: @@ -961,7 +961,10 @@ else: PageTitles.append(arg[6:]) elif arg.startswith('-namespace:'): - namespaces.append(int(arg[11:])) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) elif arg.startswith('-forceupdate'): load_pages(force_update = True) elif arg == '-repeat':
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/family.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -2239,18 +2239,18 @@ v = self.namespaces[ns_number][fallback] else: raise KeyError('ERROR: title for namespace %d in language %s unknown' % (ns_number, code)) - - if all: - if type(v) == type([]): - return tuple(v) - else: - return (v, ) - else: - if type(v) == type([]): - return v[0] - else: - return v
+ if all: + if type(v) == type([]): + return tuple(v) + else: + return (v, ) + else: + if type(v) == type([]): + return v[0] + else: + return v + def isDefinedNS(self, ns_number): """Return True if the namespace has been defined in this family. """
Modified: trunk/pywikipedia/noreferences.py =================================================================== --- trunk/pywikipedia/noreferences.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/noreferences.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -14,8 +14,8 @@ or pages-meta-current, see http://download.wikimedia.org). Argument can also be given as "-xml:filename".
- -namespace:n Number of namespace to process. The parameter can be used - multiple times. It works in combination with all other + -namespace:n Number or name of namespace to process. The parameter can be + used multiple times. It works in combination with all other parameters, except for the -start parameter. If you e.g. want to iterate over all categories starting at M, use -start:Category:M. @@ -306,7 +306,10 @@ xmlFilename = arg[5:] gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif arg.startswith('-namespace:'): - namespaces.append(int(arg[11:])) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) elif arg == '-always': always = True else:
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/pagegenerators.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -417,9 +417,20 @@
def NamespaceFilterPageGenerator(generator, namespaces): """ - Wraps around another generator. Yields only those pages that are in a list - of specific namespace. + Wraps around another generator. Yields only those pages that are in one + of the given namespaces. + + The namespace list can contain both integers (namespace numbers) and + strings/unicode strings (namespace names). """ + # convert namespace names to namespace numbers + for i in xrange(len(namespaces)): + ns = namespaces[i] + if isinstance(ns, unicode) or isinstance(ns, str): + index = wikipedia.getSite().getNamespaceIndex(ns) + if index is None: + raise ValueError(u'Unknown namespace: %s' % ns) + namespaces[i] = index for page in generator: if page.namespace() in namespaces: yield page
Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/redirect.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -292,7 +292,10 @@ else: xmlFilename = arg[5:] elif arg.startswith('-namespace:'): - namespace = int(arg[11:]) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) elif arg.startswith('-restart:'): restart = int(arg[9:]) else:
Modified: trunk/pywikipedia/refcheck.py =================================================================== --- trunk/pywikipedia/refcheck.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/refcheck.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -1,7 +1,11 @@ -""" +""" This script checks references to see if they are properly formatted. Right now it just counts the total number of transclusions of any number of given templates.
+NOTE: This script is not capable of handling the <ref></ref> syntax. It just +handles the {{ref}} syntax, which is still used, but DEPRECATED on the English +Wikipedia. + Syntax: python refcheck.py command [arguments]
Command line options: @@ -50,7 +54,10 @@ if arg == '-count': doCount = True elif arg.startswith('-namespace:'): - namespaces.append(int(arg[len('-namespace:'):])) + try: + namespaces.append(int(arg[len('-namespace:'):])) + except ValueError: + namespaces.append(arg[len('-namespace:'):]) else: argsList.append(arg)
Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/replace.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -351,7 +351,10 @@ elif arg == '-nocase': caseInsensitive = True elif arg.startswith('-namespace:'): - namespaces.append(int(arg[11:])) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) elif arg.startswith('-summary:'): wikipedia.setAction(arg[9:]) summary_commandline = True
Modified: trunk/pywikipedia/selflink.py =================================================================== --- trunk/pywikipedia/selflink.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/selflink.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -212,7 +212,10 @@ LIMIT 100""" gen = pagegenerators.MySQLPageGenerator(query) elif arg.startswith('-namespace:'): - namespaces.append(int(arg[11:])) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) else: generator = genFactory.handleArg(arg) if generator:
Modified: trunk/pywikipedia/standardize_notes.py =================================================================== --- trunk/pywikipedia/standardize_notes.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/standardize_notes.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -6,6 +6,10 @@
At present it converts to [[Wikipedia:Footnote3]] format (ref/note).
+NOTE: This script is not capable of handling the <ref></ref> syntax. It just +handles the {{ref}} syntax, which is still used, but DEPRECATED on the English +Wikipedia. + You can run the bot with the following commandline parameters:
-file - Work on all pages given in a local text file. @@ -1062,7 +1066,10 @@ elif arg == '-always': acceptall = True elif arg.startswith('-namespace:'): - namespace = int(arg[11:]) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) else: commandline_replacements.append(arg)
Modified: trunk/pywikipedia/template.py =================================================================== --- trunk/pywikipedia/template.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/template.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -333,7 +333,10 @@ else: xmlfilename = arg[5:] elif arg.startswith('-namespace:'): - namespaces.append(int(arg[len('-namespace:'):])) + try: + namespaces.append(int(arg[len('-namespace:'):])) + except ValueError: + namespaces.append(arg[len('-namespace:'):]) elif arg.startswith('-category:'): addedCat = arg[len('-category:'):] elif arg.startswith('-summary:'):
Modified: trunk/pywikipedia/templatecount.py =================================================================== --- trunk/pywikipedia/templatecount.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/templatecount.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -1,4 +1,4 @@ -""" +""" This script will display the list of pages transcluding a given list of templates. It can also be used to simply count the number of pages (rather than listing each individually). @@ -33,75 +33,78 @@ import datetime
class TemplateCountRobot: - #def __init__(self): - #Nothing - def countTemplates(self, templates, namespaces): - mysite = wikipedia.getSite() - finalText = [u'Number of transclusions per template',u'------------------------------------'] - total = 0 - for template in templates: - gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion = True) - if namespaces: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) - count = 0 - for page in gen: - count = count + 1 - finalText.append(u'%s: %d' % (template, count)) - total = total + count - for line in finalText: - wikipedia.output(line, toStdout=True) - wikipedia.output(u'TOTAL: %d' % total, toStdout=True) - wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) + #def __init__(self): + #Nothing + def countTemplates(self, templates, namespaces): + mysite = wikipedia.getSite() + finalText = [u'Number of transclusions per template',u'------------------------------------'] + total = 0 + for template in templates: + gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion = True) + if namespaces: + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + count = 0 + for page in gen: + count = count + 1 + finalText.append(u'%s: %d' % (template, count)) + total = total + count + for line in finalText: + wikipedia.output(line, toStdout=True) + wikipedia.output(u'TOTAL: %d' % total, toStdout=True) + wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True)
- def listTemplates(self, templates, namespaces): - mysite = wikipedia.getSite() - count = 0 - finalText = [u'List of pages transcluding templates:'] - for template in templates: - finalText.append(u'* %s' % template) - finalText.append(u'------------------------------------') - for template in templates: - gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion = True) - if namespaces: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) - for page in gen: - finalText.append(u'%s' % page.title()) - count = count + 1 - finalText.append(u'Total page count: %d' % count) - for line in finalText: - wikipedia.output(line, toStdout=True) - wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) + def listTemplates(self, templates, namespaces): + mysite = wikipedia.getSite() + count = 0 + finalText = [u'List of pages transcluding templates:'] + for template in templates: + finalText.append(u'* %s' % template) + finalText.append(u'------------------------------------') + for template in templates: + gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion = True) + if namespaces: + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + for page in gen: + finalText.append(u'%s' % page.title()) + count = count + 1 + finalText.append(u'Total page count: %d' % count) + for line in finalText: + wikipedia.output(line, toStdout=True) + wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True)
def main(): - operation = "None" - doCount = False - doList = False - argsList = [] - namespaces = [] + operation = "None" + doCount = False + doList = False + argsList = [] + namespaces = []
- for arg in wikipedia.handleArgs(): - if arg == '-count': - operation = "Count" - elif arg == '-list': - operation = "List" - elif arg.startswith('-namespace:'): - namespaces.append(int(arg[len('-namespace:'):])) - else: - argsList.append(arg) + for arg in wikipedia.handleArgs(): + if arg == '-count': + operation = "Count" + elif arg == '-list': + operation = "List" + elif arg.startswith('-namespace:'): + try: + namespaces.append(int(arg[len('-namespace:'):])) + except ValueError: + namespaces.append(arg[len('-namespace:'):]) + else: + argsList.append(arg)
- if operation == "None": - wikipedia.output(__doc__, 'utf-8') - else: - robot = TemplateCountRobot() - if not argsList: - argsList = ['ref', 'note', 'ref label', 'note label'] - if operation == "Count": - robot.countTemplates(argsList, namespaces) - elif operation == "List": - robot.listTemplates(argsList, namespaces) + if operation == "None": + wikipedia.output(__doc__, 'utf-8') + else: + robot = TemplateCountRobot() + if not argsList: + argsList = ['ref', 'note', 'ref label', 'note label'] + if operation == "Count": + robot.countTemplates(argsList, namespaces) + elif operation == "List": + robot.listTemplates(argsList, namespaces)
if __name__ == "__main__": - try: - main() - finally: - wikipedia.stopme() + try: + main() + finally: + wikipedia.stopme()
Modified: trunk/pywikipedia/unlink.py =================================================================== --- trunk/pywikipedia/unlink.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/unlink.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -145,7 +145,10 @@
for arg in wikipedia.handleArgs(): if arg.startswith('-namespace:'): - namespaces.append(int(arg[11:])) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) else: pageTitleParts.append(arg)
Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2007-09-12 10:07:51 UTC (rev 4250) +++ trunk/pywikipedia/weblinkchecker.py 2007-09-12 10:36:03 UTC (rev 4251) @@ -669,7 +669,10 @@ elif arg == '-notalk': config.report_dead_links_on_talk = False elif arg.startswith('-namespace:'): - namespaces.append(int(arg[11:])) + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) elif arg == '-repeat': gen = RepeatPageGenerator() else:
pywikipedia-l@lists.wikimedia.org