Revision: 7574 Author: xqt Date: 2009-10-30 09:51:24 +0000 (Fri, 30 Oct 2009)
Log Message: ----------- refcheck.py merged to templatecount.py and achived
Modified Paths: -------------- trunk/pywikipedia/templatecount.py
Added Paths: ----------- trunk/pywikipedia/archive/refcheck.py
Removed Paths: ------------- trunk/pywikipedia/refcheck.py
Copied: trunk/pywikipedia/archive/refcheck.py (from rev 7572, trunk/pywikipedia/refcheck.py) =================================================================== --- trunk/pywikipedia/archive/refcheck.py (rev 0) +++ trunk/pywikipedia/archive/refcheck.py 2009-10-30 09:51:24 UTC (rev 7574) @@ -0,0 +1,95 @@ +#!/usr/bin/python +""" +################################################## +This script with all its function has been merged +to templatecount.py. please use: + + templatecount.py -count + +xqt 2009-10-30 +################################################## +This script checks references to see if they are properly formatted. Right now +it just counts the total number of transclusions of any number of given templates. + +NOTE: This script is not capable of handling the <ref></ref> syntax. It just +handles the {{ref}} syntax, which is still used, but DEPRECATED on the English +Wikipedia. + +Syntax: python refcheck.py command [arguments] + +Command line options: + +-count Counts the number of times each template (passed in as an argument) + is transcluded. +-namespace: Filters the search to a given namespace. If this is specified + multiple times it will search all given namespaces + +Examples: + +Counts how many time {{ref}} and {{note}} are transcluded in articles. + + python refcheck.py -count ref note -namespace:0 + +""" +__version__ = '$Id$' + +import wikipedia, config +import replace, pagegenerators +import re, sys, string + +templates = ['ref', 'note', 'ref label', 'note label', 'reflist'] + +class ReferencesRobot: + #def __init__(self): + #Nothing + def countRefs(self, templates, namespaces): + mysite = wikipedia.getSite() + mytpl = mysite.template_namespace()+':' + finalText = [u'Number of transclusions per template',u'------------------------------------'] + for template in templates: + gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True) + if namespaces: + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + count = 0 + for page in gen: + count += 1 + finalText.append(u'%s: %d' % (template, count)) + for line in finalText: + wikipedia.output(line) + +def main(): + doCount = False + argsList = [] + namespaces = [] + for arg in wikipedia.handleArgs(): + if arg == '-count': + doCount = True + elif arg.startswith('-namespace:'): + try: + namespaces.append(int(arg[len('-namespace:'):])) + except ValueError: + namespaces.append(arg[len('-namespace:'):]) + else: + argsList.append(arg) + + if doCount: + robot = ReferencesRobot() + if not argsList: + argsList = templates + choice = '' + if 'reflist' in argsList: + wikipedia.output(u'NOTE: it will take a long time to count "reflist".') + choice = wikipedia.inputChoice(u'Proceed anyway?', ['yes', 'no', 'skip'], ['y', 'n', 's'], 'y') + if choice == 's': + argsList.remove('reflist') + if choice <> 'n': + robot.countRefs(argsList, namespaces) + else: + wikipedia.showHelp('refcheck') + +if __name__ == "__main__": + try: + main() + finally: + wikipedia.stopme() +
Deleted: trunk/pywikipedia/refcheck.py =================================================================== --- trunk/pywikipedia/refcheck.py 2009-10-30 06:00:55 UTC (rev 7573) +++ trunk/pywikipedia/refcheck.py 2009-10-30 09:51:24 UTC (rev 7574) @@ -1,86 +0,0 @@ -""" -This script checks references to see if they are properly formatted. Right now -it just counts the total number of transclusions of any number of given templates. - -NOTE: This script is not capable of handling the <ref></ref> syntax. It just -handles the {{ref}} syntax, which is still used, but DEPRECATED on the English -Wikipedia. - -Syntax: python refcheck.py command [arguments] - -Command line options: - --count Counts the number of times each template (passed in as an argument) - is transcluded. --namespace: Filters the search to a given namespace. If this is specified - multiple times it will search all given namespaces - -Examples: - -Counts how many time {{ref}} and {{note}} are transcluded in articles. - - python refcheck.py -count ref note -namespace:0 - -""" -__version__ = '$Id$' - -import wikipedia, config -import replace, pagegenerators -import re, sys, string - -templates = ['ref', 'note', 'ref label', 'note label', 'reflist'] - -class ReferencesRobot: - #def __init__(self): - #Nothing - def countRefs(self, templates, namespaces): - mysite = wikipedia.getSite() - mytpl = mysite.template_namespace()+':' - finalText = [u'Number of transclusions per template',u'------------------------------------'] - for template in templates: - gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True) - if namespaces: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) - count = 0 - for page in gen: - count += 1 - finalText.append(u'%s: %d' % (template, count)) - for line in finalText: - wikipedia.output(line) - -def main(): - doCount = False - argsList = [] - namespaces = [] - for arg in wikipedia.handleArgs(): - if arg == '-count': - doCount = True - elif arg.startswith('-namespace:'): - try: - namespaces.append(int(arg[len('-namespace:'):])) - except ValueError: - namespaces.append(arg[len('-namespace:'):]) - else: - argsList.append(arg) - - if doCount: - robot = ReferencesRobot() - if not argsList: - argsList = templates - choice = '' - if 'reflist' in argsList: - wikipedia.output(u'NOTE: it will take a long time to count "reflist".') - choice = wikipedia.inputChoice(u'Proceed anyway?', ['yes', 'no', 'skip'], ['y', 'n', 's'], 'y') - if choice == 's': - argsList.remove('reflist') - if choice <> 'n': - robot.countRefs(argsList, namespaces) - else: - wikipedia.showHelp('refcheck') - -if __name__ == "__main__": - try: - main() - finally: - wikipedia.stopme() -
Modified: trunk/pywikipedia/templatecount.py =================================================================== --- trunk/pywikipedia/templatecount.py 2009-10-30 06:00:55 UTC (rev 7573) +++ trunk/pywikipedia/templatecount.py 2009-10-30 09:51:24 UTC (rev 7574) @@ -1,3 +1,5 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- """ This script will display the list of pages transcluding a given list of templates. It can also be used to simply count the number of pages (rather than listing each @@ -7,10 +9,12 @@
Command line options:
--count Counts the number of times each template (passed in as an argument) - is transcluded. --list Gives the list of all of the pages transcluding the templates (rather - than just counting them). +-count Counts the number of times each template (passed in as an + argument) is transcluded. + +-list Gives the list of all of the pages transcluding the templates + (rather than just counting them). + -namespace: Filters the search to a given namespace. If this is specified multiple times it will search all given namespaces
@@ -25,6 +29,9 @@ python templatecount.py -list -namespace:14 cfd cfdu
""" +# +# Distributed under the terms of the MIT license. +# __version__ = '$Id$'
import wikipedia, config @@ -32,17 +39,20 @@ import re, sys, string import datetime
+templates = ['ref', 'note', 'ref label', 'note label', 'reflist'] + class TemplateCountRobot: #def __init__(self): #Nothing def countTemplates(self, templates, namespaces): mysite = wikipedia.getSite() + mytpl = mysite.template_namespace()+':' finalText = [u'Number of transclusions per template',u'------------------------------------'] total = 0 # The names of the templates are the keys, and the numbers of transclusions are the values. templateDict = {} for template in templates: - gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion = True) + gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True) if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) count = 0 @@ -50,7 +60,7 @@ count += 1 templateDict[template] = count finalText.append(u'%s: %d' % (template, count)) - total = total + count + total += count for line in finalText: wikipedia.output(line, toStdout=True) wikipedia.output(u'TOTAL: %d' % total, toStdout=True) @@ -105,8 +115,16 @@ else: robot = TemplateCountRobot() if not argsList: - argsList = ['ref', 'note', 'ref label', 'note label'] - if operation == "Count": + argsList = templates + choice = '' + if 'reflist' in argsList: + wikipedia.output(u'NOTE: it will take a long time to count "reflist".') + choice = wikipedia.inputChoice(u'Proceed anyway?', ['yes', 'no', 'skip'], ['y', 'n', 's'], 'y') + if choice == 's': + argsList.remove('reflist') + if choice == 'n': + return + elif operation == "Count": robot.countTemplates(argsList, namespaces) elif operation == "List": robot.listTemplates(argsList, namespaces)
pywikipedia-svn@lists.wikimedia.org