Revision: 5292 Author: siebrand Date: 2008-05-02 00:05:37 +0000 (Fri, 02 May 2008)
Log Message: ----------- eol style native
Modified Paths: -------------- trunk/pywikipedia/commonscat.py trunk/pywikipedia/delinker.py trunk/pywikipedia/featuredcount.py trunk/pywikipedia/lonelypages.py
Property Changed: ---------------- trunk/pywikipedia/add_text.py trunk/pywikipedia/checkimages.py trunk/pywikipedia/commonscat.py trunk/pywikipedia/copyright_clean.py trunk/pywikipedia/copyright_put.py trunk/pywikipedia/delinker.py trunk/pywikipedia/featuredcount.py trunk/pywikipedia/generate_user_files.py trunk/pywikipedia/lonelypages.py trunk/pywikipedia/noreferences.py trunk/pywikipedia/pageimport.py
Property changes on: trunk/pywikipedia/add_text.py ___________________________________________________________________ Name: svn:eol-style + native
Property changes on: trunk/pywikipedia/checkimages.py ___________________________________________________________________ Name: svn:eol-style + native
Modified: trunk/pywikipedia/commonscat.py =================================================================== --- trunk/pywikipedia/commonscat.py 2008-05-01 18:50:57 UTC (rev 5291) +++ trunk/pywikipedia/commonscat.py 2008-05-02 00:05:37 UTC (rev 5292) @@ -1,229 +1,229 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" -With this tool you can add the template {{commonscat}} to categories. -The tool works by following the interwiki links. If the template is present on -another langauge page, the bot will use it. - -You could probably use it at articles as well, but this isnt tested. - -This bot uses pagegenerators to get a list of pages. For example to go through all categories: -commonscat.py -start:Category:! - -Commonscat bot: - -Take a page. Follow the interwiki's and look for the commonscat template -*Found zero templates. Done. -*Found one template. Add this template -*Found more templates. Ask the user <- still have to implement this - -TODO: -*Update interwiki's at commons -*Collect all possibilities also if local wiki already has link. -*Better support for other templates (translations) / redundant templates. -*Check mode, only check pages which already have the template -*More efficient like interwiki.py -*Possibility to update other languages in the same run - -""" - -# -# (C) Multichill, 2008 -# -# Distributed under the terms of the MIT license. -# - -import wikipedia, config, pagegenerators, add_text - -commonscatTemplates = { - 'af' : u'CommonsKategorie', - 'ar' : u'تصنيف كومنز', - 'als' : u'Commonscat', - 'az' : u'CommonsKat', - 'bg' : u'Commonscat', - 'ca' : u'Commonscat', - 'cs' : u'Commonscat', - 'da' : u'Commonscat', - 'de' : u'Commonscat', - 'en' : u'Commonscat', - 'eo' : u'Commonscat', - 'es' : u'Commonscat', - 'eu' : u'Commonskat', - 'fi' : u'Commonscat', - 'fr' : u'Commonscat', - 'hr' : u'Commonscat', - 'hu' : u'Közvagyonkat', - 'id' : u'Commonscat', - 'io' : u'Commonscat', - 'is' : u'CommonsCat', - 'it' : u'Commonscat', - 'ja' : u'Commonscat', - 'ko' : u'Commonscat', - 'lt' : u'Commonscat', - 'lv' : u'Commonscat', - 'mk' : u'Ризница-врска', - 'ms' : u'Commonscat', - 'nl' : u'Commonscat', - 'nn' : u'Commonscat', - 'no' : u'Commonscat', - 'oc' : u'Commonscat', - 'os' : u'Commonscat', - 'pl' : u'Commonscat', - 'pt' : u'Commonscat', - 'ro' : u'Commonscat', - 'ru' : u'Commonscat', - 'scn' : u'Commonscat', - 'sh' : u'Commonscat', - 'simple' : u'Commonscat', - 'sk' : u'Commonscat', - 'sl' : u'Kategorija v Zbirki', - 'sr' : u'Commonscat', - 'su' : u'Commonscat', - 'sv' : u'Commonscat', - 'th' : u'Commonscat', - 'tr' : u'CommonsKat', - 'uk' : u'Commonscat', - 'vi' : u'Commonscat', - 'zh' : u'Commonscat', - 'zh-yue' : u'同享類' -} - -def getTemplate (lang = None): - ''' - Get the template name in a language. Expects the language code, returns the translation. - ''' - if commonscatTemplates.has_key(lang): - return commonscatTemplates[lang] - else: - return u'Commonscat' - -def updateInterwiki (wikipediaPage = None, commonsPage = None): - ''' - Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page. - This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist. - - This function is disabled for the moment untill i figure out what the best way is to update the interwiki's. - ''' - interwikis = {} - comment= u'' - interwikilist = wikipediaPage.interwiki() - interwikilist.append(wikipediaPage) - - for interwikiPage in interwikilist: - interwikis[interwikiPage.site()]=interwikiPage - oldtext = commonsPage.get() - # The commonssite object doesnt work with interwiki's - newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl')) - comment = u'Updating interwiki's from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]' - - if newtext != oldtext: - #This doesnt seem to work. Newtext has some trailing whitespace - wikipedia.showDiff(oldtext, newtext) - commonsPage.put(newtext=newtext, comment=comment) - - -def addCommonscat (page = None, summary = None, always = False): - ''' - Take a page. Go to all the interwiki page looking for a commonscat template. - When all the interwiki's links are checked and a proper category is found add it to the page. - ''' - commonscat = "" - commonscatpage = None - commonscats = [] - - wikipedia.output("Working on " + page.title()); - if getTemplate(page.site().language()) in page.templates(): - wikipedia.output("Commonscat template is already on " + page.title()); - #for template in page.templatesWithParams(): - # if ((template[0]==getTemplate(page.site().language())) and (len(template[1]) > 0)): - # commonscatpage = getCommonscat(template[1][0]) - # if commonscatpage != None: - # updateInterwiki (page, commonscatpage) - # #Should remove the template if something is wrong - - else: - #Follow the interwiki's - for ipage in page.interwiki(): - #See if commonscat is present - if getTemplate(ipage.site().language()) in ipage.templates(): - #Go through all the templates at the page - for template in ipage.templatesWithParams(): - #We found the template and it has the parameter set. - if ((template[0]==getTemplate(ipage.site().language())) and (len(template[1]) > 0)): - commonscatpage = getCommonscat(template[1][0]) - if commonscatpage != None: - commonscats.append(commonscatpage); - wikipedia.output("Found link for " + page.title() + " at [[" + ipage.site().language() + ":" + ipage.title() + "]] to " + commonscatpage.title() + "."); - commonscatpage = None - if len(commonscats) > 0: - commonscatpage = commonscats.pop(); - commonscat = commonscatpage.titleWithoutNamespace() - #We found one or more commonscat links, build the template and add it to our page - #TODO: We should check if we found more than one different link. - commonscat = "{{" + getTemplate(page.site().language()) + "|" + commonscat + "}}"; - add_text.add_text(page, commonscat, summary, None, None, always); - #updateInterwiki(page, commonscatpage) - return (True, always); - -def getCommonscat (name = ""): - ''' - This function will retun a page object of the commons page - If the page is a redirect this function tries to follow it. - If the page doesnt exists the function will return None - ''' - #wikipedia.output("getCommonscat: " + name ); - result = wikipedia.Page(wikipedia.getSite("commons", "commons"), "Category:" + name); - if not result.exists(): - #wikipedia.output("getCommonscat : The category doesnt exist."); - return None - elif result.isRedirectPage(): - #wikipedia.output("getCommonscat : The category is a redirect"); - return result.getRedirectTarget(); - elif "Category redirect" in result.templates(): - #wikipedia.output("getCommonscat : The category is a category redirect"); - for template in result.templatesWithParams(): - if ((template[0]=="Category redirect") and (len(template[1]) > 0)): - return getCommonscat(template[1][0]) - elif result.isDisambig(): - #wikipedia.output("getCommonscat : The category is disambigu"); - return None - else: - return result - -def main(): - ''' - Parse the command line arguments and get a pagegenerator to work on. - Iterate through all the pages. - ''' - summary = None; generator = None; always = False - # Load a lot of default generators - genFactory = pagegenerators.GeneratorFactory() - - for arg in wikipedia.handleArgs(): - if arg.startswith('-summary'): - if len(arg) == 8: - summary = wikipedia.input(u'What summary do you want to use?') - else: - summary = arg[9:] - elif arg.startswith('-page'): - if len(arg) == 5: - generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] - else: - generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] - elif arg == '-always': - always = True - else: - generator = genFactory.handleArg(arg) - if not generator: - raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') - - pregenerator = pagegenerators.PreloadingGenerator(generator) - - for page in pregenerator: - (status, always) = addCommonscat(page, summary, always) - -if __name__ == "__main__": - try: - main() - finally: - wikipedia.stopme() +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +With this tool you can add the template {{commonscat}} to categories. +The tool works by following the interwiki links. If the template is present on +another langauge page, the bot will use it. + +You could probably use it at articles as well, but this isnt tested. + +This bot uses pagegenerators to get a list of pages. For example to go through all categories: +commonscat.py -start:Category:! + +Commonscat bot: + +Take a page. Follow the interwiki's and look for the commonscat template +*Found zero templates. Done. +*Found one template. Add this template +*Found more templates. Ask the user <- still have to implement this + +TODO: +*Update interwiki's at commons +*Collect all possibilities also if local wiki already has link. +*Better support for other templates (translations) / redundant templates. +*Check mode, only check pages which already have the template +*More efficient like interwiki.py +*Possibility to update other languages in the same run + +""" + +# +# (C) Multichill, 2008 +# +# Distributed under the terms of the MIT license. +# + +import wikipedia, config, pagegenerators, add_text + +commonscatTemplates = { + 'af' : u'CommonsKategorie', + 'ar' : u'تصنيف كومنز', + 'als' : u'Commonscat', + 'az' : u'CommonsKat', + 'bg' : u'Commonscat', + 'ca' : u'Commonscat', + 'cs' : u'Commonscat', + 'da' : u'Commonscat', + 'de' : u'Commonscat', + 'en' : u'Commonscat', + 'eo' : u'Commonscat', + 'es' : u'Commonscat', + 'eu' : u'Commonskat', + 'fi' : u'Commonscat', + 'fr' : u'Commonscat', + 'hr' : u'Commonscat', + 'hu' : u'Közvagyonkat', + 'id' : u'Commonscat', + 'io' : u'Commonscat', + 'is' : u'CommonsCat', + 'it' : u'Commonscat', + 'ja' : u'Commonscat', + 'ko' : u'Commonscat', + 'lt' : u'Commonscat', + 'lv' : u'Commonscat', + 'mk' : u'Ризница-врска', + 'ms' : u'Commonscat', + 'nl' : u'Commonscat', + 'nn' : u'Commonscat', + 'no' : u'Commonscat', + 'oc' : u'Commonscat', + 'os' : u'Commonscat', + 'pl' : u'Commonscat', + 'pt' : u'Commonscat', + 'ro' : u'Commonscat', + 'ru' : u'Commonscat', + 'scn' : u'Commonscat', + 'sh' : u'Commonscat', + 'simple' : u'Commonscat', + 'sk' : u'Commonscat', + 'sl' : u'Kategorija v Zbirki', + 'sr' : u'Commonscat', + 'su' : u'Commonscat', + 'sv' : u'Commonscat', + 'th' : u'Commonscat', + 'tr' : u'CommonsKat', + 'uk' : u'Commonscat', + 'vi' : u'Commonscat', + 'zh' : u'Commonscat', + 'zh-yue' : u'同享類' +} + +def getTemplate (lang = None): + ''' + Get the template name in a language. Expects the language code, returns the translation. + ''' + if commonscatTemplates.has_key(lang): + return commonscatTemplates[lang] + else: + return u'Commonscat' + +def updateInterwiki (wikipediaPage = None, commonsPage = None): + ''' + Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page. + This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist. + + This function is disabled for the moment untill i figure out what the best way is to update the interwiki's. + ''' + interwikis = {} + comment= u'' + interwikilist = wikipediaPage.interwiki() + interwikilist.append(wikipediaPage) + + for interwikiPage in interwikilist: + interwikis[interwikiPage.site()]=interwikiPage + oldtext = commonsPage.get() + # The commonssite object doesnt work with interwiki's + newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl')) + comment = u'Updating interwiki's from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]' + + if newtext != oldtext: + #This doesnt seem to work. Newtext has some trailing whitespace + wikipedia.showDiff(oldtext, newtext) + commonsPage.put(newtext=newtext, comment=comment) + + +def addCommonscat (page = None, summary = None, always = False): + ''' + Take a page. Go to all the interwiki page looking for a commonscat template. + When all the interwiki's links are checked and a proper category is found add it to the page. + ''' + commonscat = "" + commonscatpage = None + commonscats = [] + + wikipedia.output("Working on " + page.title()); + if getTemplate(page.site().language()) in page.templates(): + wikipedia.output("Commonscat template is already on " + page.title()); + #for template in page.templatesWithParams(): + # if ((template[0]==getTemplate(page.site().language())) and (len(template[1]) > 0)): + # commonscatpage = getCommonscat(template[1][0]) + # if commonscatpage != None: + # updateInterwiki (page, commonscatpage) + # #Should remove the template if something is wrong + + else: + #Follow the interwiki's + for ipage in page.interwiki(): + #See if commonscat is present + if getTemplate(ipage.site().language()) in ipage.templates(): + #Go through all the templates at the page + for template in ipage.templatesWithParams(): + #We found the template and it has the parameter set. + if ((template[0]==getTemplate(ipage.site().language())) and (len(template[1]) > 0)): + commonscatpage = getCommonscat(template[1][0]) + if commonscatpage != None: + commonscats.append(commonscatpage); + wikipedia.output("Found link for " + page.title() + " at [[" + ipage.site().language() + ":" + ipage.title() + "]] to " + commonscatpage.title() + "."); + commonscatpage = None + if len(commonscats) > 0: + commonscatpage = commonscats.pop(); + commonscat = commonscatpage.titleWithoutNamespace() + #We found one or more commonscat links, build the template and add it to our page + #TODO: We should check if we found more than one different link. + commonscat = "{{" + getTemplate(page.site().language()) + "|" + commonscat + "}}"; + add_text.add_text(page, commonscat, summary, None, None, always); + #updateInterwiki(page, commonscatpage) + return (True, always); + +def getCommonscat (name = ""): + ''' + This function will retun a page object of the commons page + If the page is a redirect this function tries to follow it. + If the page doesnt exists the function will return None + ''' + #wikipedia.output("getCommonscat: " + name ); + result = wikipedia.Page(wikipedia.getSite("commons", "commons"), "Category:" + name); + if not result.exists(): + #wikipedia.output("getCommonscat : The category doesnt exist."); + return None + elif result.isRedirectPage(): + #wikipedia.output("getCommonscat : The category is a redirect"); + return result.getRedirectTarget(); + elif "Category redirect" in result.templates(): + #wikipedia.output("getCommonscat : The category is a category redirect"); + for template in result.templatesWithParams(): + if ((template[0]=="Category redirect") and (len(template[1]) > 0)): + return getCommonscat(template[1][0]) + elif result.isDisambig(): + #wikipedia.output("getCommonscat : The category is disambigu"); + return None + else: + return result + +def main(): + ''' + Parse the command line arguments and get a pagegenerator to work on. + Iterate through all the pages. + ''' + summary = None; generator = None; always = False + # Load a lot of default generators + genFactory = pagegenerators.GeneratorFactory() + + for arg in wikipedia.handleArgs(): + if arg.startswith('-summary'): + if len(arg) == 8: + summary = wikipedia.input(u'What summary do you want to use?') + else: + summary = arg[9:] + elif arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] + else: + generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] + elif arg == '-always': + always = True + else: + generator = genFactory.handleArg(arg) + if not generator: + raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') + + pregenerator = pagegenerators.PreloadingGenerator(generator) + + for page in pregenerator: + (status, always) = addCommonscat(page, summary, always) + +if __name__ == "__main__": + try: + main() + finally: + wikipedia.stopme()
Property changes on: trunk/pywikipedia/commonscat.py ___________________________________________________________________ Name: svn:eol-style + native
Property changes on: trunk/pywikipedia/copyright_clean.py ___________________________________________________________________ Name: svn:eol-style + native
Property changes on: trunk/pywikipedia/copyright_put.py ___________________________________________________________________ Name: svn:eol-style + native
Modified: trunk/pywikipedia/delinker.py =================================================================== --- trunk/pywikipedia/delinker.py 2008-05-01 18:50:57 UTC (rev 5291) +++ trunk/pywikipedia/delinker.py 2008-05-02 00:05:37 UTC (rev 5292) @@ -1,17 +1,17 @@ -# Helper script for delinker and image_replacer - -__version__ = '$Id: $' - -import wikipedia, config - -import sys, os -sys.path.insert(0, 'commonsdelinker') - -module = 'delinker' -if len(sys.argv) > 1: - if sys.argv[1] == 'replacer': - del sys.argv[1] - module = 'image_replacer' - -bot = __import__(module) +# Helper script for delinker and image_replacer + +__version__ = '$Id: $' + +import wikipedia, config + +import sys, os +sys.path.insert(0, 'commonsdelinker') + +module = 'delinker' +if len(sys.argv) > 1: + if sys.argv[1] == 'replacer': + del sys.argv[1] + module = 'image_replacer' + +bot = __import__(module) bot.main() \ No newline at end of file
Property changes on: trunk/pywikipedia/delinker.py ___________________________________________________________________ Name: svn:eol-style + native
Modified: trunk/pywikipedia/featuredcount.py =================================================================== --- trunk/pywikipedia/featuredcount.py 2008-05-01 18:50:57 UTC (rev 5291) +++ trunk/pywikipedia/featuredcount.py 2008-05-02 00:05:37 UTC (rev 5292) @@ -1,43 +1,43 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" -This script only counts how many have featured articles all wikipedias. - -usage: featuredcount.py - -""" -__version__ = '$Id: featured.py 4811 2008-01-05 16:22:45Z leogregianin $' - -# -# Distributed under the terms of the MIT license. -# - -import sys -import wikipedia, catlib -from featured import featured_name - -def featuredArticles(site): - method=featured_name[site.lang][0] - name=featured_name[site.lang][1] - args=featured_name[site.lang][2:] - raw=method(site, name, *args) - arts=[] - for p in raw: - if p.namespace()==0: - arts.append(p) - elif p.namespace()==1: - arts.append(wikipedia.Page(p.site(), p.titleWithoutNamespace())) - wikipedia.output('\03{lightred}** wikipedia:%s has %i featured articles\03{default}' % (site.lang, len(arts))) - -if __name__=="__main__": - mysite=wikipedia.getSite() - fromlang=featured_name.keys() - fromlang.sort() - try: - for ll in fromlang: - fromsite=wikipedia.Site(ll) - if not fromsite==wikipedia.getSite(): - arts=featuredArticles(fromsite) - arts_mysite=featuredArticles(mysite) - finally: - wikipedia.stopme() +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +This script only counts how many have featured articles all wikipedias. + +usage: featuredcount.py + +""" +__version__ = '$Id: featured.py 4811 2008-01-05 16:22:45Z leogregianin $' + +# +# Distributed under the terms of the MIT license. +# + +import sys +import wikipedia, catlib +from featured import featured_name + +def featuredArticles(site): + method=featured_name[site.lang][0] + name=featured_name[site.lang][1] + args=featured_name[site.lang][2:] + raw=method(site, name, *args) + arts=[] + for p in raw: + if p.namespace()==0: + arts.append(p) + elif p.namespace()==1: + arts.append(wikipedia.Page(p.site(), p.titleWithoutNamespace())) + wikipedia.output('\03{lightred}** wikipedia:%s has %i featured articles\03{default}' % (site.lang, len(arts))) + +if __name__=="__main__": + mysite=wikipedia.getSite() + fromlang=featured_name.keys() + fromlang.sort() + try: + for ll in fromlang: + fromsite=wikipedia.Site(ll) + if not fromsite==wikipedia.getSite(): + arts=featuredArticles(fromsite) + arts_mysite=featuredArticles(mysite) + finally: + wikipedia.stopme()
Property changes on: trunk/pywikipedia/featuredcount.py ___________________________________________________________________ Name: svn:eol-style + native
Property changes on: trunk/pywikipedia/generate_user_files.py ___________________________________________________________________ Name: svn:eol-style + native
Modified: trunk/pywikipedia/lonelypages.py =================================================================== --- trunk/pywikipedia/lonelypages.py 2008-05-01 18:50:57 UTC (rev 5291) +++ trunk/pywikipedia/lonelypages.py 2008-05-02 00:05:37 UTC (rev 5292) @@ -1,265 +1,265 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" -This is a script written to add the template "orphan" to the pages that aren't linked by other pages. -It can give some strange Errors sometime, I hope that all of them are fixed in this version. - -These command line parameters can be used to specify which pages to work on: - -¶ms; - --xml Retrieve information from a local XML dump (pages-articles - or pages-meta-current, see http://download.wikimedia.org). - Argument can also be given as "-xml:filename". - --page Only edit a specific page. - Argument can also be given as "-page:pagetitle". You can - give this parameter multiple times to edit multiple pages. - -Furthermore, the following command line parameters are supported: - --enable: - Enable or disable the bot via a Wiki Page. - --disambig: - Set a page where the bot save the name of the disambig pages found (default: skip the pages) - --limit: - Set how many pages check. - --always - Always say yes, won't ask - ---- FixMes --- -* Check that all the code hasn't bugs - ---- Credit and Help --- -This Script has been developed by Pietrodn and Filnik on botwiki. If you want to help us -improving our script archive and pywikipediabot's archive or you simply need help -you can find us here: http://botwiki.sno.cc - ---- Examples --- -python lonelypages.py -enable:User:Bot/CheckBot -always -""" -# -# (C) Pietrodn, it.wiki 2006-2007 -# (C) Filnik, it.wiki 2007 -# -# Distributed under the terms of the MIT license. -# -__version__ = '$Id: lonelypages.py,v 1.0 2007/12/28 19.16.00 filnik Exp$' -# - -import wikipedia, pagegenerators -import re - -# This is required for the text that is shown when you run this script -# with the parameter -help. -docuReplacements = { - '¶ms;': pagegenerators.parameterHelp, -} - -##################################################### -# Here you have to put the config for your Project. # -##################################################### - -# ************* Modify only below! ************* # - -# Template to add in the orphan pages -Template = { - 'en':u'{{Orphan|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}', - 'it':u'{{O||mese={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}', - 'zh':u'{{subst:Orphan/auto}}', - } - -# Comment that the Bot will use to put the template -commento = { - 'en':u'Bot: Orphan page, add template', - 'it':u'Bot: Voce orfana, aggiungo template {{O}}', - 'zh':u'機器人: 本頁的鏈入頁面太少', - } - -# When you add a disambig to the list of disambig pages -#(if you set disambigPage to None, you can put here nothing) -commenttodisambig = { - 'en':u'Bot: Adding a disambig page', - 'it':u'Bot: Aggiungo una disambigua', - 'zh':u'機器人: 增加消歧義頁面', - } - -# Use regex to prevent to put the same template twice! -# If you need help with regex, ask on botwiki ( http://botwiki.sno.cc ) -# Warning: put always "()" inside the regex, so the bot will find "something" -exception = { - 'en': [r'{{(?:template:|)(orphan)[|}]', r'{{(?:template:|)(wi)[|}]'], - 'it': [r'{{(?:template:|)(o)[|}]'], - 'zh': [r'{{(?:template:|)(orphan)[|}]'], - } - -# ************* Modify only above! ************* # - -def main(): - # Load the configurations in the function namespace - global commento; global Template; global disambigPage; global commenttodisambig - global exception - - enablePage = None # Check if someone set an enablePage or not - limit = 50000 # All the pages! (I hope that there aren't so many lonely pages in a project..) - generator = None # Check if the bot should use the default generator or not - genFactory = pagegenerators.GeneratorFactory() # Load all the default generators! - nwpages = False # Check variable for newpages - always = False # Check variable for always - disambigPage = None # If no disambigPage given, not use it. - # Arguments! - for arg in wikipedia.handleArgs(): - if arg.startswith('-enable'): - if len(arg) == 7: - enablePage = wikipedia.input(u'Would you like to check if the bot should run or not?') - else: - enablePage = arg[8:] - if arg.startswith('-disambig'): - if len(arg) == 9: - disambigPage = wikipedia.input(u'In which page should the bot save the disambig pages?') - else: - disambigPage = arg[10:] - elif arg.startswith('-limit'): - if len(arg) == 6: - limit = int(wikipedia.input(u'How many pages do you want to check?')) - else: - limit = int(arg[7:]) - elif arg.startswith('-newpages'): - if len(arg) == 9: - nwlimit = 50 # Default: 50 pages - else: - nwlimit = int(arg[10:]) - generator = wikipedia.getSite().newpages(number = nwlimit) - nwpages = True - elif arg.startswith('-page'): - if len(arg) == 5: - generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'How many pages do you want to check?'))] - else: - generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] - elif arg == '-always': - always = True - else: - generator = genFactory.handleArg(arg) - # Retrive the site - wikiSite = wikipedia.getSite() - # If the generator is not given, use the default one - if generator == None: - generator = wikiSite.lonelypages(repeat = True, number = limit) - # Take the configurations according to our project - comment = wikipedia.translate(wikiSite, commento) - commentdisambig = wikipedia.translate(wikiSite, commenttodisambig) - template = wikipedia.translate(wikiSite, Template) - exception = wikipedia.translate(wikiSite, exception) - # EnablePage part - if enablePage != None: - # Define the Page Object - enable = wikipedia.Page(wikiSite, enablePage) - # Loading the page's data - try: - getenable = enable.get() - except wikipedia.NoPage: - wikipedia.output(u"%s doesn't esist, I use the page as if it was blank!" % enable.title()) - getenable = '' - except wikiepedia.IsRedirect: - wikipedia.output(u"%s is a redirect, skip!" % enable.title()) - getenable = '' - # If the enable page is set to disable, turn off the bot - # (useful when the bot is run on a server) - if getenable != 'enable': - wikipedia.output('The bot is disabled') - wikipedia.stopme() - # DisambigPage part - if disambigPage != None: - disambigpage = wikipedia.Page(wikiSite, disambigPage) - try: - disambigtext = disambigpage.get() - except wikipedia.NoPage: - wikipedia.output(u"%s doesn't esist, skip!" % disambigpage.title()) - disambigtext = '' - except wikiepedia.IsRedirect: - wikipedia.output(u"%s is a redirect, don't use it!" % disambigpage.title()) - disambigPage = None - # Main Loop - for page in generator: - if nwpages == True: - page = page[0] # The newpages generator returns a tuple, not a Page object. - wikipedia.output(u"Checking %s..." % page.title()) - # Used to skip the first pages in test phase... - #if page.title()[0] in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']: - #continue - if page.isRedirectPage(): # If redirect, skip! - wikipedia.output(u'%s is a redirect! Skip...' % page.title()) - continue - # refs is not a list, it's a generator while resList... is a list, yes. - refs = page.getReferences() - refsList = list() - for j in refs: - if j == None: - # We have to find out why the function returns that value - wikipedia.output(u'Error: 1 --> Skip page') - continue - refsList.append(j) - # This isn't possible with a generator - if refsList != []: - wikipedia.output(u"%s isn't orphan! Skip..." % page.title()) - continue - # Never understood how a list can turn in "None", but it happened :-S - elif refsList == None: - # We have to find out why the function returns that value - wikipedia.output(u'Error: 2 --> Skip page') - continue - else: - # Ok, no refs, no redirect... let's check if there's already the template - try: - oldtxt = page.get() - except wikipedia.NoPage: - wikipedia.output(u"%s doesn't exist! Skip..." % page.title()) - continue - except wikipedia.IsRedirectPage: - wikipedia.output(u"%s is a redirect! Skip..." % page.title()) - continue - # I've used a loop in a loop. If I use continue in the second loop, it won't do anything - # in the first. So let's create a variable to avoid this problem. - Find = False - for regexp in exception: - res = re.findall(regexp, oldtxt.lower()) - # Found a template! Let's skip the page! - if res != []: - wikipedia.output(u'Your regex has found something in %s, skipping...' % page.title()) - Find = True - break - # Skip the page.. - if Find: - continue - # Is the page a disambig? - if page.isDisambig() and disambigPage != None: - wikipedia.output(u'%s is a disambig page, report..' % page.title()) - if not page.title().lower() in disambigtext.lower(): - disambigtext = u"%s\n*[[%s]]" % (disambigtext, page.title()) - disambigpage.put(disambigtext, commentdisambig) - continue - # Is the page a disambig but there's not disambigPage? Skip! - elif page.isDisambig(): - wikipedia.output(u'%s is a disambig page, skip...' % page.title()) - continue - else: - # Ok, the page need the template. Let's put it there! - newtxt = u"%s\n%s" % (template, oldtxt) # Adding the template in the text - wikipedia.output(u"\t\t>>> %s <<<" % page.title()) # Showing the title - wikipedia.showDiff(oldtxt, newtxt) # Showing the changes - choice = 'y' # Default answer - if not always: - choice = wikipedia.inputChoice(u'Orphan page found, shall I add the template?', [u'Yes', u'No', u'All'], [u'y', u'n', u'a'], [u'Y', u'N', 'A']) - if choice.lower() in [u'a', u'all']: - always = True - choice = 'y' - if choice.lower() in [u'y', u'yes']: - try: - page.put(newtxt, comment) - except wikipedia.EditConflict: - wikipedia.output(u'Edit Conflict! Skip...') - continue -if __name__ == '__main__': - try: - main() - finally: - wikipedia.stopme() +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +This is a script written to add the template "orphan" to the pages that aren't linked by other pages. +It can give some strange Errors sometime, I hope that all of them are fixed in this version. + +These command line parameters can be used to specify which pages to work on: + +¶ms; + +-xml Retrieve information from a local XML dump (pages-articles + or pages-meta-current, see http://download.wikimedia.org). + Argument can also be given as "-xml:filename". + +-page Only edit a specific page. + Argument can also be given as "-page:pagetitle". You can + give this parameter multiple times to edit multiple pages. + +Furthermore, the following command line parameters are supported: + +-enable: - Enable or disable the bot via a Wiki Page. + +-disambig: - Set a page where the bot save the name of the disambig pages found (default: skip the pages) + +-limit: - Set how many pages check. + +-always - Always say yes, won't ask + +--- FixMes --- +* Check that all the code hasn't bugs + +--- Credit and Help --- +This Script has been developed by Pietrodn and Filnik on botwiki. If you want to help us +improving our script archive and pywikipediabot's archive or you simply need help +you can find us here: http://botwiki.sno.cc + +--- Examples --- +python lonelypages.py -enable:User:Bot/CheckBot -always +""" +# +# (C) Pietrodn, it.wiki 2006-2007 +# (C) Filnik, it.wiki 2007 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id: lonelypages.py,v 1.0 2007/12/28 19.16.00 filnik Exp$' +# + +import wikipedia, pagegenerators +import re + +# This is required for the text that is shown when you run this script +# with the parameter -help. +docuReplacements = { + '¶ms;': pagegenerators.parameterHelp, +} + +##################################################### +# Here you have to put the config for your Project. # +##################################################### + +# ************* Modify only below! ************* # + +# Template to add in the orphan pages +Template = { + 'en':u'{{Orphan|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}', + 'it':u'{{O||mese={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}', + 'zh':u'{{subst:Orphan/auto}}', + } + +# Comment that the Bot will use to put the template +commento = { + 'en':u'Bot: Orphan page, add template', + 'it':u'Bot: Voce orfana, aggiungo template {{O}}', + 'zh':u'機器人: 本頁的鏈入頁面太少', + } + +# When you add a disambig to the list of disambig pages +#(if you set disambigPage to None, you can put here nothing) +commenttodisambig = { + 'en':u'Bot: Adding a disambig page', + 'it':u'Bot: Aggiungo una disambigua', + 'zh':u'機器人: 增加消歧義頁面', + } + +# Use regex to prevent to put the same template twice! +# If you need help with regex, ask on botwiki ( http://botwiki.sno.cc ) +# Warning: put always "()" inside the regex, so the bot will find "something" +exception = { + 'en': [r'{{(?:template:|)(orphan)[|}]', r'{{(?:template:|)(wi)[|}]'], + 'it': [r'{{(?:template:|)(o)[|}]'], + 'zh': [r'{{(?:template:|)(orphan)[|}]'], + } + +# ************* Modify only above! ************* # + +def main(): + # Load the configurations in the function namespace + global commento; global Template; global disambigPage; global commenttodisambig + global exception + + enablePage = None # Check if someone set an enablePage or not + limit = 50000 # All the pages! (I hope that there aren't so many lonely pages in a project..) + generator = None # Check if the bot should use the default generator or not + genFactory = pagegenerators.GeneratorFactory() # Load all the default generators! + nwpages = False # Check variable for newpages + always = False # Check variable for always + disambigPage = None # If no disambigPage given, not use it. + # Arguments! + for arg in wikipedia.handleArgs(): + if arg.startswith('-enable'): + if len(arg) == 7: + enablePage = wikipedia.input(u'Would you like to check if the bot should run or not?') + else: + enablePage = arg[8:] + if arg.startswith('-disambig'): + if len(arg) == 9: + disambigPage = wikipedia.input(u'In which page should the bot save the disambig pages?') + else: + disambigPage = arg[10:] + elif arg.startswith('-limit'): + if len(arg) == 6: + limit = int(wikipedia.input(u'How many pages do you want to check?')) + else: + limit = int(arg[7:]) + elif arg.startswith('-newpages'): + if len(arg) == 9: + nwlimit = 50 # Default: 50 pages + else: + nwlimit = int(arg[10:]) + generator = wikipedia.getSite().newpages(number = nwlimit) + nwpages = True + elif arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'How many pages do you want to check?'))] + else: + generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] + elif arg == '-always': + always = True + else: + generator = genFactory.handleArg(arg) + # Retrive the site + wikiSite = wikipedia.getSite() + # If the generator is not given, use the default one + if generator == None: + generator = wikiSite.lonelypages(repeat = True, number = limit) + # Take the configurations according to our project + comment = wikipedia.translate(wikiSite, commento) + commentdisambig = wikipedia.translate(wikiSite, commenttodisambig) + template = wikipedia.translate(wikiSite, Template) + exception = wikipedia.translate(wikiSite, exception) + # EnablePage part + if enablePage != None: + # Define the Page Object + enable = wikipedia.Page(wikiSite, enablePage) + # Loading the page's data + try: + getenable = enable.get() + except wikipedia.NoPage: + wikipedia.output(u"%s doesn't esist, I use the page as if it was blank!" % enable.title()) + getenable = '' + except wikiepedia.IsRedirect: + wikipedia.output(u"%s is a redirect, skip!" % enable.title()) + getenable = '' + # If the enable page is set to disable, turn off the bot + # (useful when the bot is run on a server) + if getenable != 'enable': + wikipedia.output('The bot is disabled') + wikipedia.stopme() + # DisambigPage part + if disambigPage != None: + disambigpage = wikipedia.Page(wikiSite, disambigPage) + try: + disambigtext = disambigpage.get() + except wikipedia.NoPage: + wikipedia.output(u"%s doesn't esist, skip!" % disambigpage.title()) + disambigtext = '' + except wikiepedia.IsRedirect: + wikipedia.output(u"%s is a redirect, don't use it!" % disambigpage.title()) + disambigPage = None + # Main Loop + for page in generator: + if nwpages == True: + page = page[0] # The newpages generator returns a tuple, not a Page object. + wikipedia.output(u"Checking %s..." % page.title()) + # Used to skip the first pages in test phase... + #if page.title()[0] in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']: + #continue + if page.isRedirectPage(): # If redirect, skip! + wikipedia.output(u'%s is a redirect! Skip...' % page.title()) + continue + # refs is not a list, it's a generator while resList... is a list, yes. + refs = page.getReferences() + refsList = list() + for j in refs: + if j == None: + # We have to find out why the function returns that value + wikipedia.output(u'Error: 1 --> Skip page') + continue + refsList.append(j) + # This isn't possible with a generator + if refsList != []: + wikipedia.output(u"%s isn't orphan! Skip..." % page.title()) + continue + # Never understood how a list can turn in "None", but it happened :-S + elif refsList == None: + # We have to find out why the function returns that value + wikipedia.output(u'Error: 2 --> Skip page') + continue + else: + # Ok, no refs, no redirect... let's check if there's already the template + try: + oldtxt = page.get() + except wikipedia.NoPage: + wikipedia.output(u"%s doesn't exist! Skip..." % page.title()) + continue + except wikipedia.IsRedirectPage: + wikipedia.output(u"%s is a redirect! Skip..." % page.title()) + continue + # I've used a loop in a loop. If I use continue in the second loop, it won't do anything + # in the first. So let's create a variable to avoid this problem. + Find = False + for regexp in exception: + res = re.findall(regexp, oldtxt.lower()) + # Found a template! Let's skip the page! + if res != []: + wikipedia.output(u'Your regex has found something in %s, skipping...' % page.title()) + Find = True + break + # Skip the page.. + if Find: + continue + # Is the page a disambig? + if page.isDisambig() and disambigPage != None: + wikipedia.output(u'%s is a disambig page, report..' % page.title()) + if not page.title().lower() in disambigtext.lower(): + disambigtext = u"%s\n*[[%s]]" % (disambigtext, page.title()) + disambigpage.put(disambigtext, commentdisambig) + continue + # Is the page a disambig but there's not disambigPage? Skip! + elif page.isDisambig(): + wikipedia.output(u'%s is a disambig page, skip...' % page.title()) + continue + else: + # Ok, the page need the template. Let's put it there! + newtxt = u"%s\n%s" % (template, oldtxt) # Adding the template in the text + wikipedia.output(u"\t\t>>> %s <<<" % page.title()) # Showing the title + wikipedia.showDiff(oldtxt, newtxt) # Showing the changes + choice = 'y' # Default answer + if not always: + choice = wikipedia.inputChoice(u'Orphan page found, shall I add the template?', [u'Yes', u'No', u'All'], [u'y', u'n', u'a'], [u'Y', u'N', 'A']) + if choice.lower() in [u'a', u'all']: + always = True + choice = 'y' + if choice.lower() in [u'y', u'yes']: + try: + page.put(newtxt, comment) + except wikipedia.EditConflict: + wikipedia.output(u'Edit Conflict! Skip...') + continue +if __name__ == '__main__': + try: + main() + finally: + wikipedia.stopme()
Property changes on: trunk/pywikipedia/lonelypages.py ___________________________________________________________________ Name: svn:eol-style + native
Property changes on: trunk/pywikipedia/noreferences.py ___________________________________________________________________ Name: svn:eol-style + native
Property changes on: trunk/pywikipedia/pageimport.py ___________________________________________________________________ Name: svn:eol-style + native