Pywikipedia-l May 2008

pywikipedia-l@lists.wikimedia.org

32 participants
376 discussions

SVN: [5294] trunk/pywikipedia/checkimages.py
by filnik＠svn.wikimedia.org 02 May '08

02 May '08

Revision: 5294 Author: filnik Date: 2008-05-02 11:05:19 +0000 (Fri, 02 May 2008) Log Message: ----------- Bugfix. For the deleted images there's not Hash. So, let's skip them instead of raising an error Modified Paths: -------------- trunk/pywikipedia/checkimages.py Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-05-02 00:13:27 UTC (rev 5293) +++ trunk/pywikipedia/checkimages.py 2008-05-02 11:05:19 UTC (rev 5294) @@ -59,7 +59,6 @@ * Add the "catch the language" function for commons. * Fix and reorganise the new documentation * Add a report for the image tagged. -* Implement: Special:FileDuplicateSearch/Image.jpg """ # @@ -77,25 +76,20 @@ locale.setlocale(locale.LC_ALL, '') -class NoHash(wikipedia.Error): - """ The APIs don't return any Hash for the image searched. - Really Strange, better to raise an error. - """ - ######################################################################################################################### # <------------------------------------------- Change only below! -----------------------------------------------------># ######################################################################################################################### # That's what you want that will be added. (i.e. the {{no source}} with the right day/month/year ) n_txt = { - 'commons':'\n{{subst:nld}}', + 'commons':u'\n{{subst:nld}}', 'de' :u'{{Benutzer:ABF/D|~~~~}} {{Dateiüberprüfung/benachrichtigt (Kategorie)|{{subst:LOCALYEAR}}|{{subst:LOCALMONTH}}|{{subst:LOCALDAY}}}} {{Dateiüberprüfung/benachrichtigt (Text)|Lizenz|||||}} --This was added by ~~~~-- ', - 'en' :'\n{{subst:nld}}', - 'it' :'\n{{subst:unverdata}}', - 'ja' :'{{subst:Nsd}}', + 'en' :u'\n{{subst:nld}}', + 'it' :u'\n{{subst:unverdata}}', + 'ja' :u'{{subst:Nsd}}', 'hu' :u'\n{{nincslicenc|~~~~~}}', - 'ta' :'\n{{subst:nld}}', - 'zh' :'{{subst:No license/auto}}', + 'ta' :u'\n{{subst:nld}}', + 'zh' :u'{{subst:No license/auto}}', } # Text that the bot will try to see if there's already or not. If there's a @@ -129,22 +123,22 @@ # When the Bot find that the usertalk is empty is not pretty to put only the no source without the welcome, isn't it? empty = { - 'commons':'{{subst:welcome}}\n~~~~\n', - 'de':'{{subst:willkommen}} ~~~~', - 'en' :'{{welcome}}\n~~~~\n', - 'it' :'\n{{subst:Benvebot}}\n~~~~\n', - 'ja':'{{welcome}}\n--~~~~\n', + 'commons':u'{{subst:welcome}}\n~~~~\n', + 'de' :u'{{subst:willkommen}} ~~~~', + 'en' :u'{{welcome}}\n~~~~\n', + 'it' :u'\n{{subst:Benvebot}}\n~~~~\n', + 'ja' :u'{{welcome}}\n--~~~~\n', 'hu' :u'{{subst:Üdvözlet|~~~~}}\n', - 'zh':'{{subst:welcome|sign=~~~~}}', + 'zh' :u'{{subst:welcome|sign=~~~~}}', } # Summary that the bot use when it notify the problem with the image's license comm2 = { - 'ar' :"بوت: طلب معلومات المصدر." , - 'commons':"Bot: Requesting source information." , + 'ar' :u"بوت: طلب معلومات المصدر." , + 'commons':u"Bot: Requesting source information." , 'de' :u'Bot:Notify User', - 'en' :"Bot: Requesting source information." , - 'it' :"Bot: Notifico l'unverified", + 'en' :u"Bot: Requesting source information." , + 'it' :u"Bot: Notifico l'unverified", 'ja' :u"ロボットによる:出典とライセンス明記のお願い", 'hu' :u'Robot: Forrásinformáció kérése', 'ja' :u'{{welcome}}\n--~~~~\n', @@ -159,7 +153,7 @@ 'commons':u"{{speedy|The file has .%s as extension. Is it ok? Please check.}}", 'en' :u"{{db-meta|The file has .%s as extension.}}", 'it' :u'{{cancella subito|motivo=Il file ha come estensione ".%s"}}', - 'ja':u'{{db|知らないファイルフォーマット%s}}', + 'ja' :u'{{db|知らないファイルフォーマット%s}}', 'hu' :u'{{azonnali|A fájlnak .%s a kiterjesztése}}', 'ta' :u'{{delete|இந்தக் கோப்பு .%s என்றக் கோப்பு நீட்சியைக் கொண்டுள்ளது.}}', 'zh' :u'{{delete|未知檔案格式%s}}', @@ -228,7 +222,7 @@ # NOTE: YOUR Botnick is automatically added. It's not required to add it twice. bot_list = { 'commons':[u'Siebot', u'CommonsDelinker', u'Filbot', u'John Bot', u'Sz-iwbot', u'ABFbot'], - 'de' :['ABFbot'], + 'de' :[u'ABFbot'], 'en' :[u'OrphanBot'], 'it' :[u'Filbot', u'Nikbot', u'.snoopyBot.'], 'ja' :[u'alexbot'], @@ -287,14 +281,14 @@ # The summary of the report comm10 = { 'commons':u'Bot: Updating the log', - 'ar':u'بوت: تحديث السجل', - 'de': u'Bot:schreibe Log', - 'en':u'Bot: Updating the log', - 'it':u'Bot: Aggiorno il log', - 'ja': u'ロボットによる:更新', - 'hu': u'Robot: A napló frissítése', - 'ta': u'தானியங்கி:பட்டியலை இற்றைப்படுத்தல்', - 'zh': u'機器人:更新記錄', + 'ar' :u'بوت: تحديث السجل', + 'de' :u'Bot:schreibe Log', + 'en' :u'Bot: Updating the log', + 'it' :u'Bot: Aggiorno il log', + 'ja' :u'ロボットによる:更新', + 'hu' :u'Robot: A napló frissítése', + 'ta' :u'தானியங்கி:பட்டியலை இற்றைப்படுத்தல்', + 'zh' :u'機器人:更新記錄', } # If a template isn't a license but it's included on a lot of images, that can be skipped to @@ -303,12 +297,12 @@ # Warning 2: The bot will use regex, make the names compatible, please (don't add "Template:" or {{ # because they are already put in the regex). HiddenTemplate = { - 'commons':['information', 'trademarked', 'trademark'], + 'commons':[u'information', u'trademarked', u'trademark'], 'de':[u'information'], - 'en':['information'], - 'it':['edp', 'informazioni[ _]file', 'information', 'trademark'], + 'en':[u'information'], + 'it':[u'edp', u'informazioni[ _]file', u'information', u'trademark'], 'ja':[u'Information'], - 'hu':[u'információ','enwiki', 'azonnali'], + 'hu':[u'információ', u'enwiki', u'azonnali'], 'ta':[u'information'], 'zh':[u'information'], } @@ -316,7 +310,7 @@ # Template added when the bot finds only an hidden template and nothing else. # Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it) HiddenTemplateNotification = { - 'commons': """\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:__botnick__|__botnick__]]''', if you need some help about it, ask its master (~~~) or go to the [[Commons:Help desk]]''. --~~~~""", + 'commons': u"""\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:__botnick__|__botnick__]]''', if you need some help about it, ask its master (~~~) or go to the [[Commons:Help desk]]''. --~~~~""", 'de': None, 'en': None, 'it': u"{{subst:Utente:Filbot/Template_insufficiente|%s}} --~~~~", @@ -324,19 +318,25 @@ } # Add your project (in alphabetical order) if you want that the bot start -project_inserted = ['ar', 'commons', 'de', 'en', 'ja', 'hu', 'it', 'ta', 'zh'] +project_inserted = [u'ar', u'commons', u'de', u'en', u'ja', u'hu', u'it', u'ta', u'zh'] # Ok, that's all. What is below, is the rest of code, now the code is fixed and it will run correctly in your project. ######################################################################################################################### # <------------------------------------------- Change only above! ----------------------------------------------------> # ######################################################################################################################### +# Error Classes class LogIsFull(wikipedia.Error): """An exception indicating that the log is full and the Bot cannot add other data to prevent Errors.""" class NothingFound(wikipedia.Error): """ An exception indicating that a regex has return [] instead of results.""" +class NoHash(wikipedia.Error): + """ The APIs don't return any Hash for the image searched. + Really Strange, better to raise an error. """ + +# Other common useful functions def printWithTimeZone(message): """ Function to print the messages followed by the TimeZone encoded correctly. """ if message[-1] != ' ': @@ -578,7 +578,11 @@ if hash_found_list != []: hash_found = hash_found_list[0] else: - raise NoHash('No Hash found in the APIs! Maybe the regex to catch it is wrong or someone has changed the APIs structure.') + if imagePage.exists(): + raise NoHash('No Hash found in the APIs! Maybe the regex to catch it is wrong or someone has changed the APIs structure.') + else: + wikipedia.output(u'Image deleted before getting the Hash. Skipping...') + return False # Error, we need to skip the page. get_duplicates = self.site.getUrl('/w/api.php?action=query&format=xml&list=allimages&aisha1=%s' % hash_found) duplicates = re.findall(r'<img name="(.*?)".*?/>', get_duplicates) if len(duplicates) > 1: @@ -591,7 +595,8 @@ if duplicate == self.image: continue # the image itself, not report also this as duplicate repme += "\n**[[:Image:%s]]" % duplicate - self.report_image(self.image, self.rep_page, self.com, repme + '\n', addings = False, regex = duplicateRegex) + self.report_image(self.image, self.rep_page, self.com, repme, addings = False, regex = duplicateRegex) + return True # Ok - No problem. Let's continue the checking phase def report_image(self, image, rep_page = None, com = None, rep_text = None, addings = True, regex = None): """ Function to report the images in the report page when needed. """ @@ -984,13 +989,6 @@ # the user has set 0 as images to skip wikipedia.output(u'\t\t>> No images to skip...<<') skip_list.append('skip = Off') # Only to print it once - # Check on commons if there's already an image with the same name - if commonsActive == True: - response = mainClass.checkImageOnCommons(imageName) - if response == False: - continue - if duplicatesActive == True: - mainClass.checkImageDuplicated(imageName) parentesi = False # parentesi are these in italian: { ( ) } [] delete = False tagged = False @@ -1006,6 +1004,16 @@ except wikipedia.IsRedirectPage: wikipedia.output(u"The file description for %s is a redirect?!" % imageName ) continue + # Check on commons if there's already an image with the same name + if commonsActive == True: + response = mainClass.checkImageOnCommons(imageName) + if response == False: + continue + # Check if there are duplicates of the image on the project selected + if duplicatesActive == True: + response2 = mainClass.checkImageDuplicated(imageName) + if response2 == False: + continue # Is the image already tagged? If yes, no need to double-check, skip for i in TextFind: # If there are {{ use regex, otherwise no (if there's not the {{ may not be a template

1 0

SVN: [5293] trunk/pywikipedia
by siebrand＠svn.wikimedia.org 02 May '08

02 May '08

Revision: 5293 Author: siebrand Date: 2008-05-02 00:13:27 +0000 (Fri, 02 May 2008) Log Message: ----------- More eol-style:native props Modified Paths: -------------- trunk/pywikipedia/blockpageschecker.py trunk/pywikipedia/commonsdelinker/plugins/__init__.py trunk/pywikipedia/commonsdelinker/plugins/books.py trunk/pywikipedia/commonsdelinker/plugins/debug.py trunk/pywikipedia/commonsdelinker/plugins/flags.py trunk/pywikipedia/families/wikia_family.py trunk/pywikipedia/fixing_redirects.py trunk/pywikipedia/maintenance/readtalk.py Property Changed: ---------------- trunk/pywikipedia/blockpageschecker.py trunk/pywikipedia/commonsdelinker/plugins/__init__.py trunk/pywikipedia/commonsdelinker/plugins/books.py trunk/pywikipedia/commonsdelinker/plugins/debug.py trunk/pywikipedia/commonsdelinker/plugins/flags.py trunk/pywikipedia/families/wikia_family.py trunk/pywikipedia/fixing_redirects.py trunk/pywikipedia/maintenance/readtalk.py Modified: trunk/pywikipedia/blockpageschecker.py =================================================================== --- trunk/pywikipedia/blockpageschecker.py 2008-05-02 00:05:37 UTC (rev 5292) +++ trunk/pywikipedia/blockpageschecker.py 2008-05-02 00:13:27 UTC (rev 5293) @@ -1,402 +1,402 @@ -# -*- coding: utf-8 -*- -""" -This is a script originally written by Wikihermit and then rewritten by Filnik, -to delete the templates used to warn in the pages that a page is blocked, -when the page isn't blocked at all. Indeed, very often sysops block the pages -for a setted time but then the forget to delete the warning! This script is useful -if you want to delete those useless warning left in these pages. - -Parameters: - -These command line parameters can be used to specify which pages to work on: - -&params; - --xml Retrieve information from a local XML dump (pages-articles - or pages-meta-current, see http://download.wikimedia.org). - Argument can also be given as "-xml:filename". - --page Only edit a specific page. - Argument can also be given as "-page:pagetitle". You can - give this parameter multiple times to edit multiple pages. - --protectedpages: Check all the blocked pages (useful when you have not categories - or when you have problems with them. (add the namespace after ":" where - you want to check - default checks all protected pages) - --moveprotected: Same as -protectedpages, for moveprotected pages - -Furthermore, the following command line parameters are supported: - --always Doesn't ask every time if the bot should make the change or not, do it always. - --debug When the bot can't delete the template from the page (wrong regex or something like that) - it will ask you if it should open the page on your browser. - (attention: pages included may give false positives..) - --move The bot will check if the page is blocked also for the move option, not only for edit - ---- Warning! --- -You have to edit this script in order to add your preferences -otherwise the script won't work! - -If you have problems, ask on botwiki ( http://botwiki.sno.cc ) -or on IRC (#pywikipediabot) - ---- Example of how to use the script --- - -python blockpageschecker.py -always - -python blockpageschecker.py -cat:Geography -always - -python blockpageschecker.py -debug -protectedpages:4 - -""" -# -# (C) Monobi a.k.a. Wikihermit, 2007 -# (C) Filnik, 2007-2008 -# (C) NicDumZ, 2008 -# -# Distributed under the terms of the MIT license. -# -__version__ = '$Id: blockpageschecker.py,v 1.5 2008/04/24 19.40.00 filnik Exp$' -# - -import re, webbrowser -import wikipedia, catlib, pagegenerators, config - -# This is required for the text that is shown when you run this script -# with the parameter -help. -docuReplacements = { - '&params;': pagegenerators.parameterHelp, -} - -####################################################### -#--------------------- PREFERENCES -------------------# -################### -- Edit below! -- ################# - -# Added a new feature! Please update and add the settings in order -# to improve the intelligence of this script ;-) -# Regex to get the semi-protection template -templateSemiProtection = { - 'en': None, - 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad\|.*?|\|.*?)\}\}', - r'\{\{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad\|(?:.*?))\}\}'], - 'fr': [ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection(|[^\}]*)\}\}'], - 'ja':[ur'\{\{(?:[Tt]emplate:|)半保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], - 'zh':[ur'\{\{(?:[Tt]emplate:|)Protected|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)'], - } -# Regex to get the total-protection template -templateTotalProtection = { - 'en': None, - 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad\|(?:.*?)|minaccia|cancellata)\}\}', - r'\{\{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee].*?)\}\}', r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], - 'fr':[ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^\}]*)\}\}', - ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?(|[^\}]*)\}\}'], - 'ja':[ur'\{\{(?:[Tt]emplate:|)保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], - 'zh':[r'\{\{(?:[Tt]emplate:|)Protected|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)'], - } -# Regex to get the semi-protection move template -templateSemiMoveProtection = { - 'en': None, - 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'], - 'ja':[ur'\{\{(?:[Tt]emplate:|)移動半保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], - 'zh':[r'\{\{(?:[Tt]emplate:|)Protected|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Mini-protected|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Protected-logo|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)'], - } -# Regex to get the total-protection move template -templateTotalMoveProtection = { - 'en': None, - 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'], - 'ja':[ur'\{\{(?:[Tt]emplate:|)移動保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], - 'zh':[ur'\{\{(?:[Tt]emplate:|)Protected|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)'], - } -# Array: 0 => Semi-block, 1 => Total Block, 2 => Semi-Move, 3 => Total-Move -templateNoRegex = { - 'it':['{{Avvisobloccoparziale}}', '{{Avvisoblocco}}', None, None], - 'fr':['{{Semi-protection}}', '{{Protection}}', None, None], - 'ja':[u'{{半保護}}', u'{{保護}}', u'{{移動半保護}}',u'{{移動保護}}'], - 'zh':[u'{{Protected/semi}}',u'{{Protected}}',u'{{Protected/ms}}',u'{{Protected/move}}'], - } - -# Category where the bot will check -categoryToCheck = { - 'en':[u'Category:Protected'], - 'fr':[u'Category:Page semi-protégée', u'Category:Page protégée', u'Catégorie:Article protégé'], - 'he':[u'קטגוריה:ויקיפדיה: דפים מוגנים', u'קטגוריה:ויקיפדיה: דפים מוגנים חלקית'], - 'it':[u'Categoria:Pagine semiprotette', u'Categoria:Voci_protette'], - 'ja':[u'Category:編集保護中の記事',u'Category:編集半保護中の記事', - u'Category:移動保護中の記事',], - 'pt':[u'Category:!Páginas protegidas', u'Category:!Páginas semiprotegidas'], - 'zh':[u'Category:被保护的页面',u'Category:被保護的模板',u'Category:暂时不能移动的页面', - u'Category:被半保护的页面',], - } -# Comment used when the Bot edits -comment = { - 'en':u'Bot: Deleting out-dated template', - 'fr':u'Robot: Mise à jour des bandeaux de protection', - 'he':u'בוט: מסיר תבנית שעבר זמנה', - 'it':u'Bot: Tolgo o sistemo template di avviso blocco', - 'ja':u'ロボットによる: 保護テンプレート除去', - 'pt':u'Bot: Retirando predefinição de proteção', - 'zh':u'機器人: 移除過期的保護模板', - } -# Check list to block the users that haven't set their preferences -project_inserted = ['en', 'fr', 'it', 'ja', 'pt', 'zh'] - -####################################################### -#------------------ END PREFERENCES ------------------# -################## -- Edit above! -- ################## - -def understandBlock(text, TTP, TSP, TSMP, TTMP): - """ Understand if the page is blocked and if it has the right template """ - for catchRegex in TTP: # TTP = templateTotalProtection - resultCatch = re.findall(catchRegex, text) - if resultCatch: - return ('sysop-total', catchRegex) - for catchRegex in TSP: - resultCatch = re.findall(catchRegex, text) - if resultCatch: - return ('autoconfirmed-total', catchRegex) - if TSMP != None and TTMP != None and TTP != TTMP and TSP != TSMP: - for catchRegex in TSMP: - resultCatch = re.findall(catchRegex, text) - if resultCatch: - return ('sysop-move', catchRegex) - for catchRegex in TTMP: - resultCatch = re.findall(catchRegex, text) - if resultCatch: - return ('autoconfirmed-move', catchRegex) - return ('editable', r'\A\n') # If editable means that we have no regex, won't change anything with this regex - -def debugQuest(site, page): - quest = wikipedia.input(u'Do you want to open the page on your [b]rowser, [g]ui or [n]othing?') - pathWiki = site.family.nicepath(site.lang) - url = 'http://%s%s%s?&redirect=no' % (wikipedia.getSite().hostname(), pathWiki, page.urlname()) - while 1: - if quest.lower() in ['b', 'B']: - webbrowser.open(url) - break - elif quest.lower() in ['g', 'G']: - import editarticle - editor = editarticle.TextEditor() - text = editor.edit(page.get()) - break - elif quest.lower() in ['n', 'N']: - break - else: - wikipedia.output(u'wrong entry, type "b", "g" or "n"') - continue - -def main(): - """ Main Function """ - # Loading the comments - global categoryToCheck; global comment; global project_inserted - if config.mylang not in project_inserted: - wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!") - wikipedia.stopme() - # always, define a generator to understand if the user sets one, defining what's genFactory - always = False; generator = False; debug = False - moveBlockCheck = False; genFactory = pagegenerators.GeneratorFactory() - # To prevent Infinite loops - errorCount = 0 - # Load the right site - site = wikipedia.getSite() - # Loading the default options. - for arg in wikipedia.handleArgs(): - if arg == '-always': - always = True - elif arg == '-move': - moveBlockCheck = True - elif arg == '-debug': - debug = True - elif arg.startswith('-protectedpages'): - if len(arg) == 15: - generator = site.protectedpages(namespace = 0) - else: - generator = site.protectedpages(namespace = int(arg[16:])) - elif arg.startswith('-moveprotected'): - if len(arg) == 14: - generator = site.protectedpages(namespace = 0, type = 'move') - else: - generator = site.protectedpages(namespace = int(arg[16:]), - type = 'move') - elif arg.startswith('-page'): - if len(arg) == 5: - generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] - else: - generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] - else: - generator = genFactory.handleArg(arg) - # Take the right templates to use, the category and the comment - TSP = wikipedia.translate(site, templateSemiProtection) - TTP = wikipedia.translate(site, templateTotalProtection) - TSMP = wikipedia.translate(site, templateSemiMoveProtection) - TTMP = wikipedia.translate(site, templateTotalMoveProtection) - TNR = wikipedia.translate(site, templateNoRegex) - - category = wikipedia.translate(site, categoryToCheck) - commentUsed = wikipedia.translate(site, comment) - if not generator: - generator = list() - wikipedia.output(u'Loading categories...') - # Define the category if no other generator has been setted - for CAT in category: - cat = catlib.Category(site, CAT) - # Define the generator - gen = pagegenerators.CategorizedPageGenerator(cat) - for pageCat in gen: - generator.append(pageCat) - wikipedia.output(u'Categories loaded, start!') - # Main Loop - preloadingGen = pagegenerators.PreloadingGenerator(generator, pageNumber = 60) - for page in preloadingGen: - pagename = page.aslink() - wikipedia.output('Loading %s...' % pagename) - try: - text = page.get() - restrictions = page.getRestrictions() - except wikipedia.NoPage: - wikipedia.output("%s doesn't exist! Skipping..." % pagename) - continue - except wikipedia.IsRedirectPage: - wikipedia.output("%s is a redirect! Skipping..." % pagename) - if debug: - debugQuest(site, page) - continue - """ - # This check does not work : - # PreloadingGenerator cannot set correctly page.editRestriction - # (see bug #1949476 ) - if not page.canBeEdited(): - wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename) - continue - """ - editRestr = restrictions['edit'] - if editRestr and editRestr[0] == 'sysop': - try: - config.sysopnames[site.family.name][site.lang] - except: - wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename) - continue - - # Understand, according to the template in the page, what should be the protection - # and compare it with what there really is. - TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP) - # Only to see if the text is the same or not... - oldtext = text - # keep track of the changes for each step (edit then move) - changes = -1 - - if not editRestr: - # page is not edit-protected - # Deleting the template because the page doesn't need it. - replaceToPerform = u'|'.join(TTP + TSP) - texti, changes = re.subn('(?:<noinclude>|)(%s)(?:</noinclude>|)' % replaceToPerform, '', text) - wikipedia.output(u'The page is editable for all, deleting the template...') - - elif editRestr[0] == 'sysop': - # total edit protection - if TemplateInThePage[0] == 'sysop-total' and TTP != None: - msg = 'The page is protected to the sysop' - if not moveBlockCheck: - msg += ', skipping...' - wikipedia.output(msg) - else: - wikipedia.output(u'The page is protected to the sysop, but the template seems not correct. Fixing...') - text, changes = re.subn(TemplateInThePage[1], TNR[1], text) - - elif TSP != None: - # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection - if TemplateInThePage[0] == 'autoconfirmed-total': - msg = 'The page is editable only for the autoconfirmed users' - if not moveBlockCheck: - msg += ', skipping...' - wikipedia.output(msg) - else: - wikipedia.output(u'The page is editable only for the autoconfirmed users, but the template seems not correct. Fixing...') - text, changes = re.subn(TemplateInThePage[1], TNR[0], text) - - if changes == 0: - # We tried to fix edit-protection templates, but it did not work. - wikipedia.output('Warning : No edit-protection template could be found') - - if moveBlockCheck: - # checking move protection now - moveRestr = restrictions['move'] - changes = -1 - - if not moveRestr: - wikipedia.output(u'The page is movable for all, deleting the template...') - # Deleting the template because the page doesn't need it. - replaceToPerform = u'|'.join(TSMP + TTMP) - text, changes = re.subn('(?:<noinclude>|)(%s)(?:</noinclude>|)' % replaceToPerform, '', text) - - elif moveRestr[0] == 'sysop': - # move-total-protection - if TemplateInThePage[0] == 'sysop-move' and TTMP != None: - wikipedia.output(u'The page is protected from moving to the sysop, skipping...') - else: - wikipedia.output(u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...') - text, changes = re.subn(TemplateInThePage[1], TNR[3], text) - - elif TSMP != None: - # implicitely moveRestr[0] = 'autoconfirmed', move-semi-protection - if TemplateInThePage[0] == 'autoconfirmed-move': - wikipedia.output(u'The page is movable only for the autoconfirmed users, skipping...') - else: - wikipedia.output(u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...') - text, changes = re.subn(TemplateInThePage[1], TNR[2], text) - - if changes == 0: - # We tried to fix move-protection templates, but it did not work. - wikipedia.output('Warning : No move-protection template could be found') - - - if oldtext != text: - # Ok, asking if the change has to be performed and do it if yes. - wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) - wikipedia.showDiff(oldtext, text) - choice = '' - while 1: - if not always: - choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') - if choice.lower() in ['a', 'all']: - always = True - if choice.lower() in ['n', 'no']: - break - if choice.lower() in ['y', 'yes'] or always: - try: - page.put(text, commentUsed, force=True) - except wikipedia.EditConflict: - wikipedia.output(u'Edit conflict! skip!') - break - except wikipedia.ServerError: - # Sometimes there is this error that's quite annoying because - # can block the whole process for nothing. - errorCount += 1 - if errorCount < 5: - wikipedia.output(u'Server Error! Wait..') - time.sleep(3) - continue - else: - # Prevent Infinite Loops - raise wikipedia.ServerError(u'Fifth Server Error!') - except wikipedia.SpamfilterError, e: - wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) - break - except wikipedia.PageNotSaved, error: - wikipedia.output(u'Error putting page: %s' % (error.args,)) - break - except wikipedia.LockedPage: - wikipedia.output(u'The page is still protected. Skipping...') - break - else: - # Break only if the errors are one after the other - errorCount = 0 - break - -if __name__ == "__main__": - try: - main() - finally: - wikipedia.stopme() +# -*- coding: utf-8 -*- +""" +This is a script originally written by Wikihermit and then rewritten by Filnik, +to delete the templates used to warn in the pages that a page is blocked, +when the page isn't blocked at all. Indeed, very often sysops block the pages +for a setted time but then the forget to delete the warning! This script is useful +if you want to delete those useless warning left in these pages. + +Parameters: + +These command line parameters can be used to specify which pages to work on: + +&params; + +-xml Retrieve information from a local XML dump (pages-articles + or pages-meta-current, see http://download.wikimedia.org). + Argument can also be given as "-xml:filename". + +-page Only edit a specific page. + Argument can also be given as "-page:pagetitle". You can + give this parameter multiple times to edit multiple pages. + +-protectedpages: Check all the blocked pages (useful when you have not categories + or when you have problems with them. (add the namespace after ":" where + you want to check - default checks all protected pages) + +-moveprotected: Same as -protectedpages, for moveprotected pages + +Furthermore, the following command line parameters are supported: + +-always Doesn't ask every time if the bot should make the change or not, do it always. + +-debug When the bot can't delete the template from the page (wrong regex or something like that) + it will ask you if it should open the page on your browser. + (attention: pages included may give false positives..) + +-move The bot will check if the page is blocked also for the move option, not only for edit + +--- Warning! --- +You have to edit this script in order to add your preferences +otherwise the script won't work! + +If you have problems, ask on botwiki ( http://botwiki.sno.cc ) +or on IRC (#pywikipediabot) + +--- Example of how to use the script --- + +python blockpageschecker.py -always + +python blockpageschecker.py -cat:Geography -always + +python blockpageschecker.py -debug -protectedpages:4 + +""" +# +# (C) Monobi a.k.a. Wikihermit, 2007 +# (C) Filnik, 2007-2008 +# (C) NicDumZ, 2008 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id: blockpageschecker.py,v 1.5 2008/04/24 19.40.00 filnik Exp$' +# + +import re, webbrowser +import wikipedia, catlib, pagegenerators, config + +# This is required for the text that is shown when you run this script +# with the parameter -help. +docuReplacements = { + '&params;': pagegenerators.parameterHelp, +} + +####################################################### +#--------------------- PREFERENCES -------------------# +################### -- Edit below! -- ################# + +# Added a new feature! Please update and add the settings in order +# to improve the intelligence of this script ;-) +# Regex to get the semi-protection template +templateSemiProtection = { + 'en': None, + 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad\|.*?|\|.*?)\}\}', + r'\{\{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad\|(?:.*?))\}\}'], + 'fr': [ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection(|[^\}]*)\}\}'], + 'ja':[ur'\{\{(?:[Tt]emplate:|)半保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], + 'zh':[ur'\{\{(?:[Tt]emplate:|)Protected|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)'], + } +# Regex to get the total-protection template +templateTotalProtection = { + 'en': None, + 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad\|(?:.*?)|minaccia|cancellata)\}\}', + r'\{\{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee].*?)\}\}', r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], + 'fr':[ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^\}]*)\}\}', + ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?(|[^\}]*)\}\}'], + 'ja':[ur'\{\{(?:[Tt]emplate:|)保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], + 'zh':[r'\{\{(?:[Tt]emplate:|)Protected|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Nn]|[Nn]ormal)(?:\|.+|)\}\}(\n+?|)'], + } +# Regex to get the semi-protection move template +templateSemiMoveProtection = { + 'en': None, + 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'], + 'ja':[ur'\{\{(?:[Tt]emplate:|)移動半保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], + 'zh':[r'\{\{(?:[Tt]emplate:|)Protected|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Mini-protected|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Protected-logo|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)'], + } +# Regex to get the total-protection move template +templateTotalMoveProtection = { + 'en': None, + 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'], + 'ja':[ur'\{\{(?:[Tt]emplate:|)移動保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], + 'zh':[ur'\{\{(?:[Tt]emplate:|)Protected|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)'], + } +# Array: 0 => Semi-block, 1 => Total Block, 2 => Semi-Move, 3 => Total-Move +templateNoRegex = { + 'it':['{{Avvisobloccoparziale}}', '{{Avvisoblocco}}', None, None], + 'fr':['{{Semi-protection}}', '{{Protection}}', None, None], + 'ja':[u'{{半保護}}', u'{{保護}}', u'{{移動半保護}}',u'{{移動保護}}'], + 'zh':[u'{{Protected/semi}}',u'{{Protected}}',u'{{Protected/ms}}',u'{{Protected/move}}'], + } + +# Category where the bot will check +categoryToCheck = { + 'en':[u'Category:Protected'], + 'fr':[u'Category:Page semi-protégée', u'Category:Page protégée', u'Catégorie:Article protégé'], + 'he':[u'קטגוריה:ויקיפדיה: דפים מוגנים', u'קטגוריה:ויקיפדיה: דפים מוגנים חלקית'], + 'it':[u'Categoria:Pagine semiprotette', u'Categoria:Voci_protette'], + 'ja':[u'Category:編集保護中の記事',u'Category:編集半保護中の記事', + u'Category:移動保護中の記事',], + 'pt':[u'Category:!Páginas protegidas', u'Category:!Páginas semiprotegidas'], + 'zh':[u'Category:被保护的页面',u'Category:被保護的模板',u'Category:暂时不能移动的页面', + u'Category:被半保护的页面',], + } +# Comment used when the Bot edits +comment = { + 'en':u'Bot: Deleting out-dated template', + 'fr':u'Robot: Mise à jour des bandeaux de protection', + 'he':u'בוט: מסיר תבנית שעבר זמנה', + 'it':u'Bot: Tolgo o sistemo template di avviso blocco', + 'ja':u'ロボットによる: 保護テンプレート除去', + 'pt':u'Bot: Retirando predefinição de proteção', + 'zh':u'機器人: 移除過期的保護模板', + } +# Check list to block the users that haven't set their preferences +project_inserted = ['en', 'fr', 'it', 'ja', 'pt', 'zh'] + +####################################################### +#------------------ END PREFERENCES ------------------# +################## -- Edit above! -- ################## + +def understandBlock(text, TTP, TSP, TSMP, TTMP): + """ Understand if the page is blocked and if it has the right template """ + for catchRegex in TTP: # TTP = templateTotalProtection + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('sysop-total', catchRegex) + for catchRegex in TSP: + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('autoconfirmed-total', catchRegex) + if TSMP != None and TTMP != None and TTP != TTMP and TSP != TSMP: + for catchRegex in TSMP: + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('sysop-move', catchRegex) + for catchRegex in TTMP: + resultCatch = re.findall(catchRegex, text) + if resultCatch: + return ('autoconfirmed-move', catchRegex) + return ('editable', r'\A\n') # If editable means that we have no regex, won't change anything with this regex + +def debugQuest(site, page): + quest = wikipedia.input(u'Do you want to open the page on your [b]rowser, [g]ui or [n]othing?') + pathWiki = site.family.nicepath(site.lang) + url = 'http://%s%s%s?&redirect=no' % (wikipedia.getSite().hostname(), pathWiki, page.urlname()) + while 1: + if quest.lower() in ['b', 'B']: + webbrowser.open(url) + break + elif quest.lower() in ['g', 'G']: + import editarticle + editor = editarticle.TextEditor() + text = editor.edit(page.get()) + break + elif quest.lower() in ['n', 'N']: + break + else: + wikipedia.output(u'wrong entry, type "b", "g" or "n"') + continue + +def main(): + """ Main Function """ + # Loading the comments + global categoryToCheck; global comment; global project_inserted + if config.mylang not in project_inserted: + wikipedia.output(u"Your project is not supported by this script. You have to edit the script and add it!") + wikipedia.stopme() + # always, define a generator to understand if the user sets one, defining what's genFactory + always = False; generator = False; debug = False + moveBlockCheck = False; genFactory = pagegenerators.GeneratorFactory() + # To prevent Infinite loops + errorCount = 0 + # Load the right site + site = wikipedia.getSite() + # Loading the default options. + for arg in wikipedia.handleArgs(): + if arg == '-always': + always = True + elif arg == '-move': + moveBlockCheck = True + elif arg == '-debug': + debug = True + elif arg.startswith('-protectedpages'): + if len(arg) == 15: + generator = site.protectedpages(namespace = 0) + else: + generator = site.protectedpages(namespace = int(arg[16:])) + elif arg.startswith('-moveprotected'): + if len(arg) == 14: + generator = site.protectedpages(namespace = 0, type = 'move') + else: + generator = site.protectedpages(namespace = int(arg[16:]), + type = 'move') + elif arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] + else: + generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] + else: + generator = genFactory.handleArg(arg) + # Take the right templates to use, the category and the comment + TSP = wikipedia.translate(site, templateSemiProtection) + TTP = wikipedia.translate(site, templateTotalProtection) + TSMP = wikipedia.translate(site, templateSemiMoveProtection) + TTMP = wikipedia.translate(site, templateTotalMoveProtection) + TNR = wikipedia.translate(site, templateNoRegex) + + category = wikipedia.translate(site, categoryToCheck) + commentUsed = wikipedia.translate(site, comment) + if not generator: + generator = list() + wikipedia.output(u'Loading categories...') + # Define the category if no other generator has been setted + for CAT in category: + cat = catlib.Category(site, CAT) + # Define the generator + gen = pagegenerators.CategorizedPageGenerator(cat) + for pageCat in gen: + generator.append(pageCat) + wikipedia.output(u'Categories loaded, start!') + # Main Loop + preloadingGen = pagegenerators.PreloadingGenerator(generator, pageNumber = 60) + for page in preloadingGen: + pagename = page.aslink() + wikipedia.output('Loading %s...' % pagename) + try: + text = page.get() + restrictions = page.getRestrictions() + except wikipedia.NoPage: + wikipedia.output("%s doesn't exist! Skipping..." % pagename) + continue + except wikipedia.IsRedirectPage: + wikipedia.output("%s is a redirect! Skipping..." % pagename) + if debug: + debugQuest(site, page) + continue + """ + # This check does not work : + # PreloadingGenerator cannot set correctly page.editRestriction + # (see bug #1949476 ) + if not page.canBeEdited(): + wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename) + continue + """ + editRestr = restrictions['edit'] + if editRestr and editRestr[0] == 'sysop': + try: + config.sysopnames[site.family.name][site.lang] + except: + wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename) + continue + + # Understand, according to the template in the page, what should be the protection + # and compare it with what there really is. + TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP) + # Only to see if the text is the same or not... + oldtext = text + # keep track of the changes for each step (edit then move) + changes = -1 + + if not editRestr: + # page is not edit-protected + # Deleting the template because the page doesn't need it. + replaceToPerform = u'|'.join(TTP + TSP) + texti, changes = re.subn('(?:<noinclude>|)(%s)(?:</noinclude>|)' % replaceToPerform, '', text) + wikipedia.output(u'The page is editable for all, deleting the template...') + + elif editRestr[0] == 'sysop': + # total edit protection + if TemplateInThePage[0] == 'sysop-total' and TTP != None: + msg = 'The page is protected to the sysop' + if not moveBlockCheck: + msg += ', skipping...' + wikipedia.output(msg) + else: + wikipedia.output(u'The page is protected to the sysop, but the template seems not correct. Fixing...') + text, changes = re.subn(TemplateInThePage[1], TNR[1], text) + + elif TSP != None: + # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection + if TemplateInThePage[0] == 'autoconfirmed-total': + msg = 'The page is editable only for the autoconfirmed users' + if not moveBlockCheck: + msg += ', skipping...' + wikipedia.output(msg) + else: + wikipedia.output(u'The page is editable only for the autoconfirmed users, but the template seems not correct. Fixing...') + text, changes = re.subn(TemplateInThePage[1], TNR[0], text) + + if changes == 0: + # We tried to fix edit-protection templates, but it did not work. + wikipedia.output('Warning : No edit-protection template could be found') + + if moveBlockCheck: + # checking move protection now + moveRestr = restrictions['move'] + changes = -1 + + if not moveRestr: + wikipedia.output(u'The page is movable for all, deleting the template...') + # Deleting the template because the page doesn't need it. + replaceToPerform = u'|'.join(TSMP + TTMP) + text, changes = re.subn('(?:<noinclude>|)(%s)(?:</noinclude>|)' % replaceToPerform, '', text) + + elif moveRestr[0] == 'sysop': + # move-total-protection + if TemplateInThePage[0] == 'sysop-move' and TTMP != None: + wikipedia.output(u'The page is protected from moving to the sysop, skipping...') + else: + wikipedia.output(u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...') + text, changes = re.subn(TemplateInThePage[1], TNR[3], text) + + elif TSMP != None: + # implicitely moveRestr[0] = 'autoconfirmed', move-semi-protection + if TemplateInThePage[0] == 'autoconfirmed-move': + wikipedia.output(u'The page is movable only for the autoconfirmed users, skipping...') + else: + wikipedia.output(u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...') + text, changes = re.subn(TemplateInThePage[1], TNR[2], text) + + if changes == 0: + # We tried to fix move-protection templates, but it did not work. + wikipedia.output('Warning : No move-protection template could be found') + + + if oldtext != text: + # Ok, asking if the change has to be performed and do it if yes. + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) + wikipedia.showDiff(oldtext, text) + choice = '' + while 1: + if not always: + choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') + if choice.lower() in ['a', 'all']: + always = True + if choice.lower() in ['n', 'no']: + break + if choice.lower() in ['y', 'yes'] or always: + try: + page.put(text, commentUsed, force=True) + except wikipedia.EditConflict: + wikipedia.output(u'Edit conflict! skip!') + break + except wikipedia.ServerError: + # Sometimes there is this error that's quite annoying because + # can block the whole process for nothing. + errorCount += 1 + if errorCount < 5: + wikipedia.output(u'Server Error! Wait..') + time.sleep(3) + continue + else: + # Prevent Infinite Loops + raise wikipedia.ServerError(u'Fifth Server Error!') + except wikipedia.SpamfilterError, e: + wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) + break + except wikipedia.PageNotSaved, error: + wikipedia.output(u'Error putting page: %s' % (error.args,)) + break + except wikipedia.LockedPage: + wikipedia.output(u'The page is still protected. Skipping...') + break + else: + # Break only if the errors are one after the other + errorCount = 0 + break + +if __name__ == "__main__": + try: + main() + finally: + wikipedia.stopme() Property changes on: trunk/pywikipedia/blockpageschecker.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/commonsdelinker/plugins/__init__.py =================================================================== --- trunk/pywikipedia/commonsdelinker/plugins/__init__.py 2008-05-02 00:05:37 UTC (rev 5292) +++ trunk/pywikipedia/commonsdelinker/plugins/__init__.py 2008-05-02 00:13:27 UTC (rev 5293) @@ -1 +1 @@ -__version__ = '$Id: $' +__version__ = '$Id: $' Property changes on: trunk/pywikipedia/commonsdelinker/plugins/__init__.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/commonsdelinker/plugins/books.py =================================================================== --- trunk/pywikipedia/commonsdelinker/plugins/books.py 2008-05-02 00:05:37 UTC (rev 5292) +++ trunk/pywikipedia/commonsdelinker/plugins/books.py 2008-05-02 00:13:27 UTC (rev 5293) @@ -1,15 +1,15 @@ -__version__ = '$Id: $' - -import re - -class FrPhotographie(object): - hook = 'before_replace' - def __init__(self, CommonsDelinker): - self.CommonsDelinker = CommonsDelinker - def __call__(self, page, summary, image, replacement): - site = page.site() - if (site.lang, site.family.name) == ('fr', 'wikibooks') and replacement.get() is None: - if page.title().startswith('Photographie/') or page.title().startswith('Tribologie/'): - replacement.set('IMG.svg') - self.CommonsDelinker.output(u'%s Replaced %s by IMG.svg on %s.' % \ - (self, image.get(), replacement.get())) +__version__ = '$Id: $' + +import re + +class FrPhotographie(object): + hook = 'before_replace' + def __init__(self, CommonsDelinker): + self.CommonsDelinker = CommonsDelinker + def __call__(self, page, summary, image, replacement): + site = page.site() + if (site.lang, site.family.name) == ('fr', 'wikibooks') and replacement.get() is None: + if page.title().startswith('Photographie/') or page.title().startswith('Tribologie/'): + replacement.set('IMG.svg') + self.CommonsDelinker.output(u'%s Replaced %s by IMG.svg on %s.' % \ + (self, image.get(), replacement.get())) Property changes on: trunk/pywikipedia/commonsdelinker/plugins/books.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/commonsdelinker/plugins/debug.py =================================================================== --- trunk/pywikipedia/commonsdelinker/plugins/debug.py 2008-05-02 00:05:37 UTC (rev 5292) +++ trunk/pywikipedia/commonsdelinker/plugins/debug.py 2008-05-02 00:13:27 UTC (rev 5293) @@ -1,17 +1,17 @@ -import difflib -__version__ = '$Id: $' - -class Diff(object): - hook = 'before_save' - def __init__(self, CommonsDelinker): - self.CommonsDelinker = CommonsDelinker - def __call__(self, page, text, new_text, summary): - diff = difflib.context_diff( - text.encode('utf-8').splitlines(True), - new_text.get().encode('utf-8').splitlines(True)) - - f = open((u'diff/%s-%s-%s.txt' % (page.urlname().replace('/', '-'), - page.site().dbName(), page.editTime())).encode('utf-8', 'ignore'), 'w') - - f.writelines(diff) +import difflib +__version__ = '$Id: $' + +class Diff(object): + hook = 'before_save' + def __init__(self, CommonsDelinker): + self.CommonsDelinker = CommonsDelinker + def __call__(self, page, text, new_text, summary): + diff = difflib.context_diff( + text.encode('utf-8').splitlines(True), + new_text.get().encode('utf-8').splitlines(True)) + + f = open((u'diff/%s-%s-%s.txt' % (page.urlname().replace('/', '-'), + page.site().dbName(), page.editTime())).encode('utf-8', 'ignore'), 'w') + + f.writelines(diff) f.close() \ No newline at end of file Property changes on: trunk/pywikipedia/commonsdelinker/plugins/debug.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/commonsdelinker/plugins/flags.py =================================================================== --- trunk/pywikipedia/commonsdelinker/plugins/flags.py 2008-05-02 00:05:37 UTC (rev 5292) +++ trunk/pywikipedia/commonsdelinker/plugins/flags.py 2008-05-02 00:13:27 UTC (rev 5293) @@ -1,32 +1,32 @@ -__version__ = '$Id: $' - -import re - -class NlWiki(object): - hook = 'gallery_replace' - def __init__(self, CommonsDelinker): - self.CommonsDelinker = CommonsDelinker - def __call__(self, page, summary, image, replacement, match, groups): - site = page.site() - if (site.lang, site.family.name) == ('nl', 'wikipedia') and replacement.get() is None: - commands = self.CommonsDelinker.SummaryCache.get(site, 'Vlaggen', default = '') - - flags = re.findall(r'(?s)\<\!\-\-begin\-flags (.*?)\-\-\>(.*?)\<\!\-\-end\-flags\-\-\>', commands) - text = page.get() - - namespace = site.namespace(14) - r_namespace = r'(?:[Cc]ategory)|(?:[%s%s]%s)' % \ - (namespace[0], namespace[0].lower(), namespace[1:]) - - for new_image, categories in flags: - for category in categories.split('\n'): - if category.strip() == '': continue - - r_cat = r'\[\[\s*%s\s*\:\s*%s\s*(?:\|.*?)?\s*\]\]' % (r_namespace, - re.sub(r'\\[ _]', '[ _]', re.escape(category.strip()))) - if re.search(r_cat, text): - self.CommonsDelinker.output( - u'%s %s replaced by %s in category %s' % \ - (self, image, new_image, category)) - replacement.set(new_image.replace(' ', '_')) +__version__ = '$Id: $' + +import re + +class NlWiki(object): + hook = 'gallery_replace' + def __init__(self, CommonsDelinker): + self.CommonsDelinker = CommonsDelinker + def __call__(self, page, summary, image, replacement, match, groups): + site = page.site() + if (site.lang, site.family.name) == ('nl', 'wikipedia') and replacement.get() is None: + commands = self.CommonsDelinker.SummaryCache.get(site, 'Vlaggen', default = '') + + flags = re.findall(r'(?s)\<\!\-\-begin\-flags (.*?)\-\-\>(.*?)\<\!\-\-end\-flags\-\-\>', commands) + text = page.get() + + namespace = site.namespace(14) + r_namespace = r'(?:[Cc]ategory)|(?:[%s%s]%s)' % \ + (namespace[0], namespace[0].lower(), namespace[1:]) + + for new_image, categories in flags: + for category in categories.split('\n'): + if category.strip() == '': continue + + r_cat = r'\[\[\s*%s\s*\:\s*%s\s*(?:\|.*?)?\s*\]\]' % (r_namespace, + re.sub(r'\\[ _]', '[ _]', re.escape(category.strip()))) + if re.search(r_cat, text): + self.CommonsDelinker.output( + u'%s %s replaced by %s in category %s' % \ + (self, image, new_image, category)) + replacement.set(new_image.replace(' ', '_')) \ No newline at end of file Property changes on: trunk/pywikipedia/commonsdelinker/plugins/flags.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/families/wikia_family.py =================================================================== --- trunk/pywikipedia/families/wikia_family.py 2008-05-02 00:05:37 UTC (rev 5292) +++ trunk/pywikipedia/families/wikia_family.py 2008-05-02 00:13:27 UTC (rev 5293) @@ -1,43 +1,43 @@ -# -*- coding: utf-8 -*- - -__version__ = '$Id: $' - -import family - -# The Wikia Search family -# user-config.py: usernames['wikia']['wikia'] = 'User name' - -class Family(family.Family): - def __init__(self): - family.Family.__init__(self) - self.name = u'wikia' - - self.langs = { - u'wikia': u'search.wikia.com', - } - - self.namespaces[4] = { - '_default': [u'search', self.namespaces[4]['_default']], - } - self.namespaces[5] = { - '_default': [u'search talk', self.namespaces[5]['_default']], - } - self.namespaces[100] = { - '_default': u'Forum', - } - self.namespaces[101] = { - '_default': u'Forum talk', - } - self.namespaces[112] = { - '_default': u'Mini', - } - self.namespaces[113] = { - '_default': u'Mini talk', - } - - def code2encoding(self, code): - return 'iso-8859-1' - - def version(self, code): - return "1.12alpha" - +# -*- coding: utf-8 -*- + +__version__ = '$Id: $' + +import family + +# The Wikia Search family +# user-config.py: usernames['wikia']['wikia'] = 'User name' + +class Family(family.Family): + def __init__(self): + family.Family.__init__(self) + self.name = u'wikia' + + self.langs = { + u'wikia': u'search.wikia.com', + } + + self.namespaces[4] = { + '_default': [u'search', self.namespaces[4]['_default']], + } + self.namespaces[5] = { + '_default': [u'search talk', self.namespaces[5]['_default']], + } + self.namespaces[100] = { + '_default': u'Forum', + } + self.namespaces[101] = { + '_default': u'Forum talk', + } + self.namespaces[112] = { + '_default': u'Mini', + } + self.namespaces[113] = { + '_default': u'Mini talk', + } + + def code2encoding(self, code): + return 'iso-8859-1' + + def version(self, code): + return "1.12alpha" + Property changes on: trunk/pywikipedia/families/wikia_family.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/fixing_redirects.py =================================================================== --- trunk/pywikipedia/fixing_redirects.py 2008-05-02 00:05:37 UTC (rev 5292) +++ trunk/pywikipedia/fixing_redirects.py 2008-05-02 00:13:27 UTC (rev 5293) @@ -1,191 +1,191 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" -This script has the intention to correct all redirect -links in featured pages or only one page of each wiki. - -Can be using with: --featured Run over featured pages --page:XXX Run over only one page - -""" -# -# This script based on disambredir.py and solve_disambiguation.py -# -# Distributed under the terms of the MIT license. -# -__version__='$Id: disambredir.py 4407 2007-10-03 17:27:14Z leogregianin $' -# -import wikipedia -import pagegenerators -import re, sys - -msg = { - 'ar': u'بوت: إصلاح التحويلات', - 'en': u'Bot: Fixing redirects', - 'he': u'בוט: מתקן הפניות', - 'ja': u'ロボットによる:リダイレクト回避', - 'nn': u'robot: retta omdirigeringar', - 'no': u'Robot: Retter omdirigeringer', - 'pt': u'Bot: Arrumando redirects', - 'sv': u'Bot: Rättar omdirigeringar', - 'zh': u'機器人: 修復重定向', -} - -featured_articles = { - 'ar': u'ويكيبيديا:مقالات مختارة', - 'de': u'Wikipedia:Exzellente_Artikel', - 'en': u'Wikipedia:Featured_articles', - 'es': u'Wikipedia:Artículos_destacados', - 'fr': u'Wikipédia:Articles_de_qualité', - 'he': u'פורטל:ערכים_מומלצים', - 'it': u'Wikipedia:Articoli_in_vetrina', - 'ja': u'Wikipedia:秀逸な記事', - 'nl': u'Wikipedia:Etalage', - 'nn': u'Wikipedia:Gode artiklar', - 'no': u'Wikipedia:Anbefalte artikler', - 'pt': u'Wikipedia:Os_melhores_artigos', - 'sv': u'Wikipedia:Utvalda_artiklar', - 'zh': u'Wikipedia:特色条目', -} - -def firstcap(string): - return string[0].upper()+string[1:] - -def treat(text, linkedPage, targetPage): - """ - Based on the method of the same name in solve_disambiguation.py - """ - # make a backup of the original text so we can show the changes later - linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') - curpos = 0 - # This loop will run until we have finished the current page - while True: - m = linkR.search(text, pos = curpos) - if not m: - break - # Make sure that next time around we will not find this same hit. - curpos = m.start() + 1 - # ignore interwiki links and links to sections of the same page - if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')): - continue - else: - actualLinkPage = wikipedia.Page(page.site(), m.group('title')) - # Check whether the link found is to page. - if actualLinkPage != linkedPage: - continue - - # how many bytes should be displayed around the current link - context = 15 - # at the beginning of the link, start red color. - # at the end of the link, reset the color to default - wikipedia.output(text[max(0, m.start() - context) : m.start()] + '\03{lightred}' + text[m.start() : m.end()] + '\03{default}' + text[m.end() : m.end() + context]) - choice = 'y' - - # The link looks like this: - # [[page_title|link_text]]trailing_chars - page_title = m.group('title') - link_text = m.group('label') - - if not link_text: - # or like this: [[page_title]]trailing_chars - link_text = page_title - if m.group('section') == None: - section = '' - else: - section = m.group('section') - trailing_chars = m.group('linktrail') - if trailing_chars: - link_text += trailing_chars - - if choice in "uU": - # unlink - we remove the section if there's any - text = text[:m.start()] + link_text + text[m.end():] - continue - replaceit = choice in "rR" - - if link_text[0].isupper(): - new_page_title = targetPage.title() - else: - new_page_title = targetPage.title()[0].lower() + targetPage.title()[1:] - if replaceit and trailing_chars: - newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) - elif replaceit or (new_page_title == link_text and not section): - newlink = "[[%s]]" % new_page_title - # check if we can create a link with trailing characters instead of a pipelink - elif len(new_page_title) <= len(link_text) and firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title) and re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: - newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) - else: - newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) - text = text[:m.start()] + newlink + text[m.end():] - continue - return text - -def workon(page): - try: - text = page.get() - except wikipedia.IsRedirectPage: - return - wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) - links = page.linkedPages() - wikipedia.getall(mysite,links) - for page2 in links: - try: - target = page2.getRedirectTarget() - except (wikipedia.Error,wikipedia.SectionError): - continue - text = treat(text, page2, target) - if text != page.get(): - comment = wikipedia.translate(mysite, msg) - page.put(text, comment) - -try: - start = '!' - featured = False - title = None - namespace = None - - for arg in wikipedia.handleArgs(): - if arg.startswith('-start'): - if len(arg) == 6: - start = wikipedia.input(u'Which start where?') - else: - start = arg[7:] - elif arg == '-featured': - featured = True - elif arg.startswith('-page'): - if len(arg) == 5: - title = wikipedia.input(u'Which page should be processed?') - else: - title = arg[6:] - elif arg.startswith('-namespace'): - if len(arg) == 10: - namespace = int(wikipedia.input(u'Which namespace should be processed?')) - else: - namespace = int(arg[11:]) - +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +This script has the intention to correct all redirect +links in featured pages or only one page of each wiki. + +Can be using with: +-featured Run over featured pages +-page:XXX Run over only one page + +""" +# +# This script based on disambredir.py and solve_disambiguation.py +# +# Distributed under the terms of the MIT license. +# +__version__='$Id: disambredir.py 4407 2007-10-03 17:27:14Z leogregianin $' +# +import wikipedia +import pagegenerators +import re, sys + +msg = { + 'ar': u'بوت: إصلاح التحويلات', + 'en': u'Bot: Fixing redirects', + 'he': u'בוט: מתקן הפניות', + 'ja': u'ロボットによる:リダイレクト回避', + 'nn': u'robot: retta omdirigeringar', + 'no': u'Robot: Retter omdirigeringer', + 'pt': u'Bot: Arrumando redirects', + 'sv': u'Bot: Rättar omdirigeringar', + 'zh': u'機器人: 修復重定向', +} + +featured_articles = { + 'ar': u'ويكيبيديا:مقالات مختارة', + 'de': u'Wikipedia:Exzellente_Artikel', + 'en': u'Wikipedia:Featured_articles', + 'es': u'Wikipedia:Artículos_destacados', + 'fr': u'Wikipédia:Articles_de_qualité', + 'he': u'פורטל:ערכים_מומלצים', + 'it': u'Wikipedia:Articoli_in_vetrina', + 'ja': u'Wikipedia:秀逸な記事', + 'nl': u'Wikipedia:Etalage', + 'nn': u'Wikipedia:Gode artiklar', + 'no': u'Wikipedia:Anbefalte artikler', + 'pt': u'Wikipedia:Os_melhores_artigos', + 'sv': u'Wikipedia:Utvalda_artiklar', + 'zh': u'Wikipedia:特色条目', +} + +def firstcap(string): + return string[0].upper()+string[1:] + +def treat(text, linkedPage, targetPage): + """ + Based on the method of the same name in solve_disambiguation.py + """ + # make a backup of the original text so we can show the changes later + linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') + curpos = 0 + # This loop will run until we have finished the current page + while True: + m = linkR.search(text, pos = curpos) + if not m: + break + # Make sure that next time around we will not find this same hit. + curpos = m.start() + 1 + # ignore interwiki links and links to sections of the same page + if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')): + continue + else: + actualLinkPage = wikipedia.Page(page.site(), m.group('title')) + # Check whether the link found is to page. + if actualLinkPage != linkedPage: + continue + + # how many bytes should be displayed around the current link + context = 15 + # at the beginning of the link, start red color. + # at the end of the link, reset the color to default + wikipedia.output(text[max(0, m.start() - context) : m.start()] + '\03{lightred}' + text[m.start() : m.end()] + '\03{default}' + text[m.end() : m.end() + context]) + choice = 'y' + + # The link looks like this: + # [[page_title|link_text]]trailing_chars + page_title = m.group('title') + link_text = m.group('label') + + if not link_text: + # or like this: [[page_title]]trailing_chars + link_text = page_title + if m.group('section') == None: + section = '' + else: + section = m.group('section') + trailing_chars = m.group('linktrail') + if trailing_chars: + link_text += trailing_chars + + if choice in "uU": + # unlink - we remove the section if there's any + text = text[:m.start()] + link_text + text[m.end():] + continue + replaceit = choice in "rR" + + if link_text[0].isupper(): + new_page_title = targetPage.title() + else: + new_page_title = targetPage.title()[0].lower() + targetPage.title()[1:] + if replaceit and trailing_chars: + newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) + elif replaceit or (new_page_title == link_text and not section): + newlink = "[[%s]]" % new_page_title + # check if we can create a link with trailing characters instead of a pipelink + elif len(new_page_title) <= len(link_text) and firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title) and re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: + newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) + else: + newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) + text = text[:m.start()] + newlink + text[m.end():] + continue + return text + +def workon(page): + try: + text = page.get() + except wikipedia.IsRedirectPage: + return + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) + links = page.linkedPages() + wikipedia.getall(mysite,links) + for page2 in links: + try: + target = page2.getRedirectTarget() + except (wikipedia.Error,wikipedia.SectionError): + continue + text = treat(text, page2, target) + if text != page.get(): + comment = wikipedia.translate(mysite, msg) + page.put(text, comment) + +try: + start = '!' + featured = False + title = None + namespace = None + + for arg in wikipedia.handleArgs(): + if arg.startswith('-start'): + if len(arg) == 6: + start = wikipedia.input(u'Which start where?') + else: + start = arg[7:] + elif arg == '-featured': + featured = True + elif arg.startswith('-page'): + if len(arg) == 5: + title = wikipedia.input(u'Which page should be processed?') + else: + title = arg[6:] + elif arg.startswith('-namespace'): + if len(arg) == 10: + namespace = int(wikipedia.input(u'Which namespace should be processed?')) + else: + namespace = int(arg[11:]) + mysite = wikipedia.getSite() if mysite.sitename() == 'wikipedia:nl': wikipedia.output(u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}') sys.exit() - - linktrail = mysite.linktrail() - if featured: - featuredList = wikipedia.translate(mysite, featured_articles) - ref = wikipedia.Page(wikipedia.getSite(), featuredList) - gen = pagegenerators.ReferringPageGenerator(ref) - generator = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) - for page in generator: - workon(page) - elif title is not None: - page = wikipedia.Page(wikipedia.getSite(), title) - workon(page) - elif namespace is not None: - for page in pagegenerators.AllpagesPageGenerator(start=start, namespace=namespace, includeredirects=False): - workon(page) - else: - wikipedia.showHelp('fixing_redirects') - sys.exit() - -finally: - wikipedia.stopme() + + linktrail = mysite.linktrail() + if featured: + featuredList = wikipedia.translate(mysite, featured_articles) + ref = wikipedia.Page(wikipedia.getSite(), featuredList) + gen = pagegenerators.ReferringPageGenerator(ref) + generator = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) + for page in generator: + workon(page) + elif title is not None: + page = wikipedia.Page(wikipedia.getSite(), title) + workon(page) + elif namespace is not None: + for page in pagegenerators.AllpagesPageGenerator(start=start, namespace=namespace, includeredirects=False): + workon(page) + else: + wikipedia.showHelp('fixing_redirects') + sys.exit() + +finally: + wikipedia.stopme() Property changes on: trunk/pywikipedia/fixing_redirects.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/maintenance/readtalk.py =================================================================== --- trunk/pywikipedia/maintenance/readtalk.py 2008-05-02 00:05:37 UTC (rev 5292) +++ trunk/pywikipedia/maintenance/readtalk.py 2008-05-02 00:13:27 UTC (rev 5293) @@ -1,30 +1,30 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" -Tool to read all your talk pages. - -This tool will go through all the normal (not sysop) accounts configured in user-config and output the contents of the talk page. - -TODO: -*Error checking -""" -import sys, re -sys.path.append(re.sub('/[^/]*$', '', sys.path[0])) #sys.path.append('..') -import wikipedia, config - -def main(): - # Get a dictionary of all the usernames - namedict = config.usernames - for familyName in namedict.iterkeys(): - for lang in namedict[familyName].iterkeys(): - site = wikipedia.getSite(code=lang, fam=familyName) - username = config.usernames[familyName][lang] - page = wikipedia.Page(site, u'User_Talk:' + username) - wikipedia.output(u'Reading talk page from ' + lang + u' ' + familyName) - wikipedia.output(page.get (nofollow_redirects=True)) - -if __name__ == "__main__": - try: - main() - finally: - wikipedia.stopme() +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +Tool to read all your talk pages. + +This tool will go through all the normal (not sysop) accounts configured in user-config and output the contents of the talk page. + +TODO: +*Error checking +""" +import sys, re +sys.path.append(re.sub('/[^/]*$', '', sys.path[0])) #sys.path.append('..') +import wikipedia, config + +def main(): + # Get a dictionary of all the usernames + namedict = config.usernames + for familyName in namedict.iterkeys(): + for lang in namedict[familyName].iterkeys(): + site = wikipedia.getSite(code=lang, fam=familyName) + username = config.usernames[familyName][lang] + page = wikipedia.Page(site, u'User_Talk:' + username) + wikipedia.output(u'Reading talk page from ' + lang + u' ' + familyName) + wikipedia.output(page.get (nofollow_redirects=True)) + +if __name__ == "__main__": + try: + main() + finally: + wikipedia.stopme() Property changes on: trunk/pywikipedia/maintenance/readtalk.py ___________________________________________________________________ Name: svn:eol-style + native

1 0

SVN: [5292] trunk/pywikipedia
by siebrand＠svn.wikimedia.org 02 May '08

02 May '08

Revision: 5292 Author: siebrand Date: 2008-05-02 00:05:37 +0000 (Fri, 02 May 2008) Log Message: ----------- eol style native Modified Paths: -------------- trunk/pywikipedia/commonscat.py trunk/pywikipedia/delinker.py trunk/pywikipedia/featuredcount.py trunk/pywikipedia/lonelypages.py Property Changed: ---------------- trunk/pywikipedia/add_text.py trunk/pywikipedia/checkimages.py trunk/pywikipedia/commonscat.py trunk/pywikipedia/copyright_clean.py trunk/pywikipedia/copyright_put.py trunk/pywikipedia/delinker.py trunk/pywikipedia/featuredcount.py trunk/pywikipedia/generate_user_files.py trunk/pywikipedia/lonelypages.py trunk/pywikipedia/noreferences.py trunk/pywikipedia/pageimport.py Property changes on: trunk/pywikipedia/add_text.py ___________________________________________________________________ Name: svn:eol-style + native Property changes on: trunk/pywikipedia/checkimages.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/commonscat.py =================================================================== --- trunk/pywikipedia/commonscat.py 2008-05-01 18:50:57 UTC (rev 5291) +++ trunk/pywikipedia/commonscat.py 2008-05-02 00:05:37 UTC (rev 5292) @@ -1,229 +1,229 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" -With this tool you can add the template {{commonscat}} to categories. -The tool works by following the interwiki links. If the template is present on -another langauge page, the bot will use it. - -You could probably use it at articles as well, but this isnt tested. - -This bot uses pagegenerators to get a list of pages. For example to go through all categories: -commonscat.py -start:Category:! - -Commonscat bot: - -Take a page. Follow the interwiki's and look for the commonscat template -*Found zero templates. Done. -*Found one template. Add this template -*Found more templates. Ask the user <- still have to implement this - -TODO: -*Update interwiki's at commons -*Collect all possibilities also if local wiki already has link. -*Better support for other templates (translations) / redundant templates. -*Check mode, only check pages which already have the template -*More efficient like interwiki.py -*Possibility to update other languages in the same run - -""" - -# -# (C) Multichill, 2008 -# -# Distributed under the terms of the MIT license. -# - -import wikipedia, config, pagegenerators, add_text - -commonscatTemplates = { - 'af' : u'CommonsKategorie', - 'ar' : u'تصنيف كومنز', - 'als' : u'Commonscat', - 'az' : u'CommonsKat', - 'bg' : u'Commonscat', - 'ca' : u'Commonscat', - 'cs' : u'Commonscat', - 'da' : u'Commonscat', - 'de' : u'Commonscat', - 'en' : u'Commonscat', - 'eo' : u'Commonscat', - 'es' : u'Commonscat', - 'eu' : u'Commonskat', - 'fi' : u'Commonscat', - 'fr' : u'Commonscat', - 'hr' : u'Commonscat', - 'hu' : u'Közvagyonkat', - 'id' : u'Commonscat', - 'io' : u'Commonscat', - 'is' : u'CommonsCat', - 'it' : u'Commonscat', - 'ja' : u'Commonscat', - 'ko' : u'Commonscat', - 'lt' : u'Commonscat', - 'lv' : u'Commonscat', - 'mk' : u'Ризница-врска', - 'ms' : u'Commonscat', - 'nl' : u'Commonscat', - 'nn' : u'Commonscat', - 'no' : u'Commonscat', - 'oc' : u'Commonscat', - 'os' : u'Commonscat', - 'pl' : u'Commonscat', - 'pt' : u'Commonscat', - 'ro' : u'Commonscat', - 'ru' : u'Commonscat', - 'scn' : u'Commonscat', - 'sh' : u'Commonscat', - 'simple' : u'Commonscat', - 'sk' : u'Commonscat', - 'sl' : u'Kategorija v Zbirki', - 'sr' : u'Commonscat', - 'su' : u'Commonscat', - 'sv' : u'Commonscat', - 'th' : u'Commonscat', - 'tr' : u'CommonsKat', - 'uk' : u'Commonscat', - 'vi' : u'Commonscat', - 'zh' : u'Commonscat', - 'zh-yue' : u'同享類' -} - -def getTemplate (lang = None): - ''' - Get the template name in a language. Expects the language code, returns the translation. - ''' - if commonscatTemplates.has_key(lang): - return commonscatTemplates[lang] - else: - return u'Commonscat' - -def updateInterwiki (wikipediaPage = None, commonsPage = None): - ''' - Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page. - This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist. - - This function is disabled for the moment untill i figure out what the best way is to update the interwiki's. - ''' - interwikis = {} - comment= u'' - interwikilist = wikipediaPage.interwiki() - interwikilist.append(wikipediaPage) - - for interwikiPage in interwikilist: - interwikis[interwikiPage.site()]=interwikiPage - oldtext = commonsPage.get() - # The commonssite object doesnt work with interwiki's - newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl')) - comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]' - - if newtext != oldtext: - #This doesnt seem to work. Newtext has some trailing whitespace - wikipedia.showDiff(oldtext, newtext) - commonsPage.put(newtext=newtext, comment=comment) - - -def addCommonscat (page = None, summary = None, always = False): - ''' - Take a page. Go to all the interwiki page looking for a commonscat template. - When all the interwiki's links are checked and a proper category is found add it to the page. - ''' - commonscat = "" - commonscatpage = None - commonscats = [] - - wikipedia.output("Working on " + page.title()); - if getTemplate(page.site().language()) in page.templates(): - wikipedia.output("Commonscat template is already on " + page.title()); - #for template in page.templatesWithParams(): - # if ((template[0]==getTemplate(page.site().language())) and (len(template[1]) > 0)): - # commonscatpage = getCommonscat(template[1][0]) - # if commonscatpage != None: - # updateInterwiki (page, commonscatpage) - # #Should remove the template if something is wrong - - else: - #Follow the interwiki's - for ipage in page.interwiki(): - #See if commonscat is present - if getTemplate(ipage.site().language()) in ipage.templates(): - #Go through all the templates at the page - for template in ipage.templatesWithParams(): - #We found the template and it has the parameter set. - if ((template[0]==getTemplate(ipage.site().language())) and (len(template[1]) > 0)): - commonscatpage = getCommonscat(template[1][0]) - if commonscatpage != None: - commonscats.append(commonscatpage); - wikipedia.output("Found link for " + page.title() + " at [[" + ipage.site().language() + ":" + ipage.title() + "]] to " + commonscatpage.title() + "."); - commonscatpage = None - if len(commonscats) > 0: - commonscatpage = commonscats.pop(); - commonscat = commonscatpage.titleWithoutNamespace() - #We found one or more commonscat links, build the template and add it to our page - #TODO: We should check if we found more than one different link. - commonscat = "{{" + getTemplate(page.site().language()) + "|" + commonscat + "}}"; - add_text.add_text(page, commonscat, summary, None, None, always); - #updateInterwiki(page, commonscatpage) - return (True, always); - -def getCommonscat (name = ""): - ''' - This function will retun a page object of the commons page - If the page is a redirect this function tries to follow it. - If the page doesnt exists the function will return None - ''' - #wikipedia.output("getCommonscat: " + name ); - result = wikipedia.Page(wikipedia.getSite("commons", "commons"), "Category:" + name); - if not result.exists(): - #wikipedia.output("getCommonscat : The category doesnt exist."); - return None - elif result.isRedirectPage(): - #wikipedia.output("getCommonscat : The category is a redirect"); - return result.getRedirectTarget(); - elif "Category redirect" in result.templates(): - #wikipedia.output("getCommonscat : The category is a category redirect"); - for template in result.templatesWithParams(): - if ((template[0]=="Category redirect") and (len(template[1]) > 0)): - return getCommonscat(template[1][0]) - elif result.isDisambig(): - #wikipedia.output("getCommonscat : The category is disambigu"); - return None - else: - return result - -def main(): - ''' - Parse the command line arguments and get a pagegenerator to work on. - Iterate through all the pages. - ''' - summary = None; generator = None; always = False - # Load a lot of default generators - genFactory = pagegenerators.GeneratorFactory() - - for arg in wikipedia.handleArgs(): - if arg.startswith('-summary'): - if len(arg) == 8: - summary = wikipedia.input(u'What summary do you want to use?') - else: - summary = arg[9:] - elif arg.startswith('-page'): - if len(arg) == 5: - generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] - else: - generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] - elif arg == '-always': - always = True - else: - generator = genFactory.handleArg(arg) - if not generator: - raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') - - pregenerator = pagegenerators.PreloadingGenerator(generator) - - for page in pregenerator: - (status, always) = addCommonscat(page, summary, always) - -if __name__ == "__main__": - try: - main() - finally: - wikipedia.stopme() +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +With this tool you can add the template {{commonscat}} to categories. +The tool works by following the interwiki links. If the template is present on +another langauge page, the bot will use it. + +You could probably use it at articles as well, but this isnt tested. + +This bot uses pagegenerators to get a list of pages. For example to go through all categories: +commonscat.py -start:Category:! + +Commonscat bot: + +Take a page. Follow the interwiki's and look for the commonscat template +*Found zero templates. Done. +*Found one template. Add this template +*Found more templates. Ask the user <- still have to implement this + +TODO: +*Update interwiki's at commons +*Collect all possibilities also if local wiki already has link. +*Better support for other templates (translations) / redundant templates. +*Check mode, only check pages which already have the template +*More efficient like interwiki.py +*Possibility to update other languages in the same run + +""" + +# +# (C) Multichill, 2008 +# +# Distributed under the terms of the MIT license. +# + +import wikipedia, config, pagegenerators, add_text + +commonscatTemplates = { + 'af' : u'CommonsKategorie', + 'ar' : u'تصنيف كومنز', + 'als' : u'Commonscat', + 'az' : u'CommonsKat', + 'bg' : u'Commonscat', + 'ca' : u'Commonscat', + 'cs' : u'Commonscat', + 'da' : u'Commonscat', + 'de' : u'Commonscat', + 'en' : u'Commonscat', + 'eo' : u'Commonscat', + 'es' : u'Commonscat', + 'eu' : u'Commonskat', + 'fi' : u'Commonscat', + 'fr' : u'Commonscat', + 'hr' : u'Commonscat', + 'hu' : u'Közvagyonkat', + 'id' : u'Commonscat', + 'io' : u'Commonscat', + 'is' : u'CommonsCat', + 'it' : u'Commonscat', + 'ja' : u'Commonscat', + 'ko' : u'Commonscat', + 'lt' : u'Commonscat', + 'lv' : u'Commonscat', + 'mk' : u'Ризница-врска', + 'ms' : u'Commonscat', + 'nl' : u'Commonscat', + 'nn' : u'Commonscat', + 'no' : u'Commonscat', + 'oc' : u'Commonscat', + 'os' : u'Commonscat', + 'pl' : u'Commonscat', + 'pt' : u'Commonscat', + 'ro' : u'Commonscat', + 'ru' : u'Commonscat', + 'scn' : u'Commonscat', + 'sh' : u'Commonscat', + 'simple' : u'Commonscat', + 'sk' : u'Commonscat', + 'sl' : u'Kategorija v Zbirki', + 'sr' : u'Commonscat', + 'su' : u'Commonscat', + 'sv' : u'Commonscat', + 'th' : u'Commonscat', + 'tr' : u'CommonsKat', + 'uk' : u'Commonscat', + 'vi' : u'Commonscat', + 'zh' : u'Commonscat', + 'zh-yue' : u'同享類' +} + +def getTemplate (lang = None): + ''' + Get the template name in a language. Expects the language code, returns the translation. + ''' + if commonscatTemplates.has_key(lang): + return commonscatTemplates[lang] + else: + return u'Commonscat' + +def updateInterwiki (wikipediaPage = None, commonsPage = None): + ''' + Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page. + This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist. + + This function is disabled for the moment untill i figure out what the best way is to update the interwiki's. + ''' + interwikis = {} + comment= u'' + interwikilist = wikipediaPage.interwiki() + interwikilist.append(wikipediaPage) + + for interwikiPage in interwikilist: + interwikis[interwikiPage.site()]=interwikiPage + oldtext = commonsPage.get() + # The commonssite object doesnt work with interwiki's + newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl')) + comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]' + + if newtext != oldtext: + #This doesnt seem to work. Newtext has some trailing whitespace + wikipedia.showDiff(oldtext, newtext) + commonsPage.put(newtext=newtext, comment=comment) + + +def addCommonscat (page = None, summary = None, always = False): + ''' + Take a page. Go to all the interwiki page looking for a commonscat template. + When all the interwiki's links are checked and a proper category is found add it to the page. + ''' + commonscat = "" + commonscatpage = None + commonscats = [] + + wikipedia.output("Working on " + page.title()); + if getTemplate(page.site().language()) in page.templates(): + wikipedia.output("Commonscat template is already on " + page.title()); + #for template in page.templatesWithParams(): + # if ((template[0]==getTemplate(page.site().language())) and (len(template[1]) > 0)): + # commonscatpage = getCommonscat(template[1][0]) + # if commonscatpage != None: + # updateInterwiki (page, commonscatpage) + # #Should remove the template if something is wrong + + else: + #Follow the interwiki's + for ipage in page.interwiki(): + #See if commonscat is present + if getTemplate(ipage.site().language()) in ipage.templates(): + #Go through all the templates at the page + for template in ipage.templatesWithParams(): + #We found the template and it has the parameter set. + if ((template[0]==getTemplate(ipage.site().language())) and (len(template[1]) > 0)): + commonscatpage = getCommonscat(template[1][0]) + if commonscatpage != None: + commonscats.append(commonscatpage); + wikipedia.output("Found link for " + page.title() + " at [[" + ipage.site().language() + ":" + ipage.title() + "]] to " + commonscatpage.title() + "."); + commonscatpage = None + if len(commonscats) > 0: + commonscatpage = commonscats.pop(); + commonscat = commonscatpage.titleWithoutNamespace() + #We found one or more commonscat links, build the template and add it to our page + #TODO: We should check if we found more than one different link. + commonscat = "{{" + getTemplate(page.site().language()) + "|" + commonscat + "}}"; + add_text.add_text(page, commonscat, summary, None, None, always); + #updateInterwiki(page, commonscatpage) + return (True, always); + +def getCommonscat (name = ""): + ''' + This function will retun a page object of the commons page + If the page is a redirect this function tries to follow it. + If the page doesnt exists the function will return None + ''' + #wikipedia.output("getCommonscat: " + name ); + result = wikipedia.Page(wikipedia.getSite("commons", "commons"), "Category:" + name); + if not result.exists(): + #wikipedia.output("getCommonscat : The category doesnt exist."); + return None + elif result.isRedirectPage(): + #wikipedia.output("getCommonscat : The category is a redirect"); + return result.getRedirectTarget(); + elif "Category redirect" in result.templates(): + #wikipedia.output("getCommonscat : The category is a category redirect"); + for template in result.templatesWithParams(): + if ((template[0]=="Category redirect") and (len(template[1]) > 0)): + return getCommonscat(template[1][0]) + elif result.isDisambig(): + #wikipedia.output("getCommonscat : The category is disambigu"); + return None + else: + return result + +def main(): + ''' + Parse the command line arguments and get a pagegenerator to work on. + Iterate through all the pages. + ''' + summary = None; generator = None; always = False + # Load a lot of default generators + genFactory = pagegenerators.GeneratorFactory() + + for arg in wikipedia.handleArgs(): + if arg.startswith('-summary'): + if len(arg) == 8: + summary = wikipedia.input(u'What summary do you want to use?') + else: + summary = arg[9:] + elif arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] + else: + generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] + elif arg == '-always': + always = True + else: + generator = genFactory.handleArg(arg) + if not generator: + raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') + + pregenerator = pagegenerators.PreloadingGenerator(generator) + + for page in pregenerator: + (status, always) = addCommonscat(page, summary, always) + +if __name__ == "__main__": + try: + main() + finally: + wikipedia.stopme() Property changes on: trunk/pywikipedia/commonscat.py ___________________________________________________________________ Name: svn:eol-style + native Property changes on: trunk/pywikipedia/copyright_clean.py ___________________________________________________________________ Name: svn:eol-style + native Property changes on: trunk/pywikipedia/copyright_put.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/delinker.py =================================================================== --- trunk/pywikipedia/delinker.py 2008-05-01 18:50:57 UTC (rev 5291) +++ trunk/pywikipedia/delinker.py 2008-05-02 00:05:37 UTC (rev 5292) @@ -1,17 +1,17 @@ -# Helper script for delinker and image_replacer - -__version__ = '$Id: $' - -import wikipedia, config - -import sys, os -sys.path.insert(0, 'commonsdelinker') - -module = 'delinker' -if len(sys.argv) > 1: - if sys.argv[1] == 'replacer': - del sys.argv[1] - module = 'image_replacer' - -bot = __import__(module) +# Helper script for delinker and image_replacer + +__version__ = '$Id: $' + +import wikipedia, config + +import sys, os +sys.path.insert(0, 'commonsdelinker') + +module = 'delinker' +if len(sys.argv) > 1: + if sys.argv[1] == 'replacer': + del sys.argv[1] + module = 'image_replacer' + +bot = __import__(module) bot.main() \ No newline at end of file Property changes on: trunk/pywikipedia/delinker.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/featuredcount.py =================================================================== --- trunk/pywikipedia/featuredcount.py 2008-05-01 18:50:57 UTC (rev 5291) +++ trunk/pywikipedia/featuredcount.py 2008-05-02 00:05:37 UTC (rev 5292) @@ -1,43 +1,43 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" -This script only counts how many have featured articles all wikipedias. - -usage: featuredcount.py - -""" -__version__ = '$Id: featured.py 4811 2008-01-05 16:22:45Z leogregianin $' - -# -# Distributed under the terms of the MIT license. -# - -import sys -import wikipedia, catlib -from featured import featured_name - -def featuredArticles(site): - method=featured_name[site.lang][0] - name=featured_name[site.lang][1] - args=featured_name[site.lang][2:] - raw=method(site, name, *args) - arts=[] - for p in raw: - if p.namespace()==0: - arts.append(p) - elif p.namespace()==1: - arts.append(wikipedia.Page(p.site(), p.titleWithoutNamespace())) - wikipedia.output('\03{lightred}** wikipedia:%s has %i featured articles\03{default}' % (site.lang, len(arts))) - -if __name__=="__main__": - mysite=wikipedia.getSite() - fromlang=featured_name.keys() - fromlang.sort() - try: - for ll in fromlang: - fromsite=wikipedia.Site(ll) - if not fromsite==wikipedia.getSite(): - arts=featuredArticles(fromsite) - arts_mysite=featuredArticles(mysite) - finally: - wikipedia.stopme() +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +This script only counts how many have featured articles all wikipedias. + +usage: featuredcount.py + +""" +__version__ = '$Id: featured.py 4811 2008-01-05 16:22:45Z leogregianin $' + +# +# Distributed under the terms of the MIT license. +# + +import sys +import wikipedia, catlib +from featured import featured_name + +def featuredArticles(site): + method=featured_name[site.lang][0] + name=featured_name[site.lang][1] + args=featured_name[site.lang][2:] + raw=method(site, name, *args) + arts=[] + for p in raw: + if p.namespace()==0: + arts.append(p) + elif p.namespace()==1: + arts.append(wikipedia.Page(p.site(), p.titleWithoutNamespace())) + wikipedia.output('\03{lightred}** wikipedia:%s has %i featured articles\03{default}' % (site.lang, len(arts))) + +if __name__=="__main__": + mysite=wikipedia.getSite() + fromlang=featured_name.keys() + fromlang.sort() + try: + for ll in fromlang: + fromsite=wikipedia.Site(ll) + if not fromsite==wikipedia.getSite(): + arts=featuredArticles(fromsite) + arts_mysite=featuredArticles(mysite) + finally: + wikipedia.stopme() Property changes on: trunk/pywikipedia/featuredcount.py ___________________________________________________________________ Name: svn:eol-style + native Property changes on: trunk/pywikipedia/generate_user_files.py ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/pywikipedia/lonelypages.py =================================================================== --- trunk/pywikipedia/lonelypages.py 2008-05-01 18:50:57 UTC (rev 5291) +++ trunk/pywikipedia/lonelypages.py 2008-05-02 00:05:37 UTC (rev 5292) @@ -1,265 +1,265 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -""" -This is a script written to add the template "orphan" to the pages that aren't linked by other pages. -It can give some strange Errors sometime, I hope that all of them are fixed in this version. - -These command line parameters can be used to specify which pages to work on: - -&params; - --xml Retrieve information from a local XML dump (pages-articles - or pages-meta-current, see http://download.wikimedia.org). - Argument can also be given as "-xml:filename". - --page Only edit a specific page. - Argument can also be given as "-page:pagetitle". You can - give this parameter multiple times to edit multiple pages. - -Furthermore, the following command line parameters are supported: - --enable: - Enable or disable the bot via a Wiki Page. - --disambig: - Set a page where the bot save the name of the disambig pages found (default: skip the pages) - --limit: - Set how many pages check. - --always - Always say yes, won't ask - ---- FixMes --- -* Check that all the code hasn't bugs - ---- Credit and Help --- -This Script has been developed by Pietrodn and Filnik on botwiki. If you want to help us -improving our script archive and pywikipediabot's archive or you simply need help -you can find us here: http://botwiki.sno.cc - ---- Examples --- -python lonelypages.py -enable:User:Bot/CheckBot -always -""" -# -# (C) Pietrodn, it.wiki 2006-2007 -# (C) Filnik, it.wiki 2007 -# -# Distributed under the terms of the MIT license. -# -__version__ = '$Id: lonelypages.py,v 1.0 2007/12/28 19.16.00 filnik Exp$' -# - -import wikipedia, pagegenerators -import re - -# This is required for the text that is shown when you run this script -# with the parameter -help. -docuReplacements = { - '&params;': pagegenerators.parameterHelp, -} - -##################################################### -# Here you have to put the config for your Project. # -##################################################### - -# ************* Modify only below! ************* # - -# Template to add in the orphan pages -Template = { - 'en':u'{{Orphan|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}', - 'it':u'{{O||mese={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}', - 'zh':u'{{subst:Orphan/auto}}', - } - -# Comment that the Bot will use to put the template -commento = { - 'en':u'Bot: Orphan page, add template', - 'it':u'Bot: Voce orfana, aggiungo template {{O}}', - 'zh':u'機器人: 本頁的鏈入頁面太少', - } - -# When you add a disambig to the list of disambig pages -#(if you set disambigPage to None, you can put here nothing) -commenttodisambig = { - 'en':u'Bot: Adding a disambig page', - 'it':u'Bot: Aggiungo una disambigua', - 'zh':u'機器人: 增加消歧義頁面', - } - -# Use regex to prevent to put the same template twice! -# If you need help with regex, ask on botwiki ( http://botwiki.sno.cc ) -# Warning: put always "()" inside the regex, so the bot will find "something" -exception = { - 'en': [r'\{\{(?:template:|)(orphan)[\|\}]', r'\{\{(?:template:|)(wi)[\|\}]'], - 'it': [r'\{\{(?:template:|)(o)[\|\}]'], - 'zh': [r'\{\{(?:template:|)(orphan)[\|\}]'], - } - -# ************* Modify only above! ************* # - -def main(): - # Load the configurations in the function namespace - global commento; global Template; global disambigPage; global commenttodisambig - global exception - - enablePage = None # Check if someone set an enablePage or not - limit = 50000 # All the pages! (I hope that there aren't so many lonely pages in a project..) - generator = None # Check if the bot should use the default generator or not - genFactory = pagegenerators.GeneratorFactory() # Load all the default generators! - nwpages = False # Check variable for newpages - always = False # Check variable for always - disambigPage = None # If no disambigPage given, not use it. - # Arguments! - for arg in wikipedia.handleArgs(): - if arg.startswith('-enable'): - if len(arg) == 7: - enablePage = wikipedia.input(u'Would you like to check if the bot should run or not?') - else: - enablePage = arg[8:] - if arg.startswith('-disambig'): - if len(arg) == 9: - disambigPage = wikipedia.input(u'In which page should the bot save the disambig pages?') - else: - disambigPage = arg[10:] - elif arg.startswith('-limit'): - if len(arg) == 6: - limit = int(wikipedia.input(u'How many pages do you want to check?')) - else: - limit = int(arg[7:]) - elif arg.startswith('-newpages'): - if len(arg) == 9: - nwlimit = 50 # Default: 50 pages - else: - nwlimit = int(arg[10:]) - generator = wikipedia.getSite().newpages(number = nwlimit) - nwpages = True - elif arg.startswith('-page'): - if len(arg) == 5: - generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'How many pages do you want to check?'))] - else: - generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] - elif arg == '-always': - always = True - else: - generator = genFactory.handleArg(arg) - # Retrive the site - wikiSite = wikipedia.getSite() - # If the generator is not given, use the default one - if generator == None: - generator = wikiSite.lonelypages(repeat = True, number = limit) - # Take the configurations according to our project - comment = wikipedia.translate(wikiSite, commento) - commentdisambig = wikipedia.translate(wikiSite, commenttodisambig) - template = wikipedia.translate(wikiSite, Template) - exception = wikipedia.translate(wikiSite, exception) - # EnablePage part - if enablePage != None: - # Define the Page Object - enable = wikipedia.Page(wikiSite, enablePage) - # Loading the page's data - try: - getenable = enable.get() - except wikipedia.NoPage: - wikipedia.output(u"%s doesn't esist, I use the page as if it was blank!" % enable.title()) - getenable = '' - except wikiepedia.IsRedirect: - wikipedia.output(u"%s is a redirect, skip!" % enable.title()) - getenable = '' - # If the enable page is set to disable, turn off the bot - # (useful when the bot is run on a server) - if getenable != 'enable': - wikipedia.output('The bot is disabled') - wikipedia.stopme() - # DisambigPage part - if disambigPage != None: - disambigpage = wikipedia.Page(wikiSite, disambigPage) - try: - disambigtext = disambigpage.get() - except wikipedia.NoPage: - wikipedia.output(u"%s doesn't esist, skip!" % disambigpage.title()) - disambigtext = '' - except wikiepedia.IsRedirect: - wikipedia.output(u"%s is a redirect, don't use it!" % disambigpage.title()) - disambigPage = None - # Main Loop - for page in generator: - if nwpages == True: - page = page[0] # The newpages generator returns a tuple, not a Page object. - wikipedia.output(u"Checking %s..." % page.title()) - # Used to skip the first pages in test phase... - #if page.title()[0] in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']: - #continue - if page.isRedirectPage(): # If redirect, skip! - wikipedia.output(u'%s is a redirect! Skip...' % page.title()) - continue - # refs is not a list, it's a generator while resList... is a list, yes. - refs = page.getReferences() - refsList = list() - for j in refs: - if j == None: - # We have to find out why the function returns that value - wikipedia.output(u'Error: 1 --> Skip page') - continue - refsList.append(j) - # This isn't possible with a generator - if refsList != []: - wikipedia.output(u"%s isn't orphan! Skip..." % page.title()) - continue - # Never understood how a list can turn in "None", but it happened :-S - elif refsList == None: - # We have to find out why the function returns that value - wikipedia.output(u'Error: 2 --> Skip page') - continue - else: - # Ok, no refs, no redirect... let's check if there's already the template - try: - oldtxt = page.get() - except wikipedia.NoPage: - wikipedia.output(u"%s doesn't exist! Skip..." % page.title()) - continue - except wikipedia.IsRedirectPage: - wikipedia.output(u"%s is a redirect! Skip..." % page.title()) - continue - # I've used a loop in a loop. If I use continue in the second loop, it won't do anything - # in the first. So let's create a variable to avoid this problem. - Find = False - for regexp in exception: - res = re.findall(regexp, oldtxt.lower()) - # Found a template! Let's skip the page! - if res != []: - wikipedia.output(u'Your regex has found something in %s, skipping...' % page.title()) - Find = True - break - # Skip the page.. - if Find: - continue - # Is the page a disambig? - if page.isDisambig() and disambigPage != None: - wikipedia.output(u'%s is a disambig page, report..' % page.title()) - if not page.title().lower() in disambigtext.lower(): - disambigtext = u"%s\n*[[%s]]" % (disambigtext, page.title()) - disambigpage.put(disambigtext, commentdisambig) - continue - # Is the page a disambig but there's not disambigPage? Skip! - elif page.isDisambig(): - wikipedia.output(u'%s is a disambig page, skip...' % page.title()) - continue - else: - # Ok, the page need the template. Let's put it there! - newtxt = u"%s\n%s" % (template, oldtxt) # Adding the template in the text - wikipedia.output(u"\t\t>>> %s <<<" % page.title()) # Showing the title - wikipedia.showDiff(oldtxt, newtxt) # Showing the changes - choice = 'y' # Default answer - if not always: - choice = wikipedia.inputChoice(u'Orphan page found, shall I add the template?', [u'Yes', u'No', u'All'], [u'y', u'n', u'a'], [u'Y', u'N', 'A']) - if choice.lower() in [u'a', u'all']: - always = True - choice = 'y' - if choice.lower() in [u'y', u'yes']: - try: - page.put(newtxt, comment) - except wikipedia.EditConflict: - wikipedia.output(u'Edit Conflict! Skip...') - continue -if __name__ == '__main__': - try: - main() - finally: - wikipedia.stopme() +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +This is a script written to add the template "orphan" to the pages that aren't linked by other pages. +It can give some strange Errors sometime, I hope that all of them are fixed in this version. + +These command line parameters can be used to specify which pages to work on: + +&params; + +-xml Retrieve information from a local XML dump (pages-articles + or pages-meta-current, see http://download.wikimedia.org). + Argument can also be given as "-xml:filename". + +-page Only edit a specific page. + Argument can also be given as "-page:pagetitle". You can + give this parameter multiple times to edit multiple pages. + +Furthermore, the following command line parameters are supported: + +-enable: - Enable or disable the bot via a Wiki Page. + +-disambig: - Set a page where the bot save the name of the disambig pages found (default: skip the pages) + +-limit: - Set how many pages check. + +-always - Always say yes, won't ask + +--- FixMes --- +* Check that all the code hasn't bugs + +--- Credit and Help --- +This Script has been developed by Pietrodn and Filnik on botwiki. If you want to help us +improving our script archive and pywikipediabot's archive or you simply need help +you can find us here: http://botwiki.sno.cc + +--- Examples --- +python lonelypages.py -enable:User:Bot/CheckBot -always +""" +# +# (C) Pietrodn, it.wiki 2006-2007 +# (C) Filnik, it.wiki 2007 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id: lonelypages.py,v 1.0 2007/12/28 19.16.00 filnik Exp$' +# + +import wikipedia, pagegenerators +import re + +# This is required for the text that is shown when you run this script +# with the parameter -help. +docuReplacements = { + '&params;': pagegenerators.parameterHelp, +} + +##################################################### +# Here you have to put the config for your Project. # +##################################################### + +# ************* Modify only below! ************* # + +# Template to add in the orphan pages +Template = { + 'en':u'{{Orphan|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}', + 'it':u'{{O||mese={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}', + 'zh':u'{{subst:Orphan/auto}}', + } + +# Comment that the Bot will use to put the template +commento = { + 'en':u'Bot: Orphan page, add template', + 'it':u'Bot: Voce orfana, aggiungo template {{O}}', + 'zh':u'機器人: 本頁的鏈入頁面太少', + } + +# When you add a disambig to the list of disambig pages +#(if you set disambigPage to None, you can put here nothing) +commenttodisambig = { + 'en':u'Bot: Adding a disambig page', + 'it':u'Bot: Aggiungo una disambigua', + 'zh':u'機器人: 增加消歧義頁面', + } + +# Use regex to prevent to put the same template twice! +# If you need help with regex, ask on botwiki ( http://botwiki.sno.cc ) +# Warning: put always "()" inside the regex, so the bot will find "something" +exception = { + 'en': [r'\{\{(?:template:|)(orphan)[\|\}]', r'\{\{(?:template:|)(wi)[\|\}]'], + 'it': [r'\{\{(?:template:|)(o)[\|\}]'], + 'zh': [r'\{\{(?:template:|)(orphan)[\|\}]'], + } + +# ************* Modify only above! ************* # + +def main(): + # Load the configurations in the function namespace + global commento; global Template; global disambigPage; global commenttodisambig + global exception + + enablePage = None # Check if someone set an enablePage or not + limit = 50000 # All the pages! (I hope that there aren't so many lonely pages in a project..) + generator = None # Check if the bot should use the default generator or not + genFactory = pagegenerators.GeneratorFactory() # Load all the default generators! + nwpages = False # Check variable for newpages + always = False # Check variable for always + disambigPage = None # If no disambigPage given, not use it. + # Arguments! + for arg in wikipedia.handleArgs(): + if arg.startswith('-enable'): + if len(arg) == 7: + enablePage = wikipedia.input(u'Would you like to check if the bot should run or not?') + else: + enablePage = arg[8:] + if arg.startswith('-disambig'): + if len(arg) == 9: + disambigPage = wikipedia.input(u'In which page should the bot save the disambig pages?') + else: + disambigPage = arg[10:] + elif arg.startswith('-limit'): + if len(arg) == 6: + limit = int(wikipedia.input(u'How many pages do you want to check?')) + else: + limit = int(arg[7:]) + elif arg.startswith('-newpages'): + if len(arg) == 9: + nwlimit = 50 # Default: 50 pages + else: + nwlimit = int(arg[10:]) + generator = wikipedia.getSite().newpages(number = nwlimit) + nwpages = True + elif arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'How many pages do you want to check?'))] + else: + generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] + elif arg == '-always': + always = True + else: + generator = genFactory.handleArg(arg) + # Retrive the site + wikiSite = wikipedia.getSite() + # If the generator is not given, use the default one + if generator == None: + generator = wikiSite.lonelypages(repeat = True, number = limit) + # Take the configurations according to our project + comment = wikipedia.translate(wikiSite, commento) + commentdisambig = wikipedia.translate(wikiSite, commenttodisambig) + template = wikipedia.translate(wikiSite, Template) + exception = wikipedia.translate(wikiSite, exception) + # EnablePage part + if enablePage != None: + # Define the Page Object + enable = wikipedia.Page(wikiSite, enablePage) + # Loading the page's data + try: + getenable = enable.get() + except wikipedia.NoPage: + wikipedia.output(u"%s doesn't esist, I use the page as if it was blank!" % enable.title()) + getenable = '' + except wikiepedia.IsRedirect: + wikipedia.output(u"%s is a redirect, skip!" % enable.title()) + getenable = '' + # If the enable page is set to disable, turn off the bot + # (useful when the bot is run on a server) + if getenable != 'enable': + wikipedia.output('The bot is disabled') + wikipedia.stopme() + # DisambigPage part + if disambigPage != None: + disambigpage = wikipedia.Page(wikiSite, disambigPage) + try: + disambigtext = disambigpage.get() + except wikipedia.NoPage: + wikipedia.output(u"%s doesn't esist, skip!" % disambigpage.title()) + disambigtext = '' + except wikiepedia.IsRedirect: + wikipedia.output(u"%s is a redirect, don't use it!" % disambigpage.title()) + disambigPage = None + # Main Loop + for page in generator: + if nwpages == True: + page = page[0] # The newpages generator returns a tuple, not a Page object. + wikipedia.output(u"Checking %s..." % page.title()) + # Used to skip the first pages in test phase... + #if page.title()[0] in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q']: + #continue + if page.isRedirectPage(): # If redirect, skip! + wikipedia.output(u'%s is a redirect! Skip...' % page.title()) + continue + # refs is not a list, it's a generator while resList... is a list, yes. + refs = page.getReferences() + refsList = list() + for j in refs: + if j == None: + # We have to find out why the function returns that value + wikipedia.output(u'Error: 1 --> Skip page') + continue + refsList.append(j) + # This isn't possible with a generator + if refsList != []: + wikipedia.output(u"%s isn't orphan! Skip..." % page.title()) + continue + # Never understood how a list can turn in "None", but it happened :-S + elif refsList == None: + # We have to find out why the function returns that value + wikipedia.output(u'Error: 2 --> Skip page') + continue + else: + # Ok, no refs, no redirect... let's check if there's already the template + try: + oldtxt = page.get() + except wikipedia.NoPage: + wikipedia.output(u"%s doesn't exist! Skip..." % page.title()) + continue + except wikipedia.IsRedirectPage: + wikipedia.output(u"%s is a redirect! Skip..." % page.title()) + continue + # I've used a loop in a loop. If I use continue in the second loop, it won't do anything + # in the first. So let's create a variable to avoid this problem. + Find = False + for regexp in exception: + res = re.findall(regexp, oldtxt.lower()) + # Found a template! Let's skip the page! + if res != []: + wikipedia.output(u'Your regex has found something in %s, skipping...' % page.title()) + Find = True + break + # Skip the page.. + if Find: + continue + # Is the page a disambig? + if page.isDisambig() and disambigPage != None: + wikipedia.output(u'%s is a disambig page, report..' % page.title()) + if not page.title().lower() in disambigtext.lower(): + disambigtext = u"%s\n*[[%s]]" % (disambigtext, page.title()) + disambigpage.put(disambigtext, commentdisambig) + continue + # Is the page a disambig but there's not disambigPage? Skip! + elif page.isDisambig(): + wikipedia.output(u'%s is a disambig page, skip...' % page.title()) + continue + else: + # Ok, the page need the template. Let's put it there! + newtxt = u"%s\n%s" % (template, oldtxt) # Adding the template in the text + wikipedia.output(u"\t\t>>> %s <<<" % page.title()) # Showing the title + wikipedia.showDiff(oldtxt, newtxt) # Showing the changes + choice = 'y' # Default answer + if not always: + choice = wikipedia.inputChoice(u'Orphan page found, shall I add the template?', [u'Yes', u'No', u'All'], [u'y', u'n', u'a'], [u'Y', u'N', 'A']) + if choice.lower() in [u'a', u'all']: + always = True + choice = 'y' + if choice.lower() in [u'y', u'yes']: + try: + page.put(newtxt, comment) + except wikipedia.EditConflict: + wikipedia.output(u'Edit Conflict! Skip...') + continue +if __name__ == '__main__': + try: + main() + finally: + wikipedia.stopme() Property changes on: trunk/pywikipedia/lonelypages.py ___________________________________________________________________ Name: svn:eol-style + native Property changes on: trunk/pywikipedia/noreferences.py ___________________________________________________________________ Name: svn:eol-style + native Property changes on: trunk/pywikipedia/pageimport.py ___________________________________________________________________ Name: svn:eol-style + native

1 0

SVN: [5291] trunk/pywikipedia/checkimages.py
by filnik＠svn.wikimedia.org 01 May '08

01 May '08

Revision: 5291 Author: filnik Date: 2008-05-01 18:50:57 +0000 (Thu, 01 May 2008) Log Message: ----------- Adding de settings, tidy a bit the other. De settings by ABF Modified Paths: -------------- trunk/pywikipedia/checkimages.py Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-05-01 16:18:50 UTC (rev 5290) +++ trunk/pywikipedia/checkimages.py 2008-05-01 18:50:57 UTC (rev 5291) @@ -88,13 +88,14 @@ # That's what you want that will be added. (i.e. the {{no source}} with the right day/month/year ) n_txt = { - 'commons':'\n{{subst:nld}}', - 'en' :'\n{{subst:nld}}', - 'it' :'\n{{subst:unverdata}}', - 'ja' :'{{subst:Nsd}}', - 'hu' :u'\n{{nincslicenc|~~~~~}}', - 'ta' :'\n{{subst:nld}}', - 'zh' :'{{subst:No license/auto}}', + 'commons':'\n{{subst:nld}}', + 'de' :u'{{Benutzer:ABF/D|~~~~}} {{Dateiüberprüfung/benachrichtigt (Kategorie)|{{subst:LOCALYEAR}}|{{subst:LOCALMONTH}}|{{subst:LOCALDAY}}}} {{Dateiüberprüfung/benachrichtigt (Text)|Lizenz|||||}} --This was added by ~~~~-- ', + 'en' :'\n{{subst:nld}}', + 'it' :'\n{{subst:unverdata}}', + 'ja' :'{{subst:Nsd}}', + 'hu' :u'\n{{nincslicenc|~~~~~}}', + 'ta' :'\n{{subst:nld}}', + 'zh' :'{{subst:No license/auto}}', } # Text that the bot will try to see if there's already or not. If there's a @@ -103,19 +104,21 @@ # '{{nld' --> '\{\{(?:template:|)no[ _]license ?(?:\||\n|\}) ?' (case insensitive). # If there's not a {{ it will work as usual (if x in Text) txt_find = { - 'commons':[u'{{no license', u'{{nld', u'{{no permission since'], - 'en':[u'{{nld', u'{{no license'], - 'hu':[u'{{nincsforrás',u'{{nincslicenc'], - 'it':[u'{{unverdata', u'{{unverified'], - 'ja':[u'{{no source', u'{{unknown', u'{{non free', u'\n{{subst:Benvebot}}\n~~~~\n', 'ja':'{{welcome}}\n--~~~~\n', @@ -136,8 +140,9 @@ # Summary that the bot use when it notify the problem with the image's license comm2 = { + 'ar' :"بوت: طلب معلومات المصدر." , 'commons':"Bot: Requesting source information." , - 'ar' :"بوت: طلب معلومات المصدر." , + 'de' :u'Bot:Notify User', 'en' :"Bot: Requesting source information." , 'it' :"Bot: Notifico l'unverified", 'ja' :u"ロボットによる:出典とライセンス明記のお願い", @@ -149,7 +154,7 @@ } # if the file has an unknown extension it will be tagged with this template. -# In reality, there aren't unknown extension, they are only not allewed... ^__^ +# In reality, there aren't unknown extension, they are only not allowed... delete_immediately = { 'commons':u"{{speedy|The file has .%s as extension. Is it ok? Please check.}}", 'en' :u"{{db-meta|The file has .%s as extension.}}", @@ -172,8 +177,8 @@ # Text that will be add if the bot find a unknown extension. delete_immediately_notification = { + 'ar' :u'الملف [[:Image:%s]] يبدو أن امتداده خاطيء, من فضلك تحقق. ~~~~', 'commons':u'The [[:Image:%s]] file seems to have a wrong extension, please check. ~~~~', - 'ar' :u'الملف [[:Image:%s]] يبدو أن امتداده خاطيء, من فضلك تحقق. ~~~~', 'en' :u'The [[:Image:%s]] file seems to have a wrong extension, please check. ~~~~', 'it' :u'{{subst:Utente:Filbot/Ext|%s}} --~~~~', 'hu' :u'A [[:Kép:%s]] fájlnak rossz a kiterjesztése, kérlek ellenőrízd. ~~~~', @@ -182,8 +187,8 @@ } # Summary of the delate immediately. (f.e: Adding {{db-meta|The file has .%s as extension.}}) del_comm = { + 'ar' :u'بوت: إضافة %s', 'commons':u'Bot: Adding %s', - 'ar' :u'بوت: إضافة %s', 'en' :u'Bot: Adding %s', 'it' :u'Bot: Aggiungo %s', 'ja' :u'ロボットによる: 追加 %s', @@ -195,8 +200,9 @@ # This is the most important header, because it will be used a lot. That's the header that the bot # will add if the image hasn't the license. nothing_head = { + 'ar' :u"\n== صورة بدون ترخيص ==\n", 'commons':u"",# Nothing, the template has already the header inside. - 'ar' :u"\n== صورة بدون ترخيص ==\n", + 'de' :u"\n== Bild ohne Lizenz ==\n", 'en' :u"\n== Image without license ==\n", 'ja' :u'', 'it' :u"\n\n== Immagine senza licenza ==\n", @@ -209,6 +215,7 @@ nothing_notification = { 'commons':u"\n{{subst:User:Filnik/untagged|Image:%s}}\n\n''This message was '''added automatically by [[User:" + \ "__botnick__|__botnick__]]''', if you need some help about it, ask its master (~~~) or go to the [[Commons:Help desk]]''. --~~~~", + 'de' :u'\n{{subst:Benutzer:ABF/D2|%s}} ~~~~ ', 'en' :u"{{subst:image source|Image:%s}} --~~~~", 'it' :u"{{subst:Utente:Filbot/Senza licenza|%s}} --~~~~", 'ja' :u"\n{{subst:image source|Image:%s}}--~~~~", @@ -221,6 +228,7 @@ # NOTE: YOUR Botnick is automatically added. It's not required to add it twice. bot_list = { 'commons':[u'Siebot', u'CommonsDelinker', u'Filbot', u'John Bot', u'Sz-iwbot', u'ABFbot'], + 'de' :['ABFbot'], 'en' :[u'OrphanBot'], 'it' :[u'Filbot', u'Nikbot', u'.snoopyBot.'], 'ja' :[u'alexbot'], @@ -231,6 +239,7 @@ # The message that the bot will add the second time that find another license problem. second_message_without_license = { 'commons':None, + 'de':None, 'en': None, 'it':u':{{subst:Utente:Filbot/Senza licenza2|%s}} --~~~~', 'hu':u'\nSzia! Úgy tűnik a [[:Kép:%s]] képpel is hasonló a probléma, mint az előbbivel. Kérlek olvasd el a [[WP:KÉPLIC|feltölthető képek]]ről szóló oldalunk, és segítségért fordulj a [[WP:KF-JO|Jogi kocsmafalhoz]]. Köszönöm --~~~~', @@ -242,6 +251,7 @@ # That's useful if you are running the bot on Toolserver. page_with_settings = { 'commons':u'User:Filbot/Settings', + 'de':None, 'en':None, 'hu':None, 'it':u'Progetto:Coordinamento/Immagini/Bot/Settings#Settings', @@ -253,6 +263,7 @@ # This is the page where the bot will store them. report_page = { 'commons':u'User:Filbot/Report', + 'de' :u'Benutzer:ABFbot/Report', 'en' :u'User:Filnik/Report', 'it' :u'Progetto:Coordinamento/Immagini/Bot/Report', 'ja' :u'User:Alexbot/report', @@ -265,6 +276,7 @@ # The text added in the report report_text = { 'commons':u"\n*[[:Image:%s]] " + timeselected, + 'de':u"\n*[[:Bild:%s]] " + timeselected, 'en':u"\n*[[:Image:%s]] " + timeselected, 'it':u"\n*[[:Immagine:%s]] " + timeselected, 'ja':u"\n*[[:Immagine:%s]] " + timeselected, @@ -276,6 +288,7 @@ comm10 = { 'commons':u'Bot: Updating the log', 'ar':u'بوت: تحديث السجل', + 'de': u'Bot:schreibe Log', 'en':u'Bot: Updating the log', 'it':u'Bot: Aggiorno il log', 'ja': u'ロボットによる:更新', @@ -291,6 +304,7 @@ # because they are already put in the regex). HiddenTemplate = { 'commons':['information', 'trademarked', 'trademark'], + 'de':[u'information'], 'en':['information'], 'it':['edp', 'informazioni[ _]file', 'information', 'trademark'], 'ja':[u'Information'], @@ -303,13 +317,14 @@ # Note: every __botnick__ will be repleaced with your bot's nickname (feel free not to use if you don't need it) HiddenTemplateNotification = { 'commons': """\n{{subst:User:Filnik/whitetemplate|Image:%s}}\n\n''This message was '''added automatically by [[User:__botnick__|__botnick__]]''', if you need some help about it, ask its master (~~~) or go to the [[Commons:Help desk]]''. --~~~~""", + 'de': None, 'en': None, 'it': u"{{subst:Utente:Filbot/Template_insufficiente|%s}} --~~~~", 'ta': None, } # Add your project (in alphabetical order) if you want that the bot start -project_inserted = ['commons', 'en', 'ja', 'hu', 'it', 'ta', 'zh'] +project_inserted = ['ar', 'commons', 'de', 'en', 'ja', 'hu', 'it', 'ta', 'zh'] # Ok, that's all. What is below, is the rest of code, now the code is fixed and it will run correctly in your project. #########################################################################################################################

1 0

SVN: [5290] branches/rewrite/pywikibot
by russblau＠svn.wikimedia.org 01 May '08

01 May '08

Revision: 5290 Author: russblau Date: 2008-05-01 16:18:50 +0000 (Thu, 01 May 2008) Log Message: ----------- Use logging instead of pywikibot.output for all progress-related messages; use absolute import Modified Paths: -------------- branches/rewrite/pywikibot/login.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/login.py =================================================================== --- branches/rewrite/pywikibot/login.py 2008-05-01 15:15:46 UTC (rev 5289) +++ branches/rewrite/pywikibot/login.py 2008-05-01 16:18:50 UTC (rev 5290) @@ -169,7 +169,7 @@ if match: id = match.group('id') if not config.solve_captcha: - raise pywikibot.CaptchaError(id) + raise CaptchaError(id) url = self.site.protocol() + '://' + self.site.hostname() + self.site.captcha_image_address(id) answer = wikipedia.ui.askForCaptcha(url) return self.getCookie(remember = remember, captchaId = id, captchaAnswer = answer) @@ -227,17 +227,17 @@ # self.password = self.password.encode(self.site.encoding()) - pywikibot.output(u"Logging in to %s as %s" % (self.site, self.username)) + logging.info(u"Logging in to %s as %s" % (self.site, self.username)) cookiedata = self.getCookie() if cookiedata: self.storecookiedata(cookiedata) - pywikibot.output(u"Should be logged in now") + logging.info(u"Should be logged in now") # Show a warning according to the local bot policy if not self.botAllowed(): - pywikibot.output(u'*** Your username is not listed on [[%s]].\n*** Please make sure you are allowed to use the robot before actually using it!' % botList[self.site.family.name][self.site.lang]) + logging.error(u'*** Your username is not listed on [[%s]].\n*** Please make sure you are allowed to use the robot before actually using it!' % botList[self.site.family.name][self.site.lang]) return True else: - pywikibot.output(u"Login failed. Wrong password or CAPTCHA answer?") + logging.error(u"Login failed. Wrong password or CAPTCHA answer?") if retry: self.password = None return self.login(retry = True) @@ -276,7 +276,7 @@ for lang in namedict[familyName].iterkeys(): site = pywikibot.getSite(code=lang, fam=familyName) if not forceLogin and site.loggedInAs(sysop = sysop) != None: - pywikibot.output(u'Already logged in on %s' % site) + logging.info(u'Already logged in on %s' % site) else: loginMan = LoginManager(password, sysop = sysop, site = site) loginMan.login() Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-05-01 15:15:46 UTC (rev 5289) +++ branches/rewrite/pywikibot/page.py 2008-05-01 16:18:50 UTC (rev 5290) @@ -735,7 +735,7 @@ logging.debug( u"Page.move: throttle option is deprecated.") if reason is None: - pywikibot.output(u'Moving %s to [[%s]].' + logging.info(u'Moving %s to [[%s]].' % (self.title(asLink=True), newtitle)) reason = pywikibot.input(u'Please enter a reason for the move:') return self.site().move(self, newtitle, reason, @@ -755,7 +755,7 @@ logging.debug( u"Page.delete: throttle option is deprecated.") if reason is None: - pywikibot.output(u'Deleting %s.' % (self.title(asLink=True))) + logging.info(u'Deleting %s.' % (self.title(asLink=True))) reason = pywikibot.input(u'Please enter a reason for the deletion:') answer = u'y' if prompt and not hasattr(self.site(), '_noDeletePrompt'): @@ -833,7 +833,7 @@ logging.debug( u"Page.undelete: throttle option is deprecated.") if comment is None: - pywikibot.output(u'Preparing to undelete %s.' + logging.info(u'Preparing to undelete %s.' % (self.title(asLink=True))) comment = pywikibot.input( u'Please enter a reason for the undeletion:') @@ -863,7 +863,7 @@ un = u'un' else: un = u'' - pywikibot.output(u'Preparing to %sprotect %s.' + logging.info(u'Preparing to %sprotect %s.' % (un, self.title(asLink=True))) reason = pywikibot.input(u'Please enter a reason for the action:') if unprotect: @@ -1158,11 +1158,11 @@ catname = self.site().category_namespace() + ':' + catname targetCat = Category(self.site(), catname) if targetCat.exists(): - pywikibot.output('Target page %s already exists!' + logging.warn('Target page %s already exists!' % targetCat.title()) return False else: - pywikibot.output('Moving text from %s to %s.' + logging.info('Moving text from %s to %s.' % (self.title(), targetCat.title())) authors = ', '.join(self.contributingUsers()) creationSummary = pywikibot.translate( @@ -1194,11 +1194,11 @@ catname = self.site().category_namespace() + ':' + catname targetCat = Category(self.site(), catname) if targetCat.exists(): - pywikibot.output('Target page %s already exists!' + logging.warn('Target page %s already exists!' % targetCat.title()) return False else: - pywikibot.output('Moving text from %s to %s.' + logging.info('Moving text from %s to %s.' % (self.title(), targetCat.title())) authors = ', '.join(self.contributingUsers()) creationSummary = pywikibot.translate( Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-05-01 15:15:46 UTC (rev 5289) +++ branches/rewrite/pywikibot/site.py 2008-05-01 16:18:50 UTC (rev 5290) @@ -13,7 +13,7 @@ import pywikibot from pywikibot.throttle import Throttle from pywikibot.data import api -from exceptions import * +from pywikibot.exceptions import * import config import logging @@ -49,15 +49,13 @@ exec "import %s_family as myfamily" % fam except ImportError: if fatal: - output(u"""\ + logging.exception(u"""\ Error importing the %s family. This probably means the family does not exist. Also check your configuration file.""" % fam) - import traceback - traceback.print_stack() sys.exit(1) else: - raise ValueError("Family %s does not exist" % repr(fam)) + raise Error("Family %s does not exist" % fam) return myfamily.Family()

1 0

SVN: [5289] trunk/pywikipedia/checkimages.py
by filnik＠svn.wikimedia.org 01 May '08

01 May '08

Revision: 5289 Author: filnik Date: 2008-05-01 15:15:46 +0000 (Thu, 01 May 2008) Log Message: ----------- Adding a new functionality: checking if the image has duplicates through the APIs! Adding also some documentation, an error class for the new functionality and some rewrite for the report_image function Modified Paths: -------------- trunk/pywikipedia/checkimages.py Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-04-30 17:31:44 UTC (rev 5288) +++ trunk/pywikipedia/checkimages.py 2008-05-01 15:15:46 UTC (rev 5289) @@ -18,6 +18,8 @@ -commons - The Bot will check if an image on Commons has the same name and if true it report the image. + -duplicates - Checking if the image has duplicates. + -break - To break the bot after the first check (default: recursive) -time[:#] - Time in seconds between repeat runs (default: 30) @@ -55,9 +57,9 @@ ---- Known issues/FIXMEs: ---- * Fix the "real-time" regex and function * Add the "catch the language" function for commons. -* Add new documentation +* Fix and reorganise the new documentation * Add a report for the image tagged. -* Fix the settings part when the bot save the data (make it better) +* Implement: Special:FileDuplicateSearch/Image.jpg """ # @@ -70,12 +72,16 @@ __version__ = '$Id$' # -import re, time, urllib2 -import wikipedia, config, os, locale, sys -import cPickle, pagegenerators, catlib +import re, time, urllib, urllib2, os, locale, sys +import wikipedia, config, pagegenerators, catlib locale.setlocale(locale.LC_ALL, '') +class NoHash(wikipedia.Error): + """ The APIs don't return any Hash for the image searched. + Really Strange, better to raise an error. + """ + ######################################################################################################################### # <------------------------------------------- Change only below! -----------------------------------------------------># ######################################################################################################################### @@ -214,12 +220,12 @@ # This is a list of what bots used this script in your project. # NOTE: YOUR Botnick is automatically added. It's not required to add it twice. bot_list = { - 'commons':['Siebot', 'CommonsDelinker'], - 'en' :['OrphanBot'], - 'it' :['Filbot', 'Nikbot', '.snoopyBot.'], - 'ja' :['alexbot'], - 'ta' :['TrengarasuBOT'], - 'zh' :['alexbot'], + 'commons':[u'Siebot', u'CommonsDelinker', u'Filbot', u'John Bot', u'Sz-iwbot', u'ABFbot'], + 'en' :[u'OrphanBot'], + 'it' :[u'Filbot', u'Nikbot', u'.snoopyBot.'], + 'ja' :[u'alexbot'], + 'ta' :[u'TrengarasuBOT'], + 'zh' :[u'alexbot'], } # The message that the bot will add the second time that find another license problem. @@ -248,7 +254,7 @@ report_page = { 'commons':u'User:Filbot/Report', 'en' :u'User:Filnik/Report', - 'it' :u'Progetto:Coordinamento/Immagini/Bot/NowCommons', + 'it' :u'Progetto:Coordinamento/Immagini/Bot/Report', 'ja' :u'User:Alexbot/report', 'hu' :u'User:Bdamokos/Report', 'ta' :u'Trengarasu/commonsimages', @@ -415,8 +421,8 @@ talk_page = wikipedia.Page(self.site, pagina_discussione) self.talk_page = talk_page return True - # There is the function to put the advise in talk page. def put_talk(self, notification, head, notification2 = None, commx = None): + """ Function to put the warning in talk page of the uploader.""" commento2 = wikipedia.translate(self.site, comm2) talk_page = self.talk_page notification = self.notification @@ -476,6 +482,7 @@ talk_page.put(testoattuale + head + notification, comment = commentox, minorEdit = False) def untaggedGenerator(self, untaggedProject, limit): + """ Generator that yield the images without license. It's based on a tool of the toolserver. """ lang = untaggedProject.split('.', 1)[0] project = '.%s' % untaggedProject.split('.', 1)[1] if lang == 'commons': @@ -494,6 +501,7 @@ yield wikiPage def regexGenerator(self, regexp, textrun): + """ Generator used when an user use a regex parsing a page to yield the results """ pos = 0 done = list() ext_list = list() @@ -510,15 +518,15 @@ yield image #continue - def checkImage(self, image): + def checkImageOnCommons(self, image): + """ Checking if the image is on commons """ self.image = image - # Search regular expression to find links like this (and the class attribute is optional too) - # title="Immagine:Nvidia.jpg" - wikipedia.output(u'Checking if %s is on commons...' % image) - commons = wikipedia.getSite('commons', 'commons') - if wikipedia.Page(commons, u'Image:%s' % image).exists(): - wikipedia.output(u'%s is on commons!' % image) - imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % image) + wikipedia.output(u'Checking if %s is on commons...' % self.image) + commons = wikipedia.getSite('commons', 'commons') + regexOnCommons = r"\n\*\[\[:Image:%s\]\] is also on '''Commons''': \[\[commons:Image:%s\]\]$" % (self.image, self.image) + if wikipedia.Page(commons, u'Image:%s' % self.image).exists(): + wikipedia.output(u'%s is on commons!' % self.image) + imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % self.image) on_commons_text = imagePage.getImagePageHtml() if "<div class='sharedUploadNotice'>" in on_commons_text: wikipedia.output(u"But, the image doesn't exist on your project! Skip...") @@ -529,39 +537,68 @@ wikipedia.output(u'%s has "stemma" inside, means that it\'s ok.' % image) return True # Problems? No, it's only not on commons but the image needs a check else: - repme = "\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]]" - self.report_image(self.image, self.rep_page, self.com, repme) + repme = "\n*[[:Image:%s]] is also on '''Commons''': [[commons:Image:%s]]" % (self.image, self.image) + self.report_image(self.image, self.rep_page, self.com, repme, addings = False, regex = regexOnCommons) # Problems? No, return True return True else: # Problems? No, return True return True - def report_image(self, image, rep_page = None, com = None, rep_text = None): - if rep_page == None: - rep_page = self.rep_page - if com == None: - com = self.com - if rep_text == None: - rep_text = self.rep_text + + def convert_to_url(self, page): + # Function stolen from wikipedia.py + """The name of the page this Page refers to, in a form suitable for the URL of the page.""" + title = page.replace(" ", "_") + encodedTitle = title.encode(self.site.encoding()) + return urllib.quote(encodedTitle) + + def checkImageDuplicated(self, image): + """ Function to check the duplicated images. """ + self.image = image + duplicateRegex = r'\n\*(?:\[\[:Image:%s\]\] has the following duplicates:|\*\[\[:Image:%s\]\])$' % (self.image, self.image) + imagePage = wikipedia.ImagePage(self.site, 'Image:%s' % self.image) + wikipedia.output(u'Checking if %s has duplicates...' % image) + get_hash = self.site.getUrl('/w/api.php?action=query&format=xml&titles=Image:%s&prop=imageinfo&iiprop=sha1' % self.convert_to_url(self.image)) + hash_found_list = re.findall(r'<ii sha1="(.*?)" />', get_hash) + if hash_found_list != []: + hash_found = hash_found_list[0] + else: + raise NoHash('No Hash found in the APIs! Maybe the regex to catch it is wrong or someone has changed the APIs structure.') + get_duplicates = self.site.getUrl('/w/api.php?action=query&format=xml&list=allimages&aisha1=%s' % hash_found) + duplicates = re.findall(r'<img name="(.*?)".*?/>', get_duplicates) + if len(duplicates) > 1: + if len(duplicates) == 2: + wikipedia.output(u'%s has a duplicate! Reporting it...' % self.image) + else: + wikipedia.output(u'%s has %s duplicates! Reporting them...' % (self.image, len(duplicates) - 1)) + repme = "\n*[[:Image:%s]] has the following duplicates:" % self.image + for duplicate in duplicates: + if duplicate == self.image: + continue # the image itself, not report also this as duplicate + repme += "\n**[[:Image:%s]]" % duplicate + self.report_image(self.image, self.rep_page, self.com, repme + '\n', addings = False, regex = duplicateRegex) + + def report_image(self, image, rep_page = None, com = None, rep_text = None, addings = True, regex = None): + """ Function to report the images in the report page when needed. """ + if rep_page == None: rep_page = self.rep_page + if com == None: com = self.com + if rep_text == None: rep_text = self.rep_text another_page = wikipedia.Page(self.site, rep_page) - - if another_page.exists(): + if regex == None: regex = image + if another_page.exists(): text_get = another_page.get() else: text_get = str() if len(text_get) >= self.logFulNumber: raise LogIsFull("The log page (%s) is full! Please delete the old images reported." % another_page.title()) pos = 0 - # The talk page includes "_" between the two names, in this way i replace them to " " - regex = image - n = re.compile(regex, re.UNICODE) + # The talk page includes "_" between the two names, in this way i replace them to " " + n = re.compile(regex, re.UNICODE|re.M) y = n.search(text_get, pos) if y == None: - # Adding the log :) - if "\'\'\'Commons\'\'\'" in rep_text: - rep_text = rep_text % (image, image) - else: - rep_text = rep_text % image + # Adding the log + if addings: + rep_text = rep_text % image # Adding the name of the image in the report if not done already another_page.put(text_get + rep_text, comment = com, minorEdit = False) wikipedia.output(u"...Reported...") reported = True @@ -572,6 +609,7 @@ return reported def takesettings(self): + """ Function to take the settings from the wiki. """ pos = 0 if self.settings == None: lista = None else: @@ -609,6 +647,7 @@ return lista def load(self, raw): + """ Load a list of object from a string using regex. """ list_loaded = list() pos = 0 load_2 = True @@ -693,6 +732,7 @@ regexGen = False # Use the regex generator untagged = False # Use the untagged generator skip_list = list() # Inizialize the skip list used below + duplicatesActive = False # Here below there are the parameters. for arg in wikipedia.handleArgs(): @@ -710,6 +750,8 @@ repeat = False elif arg == '-commons': commonsActive = True + elif arg == '-duplicates': + duplicatesActive = True elif arg.startswith('-skip'): if len(arg) == 5: skip = True @@ -874,8 +916,7 @@ else: wikipedia.output(u'\t >> No additional settings found! <<') # Not the main, but the most important loop. #parsed = False - for image in generator: - + for image in generator: # When you've a lot of image to skip before working use this workaround, otherwise # let this commented, thanks. [ decoment also parsed = False if you want to use it # @@ -930,9 +971,11 @@ skip_list.append('skip = Off') # Only to print it once # Check on commons if there's already an image with the same name if commonsActive == True: - response = mainClass.checkImage(imageName) + response = mainClass.checkImageOnCommons(imageName) if response == False: continue + if duplicatesActive == True: + mainClass.checkImageDuplicated(imageName) parentesi = False # parentesi are these in italian: { ( ) } [] delete = False tagged = False

1 0

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

Pywikipedia-l May 2008