Pywikipedia-l April 2008

pywikipedia-l@lists.wikimedia.org

34 participants
213 discussions

SVN: [5261] trunk/pywikipedia/wikipedia.py
by rotem＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5261 Author: rotem Date: 2008-04-23 13:56:51 +0000 (Wed, 23 Apr 2008) Log Message: ----------- * Error was shown also when cascading protection was not enabled. * Code style cleanup. * The term is 'protect', not 'block'. Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-23 13:26:33 UTC (rev 5260) +++ trunk/pywikipedia/wikipedia.py 2008-04-23 13:56:51 UTC (rev 5261) @@ -2297,17 +2297,23 @@ token = self.site().getToken(self, sysop = True) - #Translate 'none' to '' + # Translate 'none' to '' if edit == 'none': edit = '' if move == 'none': move = '' + + # Translate no duration to infinite if duration == 'none' or duration == None: duration = 'infinite' - if cascading == False: cascading = '0' - else: cascading = '1' - if edit != 'sysop' or move != 'sysop': - # You can't block a page as autoconfirmed and cascading, prevent the error + # Get cascading + if cascading == False: cascading = '0' - output(u"NOTE: The page can't be blocked with cascading and not also with only-sysop. Set cascading \"off\"") + else: + if edit != 'sysop' or move != 'sysop': + # You can't protect a page as autoconfirmed and cascading, prevent the error + cascading = '0' + output(u"NOTE: The page can't be protected with cascading and not also with only-sysop. Set cascading \"off\"") + else: + cascading = '1' predata = { 'mwProtect-cascade': cascading,

1 0

SVN: [5260] trunk/pywikipedia/weblinkchecker.py
by wikipedian＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5260 Author: wikipedian Date: 2008-04-23 13:26:33 +0000 (Wed, 23 Apr 2008) Log Message: ----------- when several dead links are reported on the same talk page, name the headers "Dead link", "Dead link 2", "Dead link 3" etc. Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2008-04-23 13:23:19 UTC (rev 5259) +++ trunk/pywikipedia/weblinkchecker.py 2008-04-23 13:26:33 UTC (rev 5260) @@ -123,22 +123,40 @@ # The second %s will be replaced by a hint to the Internet Archive, # in case the page has been archived there. talk_report = { - 'ar': u'== وصلة ميتة ==\n\nخلال عدة عمليات أوتوماتيكية من البوت الوصلة الخارجية التالية كانت غير متوفرة. من فضلك تحقق من أن الوصلة لا تعمل وأزلها أو أصلحها في هذه الحالة!\n\n%s\n%s--~~~~', - 'de': u'== Toter Weblink ==\n\nBei mehreren automatisierten Botläufen wurde der folgende Weblink als nicht verfügbar erkannt. Bitte überprüfe, ob der Link tatsächlich unerreichbar ist, und korrigiere oder entferne ihn in diesem Fall!\n\n%s\n%s--~~~~', - 'en': u'== Dead link ==\n\nDuring several automated bot runs the following external link was found to be unavailable. Please check if the link is in fact down and fix or remove it in that case!\n\n%s\n%s--~~~~', - 'fr': u'== Lien mort ==\n\nPendant plusieurs patrouilles par bot, le lien suivant a été inaccessible. Veuillez vérifier si le lien est effectivement mort et si oui corrigez ou retirez-le.\n\n%s\n%s--~~~~', - 'he': u'== קישור שבור ==\n\nבמהלך מספר ריצות אוטומטיות של הבוט, נמצא שהקישור החיצוני הבא אינו זמין. אנא בדקו אם הקישור אכן שבור, ותקנו אותו או הסירו אותו במקרה זה!\n\n%s\n%s--~~~~', - 'ia': u'== Ligamine defuncte ==\n\nDurante plure sessiones automatic, le robot ha constatate que le sequente ligamine externe non es disponibile. Per favor confirma que le ligamine de facto es defuncte, e in caso de si, repara o elimina lo!\n\n%s\n%s--~~~~', - 'kk': u'== Өлі сілтеме ==\n\nӨздікті бот бірнеше жегілгенде келесі сыртқы сілтемеге қатынай алмады. Бұл сілтеменің қатыналуын тексеріп шығыңыз да, не түзетіңіз, не аластаңыз!\n\n%s\n%s--~~~~', - 'ksh': u'== Han enne kappodde Weblengk jefonge ==\n\nEsch han bonge die Weblingks paa Mol jetschäck. Se han allemoolde nit jedon Doht ens donnoh loore, un dä Lengk reparreere odo eruß nämme.\n\n%s\n%s--~~~~', - 'nds': u'== Weblenk geiht nich mehr ==\n\nDe Bot hett en poor Mal al versöcht, disse Siet optoropen un kunn dor nich bikamen. Schall man een nakieken, wat de Siet noch dor is un den Lenk richten oder rutnehmen.\n\n%s\n%s--~~~~', - 'nl': u'== Dode link ==\nTijdens enkele automatische controles bleek de onderstaande externe link onbereikbaar. Controleer alstublieft of de link inderdaad onbereikbaar is. Verwijder deze tekst alstublieft na een succesvolle controle of na het verwijderen of corrigeren van de externe link.\n\n%s\n%s--~~~~[[Categorie:Wikipedia:Onbereikbare externe link]]', - 'no': u'{{subst:Bruker:JhsBot/Død lenke}}\n\n%s\n%s~~~~\n\n{{ødelagt lenke}}', - 'pl': u'== Martwy link ==\n\nW czasie kilku automatycznych przebiegów bota, poniższy link zewnętrzny był niedostępny. Proszę sprawdzić czy odnośnik jest faktycznie niedziałający i ewentualnie go usunąć.\n\n%s\n%s--~~~~', - 'pt': u'== Link quebrado ==\n\nFoi checado os links externos deste artigo por vários minutos. Alguém verifique por favor se a ligação estiver fora do ar e tente arrumá-lo ou removê-la!\n\n%s\n --~~~~ ', - 'sr': u'== Покварене спољашње повезнице ==\n\nТоком неколико аутоматски провера, бот је пронашао покварене спољашње повезнице. Молимо вас проверите да ли је повезница добра, поправите је или је уклоните!\n\n%s\n%s--~~~~', + 'ar': u'== %s ==\n\nخلال عدة عمليات أوتوماتيكية من البوت الوصلة الخارجية التالية كانت غير متوفرة. من فضلك تحقق من أن الوصلة لا تعمل وأزلها أو أصلحها في هذه الحالة!\n\n%s\n%s--~~~~', + 'de': u'== %s ==\n\nBei mehreren automatisierten Botläufen wurde der folgende Weblink als nicht verfügbar erkannt. Bitte überprüfe, ob der Link tatsächlich unerreichbar ist, und korrigiere oder entferne ihn in diesem Fall!\n\n%s\n%s--~~~~', + 'en': u'== %s ==\n\nDuring several automated bot runs the following external link was found to be unavailable. Please check if the link is in fact down and fix or remove it in that case!\n\n%s\n%s--~~~~', + 'fr': u'== %s ==\n\nPendant plusieurs patrouilles par bot, le lien suivant a été inaccessible. Veuillez vérifier si le lien est effectivement mort et si oui corrigez ou retirez-le.\n\n%s\n%s--~~~~', + 'he': u'== %s ==\n\nבמהלך מספר ריצות אוטומטיות של הבוט, נמצא שהקישור החיצוני הבא אינו זמין. אנא בדקו אם הקישור אכן שבור, ותקנו אותו או הסירו אותו במקרה זה!\n\n%s\n%s--~~~~', + 'ia': u'== %s ==\n\nDurante plure sessiones automatic, le robot ha constatate que le sequente ligamine externe non es disponibile. Per favor confirma que le ligamine de facto es defuncte, e in caso de si, repara o elimina lo!\n\n%s\n%s--~~~~', + 'kk': u'== %s ==\n\nӨздікті бот бірнеше жегілгенде келесі сыртқы сілтемеге қатынай алмады. Бұл сілтеменің қатыналуын тексеріп шығыңыз да, не түзетіңіз, не аластаңыз!\n\n%s\n%s--~~~~', + 'ksh': u'== %s ==\n\nEsch han bonge die Weblingks paa Mol jetschäck. Se han allemoolde nit jedon Doht ens donnoh loore, un dä Lengk reparreere odo eruß nämme.\n\n%s\n%s--~~~~', + 'nds': u'== %s ==\n\nDe Bot hett en poor Mal al versöcht, disse Siet optoropen un kunn dor nich bikamen. Schall man een nakieken, wat de Siet noch dor is un den Lenk richten oder rutnehmen.\n\n%s\n%s--~~~~', + 'nl': u'== %s ==\nTijdens enkele automatische controles bleek de onderstaande externe link onbereikbaar. Controleer alstublieft of de link inderdaad onbereikbaar is. Verwijder deze tekst alstublieft na een succesvolle controle of na het verwijderen of corrigeren van de externe link.\n\n%s\n%s--~~~~[[Categorie:Wikipedia:Onbereikbare externe link]]', + # This is not a good solution as it only works on the Norwegian Wikipedia, not on Wiktionary etc. + 'no': u'%s{{subst:Bruker:JhsBot/Død lenke}}\n\n%s\n%s~~~~\n\n{{ødelagt lenke}}', + 'pl': u'== %s ==\n\nW czasie kilku automatycznych przebiegów bota, poniższy link zewnętrzny był niedostępny. Proszę sprawdzić czy odnośnik jest faktycznie niedziałający i ewentualnie go usunąć.\n\n%s\n%s--~~~~', + 'pt': u'== %s ==\n\nFoi checado os links externos deste artigo por vários minutos. Alguém verifique por favor se a ligação estiver fora do ar e tente arrumá-lo ou removê-la!\n\n%s\n --~~~~ ', + 'sr': u'== %s ==\n\nТоком неколико аутоматски провера, бот је пронашао покварене спољашње повезнице. Молимо вас проверите да ли је повезница добра, поправите је или је уклоните!\n\n%s\n%s--~~~~', } +talk_report_header = { + 'ar': u'وصلة ميتة', + 'de': u'Toter Weblink', + 'en': u'Dead link', + 'fr': u'Lien mort', + 'he': u'קישור שבור', + 'ia': u'Ligamine defuncte', + 'kk': u'Өлі сілтем', + 'ksh': u'Han enne kappodde Weblengk jefonge', + 'nds': u'Weblenk geiht nich mehr', + 'nl': u'Dode link', + 'no': u'', + 'pl': u'Martwy link', + 'pt': u'Link quebrado', + 'sr': u'Покварене спољашње повезнице', +} + talk_report_archive = { 'ar': u'\nصفحة الويب تم حفظها بواسطة أرشيف الإنترنت. من فضلك ضع في الاعتبار الوصل لنسخة مؤرشفة مناسبة: [%s]. ', 'de': u'Die Webseite wurde vom Internet Archive gespeichert. Bitte verlinke gegebenenfalls eine geeignete archivierte Version: [%s]. ', @@ -537,10 +555,11 @@ txtfile.write('=== %s ===\n' % containingPage.title()[:3]) txtfile.write(errorReport) txtfile.close() + if self.reportThread and not containingPage.isTalkPage(): self.reportThread.report(url, errorReport, containingPage, archiveURL) - - + + def setLinkDead(self, url, error, page): """ Adds the fact that the link was found dead to the .dat file. @@ -645,7 +664,15 @@ archiveMsg = wikipedia.translate(wikipedia.getSite(), talk_report_archive) % archiveURL else: archiveMsg = u'' - content += wikipedia.translate(wikipedia.getSite(), talk_report) % (errorReport, archiveMsg) + # The header will default to "Dead link". But if there is already such a header, we'll + # use "Dead link 2", "Dead link 3", etc. + header = wikipedia.translate(wikipedia.getSite(), talk_report_header) + i = 1 + # Check if there is already such a header on the talk page. + while re.search('= *' + header + ' *=', content) is not None: + i += 1 + header = wikipedia.translate(wikipedia.getSite(), talk_report_header) + " " + str(i) + content += wikipedia.translate(wikipedia.getSite(), talk_report) % (header, errorReport, archiveMsg) try: talkPage.put(content) except wikipedia.SpamfilterError, error:

1 0

SVN: [5259] trunk/pywikipedia/pagegenerators.py
by nicdumz＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5259 Author: nicdumz Date: 2008-04-23 13:23:19 +0000 (Wed, 23 Apr 2008) Log Message: ----------- Adding UserContributionsGenerator :) Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2008-04-23 12:46:19 UTC (rev 5258) +++ trunk/pywikipedia/pagegenerators.py 2008-04-23 13:23:19 UTC (rev 5259) @@ -106,6 +106,9 @@ Argument can be given as "-unwatched:n" where n is the maximum number of articles to work on. +-usercontribs Work on all articles that were edited by a certain user : + Example : -usercontribs:DumZiBoT + -weblink Work on all articles that contain an external link to a given URL; may be given as "-weblink:url" @@ -414,6 +417,40 @@ for page in site.linksearch(link, limit=step): yield page +def UserContributionsGenerator(username, number = 250, namespaces = [], site = None ): + """ + Yields number unique pages edited by user:username + namespaces : list of namespace numbers to fetch contribs from + """ + import urllib + if site is None: + site = wikipedia.getSite() + if number > 500: + # the api does not allow more than 500 results for anonymous users + number = 500 + apiQ = site.api_address() + 'action=query&list=usercontribs&ucuser=' + apiQ += urllib.quote(username.encode(site.encoding())) + apiQ += '&ucprop=title&uclimit=%s&format=xml' % number + if namespaces: + apiQ += '&ucnamespace=%s' % '|'.join(map(str, namespaces)) + titlesRe = re.compile('title="(.*?)"') + ucstartRe = re.compile('ucstart="(.*?)"') + ucstart = '' + # An user is likely to contribute on several pages, + # keeping track of titles + titleList = [] + while True: + result = site.getUrl(apiQ + ucstart) + for title in titlesRe.findall(result): + if not title in titleList: + titleList.append(title) + yield wikipedia.Page(site, title) + m = ucstartRe.search(result) + if m: + ucstart = '&ucstart=' + m.group(1) + else: + break + def SearchPageGenerator(query, number = 100, namespaces = None, site = None): """ Provides a list of results using the internal MediaWiki search engine @@ -815,6 +852,8 @@ gen = UnwatchedPagesPageGenerator() else: gen = UnwatchedPagesPageGenerator(number = int(arg[11:])) + elif arg.startswith('-usercontribs'): + gen = UserContributionsGenerator(arg[14:]) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator()

1 0

SVN: [5258] trunk/pywikipedia/weblinkchecker.py
by wikipedian＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5258 Author: wikipedian Date: 2008-04-23 12:46:19 +0000 (Wed, 23 Apr 2008) Log Message: ----------- Bugfix: }} and | were regarded as part of the link when there is a URL in a nested template. See for example: http://de.wikipedia.org/w/index.php?title=Benutzer:Head/Spielwiese&action=e… Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2008-04-23 08:51:40 UTC (rev 5257) +++ trunk/pywikipedia/weblinkchecker.py 2008-04-23 12:46:19 UTC (rev 5258) @@ -203,7 +203,13 @@ # MediaWiki parses templates before parsing external links. Thus, there # might be a | or a } directly after a URL which does not belong to # the URL itself. - # Blow up templates with spaces to avoid these problems. + + # First, remove the curly braces of inner templates: + nestedTemplateR = re.compile(r'{{([^}]*?){{(.*?)}}(.*?)}}') + while nestedTemplateR.search(text): + text = nestedTemplateR.sub(r'{{\1 \2 \3}}', text) + + # Then blow up the templates with spaces so that the | and }} will not be regarded as part of the link:. templateWithParamsR = re.compile(r'{{([^}]*?[^ ])\|([^ ][^}]*?)}}', re.DOTALL) while templateWithParamsR.search(text): text = templateWithParamsR.sub(r'{{ \1 | \2 }}', text)

1 0

[ pywikipediabot-Bugs-1949476 ] Nasty weird bug involving preloadingGenerator & canBeEdited
by SourceForge.net 23 Apr '08

23 Apr '08

Bugs item #1949476, was opened at 2008-04-23 11:09 Message generated for change (Tracker Item Submitted) made by Item Submitter You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1949476&group_… Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: NicDumZ — Nicolas Dumazet (nicdumz) Assigned to: Nobody/Anonymous (nobody) Summary: Nasty weird bug involving preloadingGenerator & canBeEdited Initial Comment: I realized that sometimes, in blockpageschecker.py, someTotalProtectedPage.canBeEdited() would wrongly return True (I have no sysop account, hence on every [edit=sysop] protected page, it should return False) I first tried the single canBeEdited() on that total protected page : >>> import wikipedia; s = wikipedia.Site('fr', 'wikipedia'); protectedpage = wikipedia.Page(s, 'Zentrum') Checked for running processes. 1 processes currently running, including the current process. >>> protectedpage.canBeEdited() Getting 1 pages from wikipedia:fr... False Which is fine. However : >>> import wikipedia; s = wikipedia.Site('fr', 'wikipedia'); protectedpage = wikipedia.Page(s, 'Zentrum') Checked for running processes. 1 processes currently running, including the current process. >>> import pagegenerators; gen = pagegenerators.PreloadingGenerator([protectedpage]) >>> for p in gen : print p.canBeEdited() ... Getting 1 pages from wikipedia:fr... True >>> protectedpage.canBeEdited() True Which is wrong. I then thought then calling canBeEdited on a total protected page from PreloadingGenerator would _every_time_ return wrongly True, but I was wrong : >>> import wikipedia; s = wikipedia.Site('en', 'wikipedia'); protectedpage = wikipedia.Page(s, 'Wikipedia:site support') Checked for running processes. 1 processes currently running, including the current process. >>> import pagegenerators; gen = pagegenerators.PreloadingGenerator([protectedpage]) >>> for p in gen : print p.canBeEdited() ... Getting 1 pages from wikipedia:en... False So apparently, in some special cases, calling canBeEdited on a total protected page from PreloadingGenerator returns True. However I have not been able to find *when* exactly this happens. Any thoughts ? PS : An easy fix for this is to get rid of the current implementation of canBeEdited, to use instead the new getRestrictions using the API (getRestrictions()['edit']). But we should find out what is happenning here, since we might miss something important. ---------------------------------------------------------------------- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1949476&group_…

1 0

SVN: [5257] trunk/pywikipedia/noreferences.py
by nicdumz＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5257 Author: nicdumz Date: 2008-04-23 08:51:40 +0000 (Wed, 23 Apr 2008) Log Message: ----------- Rewriting lacksReferences, addReferences, and createReferenceSection to take pageText as an argument instead of a Page object. The idea is to be able to call for a lacksReferences() or a addReferences() from the outside of the class, on a specific pagetext, without having the page being saved. Concrete application example : * You proceed <ref> tags in a page, and get a modified pageText * Since you may have added new references, you want to check for <references/> : if lacksReferences(pageText, verbose=False): pageText = addReferences(pageText) # further processing on pageText You may have noticied it, I also added a verbose arg in lacksReferences :) (defaulting to True for b/c since this is the current behavior) Modified Paths: -------------- trunk/pywikipedia/noreferences.py Modified: trunk/pywikipedia/noreferences.py =================================================================== --- trunk/pywikipedia/noreferences.py 2008-04-23 08:24:05 UTC (rev 5256) +++ trunk/pywikipedia/noreferences.py 2008-04-23 08:51:40 UTC (rev 5257) @@ -211,49 +211,41 @@ except KeyError: self.referencesTemplates = [] - def lacksReferences(self, page): + def lacksReferences(self, text, verbose = True): """ Checks whether or not the page is lacking a references tag. """ - # Show the title of the page we're working on. - # Highlight the title in purple. - wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) - try: - oldText = page.get() - oldTextCleaned = wikipedia.removeDisabledParts(oldText) - if not self.refR.search(oldTextCleaned): + oldTextCleaned = wikipedia.removeDisabledParts(text) + if not self.refR.search(oldTextCleaned): + if verbose: wikipedia.output(u'No changes necessary: no ref tags found.') - return False - elif self.referencesR.search(oldTextCleaned): + return False + elif self.referencesR.search(oldTextCleaned): + if verbose: wikipedia.output(u'No changes necessary: references tag found.') + return False + else: + templateR ='' + part = '\{\{(' + for template in self.referencesTemplates: + templateR += part + template + part = '|' + templateR+=')' + if re.search(templateR,oldTextCleaned,re.I): + if verbose: + wikipedia.output(u'No changes necessary: references template found.') return False - else: - templateR ='' - part = '\{\{(' - for template in self.referencesTemplates: - templateR += part + template - part = '|' - templateR+=')' - if re.search(templateR,oldTextCleaned,re.I): - wikipedia.output(u'No changes necessary: references template found.') - return False + if verbose: wikipedia.output(u'Found ref without references.') - return True - except wikipedia.NoPage: - wikipedia.output(u"Page %s does not exist?!" % page.aslink()) - except wikipedia.IsRedirectPage: - wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink()) - except wikipedia.LockedPage: - wikipedia.output(u"Page %s is locked?!" % page.aslink()) - return False + return True - def addReferences(self, page): + def addReferences(self, oldText): """ Tries to add a references tag into an existing section where it fits into. If there is no such section, creates a new section containing the references tag. + * Returns : The modified pagetext """ - oldText = page.get() # Is there an existing section where we can add the references tag? for section in wikipedia.translate(self.site, referencesSections): @@ -268,8 +260,7 @@ else: wikipedia.output(u'Adding references tag to existing %s section...\n' % section) newText = oldText[:match.end()] + u'\n<references/>\n' + oldText[match.end():] - self.save(page, newText) - return + return newText else: break @@ -288,8 +279,7 @@ wikipedia.output(u'Adding references section before %s section...\n' % section) index = match.start() ident = match.group('ident') - self.createReferenceSection(page, index, ident) - return + return self.createReferenceSection(oldText, index, ident) else: break # This gets complicated: we want to place the new references @@ -317,13 +307,11 @@ break wikipedia.output(u'Found no section that can be preceeded by a new references section. Placing it before interwiki links, categories, and bottom templates.') index = len(tmpText) - self.createReferenceSection(page, index) + return self.createReferenceSection(oldText, index) - def createReferenceSection(self, page, index, ident = '=='): - oldText = page.get() + def createReferenceSection(self, oldText, index, ident = '=='): newSection = u'\n%s %s %s\n\n<references/>\n' % (ident, wikipedia.translate(self.site, referencesSections)[0], ident) - newText = oldText[:index] + newSection + oldText[index:] - self.save(page, newText) + return oldText[:index] + newSection + oldText[index:] def save(self, page, newText): """ @@ -362,8 +350,23 @@ wikipedia.setAction(comment) for page in self.generator: - if self.lacksReferences(page): - self.addReferences(page) + # Show the title of the page we're working on. + # Highlight the title in purple. + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) + try: + text = page.get() + except wikipedia.NoPage: + wikipedia.output(u"Page %s does not exist?!" % page.aslink()) + continue + except wikipedia.IsRedirectPage: + wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink()) + continue + except wikipedia.LockedPage: + wikipedia.output(u"Page %s is locked?!" % page.aslink()) + continue + if self.lacksReferences(text): + newText = self.addReferences(text) + self.save(page, newText) if self.site.messages() and not self.ignoreMsg: wikipedia.output(u'NOTE: You have unread messages, stopping...') wikipedia.stopme()

1 0

SVN: [5256] trunk/pywikipedia/noreferences.py
by nicdumz＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5256 Author: nicdumz Date: 2008-04-23 08:24:05 +0000 (Wed, 23 Apr 2008) Log Message: ----------- Getting rid of a mysite global (?!), defining a local classfield 'site' to use self.site every time instead of multiples page.site(), wikipedia.getSite() .... Modified Paths: -------------- trunk/pywikipedia/noreferences.py Modified: trunk/pywikipedia/noreferences.py =================================================================== --- trunk/pywikipedia/noreferences.py 2008-04-23 08:17:57 UTC (rev 5255) +++ trunk/pywikipedia/noreferences.py 2008-04-23 08:24:05 UTC (rev 5256) @@ -37,8 +37,6 @@ import wikipedia, pagegenerators, catlib import editarticle import re, sys -global mysite -mysite =wikipedia.getSite() # This is required for the text that is shown when you run this script # with the parameter -help. @@ -193,12 +191,11 @@ def __iter__(self): import xmlreader - mysite = wikipedia.getSite() dump = xmlreader.XmlDump(self.xmlFilename) for entry in dump.parse(): text = wikipedia.removeDisabledParts(entry.text) if self.refR.search(text) and not self.referencesR.search(text): - yield wikipedia.Page(mysite, entry.title) + yield wikipedia.Page(wikipedia.getSite(), entry.title) class NoReferencesBot: @@ -206,7 +203,7 @@ self.generator = generator self.always = always self.ignoreMsg = ignoreMsg - mysite = wikipedia.getSite() + self.site = wikipedia.getSite() self.refR = re.compile('</ref>', re.IGNORECASE) self.referencesR = re.compile('<references */>', re.IGNORECASE) try: @@ -259,7 +256,7 @@ oldText = page.get() # Is there an existing section where we can add the references tag? - for section in wikipedia.translate(page.site(), referencesSections): + for section in wikipedia.translate(self.site, referencesSections): sectionR = re.compile(r'\r\n=+ *%s *=+\r\n' % section) index = 0 while index < len(oldText): @@ -277,7 +274,7 @@ break # Create a new section for the references tag - for section in wikipedia.translate(page.site(), placeBeforeSections): + for section in wikipedia.translate(self.site, placeBeforeSections): # Find out where to place the new section sectionR = re.compile(r'\r\n(?P<ident>=+) *%s *=+\r\n' % section) index = 0 @@ -304,7 +301,7 @@ # keep removing interwiki links, templates etc. from the bottom. # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. - catNamespaces = '|'.join(page.site().category_namespaces()) + catNamespaces = '|'.join(self.site.category_namespaces()) categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates @@ -324,7 +321,7 @@ def createReferenceSection(self, page, index, ident = '=='): oldText = page.get() - newSection = u'\n%s %s %s\n\n<references/>\n' % (ident, wikipedia.translate(page.site(), referencesSections)[0], ident) + newSection = u'\n%s %s %s\n\n<references/>\n' % (ident, wikipedia.translate(self.site, referencesSections)[0], ident) newText = oldText[:index] + newSection + oldText[index:] self.save(page, newText) @@ -343,7 +340,7 @@ if self.always: try: page.put_async(newText) - if mysite.messages() and not self.ignoreMsg: + if self.site.messages() and not self.ignoreMsg: wikipedia.output(u'NOTE: You have unread messages, stopping...') wikipedia.stopme() except wikipedia.EditConflict: @@ -355,20 +352,19 @@ else: # Save the page in the background. No need to catch exceptions. page.put_async(newText) - if mysite.messages() and not self.ignoreMsg: + if self.site.messages() and not self.ignoreMsg: wikipedia.output(u'NOTE: You have unread messages, stopping...') wikipedia.stopme() return def run(self): - mysite =wikipedia.getSite() - comment = wikipedia.translate(wikipedia.getSite(), msg) + comment = wikipedia.translate(self.site, msg) wikipedia.setAction(comment) for page in self.generator: if self.lacksReferences(page): self.addReferences(page) - if mysite.messages() and not self.ignoreMsg: + if self.site.messages() and not self.ignoreMsg: wikipedia.output(u'NOTE: You have unread messages, stopping...') wikipedia.stopme()

1 0

SVN: [5255] trunk/pywikipedia/noreferences.py
by nicdumz＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5255 Author: nicdumz Date: 2008-04-23 08:17:57 +0000 (Wed, 23 Apr 2008) Log Message: ----------- * Getting rid of those calls to wikipedia.crash (siebrand ?!!? ) * output -> wikipedia.output * Using Site.messages() instead of a strange message detection method * I hate when my bot stops on new messages: Introducing '-ignoremsg' Modified Paths: -------------- trunk/pywikipedia/noreferences.py Modified: trunk/pywikipedia/noreferences.py =================================================================== --- trunk/pywikipedia/noreferences.py 2008-04-23 08:07:44 UTC (rev 5254) +++ trunk/pywikipedia/noreferences.py 2008-04-23 08:17:57 UTC (rev 5255) @@ -21,6 +21,7 @@ -start:Category:M. -always Don't prompt you for each replacement. + -ignoremsg Don't stop when the bot has new messages All other parameters will be regarded as part of the title of a single page, and the bot will only work on that single page. @@ -201,9 +202,10 @@ class NoReferencesBot: - def __init__(self, generator, always = False): + def __init__(self, generator, always = False, ignoreMsg = False): self.generator = generator self.always = always + self.ignoreMsg = ignoreMsg mysite = wikipedia.getSite() self.refR = re.compile('</ref>', re.IGNORECASE) self.referencesR = re.compile('<references */>', re.IGNORECASE) @@ -341,8 +343,9 @@ if self.always: try: page.put_async(newText) - if mysite.messages: - wikipedia.crash() + if mysite.messages() and not self.ignoreMsg: + wikipedia.output(u'NOTE: You have unread messages, stopping...') + wikipedia.stopme() except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title(),)) except wikipedia.SpamfilterError, e: @@ -352,11 +355,9 @@ else: # Save the page in the background. No need to catch exceptions. page.put_async(newText) - path = mysite.put_address('Non-existing_page') - text = mysite.getUrl(path) - if '<div class="usermessage">' in text: - output(u'NOTE: You have unread messages on %s' % self) - wikipedia.crash() + if mysite.messages() and not self.ignoreMsg: + wikipedia.output(u'NOTE: You have unread messages, stopping...') + wikipedia.stopme() return def run(self): @@ -367,11 +368,9 @@ for page in self.generator: if self.lacksReferences(page): self.addReferences(page) - path = mysite.put_address('Non-existing_page') - text = mysite.getUrl(path) - if '<div class="usermessage">' in text: - output(u'NOTE: You have unread messages on %s' % self) - wikipedia.crash() + if mysite.messages() and not self.ignoreMsg: + wikipedia.output(u'NOTE: You have unread messages, stopping...') + wikipedia.stopme() def main(): #page generator @@ -384,6 +383,8 @@ namespaces = [] # Never ask before changing a page always = False + # Stop when the bot has new messages + ignoreMsg = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. @@ -403,6 +404,8 @@ namespaces.append(arg[11:]) elif arg == '-always': always = True + elif arg == '-ignoremsg': + ignoreMsg = True else: generator = genFactory.handleArg(arg) if generator: @@ -419,7 +422,7 @@ if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber=500) - bot = NoReferencesBot(preloadingGen, always = always) + bot = NoReferencesBot(preloadingGen, always, ignoreMsg) bot.run() if __name__ == "__main__":

1 0

SVN: [5254] trunk/pywikipedia/wikipedia.py
by nicdumz＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5254 Author: nicdumz Date: 2008-04-23 08:07:44 +0000 (Wed, 23 Apr 2008) Log Message: ----------- Page.getRestrictions : Raise NoPage when the page does not exist Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-22 21:03:00 UTC (rev 5253) +++ trunk/pywikipedia/wikipedia.py 2008-04-23 08:07:44 UTC (rev 5254) @@ -1141,8 +1141,12 @@ """ api_url = '/w/api.php?action=query&prop=info&inprop=protection&format=xml&titles=%s' % self.urlname() text = self.site().getUrl(api_url) - if not 'pageid="' in text: # Avoid errors when you can't reach the API - raise Error("API problem, can't reach the API!") + if 'missing=""' in text: + raise NoPage('Page %s does not exist' % self.aslink()) + elif not 'pageid="' in text: + # I don't know what may happen here. + # We may want to have better error handling + raise Error("BUG> API problem.") match = re.findall(r'<protection>(.*?)</protection>', text) restrictions = { 'edit': None, 'move': None }

1 0

SVN: [5253] trunk/pywikipedia/fixing_redirects.py
by erwin85＠svn.wikimedia.org 23 Apr '08

23 Apr '08

Revision: 5253 Author: erwin85 Date: 2008-04-22 21:03:00 +0000 (Tue, 22 Apr 2008) Log Message: ----------- * disabled fixing_redirects.py for nlwiki There is consensus that bots should not be used to fix redirects. Modified Paths: -------------- trunk/pywikipedia/fixing_redirects.py Modified: trunk/pywikipedia/fixing_redirects.py =================================================================== --- trunk/pywikipedia/fixing_redirects.py 2008-04-22 20:15:23 UTC (rev 5252) +++ trunk/pywikipedia/fixing_redirects.py 2008-04-22 21:03:00 UTC (rev 5253) @@ -164,7 +164,11 @@ else: namespace = int(arg[11:]) - mysite = wikipedia.getSite() + mysite = wikipedia.getSite() + if mysite.sitename() == 'wikipedia:nl': + wikipedia.output(u'\03{lightred}There is consensus on the Dutch Wikipedia that bots should not be used to fix redirects.\03{default}') + sys.exit() + linktrail = mysite.linktrail() if featured: featuredList = wikipedia.translate(mysite, featured_articles)

1 0

← Newer
1
2
3
4
5
6
7
8
...
22
Older →

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

Pywikipedia-l April 2008