Pywikipedia-l May 2008

pywikipedia-l@lists.wikimedia.org

32 participants
376 discussions

SVN: [5344] trunk/pywikipedia/nowcommons.py
by siebrand＠svn.wikimedia.org 09 May '08

09 May '08

Revision: 5344 Author: siebrand Date: 2008-05-09 17:08:13 +0000 (Fri, 09 May 2008) Log Message: ----------- Add switch -replaceonly when working on a wiki without sysop rights Modified Paths: -------------- trunk/pywikipedia/nowcommons.py Modified: trunk/pywikipedia/nowcommons.py =================================================================== --- trunk/pywikipedia/nowcommons.py 2008-05-09 16:32:01 UTC (rev 5343) +++ trunk/pywikipedia/nowcommons.py 2008-05-09 17:08:13 UTC (rev 5344) @@ -9,8 +9,8 @@ the source wiki. If multiple versions of the file exist, the script will not delete. If the MD5 comparison is not equal, the script will not delete. -A sysop account on the local wiki is required if you want this script to work -properly. +A sysop account on the local wiki is required if you want all features of +this script to work properly. This script understands various command-line arguments: -autonomous: run automatically, do not ask any questions. All files @@ -30,6 +30,9 @@ or in galleries. However, it can also make more mistakes. + -replaceonly: Use this if you do not have a local sysop account, but do + wish to replace links from the NowCommons template. + Known issues. Please fix these if you are capable and motivated: - if a file marked nowcommons is not present on Wikimedia Commons, the bot will exit. @@ -53,6 +56,7 @@ replace = False replacealways = False replaceloose = False +replaceonly = False for arg in wikipedia.handleArgs(): if arg == '-autonomous': @@ -64,6 +68,8 @@ replacealways = True if arg == '-replaceloose': replaceloose = True + if arg == '-replaceonly': + replaceonly = True nowCommons = { '_default': [ @@ -193,20 +199,21 @@ else: wikipedia.output(u'No page is using \"\03{lightgreen}%s\03{default}\" anymore.' % localImagePage.titleWithoutNamespace()) commonsText = commonsImagePage.get() - if md5 == commonsImagePage.getFileMd5Sum(): - wikipedia.output(u'The image is identical to the one on Commons.') - if autonomous == False: - wikipedia.output(u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % page.title()) - wikipedia.output(localImagePage.get()) - wikipedia.output(u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % commonsImagePage.title()) - wikipedia.output(commonsText) - choice = wikipedia.inputChoice(u'Does the description on Commons contain all required source and license information?', ['yes', 'no'], ['y', 'N'], 'N') - if choice == 'y': + if replaceonly == False: + if md5 == commonsImagePage.getFileMd5Sum(): + wikipedia.output(u'The image is identical to the one on Commons.') + if autonomous == False: + wikipedia.output(u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % page.title()) + wikipedia.output(localImagePage.get()) + wikipedia.output(u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % commonsImagePage.title()) + wikipedia.output(commonsText) + choice = wikipedia.inputChoice(u'Does the description on Commons contain all required source and license information?', ['yes', 'no'], ['y', 'N'], 'N') + if choice == 'y': + localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False) + else: localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False) else: - localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False) - else: - wikipedia.output(u'The image is not identical to the one on Commons.') + wikipedia.output(u'The image is not identical to the one on Commons.') except (wikipedia.NoPage, wikipedia.IsRedirectPage), e: wikipedia.output(u'%s' % e) continue

1 0

SVN: [5343] trunk/pywikipedia/nowcommons.py
by siebrand＠svn.wikimedia.org 09 May '08

09 May '08

Revision: 5343 Author: siebrand Date: 2008-05-09 16:32:01 +0000 (Fri, 09 May 2008) Log Message: ----------- Add feature to replace local image with shared repo image. New switches: -replace: replace with confirmation -replacealways: replace without confirmation (switch from image.py) -replaceloose: use loose pattern matching when replacing (switch from image.py) Modified Paths: -------------- trunk/pywikipedia/nowcommons.py Modified: trunk/pywikipedia/nowcommons.py =================================================================== --- trunk/pywikipedia/nowcommons.py 2008-05-09 11:35:15 UTC (rev 5342) +++ trunk/pywikipedia/nowcommons.py 2008-05-09 16:32:01 UTC (rev 5343) @@ -9,13 +9,27 @@ the source wiki. If multiple versions of the file exist, the script will not delete. If the MD5 comparison is not equal, the script will not delete. -A sysop account is required for this script to work. +A sysop account on the local wiki is required if you want this script to work +properly. This script understands various command-line arguments: - -autonomous: run automatically, do not ask any questions. All files - that qualify for deletion are deleted. Reduced screen - output. + -autonomous: run automatically, do not ask any questions. All files + that qualify for deletion are deleted. Reduced screen + output. + -replace: replace links if the files are equal and the file names + differ + + -replacealways: replace links if the files are equal and the file names + differ without asking for confirmation + + -replaceloose: Do loose replacements. This will replace all occurences + of the name of the image (and not just explicit image + syntax). This should work to catch all instances of the + file, including where it is used as a template parameter + or in galleries. However, it can also make more + mistakes. + Known issues. Please fix these if you are capable and motivated: - if a file marked nowcommons is not present on Wikimedia Commons, the bot will exit. @@ -31,14 +45,25 @@ import sys, re import wikipedia, pagegenerators +import image # only for nowCommonsMessage from imagetransfer import nowCommonsMessage autonomous = False +replace = False +replacealways = False +replaceloose = False for arg in wikipedia.handleArgs(): if arg == '-autonomous': autonomous = True + if arg == '-replace': + replace = True + if arg == '-replacealways': + replace = True + replacealways = True + if arg == '-replaceloose': + replaceloose = True nowCommons = { '_default': [ @@ -65,7 +90,7 @@ u'גם בוויקישיתוף' ], 'ja':[ - u'NowCommons', + u'NowCommons', ], 'ia': [ u'OraInCommons' @@ -82,7 +107,7 @@ u'NowCommons' ], 'zh':[ - u'NowCommons', + u'NowCommons', u'Nowcommons', u'NCT', ], @@ -104,7 +129,7 @@ def __init__(self): self.site = wikipedia.getSite() if repr(self.site) == 'commons:commons': - sys.exit('Don\'t run this bot on Commons!') + sys.exit('Do not run this bot on Commons!') ncList = self.ncTemplates() self.nowCommonsTemplate = wikipedia.Page(self.site, 'Template:' + ncList[0]) @@ -150,23 +175,23 @@ if not filenameOnCommons: wikipedia.output(u'NowCommons template not found.') continue - commonsImagePage = wikipedia.ImagePage(commons, - 'Image:%s' % filenameOnCommons) + commonsImagePage = wikipedia.ImagePage(commons, 'Image:%s' % filenameOnCommons) if len(localImagePage.getFileVersionHistory()) > 1: - wikipedia.output(u"""\ -This image has a version history. Please delete it manually after making sure -that the old versions aren't worth keeping.""") + wikipedia.output(u"This image has a version history. Please delete it manually after making sure that the old versions are not worth keeping.""") continue if localImagePage.titleWithoutNamespace() != commonsImagePage.titleWithoutNamespace(): usingPages = list(localImagePage.usingPages()) if usingPages and usingPages != [localImagePage]: - wikipedia.output( - '%s is still used in %i pages. Please change them manually.' - % (localImagePage.title(), len(usingPages))) + wikipedia.output(u'\"\03{lightred}%s\03{default}\" is still used in %i pages.' % (localImagePage.titleWithoutNamespace(), len(usingPages))) + if replace == True: + wikipedia.output(u'Replacing \"\03{lightred}%s\03{default}\" by \"\03{lightgreen}%s\03{default}\".' % (localImagePage.titleWithoutNamespace(), commonsImagePage.titleWithoutNamespace())) + oImageRobot = image.ImageRobot(pagegenerators.FileLinksGenerator(localImagePage), localImagePage.titleWithoutNamespace(), commonsImagePage.titleWithoutNamespace(), '', replacealways, replaceloose) + oImageRobot.run() + else: + wikipedia.output(u'Please change them manually.') continue else: - wikipedia.output('No page is using %s anymore.' - % localImagePage.title()) + wikipedia.output(u'No page is using \"\03{lightgreen}%s\03{default}\" anymore.' % localImagePage.titleWithoutNamespace()) commonsText = commonsImagePage.get() if md5 == commonsImagePage.getFileMd5Sum(): wikipedia.output(u'The image is identical to the one on Commons.') @@ -180,6 +205,8 @@ localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False) else: localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False) + else: + wikipedia.output(u'The image is not identical to the one on Commons.') except (wikipedia.NoPage, wikipedia.IsRedirectPage), e: wikipedia.output(u'%s' % e) continue

1 0

Developer access for Purodha B Blissenbach
by Daniel Herding 09 May '08

09 May '08

Hi all, I'd like to suggest to grant developer rights to Purodha B Blissenbach ([[de:Benutzer:Purodha]]). He has contributed some useful patches, but sometimes they just starve because nobody seems to have time to review them. I have met Purodha in person, and I think he's very careful in testing his changes before uploading. One example is his extensive bug report [ 1903113 ] interwiki.py looses access to Wikipedias, see: https://sourceforge.net/tracker/index.php?func=detail&aid=1903113&group_id=… He has submitted two patches, 1907586 and 1918278. I think the approach is worth a try, but the patches seem to be already outdated. So, Purodha, would you like to have developer access to the SVN repository? And what do others think about my suggestion? Cheers Daniel

6 7

output problem using pagegenerators.CategorizedPageGenerator(cat)
by ruud habets 09 May '08

09 May '08

Hi, I recently started using pywikipedia-scripts again after some time-out. Now when i try to output page content to screen using gen = pagegenerators.CategorizedPageGenerator(cat) for page in gen: text = page.get() print page.title The correct output is (for example for a page with the name 1095) <bound method Page.site of Page{[[1095]]}> Can anyone tell me how to eliminate the <bound method Page.site of Page{[[]]}> I only need the page name thnks in advance Ruud -- Met vriendelijke groet, Ruud Habets ---------------------------------- Ruud Habets mail ruud(a)kgv.nl www http://www.kgv.nl tel. 045-5418899 (p) tel. 0650-844386 (m) tel. 045-4006037 (w) ----------------------------------

2 1

Rewrite status report #2
by Russell Blau 09 May '08

09 May '08

By request, here is an update for interested persons on the status of the rewrite branch. The major purpose of the rewrite branch is to implement a bot framework that uses the new MediaWiki API (see http://www.mediawiki.org/wiki/API for details) instead of the old approach of scraping HTML wiki pages. Note that many other potential areas for a rewrite have been suggested on this list, and at http://www.botwiki.sno.cc/wiki/Rewrite, but most of those are not currently being pursued due to a lack of resources. The software in the rewrite branch currently is runnable, but incomplete, and with limited documentation. For the most part, the bot programming interface is intended to be very similar to the interface used in the current pywikipedia trunk, so that bot programs can be ported easily, but there are significant changes that we have started to document in the file README-conversion.txt. To date, most methods of the Page object that read from the wiki have been implemented; you can, for example, instantiate a Page object, get its text, and retrieve its templates, links, categories, backlinks, and so forth (interwiki links other than language links are not yet implemented). Methods that provide Site-wide lists of pages (allpages, allcategories, randompages, etc.) have not yet been implemented, but this is next on my to-do list. Most methods in the existing framework that manipulate wiki text have not yet been ported (things like replaceCategoryLinks), but these should require very few changes. The ability to save changes to the wiki is *not* yet implemented. Note that the MediaWiki API does now have the ability to edit pages, but this has not yet been activated on any WMF wikis, so once editing is implemented in the bot framework, it still will be of limited use. At the moment, I am doing most of the development work on this branch. Valhallasw contributed the http interface, nicdumz has contributed some user-related methods, and although he has not contributed directly, I have stolen^H^H^H^H^H^Hbeen inspired by some of Bryan Tong Minh's ideas from his mwclient project. Let me make it clear that I do not consider this "my" project by any means; anyone who is willing and able to contribute will be most welcome. It may be helpful for any new contributors to announce which aspects of the code they are planning to work on, to avoid duplication of effort. Russ

4 4

Using -excepttext
by Bináris 09 May '08

09 May '08

Please help, I can't do it. :-( I had a working command: replace.py -links:User:BinBot/try -fix:datumjav -recursive -allowoverlap Now I don't wan't to edit the articles containing templates {{szinnyei}} or {{pallas}} because they are not worth to deal with. I tried to insert -excepttext:'szinnyei' -excepttext:szinnyei -excepttext:{{szinnyei}} -excepttext:'\{\{szinnyei\}\}' and so on, and none of them worked, it always edits those terrible articles. I also tried except instead of excepttext, because the help inside the replace.py states that. I am a bit confused because http://meta.wikimedia.org/wiki/Replace.py states that only older versions used except, and my replace.py contains the LockedPage exception, so it must be the newest according to http://pywikipediabot.cvs.sourceforge.net/pywikipediabot/pywikipedia/replac…, but the inside help still says except. Anyhow, I cannot avoid editing articles with these templates. What would be the correct syntax? Bináris

3 3

SVN: [5342] branches/rewrite/pywikibot/site.py
by nicdumz＠svn.wikimedia.org 09 May '08

09 May '08

Revision: 5342 Author: nicdumz Date: 2008-05-09 11:35:15 +0000 (Fri, 09 May 2008) Log Message: ----------- * Adding a batch parameter to categorymembers to set the numbers of pages retrieved at once * Trying to address NoPage issues Modified Paths: -------------- branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-05-09 10:56:35 UTC (rev 5341) +++ branches/rewrite/pywikibot/site.py 2008-05-09 11:35:15 UTC (rev 5342) @@ -744,7 +744,7 @@ for ns in namespaces) return tlgen - def categorymembers(self, category, namespaces=None): + def categorymembers(self, category, namespaces=None, batch=None): """Iterate members of specified category. @param category: The Category to iterate. @@ -754,6 +754,8 @@ however, that the iterated values are always Page objects, even if in the Category or Image namespace. @type namespaces: list of ints + @param batch: the number of pages to fetch each time. + @type batch: int """ if category.namespace() != 14: @@ -766,6 +768,11 @@ if namespaces is not None: cmgen.request[u"gcmnamespace"] = u"|".join(unicode(ns) for ns in namespaces) + if batch is not None: + if batch > 5000: + logging.debug("No more than 5000 rows can be fetched at once.") + batch=5000 + cmgen.request[u'cmlimit'] = str(batch) return cmgen def loadrevisions(self, page=None, getText=False, revids=None, @@ -881,6 +888,8 @@ raise Error( u"loadrevisions: Query on %s returned data on '%s'" % (page, pagedata['title'])) + if pagedata.has_key('missing'): + raise NoPage(u'Page %s does not exist' % page.title(asLink=True)) else: page = Page(self, pagedata['title']) api.update_page(page, pagedata)

1 0

SVN: [5341] trunk/pywikipedia/noreferences.py
by wikipedian＠svn.wikimedia.org 09 May '08

09 May '08

Revision: 5341 Author: wikipedian Date: 2008-05-09 10:56:35 +0000 (Fri, 09 May 2008) Log Message: ----------- bugfix: the <references/> tag can now have a "group" attribute. Modified Paths: -------------- trunk/pywikipedia/noreferences.py Modified: trunk/pywikipedia/noreferences.py =================================================================== --- trunk/pywikipedia/noreferences.py 2008-05-08 19:49:52 UTC (rev 5340) +++ trunk/pywikipedia/noreferences.py 2008-05-09 10:56:35 UTC (rev 5341) @@ -198,7 +198,8 @@ """ self.xmlFilename = xmlFilename self.refR = re.compile('</ref>', re.IGNORECASE) - self.referencesR = re.compile('<references */>', re.IGNORECASE) + # The references tab can contain additional spaces and a group attribute. + self.referencesR = re.compile('<references.*?/>', re.IGNORECASE) def __iter__(self): import xmlreader @@ -215,7 +216,7 @@ self.always = always self.site = wikipedia.getSite() self.refR = re.compile('</ref>', re.IGNORECASE) - self.referencesR = re.compile('<references */>', re.IGNORECASE) + self.referencesR = re.compile('<references.*?/>', re.IGNORECASE) try: self.referencesTemplates = referencesTemplates[wikipedia.getSite().family.name][wikipedia.getSite().lang] except KeyError:

1 0

SVN: [5340] trunk/pywikipedia/reflinks.py
by nicdumz＠svn.wikimedia.org 08 May '08

08 May '08

Revision: 5340 Author: nicdumz Date: 2008-05-08 19:49:52 +0000 (Thu, 08 May 2008) Log Message: ----------- Follow up to Daniel's last commit. I myself had locally disabled the "stop on new messages" feature in reflinks.py . Committing it :) Modified Paths: -------------- trunk/pywikipedia/reflinks.py Modified: trunk/pywikipedia/reflinks.py =================================================================== --- trunk/pywikipedia/reflinks.py 2008-05-08 19:37:31 UTC (rev 5339) +++ trunk/pywikipedia/reflinks.py 2008-05-08 19:49:52 UTC (rev 5340) @@ -6,8 +6,8 @@ the link to use it as the title of the wiki link in the reference, i.e. <ref>[http://www.google.fr/search?q=test test - Google Search]</ref> -The bot checks every 20 edits its talk page and a special stop page : if -one of these page has been edited, it stops. +The bot checks every 20 edits a special stop page : if +the page has been edited, it stops. DumZiBoT is running that script on en: & fr: at every new dump, running it on de: is not allowed anymore. @@ -493,9 +493,6 @@ if actualRev != self.stopPageRevId: wikipedia.output(u'[[%s]] has been edited : Someone wants us to stop.' % self.stopPage) return - if self.site.messages(): - wikipedia.output(u'Bot has new messages. Better stop to check.') - return def main(): genFactory = pagegenerators.GeneratorFactory()

1 0

SVN: [5339] trunk/pywikipedia/noreferences.py
by wikipedian＠svn.wikimedia.org 08 May '08

08 May '08

Revision: 5339 Author: wikipedian Date: 2008-05-08 19:37:31 +0000 (Thu, 08 May 2008) Log Message: ----------- rolled back more of the changes from the update of Betacommand / siebrand, revision 4985: * Putting pages asynchronously in always-yes mode is not useful. The asynchronous putting feature is meant to reduce waiting times in interactive mode. * Removed the stop-on-new-messages feature. This is really annoying, most bot operators don't read their messages with their bot accounts. Also, it doesn't belong here. If you want such a feature, add code to wikipedia.py which stops the bot on new messages. Use a config variable for this, and set its default so that the bot continues when there are new messages. Modified Paths: -------------- trunk/pywikipedia/noreferences.py Modified: trunk/pywikipedia/noreferences.py =================================================================== --- trunk/pywikipedia/noreferences.py 2008-05-08 19:17:04 UTC (rev 5338) +++ trunk/pywikipedia/noreferences.py 2008-05-08 19:37:31 UTC (rev 5339) @@ -21,7 +21,6 @@ -start:Category:M. -always Don't prompt you for each replacement. - -ignoremsg Don't stop when the bot has new messages All other parameters will be regarded as part of the title of a single page, and the bot will only work on that single page. @@ -211,10 +210,9 @@ class NoReferencesBot: - def __init__(self, generator, always = False, ignoreMsg = False): + def __init__(self, generator, always = False): self.generator = generator self.always = always - self.ignoreMsg = ignoreMsg self.site = wikipedia.getSite() self.refR = re.compile('</ref>', re.IGNORECASE) self.referencesR = re.compile('<references */>', re.IGNORECASE) @@ -335,10 +333,7 @@ if self.always: try: - page.put_async(newText) - if self.site.messages() and not self.ignoreMsg: - wikipedia.output(u'NOTE: You have unread messages, stopping...') - wikipedia.stopme() + page.put(newText) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title(),)) except wikipedia.SpamfilterError, e: @@ -348,9 +343,6 @@ else: # Save the page in the background. No need to catch exceptions. page.put_async(newText) - if self.site.messages() and not self.ignoreMsg: - wikipedia.output(u'NOTE: You have unread messages, stopping...') - wikipedia.stopme() return def run(self): @@ -375,9 +367,6 @@ if self.lacksReferences(text): newText = self.addReferences(text) self.save(page, newText) - if self.site.messages() and not self.ignoreMsg: - wikipedia.output(u'NOTE: You have unread messages, stopping...') - wikipedia.stopme() def main(): #page generator @@ -390,8 +379,6 @@ namespaces = [] # Never ask before changing a page always = False - # Stop when the bot has new messages - ignoreMsg = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. @@ -411,8 +398,6 @@ namespaces.append(arg[11:]) elif arg == '-always': always = True - elif arg == '-ignoremsg': - ignoreMsg = True else: generator = genFactory.handleArg(arg) if generator: @@ -429,7 +414,7 @@ if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) - bot = NoReferencesBot(preloadingGen, always, ignoreMsg) + bot = NoReferencesBot(preloadingGen, always) bot.run() if __name__ == "__main__":

1 0

← Newer
1
...
28
29
30
31
32
33
34
...
38
Older →

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

Pywikipedia-l May 2008