Pywikipedia-svn October 2010

pywikipedia-svn@lists.wikimedia.org

6 participants
90 discussions

SVN: [8640] branches/rewrite
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8640 Author: xqt Date: 2010-10-10 03:34:13 +0000 (Sun, 10 Oct 2010) Log Message: ----------- minor changes Modified Paths: -------------- branches/rewrite/COPYING branches/rewrite/generate_user_files.py Property Changed: ---------------- branches/rewrite/pywikibot/ Modified: branches/rewrite/COPYING =================================================================== --- branches/rewrite/COPYING 2010-10-10 02:15:03 UTC (rev 8639) +++ branches/rewrite/COPYING 2010-10-10 03:34:13 UTC (rev 8640) @@ -1,23 +1,23 @@ -Copyright (c) 2004-2009 Pywikipedia bot team - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation -files (the "Software"), to deal in the Software without -restriction, including without limitation the rights to use, -copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following -conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - +Copyright (c) 2004-2010 Pywikipedia bot team + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + Modified: branches/rewrite/generate_user_files.py =================================================================== --- branches/rewrite/generate_user_files.py 2010-10-10 02:15:03 UTC (rev 8639) +++ branches/rewrite/generate_user_files.py 2010-10-10 03:34:13 UTC (rev 8640) @@ -2,7 +2,8 @@ """ Script to create user files (user-config.py, user-fixes.py) """ __version__ = '$Id$' -import codecs, os, platform, re, sys +import codecs, os, re, sys +import platform def get_base_dir(): """Return the directory in which user-specific information is stored. @@ -113,7 +114,7 @@ set environment variables.""" % locals(), width=76) for line in msg: print line - ok = raw_input("Is this OK? [y/N] ") + ok = raw_input("Is this OK? ([yes], [N]o) ") if ok in ["Y", "y"]: base_dir = new_base return True @@ -130,16 +131,17 @@ _fnc = os.path.join(base_dir, "user-config.py") if not file_exists(_fnc): known_families = re.findall(r'(.+)_family.py\b', - '\n'.join(os.listdir( - os.path.join( - pywikibot_dir, "pywikibot", "families")))) + '\n'.join(os.listdir( + os.path.join(pywikibot_dir, + "pywikibot", + "families")))) fam = listchoice(known_families, "Select family of sites we are working on", default='wikipedia') mylang = raw_input( "The language code of the site we're working on (default: 'en'): ") or 'en' - username = raw_input("Username (%s %s): " % (mylang, fam) - ) or 'UnnamedBot' + username = raw_input("Username (%s %s): " + % (mylang, fam)) or 'UnnamedBot' username = unicode(username, console_encoding) while True: choice = raw_input( @@ -156,7 +158,8 @@ # config2.py will be in the pywikibot/ directory f = codecs.open(os.path.join(install, "pywikibot", "config2.py"), "r", "utf-8") - cpy = f.read() ; f.close() + cpy = f.read() + f.close() res = re.findall("^(############## (?:LOGFILE|" "INTERWIKI|" @@ -227,8 +230,8 @@ if __name__ == "__main__": while True: - print("Your default user directory is '%s'" % base_dir) - ok = raw_input("[K]eep/Change? ").upper().strip() + print('\nYour default user directory is "%s"' % base_dir) + ok = raw_input("How to proceed? ([K]eep [c]hange) ").upper().strip() if (not ok) or "KEEP".startswith(ok): break if "CHANGE".startswith(ok): @@ -238,8 +241,8 @@ if os.path.exists(os.path.join(base_dir, "user-config.py")): break do_copy = raw_input( - "Do you want to copy user files from an existing pywikipedia installation? " - ).upper().strip() +"Do you want to copy user files from an existing pywikipedia installation? " + ).upper().strip() if do_copy and "YES".startswith(do_copy): oldpath = raw_input("Path to existing wikipedia.py? ") if not os.path.exists(oldpath): @@ -254,7 +257,7 @@ newf = file(os.path.join(base_dir, "user-config.py"), "wb") oldf = file(os.path.join(oldpath, "user-config.py"), "rb") newf.write(oldf.read()) - newf.close() ; oldf.close() + newf.close(); oldf.close() if os.path.isfile(os.path.join(oldpath, "user-fixes.py")): newfix = file(os.path.join(base_dir, "user-fixes.py"), "wb") oldfix = file(os.path.join(oldpath, "user-fixes.py"), "rb") @@ -263,13 +266,13 @@ elif do_copy and "NO".startswith(do_copy): break if not os.path.isfile(os.path.join(base_dir, "user-config.py")): - a = raw_input("Create user-config.py file? [y/N] ") + a = raw_input("Create user-config.py file? ([y]es, [N]o) ") if a[:1] in ["Y", "y"]: create_user_config() else: print("NOTE: user-config.py already exists in the directory") if not os.path.isfile(os.path.join(base_dir, "user-fixes.py")): - a = raw_input("Create user-fixes.py file? [y/N] ") + a = raw_input("Create user-fixes.py file? ([y]es, [N]o) ") if a[:1] in ["Y", "y"]: create_user_fixes() else: Property changes on: branches/rewrite/pywikibot ___________________________________________________________________ Modified: svn:ignore - *.pyc + *.pyc *.bak

1 0

SVN: [8639] branches/rewrite/scripts/cosmetic_changes.py
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8639 Author: xqt Date: 2010-10-10 02:15:03 +0000 (Sun, 10 Oct 2010) Log Message: ----------- update from trunk r8638 Modified Paths: -------------- branches/rewrite/scripts/cosmetic_changes.py Modified: branches/rewrite/scripts/cosmetic_changes.py =================================================================== --- branches/rewrite/scripts/cosmetic_changes.py 2010-10-10 02:06:55 UTC (rev 8638) +++ branches/rewrite/scripts/cosmetic_changes.py 2010-10-10 02:15:03 UTC (rev 8639) @@ -308,11 +308,7 @@ if not family.isDefinedNSLanguage(nsNumber, self.site.lang): # Skip undefined namespaces continue - if nsNumber == 6 and self.site.family.name == 'wikipedia' and \ - self.site.lang in ('en', 'fr'): - # skip processing file namespace on en-wiki and fr-wiki - continue - namespaces = list(family.namespace(self.site.lang, nsNumber, all = True)) + namespaces = list(family.namespace(self.site.lang, nsNumber, all=True)) thisNs = namespaces.pop(0) if nsNumber == 6 and family.name == 'wikipedia' and \ self.site.lang in ('en', 'fr'): @@ -755,17 +751,17 @@ gen = genFactory.getCombinedGenerator() if not gen: pywikibot.showHelp() - elif not always: - answer = pywikibot.inputChoice( - warning + '\nDo you really want to continue?', - ['yes', 'no'], ['y', 'N'], 'N') + else: + if not always: + answer = pywikibot.inputChoice( + warning + '\nDo you really want to continue?', + ['yes', 'no'], ['y', 'N'], 'N') + if answer == 'y': + preloadingGen = pagegenerators.PreloadingGenerator(gen) + bot = CosmeticChangesBot(preloadingGen, acceptall=always, + comment=editSummary) + bot.run() - if answer == 'y': - preloadingGen = pagegenerators.PreloadingGenerator(gen) - bot = CosmeticChangesBot(preloadingGen, acceptall=always, - comment=editSummary) - bot.run() - if __name__ == "__main__": try: main()

1 0

SVN: [8638] trunk/pywikipedia/cosmetic_changes.py
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8638 Author: xqt Date: 2010-10-10 02:06:55 +0000 (Sun, 10 Oct 2010) Log Message: ----------- fix for iter method of pagegenerators.PreloadingGenerator(): TypeError: 'NoneType'object is not iterable Modified Paths: -------------- trunk/pywikipedia/cosmetic_changes.py Modified: trunk/pywikipedia/cosmetic_changes.py =================================================================== --- trunk/pywikipedia/cosmetic_changes.py 2010-10-10 01:52:32 UTC (rev 8637) +++ trunk/pywikipedia/cosmetic_changes.py 2010-10-10 02:06:55 UTC (rev 8638) @@ -866,17 +866,17 @@ gen = genFactory.getCombinedGenerator() if not gen: pywikibot.showHelp() - elif not always: - answer = pywikibot.inputChoice( - warning + '\nDo you really want to continue?', - ['yes', 'no'], ['y', 'N'], 'N') + else: + if not always: + answer = pywikibot.inputChoice( + warning + '\nDo you really want to continue?', + ['yes', 'no'], ['y', 'N'], 'N') + if answer == 'y': + preloadingGen = pagegenerators.PreloadingGenerator(gen) + bot = CosmeticChangesBot(preloadingGen, acceptall=always, + comment=editSummary) + bot.run() - if answer == 'y': - preloadingGen = pagegenerators.PreloadingGenerator(gen) - bot = CosmeticChangesBot(preloadingGen, acceptall=always, - comment=editSummary) - bot.run() - if __name__ == "__main__": try: main()

1 0

SVN: [8637] trunk/pywikipedia/generate_user_files.py
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8637 Author: xqt Date: 2010-10-10 01:52:32 +0000 (Sun, 10 Oct 2010) Log Message: ----------- update from rewrite r8154 Modified Paths: -------------- trunk/pywikipedia/generate_user_files.py Modified: trunk/pywikipedia/generate_user_files.py =================================================================== --- trunk/pywikipedia/generate_user_files.py 2010-10-09 23:32:32 UTC (rev 8636) +++ trunk/pywikipedia/generate_user_files.py 2010-10-10 01:52:32 UTC (rev 8637) @@ -2,7 +2,7 @@ """ Script to create user files (user-config.py, user-fixes.py) """ __version__ = '$Id$' -import os, sys, codecs, re +import codecs, os, re, sys base_dir = '' console_encoding = sys.stdout.encoding @@ -44,19 +44,31 @@ def create_user_config(): _fnc = os.path.join(base_dir, "user-config.py") if not file_exists(_fnc): - know_families = re.findall(r'(.+)_family.py\b', '\n'.join(os.listdir(os.path.join(base_dir, "families")))) - fam = listchoice(know_families, "Select family of sites we are working on", default = 'wikipedia') - mylang = raw_input("The language code of the site we're working on (default: 'en'): ") or 'en' - username = raw_input("Username (%s %s): " % (mylang, fam)) or 'UnnamedBot' + known_families = re.findall(r'(.+)_family.py\b', + '\n'.join(os.listdir( + os.path.join(base_dir, + "families")))) + fam = listchoice(known_families, + "Select family of sites we are working on", + default='wikipedia') + mylang = raw_input( +"The language code of the site we're working on (default: 'en'): ") or 'en' + username = raw_input("Username (%s %s): " + % (mylang, fam)) or 'UnnamedBot' username = unicode(username, console_encoding) while True: - choice = raw_input("Which variant of user_config.py:\n[S]mall or [E]xtended (with further informations)? ").upper() - if choice in ['S','E']: + choice = raw_input( +"Which variant of user_config.py:\n[S]mall or [E]xtended (with further information)? " + ).upper() + if choice in "SE": break # # I don't like this solution. Temporary for me. - f = codecs.open("config.py", "r", "utf-8") ; cpy = f.read() ; f.close() + # + f = codecs.open("config.py", "r", "utf-8") + cpy = f.read() + f.close() res = re.findall("^(############## (?:LOGFILE|" "INTERWIKI|" @@ -67,14 +79,16 @@ "DATABASE|" "SEARCH ENGINE|" "COPYRIGHT|" - "FURTHER) SETTINGS .*?)^(?=#####|# =====)", cpy, re.MULTILINE | re.DOTALL) + "FURTHER) SETTINGS .*?)^(?=#####|# =====)", + cpy, re.MULTILINE | re.DOTALL) config_text = '\n'.join(res) f = codecs.open(_fnc, "w", "utf-8") if choice == 'E': f.write("""# -*- coding: utf-8 -*- -# This is an automatically generated file. You can find more configuration parameters in 'config.py' file. +# This is an automatically generated file. You can find more configuration +# parameters in 'config.py' file. # The family of sites we are working on. wikipedia.py will import # families/xxx_family.py so if you want to change this variable, @@ -135,5 +149,5 @@ if choice == "3": create_user_config() create_user_fixes() - if not choice in ["1", "2", "3"]: + if not choice in "123": print("Nothing to do")

1 0

SVN: [8636] branches/rewrite/scripts/interwiki.py
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8636 Author: xqt Date: 2010-10-09 23:32:32 +0000 (Sat, 09 Oct 2010) Log Message: ----------- update from trunk r8635 Modified Paths: -------------- branches/rewrite/scripts/interwiki.py Modified: branches/rewrite/scripts/interwiki.py =================================================================== --- branches/rewrite/scripts/interwiki.py 2010-10-09 23:22:42 UTC (rev 8635) +++ branches/rewrite/scripts/interwiki.py 2010-10-09 23:32:32 UTC (rev 8636) @@ -1819,17 +1819,19 @@ if page.section(): # This is not a page, but a subpage. Do not edit it. - pywikibot.output(u"Not editing %s: not doing interwiki on subpages" % page) + pywikibot.output(u"Not editing %s: not doing interwiki on subpages" + % page) raise SaveError(u'Link has a #section') try: pagetext = page.get() except pywikibot.NoPage: - pywikibot.output(u"Not editing %s: page does not exist" % page) + pywikibot.output(u"Not editing %s: page does not exist" + % page) raise SaveError(u'Page doesn\'t exist') - # clone original newPages dictionary, so that we can modify it to the local page's needs + # clone original newPages dictionary, so that we can modify it to the + # local page's needs new = dict(newPages) - interwikis = [pywikibot.Page(l) for l in page.iterlanglinks()] # remove interwiki links to ignore @@ -1840,21 +1842,31 @@ continue try: - if (new[ignorepage.site] == ignorepage) and (ignorepage.site != page.site): + if (new[ignorepage.site] == ignorepage) and \ + (ignorepage.site != page.site): if (ignorepage not in interwikis): - pywikibot.output(u"Ignoring link to %(to)s for %(from)s" % {'to': ignorepage, 'from': page}) + pywikibot.output( + u"Ignoring link to %(to)s for %(from)s" + % {'to': ignorepage, + 'from': page}) new.pop(ignorepage.site) else: - pywikibot.output(u"NOTE: Not removing interwiki from %(from)s to %(to)s (exists both commented and non-commented)" % {'to': ignorepage, 'from': page}) + pywikibot.output( + u"NOTE: Not removing interwiki from %(from)s to %(to)s (exists both commented and non-commented)" + % {'to': ignorepage, + 'from': page}) except KeyError: pass - # sanity check - the page we are fixing must be the only one for that site. + # sanity check - the page we are fixing must be the only one for that + # site. pltmp = new[page.site] if pltmp != page: s = u"None" if pltmp is not None: s = pltmp - pywikibot.output(u"BUG>>> %s is not in the list of new links! Found %s." % (page, s)) + pywikibot.output( + u"BUG>>> %s is not in the list of new links! Found %s." + % (page, s)) raise SaveError(u'BUG: sanity check failed') # Avoid adding an iw link back to itself @@ -1866,9 +1878,12 @@ old[page2.site] = page2 # Check what needs to get done - mods, mcomment, adding, removing, modifying = compareLanguages(old, new, insite = page.site) + mods, mcomment, adding, removing, modifying = compareLanguages(old, + new, + insite=page.site) - # When running in autonomous mode without -force switch, make sure we don't remove any items, but allow addition of the new ones + # When running in autonomous mode without -force switch, make sure we + # don't remove any items, but allow addition of the new ones if globalvar.autonomous and not globalvar.force and len(removing) > 0: for rmsite in removing: # Sometimes sites have an erroneous link to itself as an @@ -1879,14 +1894,16 @@ #put it to new means don't delete it if not globalvar.cleanup or \ rmPage.title(asLink=True, forceInterwiki=True) not in globalvar.remove or \ - rmPage.site.sitename() == 'wikipedia:hi': #work-arround for bug #3081100 (do not remove hi-pages) + rmPage.site.sitename() == 'wikipedia:hi' and \ + page.site.sitename() != 'wikipedia:de': #work-arround for bug #3081100 (do not remove hi-pages) new[rmsite] = rmPage pywikibot.output( u"WARNING: %s is either deleted or has a mismatching disambiguation state." % rmPage) # Re-Check what needs to get done - mods, mcomment, adding, removing, modifying = compareLanguages(old, new, insite=page.site) - + mods, mcomment, adding, removing, modifying = compareLanguages(old, + new, + insite=page.site) if not mods: if not globalvar.quiet: pywikibot.output(u'No changes needed on page %s' @@ -1895,20 +1912,26 @@ return False # Show a message in purple. - pywikibot.output(u"\03{lightpurple}Updating links on page %s.\03{default}" % page) - + pywikibot.output( + u"\03{lightpurple}Updating links on page %s.\03{default}" + % page) pywikibot.output(u"Changes to be made: %s" % mods) oldtext = page.get() template = (page.namespace() == 10) newtext = pywikibot.replaceLanguageLinks(oldtext, new, - site = page.site, - template = template) - # This is for now. Later there should be different funktions for each kind + site=page.site, + template=template) + # This is for now. Later there should be different funktions for each + # kind if not botMayEdit(page): if template: - pywikibot.output(u'SKIPPING: %s should have interwiki links on subpage.' % page.aslink(True)) + pywikibot.output( + u'SKIPPING: %s should have interwiki links on subpage.' + % page.aslink(True)) else: - pywikibot.output(u'SKIPPING: %s is under construction or to be deleted.' % page.aslink(True)) + pywikibot.output( + u'SKIPPING: %s is under construction or to be deleted.' + % page.aslink(True)) return False if newtext == oldtext: return False @@ -1917,8 +1940,12 @@ # pywikibot.output(u"NOTE: Replace %s" % page) # Determine whether we need permission to submit ask = False - if removing and removing != [page.site]: # Allow for special case of a self-pointing interwiki link - self.problem(u'Found incorrect link to %s in %s'% (",".join([x.lang for x in removing]), page), createneed = False) + + # Allow for special case of a self-pointing interwiki link + if removing and removing != [page.site]: + self.problem(u'Found incorrect link to %s in %s' + % (",".join([x.lang for x in removing]), + page), createneed=False) ask = True if globalvar.force or globalvar.cleanup: ask = False @@ -1930,9 +1957,10 @@ # If we cannot ask, deny permission answer = 'n' else: - answer = pywikibot.inputChoice(u'Submit?', - ['Yes', 'No', 'open in Browser', 'Give up', 'Always'], - ['y', 'n', 'b', 'g', 'a']) + answer = pywikibot.inputChoice(u'Submit?', + ['Yes', 'No', 'open in Browser', + 'Give up', 'Always'], + ['y', 'n', 'b', 'g', 'a']) if answer == 'b': webbrowser.open("http://%s%s" % ( page.site.hostname(), @@ -1954,7 +1982,8 @@ if bot: while pywikibot.get_throttle.waittime() + 2.0 < pywikibot.put_throttle.waittime(): if not globalvar.quiet: - pywikibot.output(u"NOTE: Performing a recursive query first to save time....") + pywikibot.output( + u"NOTE: Performing a recursive query first to save time....") qdone = bot.oneQuery() if not qdone: # Nothing more to do @@ -1965,20 +1994,23 @@ while True: try: if globalvar.async: - page.put_async(newtext, comment = mcomment) + page.put_async(newtext, comment=mcomment) status = 302 else: - status, reason, data = page.put(newtext, comment = mcomment) + status, reason, data = page.put(newtext, comment=mcomment) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked. Skipping.' % page.title(asLink=True, forceInterwiki=True)) raise SaveError(u'Locked') except pywikibot.EditConflict: - pywikibot.output(u'ERROR putting page: An edit conflict occurred. Giving up.') + pywikibot.output( + u'ERROR putting page: An edit conflict occurred. Giving up.') raise SaveError(u'Edit conflict') except (pywikibot.SpamfilterError), error: - pywikibot.output(u'ERROR putting page: %s blacklisted by spamfilter. Giving up.' % (error.url,)) + pywikibot.output( + u'ERROR putting page: %s blacklisted by spamfilter. Giving up.' + % (error.url,)) raise SaveError(u'Spam filter') except (pywikibot.PageNotSaved), error: pywikibot.output(u'ERROR putting page: %s' % (error.args,)) @@ -1987,14 +2019,16 @@ if timeout>3600: raise pywikibot.output(u'ERROR putting page: %s' % (error.args,)) - pywikibot.output(u'Sleeping %i seconds before trying again.' % (timeout,)) + pywikibot.output(u'Sleeping %i seconds before trying again.' + % (timeout,)) timeout *= 2 time.sleep(timeout) except pywikibot.ServerError: if timeout > 3600: raise pywikibot.output(u'ERROR putting page: ServerError.') - pywikibot.output(u'Sleeping %i seconds before trying again.' % (timeout,)) + pywikibot.output(u'Sleeping %i seconds before trying again.' + % (timeout,)) timeout *= 2 time.sleep(timeout) else: @@ -2007,7 +2041,9 @@ elif answer == 'g': raise GiveUpOnPage(u'User asked us to give up') else: - raise LinkMustBeRemoved(u'Found incorrect link to %s in %s'% (",".join([x.lang for x in removing]), page)) + raise LinkMustBeRemoved(u'Found incorrect link to %s in %s' + % (",".join([x.lang for x in removing]), + page)) def reportBacklinks(self, new, updatedSites): """ @@ -2313,6 +2349,8 @@ fmt = lambda d, site: site.lang head, add, rem, mod = pywikibot.translate(insite.lang, msg) + if insite.lang=='de' and not globalvar.autonomous: + head = u'Halbautomatischer %s' % head #prevents abuse filter blocking for hi-wiki colon = u': ' comma = u', '

1 0

SVN: [8635] trunk/pywikipedia/interwiki.py
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8635 Author: xqt Date: 2010-10-09 23:22:42 +0000 (Sat, 09 Oct 2010) Log Message: ----------- hi-wiki blocking only in autonomous mode Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2010-10-09 23:07:30 UTC (rev 8634) +++ trunk/pywikipedia/interwiki.py 2010-10-09 23:22:42 UTC (rev 8635) @@ -1962,7 +1962,7 @@ while True: try: if globalvar.async: - page.put_async(newtext, comment = mcomment) + page.put_async(newtext, comment=mcomment) status = 302 else: status, reason, data = page.put(newtext, comment=mcomment) @@ -2315,6 +2315,8 @@ fmt = lambda d, site: site.lang head, add, rem, mod = pywikibot.translate(insite.lang, msg) + if insite.lang=='de' and not globalvar.autonomous: + head = u'Halbautomatischer %s' % head #prevents abuse filter blocking for hi-wiki colon = u': ' comma = u', '

1 0

SVN: [8634] trunk/pywikipedia/interwiki.py
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8634 Author: xqt Date: 2010-10-09 23:07:30 +0000 (Sat, 09 Oct 2010) Log Message: ----------- do not block removing of hi-pages on de-wiki; this will be blocked by abuse filter to check which bot is affected to the hi-bug Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2010-10-09 22:37:43 UTC (rev 8633) +++ trunk/pywikipedia/interwiki.py 2010-10-09 23:07:30 UTC (rev 8634) @@ -1787,20 +1787,21 @@ # In this case only continue on the Page we started with if page != self.originPage: raise SaveError - if page.title() != page.sectionFreeTitle(): # This is not a page, but a subpage. Do not edit it. - pywikibot.output(u"Not editing %s: not doing interwiki on subpages" % page.aslink(True)) + pywikibot.output(u"Not editing %s: not doing interwiki on subpages" + % page.aslink(True)) raise SaveError try: pagetext = page.get() except pywikibot.NoPage: - pywikibot.output(u"Not editing %s: page does not exist" % page.aslink(True)) + pywikibot.output(u"Not editing %s: page does not exist" + % page.aslink(True)) raise SaveError - # clone original newPages dictionary, so that we can modify it to the local page's needs + # clone original newPages dictionary, so that we can modify it to the + # local page's needs new = dict(newPages) - interwikis = page.interwiki() # remove interwiki links to ignore @@ -1809,23 +1810,32 @@ ignorepage = pywikibot.Page(page.site(), iw.groups()[0]) except (pywikibot.NoSuchSite, pywikibot.InvalidTitle): continue - try: - if (new[ignorepage.site()] == ignorepage) and (ignorepage.site() != page.site()): + if (new[ignorepage.site()] == ignorepage) and \ + (ignorepage.site() != page.site()): if (ignorepage not in interwikis): - pywikibot.output(u"Ignoring link to %(to)s for %(from)s" % {'to': ignorepage.title(asLink=True), 'from': page.title(asLink=True)}) + pywikibot.output( + u"Ignoring link to %(to)s for %(from)s" + % {'to': ignorepage.title(asLink=True), + 'from': page.title(asLink=True)}) new.pop(ignorepage.site()) else: - pywikibot.output(u"NOTE: Not removing interwiki from %(from)s to %(to)s (exists both commented and non-commented)" % {'to': ignorepage.title(asLink=True), 'from': page.title(asLink=True)}) + pywikibot.output( + u"NOTE: Not removing interwiki from %(from)s to %(to)s (exists both commented and non-commented)" + % {'to': ignorepage.title(asLink=True), + 'from': page.title(asLink=True)}) except KeyError: pass - # sanity check - the page we are fixing must be the only one for that site. + # sanity check - the page we are fixing must be the only one for that + # site. pltmp = new[page.site()] if pltmp != page: - s = "None" + s = u"None" if pltmp is not None: s = pltmp.aslink(True) - pywikibot.output(u"BUG>>> %s is not in the list of new links! Found %s." % (page.aslink(True), s)) + pywikibot.output( + u"BUG>>> %s is not in the list of new links! Found %s." + % (page.aslink(True), s)) raise SaveError # Avoid adding an iw link back to itself @@ -1837,7 +1847,9 @@ old[page2.site()] = page2 # Check what needs to get done - mods, mcomment, adding, removing, modifying = compareLanguages(old, new, insite = page.site()) + mods, mcomment, adding, removing, modifying = compareLanguages(old, + new, + insite=page.site()) # When running in autonomous mode without -force switch, make sure we # don't remove any items, but allow addition of the new ones @@ -1851,14 +1863,16 @@ #put it to new means don't delete it if not globalvar.cleanup or \ rmPage.aslink(forceInterwiki=True) not in globalvar.remove or \ - rmPage.site().sitename() == 'wikipedia:hi': #work-arround for bug #3081100 (do not remove hi-pages) + rmPage.site().sitename() == 'wikipedia:hi' and \ + page.site().sitename() != 'wikipedia:de': #work-arround for bug #3081100 (do not remove hi-pages) new[rmsite] = rmPage pywikibot.output( u"WARNING: %s is either deleted or has a mismatching disambiguation state." % rmPage.aslink(True)) # Re-Check what needs to get done - mods, mcomment, adding, removing, modifying = compareLanguages(old, new, insite=page.site()) - + mods, mcomment, adding, removing, modifying = compareLanguages(old, + new, + insite=page.site()) if not mods: if not globalvar.quiet or pywikibot.verbose: pywikibot.output(u'No changes needed on page %s' @@ -1866,20 +1880,26 @@ return False # Show a message in purple. - pywikibot.output(u"\03{lightpurple}Updating links on page %s.\03{default}" % page.aslink(True)) - + pywikibot.output( + u"\03{lightpurple}Updating links on page %s.\03{default}" + % page.aslink(True)) pywikibot.output(u"Changes to be made: %s" % mods) oldtext = page.get() template = (page.namespace() == 10) newtext = pywikibot.replaceLanguageLinks(oldtext, new, - site = page.site(), - template = template) - # This is for now. Later there should be different funktions for each kind + site=page.site(), + template=template) + # This is for now. Later there should be different funktions for each + # kind if not botMayEdit(page): if template: - pywikibot.output(u'SKIPPING: %s should have interwiki links on subpage.' % page.aslink(True)) + pywikibot.output( + u'SKIPPING: %s should have interwiki links on subpage.' + % page.aslink(True)) else: - pywikibot.output(u'SKIPPING: %s is under construction or to be deleted.' % page.aslink(True)) + pywikibot.output( + u'SKIPPING: %s is under construction or to be deleted.' + % page.aslink(True)) return False if newtext == oldtext: return False @@ -1888,8 +1908,12 @@ # pywikibot.output(u"NOTE: Replace %s" % page.title(asLink=True)) # Determine whether we need permission to submit ask = False - if removing and removing != [page.site()]: # Allow for special case of a self-pointing interwiki link - self.problem(u'Found incorrect link to %s in %s'% (",".join([x.lang for x in removing]), page.aslink(True)), createneed = False) + + # Allow for special case of a self-pointing interwiki link + if removing and removing != [page.site()]: + self.problem(u'Found incorrect link to %s in %s' + % (",".join([x.lang for x in removing]), + page.aslink(True)), createneed=False) ask = True if globalvar.force or globalvar.cleanup: ask = False @@ -1901,9 +1925,10 @@ # If we cannot ask, deny permission answer = 'n' else: - answer = pywikibot.inputChoice(u'Submit?', - ['Yes', 'No', 'open in Browser', 'Give up', 'Always'], - ['y', 'n', 'b', 'g', 'a']) + answer = pywikibot.inputChoice(u'Submit?', + ['Yes', 'No', 'open in Browser', + 'Give up', 'Always'], + ['y', 'n', 'b', 'g', 'a']) if answer == 'b': webbrowser.open("http://%s%s" % ( page.site().hostname(), @@ -1925,7 +1950,8 @@ if bot: while pywikibot.get_throttle.waittime() + 2.0 < pywikibot.put_throttle.waittime(): if not globalvar.quiet or pywikibot.verbose: - pywikibot.output(u"NOTE: Performing a recursive query first to save time....") + pywikibot.output( + u"NOTE: Performing a recursive query first to save time....") qdone = bot.oneQuery() if not qdone: # Nothing more to do @@ -1939,16 +1965,19 @@ page.put_async(newtext, comment = mcomment) status = 302 else: - status, reason, data = page.put(newtext, comment = mcomment) + status, reason, data = page.put(newtext, comment=mcomment) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked. Skipping.' % page.aslink(True)) raise SaveError except pywikibot.EditConflict: - pywikibot.output(u'ERROR putting page: An edit conflict occurred. Giving up.') + pywikibot.output( + u'ERROR putting page: An edit conflict occurred. Giving up.') raise SaveError except (pywikibot.SpamfilterError), error: - pywikibot.output(u'ERROR putting page: %s blacklisted by spamfilter. Giving up.' % (error.url,)) + pywikibot.output( + u'ERROR putting page: %s blacklisted by spamfilter. Giving up.' + % (error.url,)) raise SaveError except (pywikibot.PageNotSaved), error: pywikibot.output(u'ERROR putting page: %s' % (error.args,)) @@ -1957,14 +1986,16 @@ if timeout>3600: raise pywikibot.output(u'ERROR putting page: %s' % (error.args,)) - pywikibot.output(u'Sleeping %i seconds before trying again.' % (timeout,)) + pywikibot.output(u'Sleeping %i seconds before trying again.' + % (timeout,)) timeout *= 2 time.sleep(timeout) except pywikibot.ServerError: if timeout > 3600: raise pywikibot.output(u'ERROR putting page: ServerError.') - pywikibot.output(u'Sleeping %i seconds before trying again.' % (timeout,)) + pywikibot.output(u'Sleeping %i seconds before trying again.' + % (timeout,)) timeout *= 2 time.sleep(timeout) else: @@ -1977,7 +2008,9 @@ elif answer == 'g': raise GiveUpOnPage else: - raise LinkMustBeRemoved(u'Found incorrect link to %s in %s'% (",".join([x.lang for x in removing]), page.aslink(True))) + raise LinkMustBeRemoved(u'Found incorrect link to %s in %s' + % (",".join([x.lang for x in removing]), + page.aslink(True))) def reportBacklinks(self, new, updatedSites): """

1 0

SVN: [8633] trunk/pywikipedia
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8633 Author: xqt Date: 2010-10-09 22:37:43 +0000 (Sat, 09 Oct 2010) Log Message: ----------- format fixes * add utf-8 coding * actualize licence header * import wikipedia as pywikibot * string assignment * PEP8 Modified Paths: -------------- trunk/pywikipedia/daemonize.py trunk/pywikipedia/maintainer.py trunk/pywikipedia/udp-log.py Modified: trunk/pywikipedia/daemonize.py =================================================================== --- trunk/pywikipedia/daemonize.py 2010-10-09 21:59:27 UTC (rev 8632) +++ trunk/pywikipedia/daemonize.py 2010-10-09 22:37:43 UTC (rev 8633) @@ -1,10 +1,17 @@ +# -*- coding: utf-8 -*- +# +# (C) Pywikipedia bot team, 2007-2008, 2010 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' +# + import sys, os -__version__ = '$Id$' - is_daemon = False -def daemonize(close_fd = True, chdir = True, write_pid = False, redirect_std = None): +def daemonize(close_fd=True, chdir=True, write_pid=False, redirect_std=None): """ Daemonize the current process. Only works on POSIX compatible operating systems. The process will fork to the background and return control to the terminal. @@ -49,4 +56,4 @@ else: # Exit to return control to the terminal # os._exit to prevent the cleanup to run - os._exit(0) \ No newline at end of file + os._exit(0) Modified: trunk/pywikipedia/maintainer.py =================================================================== --- trunk/pywikipedia/maintainer.py 2010-10-09 21:59:27 UTC (rev 8632) +++ trunk/pywikipedia/maintainer.py 2010-10-09 22:37:43 UTC (rev 8633) @@ -15,18 +15,19 @@ from ircbot import SingleServerIRCBot from irclib import nm_to_n import random -import wikipedia import thread import threading import time import rciw import censure +import wikipedia as pywikibot ver = 1 -site = wikipedia.getSite() +site = pywikibot.getSite() site.forceLogin() + class rcFeeder(SingleServerIRCBot): def __init__(self, channel, nickname, server, port=6667): SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname) @@ -63,6 +64,7 @@ def on_quit(self, e, cmd): pass + class MaintcontBot(SingleServerIRCBot): def __init__(self, nickname, server, port=6667): SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname) @@ -71,14 +73,17 @@ feederThread.start() def feederBot(self): - self.feed = rcFeeder('#' + site.language() + '.' + site.family.name, site.loggedInAs(), "irc.wikimedia.org") + self.feed = rcFeeder('#%s.%s' % (site.language(), site.family.name), + site.loggedInAs(), "irc.wikimedia.org") self.feed.start() def on_nicknameinuse(self, c, e): c.nick("mainter" + str(random.randrange(100, 999))) def on_welcome(self, c, e): - self.connection.privmsg("maintcont", "workerjoin " + site.language() + '.' + site.family.name + ' ' + str(ver)) + self.connection.privmsg("maintcont", + "workerjoin %s.%s %s" + % (site.language(), site.family.name, str(ver)) def on_privmsg(self, c, e): nick = nm_to_n(e.source()) @@ -102,6 +107,7 @@ self.connection.privmsg("maintcont", "active") time.sleep(10) + class Maintainer: def __init__(self): controllThread = threading.Thread(target=self.controllBot) @@ -111,8 +117,10 @@ raw_input() def controllBot(self): - bot = MaintcontBot("mainter" + str(random.randrange(100, 999)), "irc.freenode.net") + bot = MaintcontBot("mainter%s" % str(random.randrange(100, 999)), + "irc.freenode.net") bot.start() + if __name__ == "__main__": Maintainer() Modified: trunk/pywikipedia/udp-log.py =================================================================== --- trunk/pywikipedia/udp-log.py 2010-10-09 21:59:27 UTC (rev 8632) +++ trunk/pywikipedia/udp-log.py 2010-10-09 22:37:43 UTC (rev 8633) @@ -1,18 +1,24 @@ -#/usr/bin/env python +#/usr/bin/env python # -*- coding: utf-8 -*- # # (C) Misza13 <misza1313(a)gmail.com>, 2007 +# (C) Pywikipedia bot team, 2007-2008, 2010 # # Distributed under the terms of the MIT license. # -import sys, re, socket __version__ = '$Id$' +# +import sys, re, socket + TARGET_HOST = 'toolserver.org' TARGET_PORT = 42448 input = sys.stdin.read() -log = re.search('Versions: (?P<ver>.*?)\nuid=\d+$(?P<user>\w+)$.*Log Message:\s*(?P<logmsg>.*)',input,re.DOTALL) +log = re.search( + 'Versions: (?P<ver>.*?)\nuid=\d+$(?P<user>\w+)$.*Log Message:\s*(?P<logmsg>.*)', + input, + re.DOTALL) if log: print 'Routing commit data via UDP...' @@ -23,5 +29,6 @@ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.connect((TARGET_HOST,TARGET_PORT)) - sock.send('\002%s\002 commited \002%s\002 * \0032%s\003' % (user,ver,logmsg)) + sock.send('\002%s\002 commited \002%s\002 * \0032%s\003' + % (user, ver, logmsg)) sock.close()

1 0

SVN: [8632] trunk/pywikipedia
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8632 Author: xqt Date: 2010-10-09 21:59:27 +0000 (Sat, 09 Oct 2010) Log Message: ----------- import wikipedia as pywikibot for merging to rewrite Modified Paths: -------------- trunk/pywikipedia/warnfile.py trunk/pywikipedia/watchlist.py trunk/pywikipedia/weblinkchecker.py Modified: trunk/pywikipedia/warnfile.py =================================================================== --- trunk/pywikipedia/warnfile.py 2010-10-09 21:01:00 UTC (rev 8631) +++ trunk/pywikipedia/warnfile.py 2010-10-09 21:59:27 UTC (rev 8632) @@ -13,38 +13,44 @@ """ # # (C) Rob W.W. Hooft, 2003 +# (C) Pywikipedia bot team, 2003-2010 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' # import sys, os, re -import wikipedia, interwiki +import wikipedia as pywikibot +import interwiki + class WarnfileReader: def __init__(self, filename): self.filename = filename def getHints(self): print "Parsing warnfile..." - R=re.compile(r'WARNING: (?P<family>.+?): \[\[(?P<locallang>.+?):(?P<localtitle>.+?)\]\](?P<warningtype>.+?)\[\[(?P<targetlang>.+?):(?P<targettitle>.+?)\]\]') + R=re.compile( + r'WARNING: (?P<family>.+?): \[\[(?P<locallang>.+?):(?P<localtitle>.+?)\]\](?P<warningtype>.+?)\[\[(?P<targetlang>.+?):(?P<targettitle>.+?)\]\]') import codecs f = codecs.open(self.filename, 'r', 'utf-8') hints={} removeHints={} - mysite=wikipedia.getSite() + mysite=pywikibot.getSite() for line in f.readlines(): m=R.search(line) if m: #print "DBG>",line - if m.group('locallang') == mysite.lang and m.group('family') == mysite.family.name: - #wikipedia.output(u' '.join([m.group('locallang'), m.group('localtitle'), m.group('warningtype'), m.group('targetsite'), m.group('targettitle')])) + if m.group('locallang') == mysite.lang and \ + m.group('family') == mysite.family.name: + #pywikibot.output(u' '.join([m.group('locallang'), m.group('localtitle'), m.group('warningtype'), m.group('targetsite'), m.group('targettitle')])) #print m.group(3) - page = wikipedia.Page(mysite, m.group('localtitle')) + page = pywikibot.Page(mysite, m.group('localtitle')) removing = (m.group('warningtype') == ' links to incorrect ') try: - targetSite = mysite.getSite(code = m.group('targetlang')) - targetPage = wikipedia.Page(targetSite, m.group('targettitle')) + targetSite = mysite.getSite(code=m.group('targetlang')) + targetPage = pywikibot.Page(targetSite, + m.group('targettitle')) if removing: if page not in removeHints: removeHints[page]=[] @@ -53,7 +59,7 @@ if page not in hints: hints[page]=[] hints[page].append(targetPage) - except wikipedia.Error: + except pywikibot.Error: print "DBG> Failed to add", line f.close() return hints, removeHints @@ -72,11 +78,13 @@ try: for page2 in page.interwiki(): old[page2.site()] = page2 - except wikipedia.IsRedirectPage: - wikipedia.output(u"%s is a redirect page; not changing" % page.aslink()) + except pywikibot.IsRedirectPage: + pywikibot.output(u"%s is a redirect page; not changing" + % page.title(asLink=True)) continue - except wikipedia.NoPage: - wikipedia.output(u"Page %s not found; skipping" % page.aslink()) + except pywikibot.NoPage: + pywikibot.output(u"Page %s not found; skipping" + % page.title(asLink=True)) continue new={} new.update(old) @@ -91,38 +99,43 @@ del new[site] except KeyError: pass - mods, adding, removing, modifying = interwiki.compareLanguages(old, new, insite = page.site()) + mods, adding, removing, modifying = interwiki.compareLanguages(old, + new, + insite=page.site()) if mods: - wikipedia.output(page.aslink() + mods) + pywikibot.output(page.title(asLink=True) + mods) oldtext = page.get() - newtext = wikipedia.replaceLanguageLinks(oldtext, new) + newtext = pywikibot.replaceLanguageLinks(oldtext, new) if 1: - wikipedia.showDiff(oldtext, newtext) + pywikibot.showDiff(oldtext, newtext) try: - status, reason, data = page.put(newtext, comment='warnfile '+mods) - except wikipedia.LockedPage: - wikipedia.output(u"Page is locked. Skipping.") + status, reason, data = page.put(newtext, + comment='warnfile '+mods) + except pywikibot.LockedPage: + pywikibot.output(u"Page is locked. Skipping.") continue - except wikipedia.SpamfilterError, e: - wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) + except pywikibot.SpamfilterError, e: + pywikibot.output( + u'Cannot change %s because of blacklist entry %s' + % (page.title(), e.url)) continue - except wikipedia.Error: - wikipedia.output(u"Error while saving page.") + except pywikibot.Error: + pywikibot.output(u"Error while saving page.") continue if str(status) != '302': print status, reason def main(): filename = None - for arg in wikipedia.handleArgs(): + for arg in pywikibot.handleArgs(): if os.path.isabs(arg): filename = arg else: - filename = wikipedia.config.datafilepath("logs", arg) + filename = pywikibot.config.datafilepath("logs", arg) if not filename: - mysite = wikipedia.getSite() - filename = wikipedia.config.datafilepath('logs', + mysite = pywikibot.getSite() + filename = pywikibot.config.datafilepath('logs', 'warning-%s-%s.log' % (mysite.family.name, mysite.lang)) reader = WarnfileReader(filename) bot = WarnfileRobot(reader) @@ -132,5 +145,5 @@ try: main() finally: - wikipedia.stopme() + pywikibot.stopme() Modified: trunk/pywikipedia/watchlist.py =================================================================== --- trunk/pywikipedia/watchlist.py 2010-10-09 21:01:00 UTC (rev 8631) +++ trunk/pywikipedia/watchlist.py 2010-10-09 21:59:27 UTC (rev 8632) @@ -12,16 +12,18 @@ Command line options: -all - Reloads watchlists for all wikis where a watchlist is already present - -new - Load watchlists for all wikis where accounts is setting in user-config.py + -new - Load watchlists for all wikis where accounts is setting in + user-config.py """ # (C) Daniel Herding, 2005 # # Distributed under the terms of the MIT license. - +# __version__='$Id$' +# -import wikipedia +import wikipedia as pywikibot import re, sys, pickle import os.path import time @@ -30,19 +32,20 @@ def get(site = None): if site is None: - site = wikipedia.getSite() + site = pywikibot.getSite() if site in cache: # Use cached copy if it exists. watchlist = cache[site] else: - fn = wikipedia.config.datafilepath('watchlists', + fn = pywikibot.config.datafilepath('watchlists', 'watchlist-%s-%s.dat' % (site.family.name, site.lang)) try: # find out how old our saved dump is (in seconds) file_age = time.time() - os.path.getmtime(fn) # if it's older than 1 month, reload it if file_age > 30 * 24 * 60 * 60: - wikipedia.output(u'Copy of watchlist is one month old, reloading') + pywikibot.output( + u'Copy of watchlist is one month old, reloading') refresh(site) except OSError: # no saved watchlist exists yet, retrieve one @@ -69,15 +72,15 @@ params = { 'action': 'query', 'list': 'watchlist', - 'wllimit': wikipedia.config.special_page_limit, + 'wllimit': pywikibot.config.special_page_limit, 'wlprop': 'title', } - wikipedia.output(u'Retrieving watchlist for %s via API.' % repr(site)) - #wikipedia.put_throttle() # It actually is a get, but a heavy one. + pywikibot.output(u'Retrieving watchlist for %s via API.' % repr(site)) + #pywikibot.put_throttle() # It actually is a get, but a heavy one. watchlist = [] while True: - data = wikipedia.query.GetData(params, site, sysop=sysop) + data = pywikibot.query.GetData(params, site, sysop=sysop) if 'error' in data: raise RuntimeError('ERROR: %s' % data) watchlist.extend([w['title'] for w in data['query']['watchlist']]) @@ -90,24 +93,29 @@ # Save the watchlist to disk # The file is stored in the watchlists subdir. Create if necessary. if sysop: - f = open(wikipedia.config.datafilepath('watchlists', - 'watchlist-%s-%s-sysop.dat' % (site.family.name, site.lang)), 'w') + f = open(pywikibot.config.datafilepath('watchlists', + 'watchlist-%s-%s-sysop.dat' + % (site.family.name, site.lang)), + 'w') else: - f = open(wikipedia.config.datafilepath('watchlists', - 'watchlist-%s-%s.dat' % (site.family.name, site.lang)), 'w') + f = open(pywikibot.config.datafilepath('watchlists', + 'watchlist-%s-%s.dat' + % (site.family.name, site.lang)), + 'w') pickle.dump(watchlist, f) f.close() def _refreshOld(site, sysop=False): # get watchlist special page's URL path = site.watchlist_address() - wikipedia.output(u'Retrieving watchlist for %s' % repr(site)) - #wikipedia.put_throttle() # It actually is a get, but a heavy one. + pywikibot.output(u'Retrieving watchlist for %s' % repr(site)) + #pywikibot.put_throttle() # It actually is a get, but a heavy one. watchlistHTML = site.getUrl(path, sysop=sysop) - wikipedia.output(u'Parsing watchlist') + pywikibot.output(u'Parsing watchlist') watchlist = [] - for itemR in [re.compile(r'<li><input type="checkbox" name="id\[\]" value="(.+?)" />'), re.compile(r'<li><input name="titles\[\]" type="checkbox" value="(.+?)" />')]: + for itemR in [re.compile(r'<li><input type="checkbox" name="id\[\]" value="(.+?)" />'), + re.compile(r'<li><input name="titles\[\]" type="checkbox" value="(.+?)" />')]: for m in itemR.finditer(watchlistHTML): pageName = m.group(1) watchlist.append(pageName) @@ -115,28 +123,34 @@ # Save the watchlist to disk # The file is stored in the watchlists subdir. Create if necessary. if sysop: - f = open(wikipedia.config.datafilepath('watchlists', - 'watchlist-%s-%s-sysop.dat' % (site.family.name, site.lang)), 'w') + f = open(pywikibot.config.datafilepath('watchlists', + 'watchlist-%s-%s-sysop.dat' + % (site.family.name, site.lang)), + 'w') else: - f = open(wikipedia.config.datafilepath('watchlists', - 'watchlist-%s-%s.dat' % (site.family.name, site.lang)), 'w') + f = open(pywikibot.config.datafilepath('watchlists', + 'watchlist-%s-%s.dat' + % (site.family.name, site.lang)), + 'w') pickle.dump(watchlist, f) f.close() def refresh_all(new = False, sysop=False): if new: import config - wikipedia.output('Downloading All watchlists for your accounts in user-config.py'); + pywikibot.output( + 'Downloading All watchlists for your accounts in user-config.py') for family in config.usernames: for lang in config.usernames[ family ]: - refresh(wikipedia.getSite( code = lang, fam = family ), sysop=sysop ) + refresh(pywikibot.getSite(code=lang, fam=family), sysop=sysop) for family in config.sysopnames: - for lang in config.sysopnames[ family ]: - refresh(wikipedia.getSite( code = lang, fam = family ), sysop=sysop ) + for lang in config.sysopnames[family]: + refresh(pywikibot.getSite(code=lang, fam=family), sysop=sysop) else: import dircache, time - filenames = dircache.listdir(wikipedia.config.datafilepath('watchlists')) + filenames = dircache.listdir( + pywikibot.config.datafilepath('watchlists')) watchlist_filenameR = re.compile('watchlist-([a-z\-:]+).dat') for filename in filenames: match = watchlist_filenameR.match(filename) @@ -144,13 +158,13 @@ arr = match.group(1).split('-') family = arr[0] lang = '-'.join(arr[1:]) - refresh(wikipedia.getSite(code = lang, fam = family)) + refresh(pywikibot.getSite(code = lang, fam = family)) def main(): all = False new = False sysop = False - for arg in wikipedia.handleArgs(): + for arg in pywikibot.handleArgs(): if arg == '-all' or arg == '-update': all = True elif arg == '-new': @@ -162,16 +176,16 @@ elif new: refresh_all(new, sysop=sysop) else: - refresh(wikipedia.getSite(), sysop=sysop) + refresh(pywikibot.getSite(), sysop=sysop) - watchlist = get(wikipedia.getSite()) - wikipedia.output(u'%i pages in the watchlist.' % len(watchlist)) + watchlist = get(pywikibot.getSite()) + pywikibot.output(u'%i pages in the watchlist.' % len(watchlist)) for pageName in watchlist: - wikipedia.output( pageName, toStdout = True ) + pywikibot.output( pageName, toStdout = True ) if __name__ == "__main__": try: main() finally: - wikipedia.stopme() + pywikibot.stopme() Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2010-10-09 21:01:00 UTC (rev 8631) +++ trunk/pywikipedia/weblinkchecker.py 2010-10-09 21:59:27 UTC (rev 8632) @@ -88,16 +88,18 @@ # # (C) Daniel Herding, 2005 +# (C) Pywikipedia bot team, 2005-2010 # # Distributed under the terms of the MIT license. # __version__='$Id$' -import wikipedia, config, pagegenerators import sys, re import codecs, pickle import httplib, socket, urlparse, urllib, urllib2 import threading, time +import wikipedia as pywikibot +import config, pagegenerators try: set # introduced in Python 2.4: faster and future except NameError: @@ -208,7 +210,7 @@ ] def weblinksIn(text, withoutBracketed = False, onlyBracketed = False): - text = wikipedia.removeDisabledParts(text) + text = pywikibot.removeDisabledParts(text) # MediaWiki parses templates before parsing external links. Thus, there # might be a | or a } directly after a URL which does not belong to @@ -220,15 +222,16 @@ text = nestedTemplateR.sub(r'{{\1 \2 \3}}', text) # Then blow up the templates with spaces so that the | and }} will not be regarded as part of the link:. - templateWithParamsR = re.compile(r'{{([^}]*?[^ ])\|([^ ][^}]*?)}}', re.DOTALL) + templateWithParamsR = re.compile(r'{{([^}]*?[^ ])\|([^ ][^}]*?)}}', + re.DOTALL) while templateWithParamsR.search(text): text = templateWithParamsR.sub(r'{{ \1 | \2 }}', text) - linkR = wikipedia.compileLinkR(withoutBracketed, onlyBracketed) + linkR = pywikibot.compileLinkR(withoutBracketed, onlyBracketed) # Remove HTML comments in URLs as well as URLs in HTML comments. # Also remove text inside nowiki links etc. - text = wikipedia.removeDisabledParts(text) + text = pywikibot.removeDisabledParts(text) for m in linkR.finditer(text): yield m.group('url') @@ -237,7 +240,7 @@ self.url = url def getArchiveURL(self): - wikipedia.output(u'Consulting the Internet Archive for %s' % self.url) + pywikibot.output(u'Consulting the Internet Archive for %s' % self.url) archiveURL = 'http://web.archive.org/web/*/%s' % self.url try: f = urllib2.urlopen(archiveURL) @@ -270,7 +273,7 @@ Warning: Also returns false if your Internet connection isn't working correctly! (This will give a Socket Error) ''' - def __init__(self, url, redirectChain = [], serverEncoding = None, HTTPignore = []): + def __init__(self, url, redirectChain = [], serverEncoding=None, HTTPignore=[]): """ redirectChain is a list of redirects which were resolved by resolveRedirect(). This is needed to detect redirect loops. @@ -278,7 +281,7 @@ self.url = url self.serverEncoding = serverEncoding self.header = { - # 'User-agent': wikipedia.useragent, + # 'User-agent': pywikibot.useragent, # we fake being Firefox because some webservers block unknown # clients, e.g. http://images.google.de/images?q=Albit gives a 403 # when using the PyWikipediaBot user agent. @@ -302,7 +305,9 @@ def getEncodingUsedByServer(self): if not self.serverEncoding: try: - wikipedia.output(u'Contacting server %s to find out its default encoding...' % self.host) + pywikibot.output( + u'Contacting server %s to find out its default encoding...' + % self.host) conn = self.getConnection() conn.request('HEAD', '/', None, self.header) response = conn.getresponse() @@ -313,7 +318,8 @@ if not self.serverEncoding: # TODO: We might also load a page, then check for an encoding # definition in a HTML meta tag. - wikipedia.output(u'Error retrieving server\'s default charset. Using ISO 8859-1.') + pywikibot.output( + u'Error retrieving server\'s default charset. Using ISO 8859-1.') # most browsers use ISO 8859-1 (Latin-1) as the default. self.serverEncoding = 'iso8859-1' return self.serverEncoding @@ -358,9 +364,11 @@ conn = self.getConnection() try: if useHEAD: - conn.request('HEAD', '%s%s' % (self.path, self.query), None, self.header) + conn.request('HEAD', '%s%s' % (self.path, self.query), None, + self.header) else: - conn.request('GET', '%s%s' % (self.path, self.query), None, self.header) + conn.request('GET', '%s%s' % (self.path, self.query), None, + self.header) response = conn.getresponse() # read the server's encoding, in case we need it later self.readEncodingFromResponse(response) @@ -380,12 +388,15 @@ try: redirTarget.encode('ascii') except UnicodeError: - redirTarget = redirTarget.decode(self.getEncodingUsedByServer()) - if redirTarget.startswith('http://') or redirTarget.startswith('https://'): + redirTarget = redirTarget.decode( + self.getEncodingUsedByServer()) + if redirTarget.startswith('http://') or \ + redirTarget.startswith('https://'): self.changeUrl(redirTarget) return True elif redirTarget.startswith('/'): - self.changeUrl(u'%s://%s%s' % (self.protocol, self.host, redirTarget)) + self.changeUrl(u'%s://%s%s' + % (self.protocol, self.host, redirTarget)) return True else: # redirect to relative position # cut off filename @@ -399,7 +410,9 @@ # change /foo/bar/ to /foo/ directory = directory[:-1] directory = directory[:directory.rindex('/') + 1] - self.changeUrl('%s://%s%s%s' % (self.protocol, self.host, directory, redirTarget)) + self.changeUrl('%s://%s%s%s' + % (self.protocol, self.host, directory, + redirTarget)) return True else: return False # not a redirect @@ -412,7 +425,8 @@ try: wasRedirected = self.resolveRedirect(useHEAD = useHEAD) except UnicodeError, error: - return False, u'Encoding Error: %s (%s)' % (error.__class__.__name__, unicode(error)) + return False, u'Encoding Error: %s (%s)' \ + % (error.__class__.__name__, unicode(error)) except httplib.error, error: return False, u'HTTP Error: %s' % error.__class__.__name__ except socket.error, error: @@ -441,7 +455,9 @@ # which leads to a cyclic list of redirects. # We simply start from the beginning, but this time, # we don't use HEAD, but GET requests. - redirChecker = LinkChecker(self.redirectChain[0], serverEncoding = self.serverEncoding, HTTPignore = self.HTTPignore) + redirChecker = LinkChecker(self.redirectChain[0], + serverEncoding=self.serverEncoding, + HTTPignore=self.HTTPignore) return redirChecker.check(useHEAD = False) else: urlList = ['[%s]' % url for url in self.redirectChain + [self.url]] @@ -452,13 +468,17 @@ # which leads to a long (or infinite) list of redirects. # We simply start from the beginning, but this time, # we don't use HEAD, but GET requests. - redirChecker = LinkChecker(self.redirectChain[0], serverEncoding = self.serverEncoding, HTTPignore = self.HTTPignore) + redirChecker = LinkChecker(self.redirectChain[0], + serverEncoding=self.serverEncoding, + HTTPignore = self.HTTPignore) return redirChecker.check(useHEAD = False) else: urlList = ['[%s]' % url for url in self.redirectChain + [self.url]] return False, u'Long Chain of Redirects: %s' % ' -> '.join(urlList) else: - redirChecker = LinkChecker(self.url, self.redirectChain, self.serverEncoding, HTTPignore = self.HTTPignore) + redirChecker = LinkChecker(self.url, self.redirectChain, + self.serverEncoding, + HTTPignore=self.HTTPignore) return redirChecker.check(useHEAD = useHEAD) else: try: @@ -466,7 +486,8 @@ except httplib.error, error: return False, u'HTTP Error: %s' % error.__class__.__name__ try: - conn.request('GET', '%s%s' % (self.path, self.query), None, self.header) + conn.request('GET', '%s%s' + % (self.path, self.query), None, self.header) except socket.error, error: return False, u'Socket Error: %s' % repr(error[1]) try: @@ -500,23 +521,25 @@ try: ok, message = linkChecker.check() except: - wikipedia.output('Exception while processing URL %s in page %s' % (self.url, self.page.title())) + pywikibot.output('Exception while processing URL %s in page %s' + % (self.url, self.page.title())) raise if ok: if self.history.setLinkAlive(self.url): - wikipedia.output('*Link to %s in [[%s]] is back alive.' % (self.url, self.page.title())) + pywikibot.output('*Link to %s in [[%s]] is back alive.' + % (self.url, self.page.title())) else: - wikipedia.output('*[[%s]] links to %s - %s.' % (self.page.title(), self.url, message)) + pywikibot.output('*[[%s]] links to %s - %s.' + % (self.page.title(), self.url, message)) self.history.setLinkDead(self.url, message, self.page, day) + class History: - ''' - Stores previously found dead links. - The URLs are dictionary keys, and values are lists of tuples where each tuple - represents one time the URL was found dead. Tuples have the form - (title, date, error) where title is the wiki page where the URL was found, - date is an instance of time, and error is a string with error code and - message. + ''' Stores previously found dead links. The URLs are dictionary keys, and + values are lists of tuples where each tuple represents one time the URL was + found dead. Tuples have the form (title, date, error) where title is the + wiki page where the URL was found, date is an instance of time, and error is + a string with error code and message. We assume that the first element in the list represents the first time we found this dead link, and the last element represents the last time. @@ -528,13 +551,14 @@ ('WikiPageTitle', DATE, '404: File not found'), ('WikiPageName2', DATE, '404: File not found'), ] + ''' def __init__(self, reportThread): self.reportThread = reportThread - site = wikipedia.getSite() + site = pywikibot.getSite() self.semaphore = threading.Semaphore() - self.datfilename = wikipedia.config.datafilepath('deadlinks', + self.datfilename = pywikibot.config.datafilepath('deadlinks', 'deadlinks-%s-%s.dat' % (site.family.name, site.lang)) # Count the number of logged links, so that we can insert captions @@ -552,7 +576,7 @@ """ Logs an error report to a text file in the deadlinks subdirectory. """ - site = wikipedia.getSite() + site = pywikibot.getSite() if archiveURL: errorReport = u'* %s ([%s archive])\n' % (url, archiveURL) else: @@ -560,10 +584,13 @@ for (pageTitle, date, error) in self.historyDict[url]: # ISO 8601 formulation isoDate = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(date)) - errorReport += "** In [[%s]] on %s, %s\n" % (pageTitle, isoDate, error) - wikipedia.output(u"** Logging link for deletion.") - txtfilename = wikipedia.config.datafilepath('deadlinks', - 'results-%s-%s.txt' % (site.family.name, site.lang)) + errorReport += "** In [[%s]] on %s, %s\n" % (pageTitle, isoDate, + error) + pywikibot.output(u"** Logging link for deletion.") + txtfilename = pywikibot.config.datafilepath('deadlinks', + 'results-%s-%s.txt' + % (site.family.name, + site.lang)) txtfile = codecs.open(txtfilename, 'a', 'utf-8') self.logCount += 1 if self.logCount % 30 == 0: @@ -573,7 +600,8 @@ txtfile.close() if self.reportThread and not containingPage.isTalkPage(): - self.reportThread.report(url, errorReport, containingPage, archiveURL) + self.reportThread.report(url, errorReport, containingPage, + archiveURL) def setLinkDead(self, url, error, page, day): @@ -589,9 +617,9 @@ # ago, we won't save it in the history this time. if timeSinceLastFound > 60 * 60: self.historyDict[url].append((page.title(), now, error)) - # if the first time we found this link longer than x day ago (default is a week), - # it should probably be fixed or removed. We'll list it in a file - # so that it can be removed manually. + # if the first time we found this link longer than x day ago + # (default is a week), it should probably be fixed or removed. + # We'll list it in a file so that it can be removed manually. if timeSinceFirstFound > 60 * 60 * 24 * day: # search for archived page iac = InternetArchiveConsulter(url) @@ -640,9 +668,10 @@ self.killed = False def report(self, url, errorReport, containingPage, archiveURL): + """ Tries to add an error report to the talk page belonging to the page + containing the dead link. + """ - Tries to add an error report to the talk page belonging to the page containing the dead link. - """ self.semaphore.acquire() self.queue.append((url, errorReport, containingPage, archiveURL)) self.semaphore.release() @@ -666,37 +695,53 @@ (url, errorReport, containingPage, archiveURL) = self.queue[0] self.queue = self.queue[1:] talkPage = containingPage.toggleTalkPage() - wikipedia.output(u'\03{lightaqua}** Reporting dead link on %s...\03{default}' % talkPage.aslink()) + pywikibot.output( + u'\03{lightaqua}** Reporting dead link on %s...\03{default}' + % talkPage.title(asLink=True)) try: content = talkPage.get() + "\n\n" if url in content: - wikipedia.output(u'\03{lightaqua}** Dead link seems to have already been reported on %s\03{default}' % talkPage.aslink()) + pywikibot.output( + u'\03{lightaqua}** Dead link seems to have already been reported on %s\03{default}' + % talkPage.title(asLink=True)) self.semaphore.release() continue - except (wikipedia.NoPage, wikipedia.IsRedirectPage): + except (pywikibot.NoPage, pywikibot.IsRedirectPage): content = u'' if archiveURL: - archiveMsg = wikipedia.translate(wikipedia.getSite(), talk_report_archive) % archiveURL + archiveMsg = pywikibot.translate(pywikibot.getSite(), + talk_report_archive) % archiveURL else: archiveMsg = u'' - # The caption will default to "Dead link". But if there is already such a caption, we'll - # use "Dead link 2", "Dead link 3", etc. - caption = wikipedia.translate(wikipedia.getSite(), talk_report_caption) + # The caption will default to "Dead link". But if there is + # already such a caption, we'll use "Dead link 2", + # "Dead link 3", etc. + caption = pywikibot.translate(pywikibot.getSite(), + talk_report_caption) i = 1 # Check if there is already such a caption on the talk page. while re.search('= *' + caption + ' *=', content) is not None: i += 1 - caption = wikipedia.translate(wikipedia.getSite(), talk_report_caption) + " " + str(i) - content += wikipedia.translate(wikipedia.getSite(), talk_report) % (caption, errorReport, archiveMsg) - comment = u'[[%s#%s|→]]%s' % (talkPage.title(), caption, wikipedia.translate(wikipedia.getSite(), talk_report_msg)) + caption = pywikibot.translate(pywikibot.getSite(), + talk_report_caption) + " " + str(i) + content += pywikibot.translate(pywikibot.getSite(), + talk_report) % (caption, + errorReport, + archiveMsg) + comment = u'[[%s#%s|→]]%s' % (talkPage.title(), caption, + pywikibot.translate(pywikibot.getSite(), + talk_report_msg)) try: talkPage.put(content, comment) - except wikipedia.SpamfilterError, error: - wikipedia.output(u'\03{lightaqua}** SpamfilterError while trying to change %s: %s\03{default}' % (talkPage.aslink(), error.url)) + except pywikibot.SpamfilterError, error: + pywikibot.output( + u'\03{lightaqua}** SpamfilterError while trying to change %s: %s\03{default}' + % (talkPage.title(asLink=True), error.url)) self.semaphore.release() + class WeblinkCheckerRobot: ''' Robot which will use several LinkCheckThreads at once to search for dead @@ -705,7 +750,7 @@ def __init__(self, generator, HTTPignore = []): self.generator = generator if config.report_dead_links_on_talk: - #wikipedia.output("Starting talk page thread") + #pywikibot.output("Starting talk page thread") reportThread = DeadLinkReportThread() # thread dies when program terminates # reportThread.setDaemon(True) @@ -722,8 +767,8 @@ def checkLinksIn(self, page): try: text = page.get() - except wikipedia.NoPage: - wikipedia.output(u'%s does not exist.' % page.title()) + except pywikibot.NoPage: + pywikibot.output(u'%s does not exist.' % page.title()) return for url in weblinksIn(text): ignoreUrl = False @@ -736,11 +781,13 @@ while threading.activeCount() >= config.max_external_links: # wait 100 ms time.sleep(0.1) - thread = LinkCheckThread(page, url, self.history, self.HTTPignore) + thread = LinkCheckThread(page, url, self.history, + self.HTTPignore) # thread dies when program terminates thread.setDaemon(True) thread.start() + def RepeatPageGenerator(): history = History(None) pageTitles = set() @@ -751,7 +798,7 @@ pageTitles = list(pageTitles) pageTitles.sort() for pageTitle in pageTitles: - page = wikipedia.Page(wikipedia.getSite(), pageTitle) + page = pywikibot.Page(pywikibot.getSite(), pageTitle) yield page def countLinkCheckThreads(): @@ -774,7 +821,7 @@ genFactory = pagegenerators.GeneratorFactory() global day day = 7 - for arg in wikipedia.handleArgs(): + for arg in pywikibot.handleArgs(): if arg == '-talk': config.report_dead_links_on_talk = True elif arg == '-notalk': @@ -796,7 +843,7 @@ if singlePageTitle: singlePageTitle = ' '.join(singlePageTitle) - page = wikipedia.Page(wikipedia.getSite(), singlePageTitle) + page = pywikibot.Page(pywikibot.getSite(), singlePageTitle) gen = iter([page]) if not gen: @@ -817,33 +864,35 @@ # Don't wait longer than 30 seconds for threads to finish. while countLinkCheckThreads() > 0 and waitTime < 30: try: - wikipedia.output(u"Waiting for remaining %i threads to finish, please wait..." % countLinkCheckThreads()) + pywikibot.output( + u"Waiting for remaining %i threads to finish, please wait..." % countLinkCheckThreads()) # wait 1 second time.sleep(1) waitTime += 1 except KeyboardInterrupt: - wikipedia.output(u'Interrupted.') + pywikibot.output(u'Interrupted.') break if countLinkCheckThreads() > 0: - wikipedia.output(u'Remaining %i threads will be killed.' % countLinkCheckThreads()) + pywikibot.output(u'Remaining %i threads will be killed.' + % countLinkCheckThreads()) # Threads will die automatically because they are daemonic. if bot.history.reportThread: bot.history.reportThread.shutdown() - # wait until the report thread is shut down; the user can interrupt - # it by pressing CTRL-C. + # wait until the report thread is shut down; the user can + # interrupt it by pressing CTRL-C. try: while bot.history.reportThread.isAlive(): time.sleep(0.1) except KeyboardInterrupt: - wikipedia.output(u'Report thread interrupted.') + pywikibot.output(u'Report thread interrupted.') bot.history.reportThread.kill() - wikipedia.output(u'Saving history...') + pywikibot.output(u'Saving history...') bot.history.save() else: - wikipedia.showHelp() + pywikibot.showHelp() if __name__ == "__main__": try: main() finally: - wikipedia.stopme() + pywikibot.stopme()

1 0

SVN: [8631] trunk/pywikipedia
by xqt＠svn.wikimedia.org 09 Oct '10

09 Oct '10

Revision: 8631 Author: xqt Date: 2010-10-09 21:01:00 +0000 (Sat, 09 Oct 2010) Log Message: ----------- import wikipedia as pywikibot for merging to rewrite Modified Paths: -------------- trunk/pywikipedia/statistics_in_wikitable.py trunk/pywikipedia/table2wiki.py trunk/pywikipedia/templatecount.py trunk/pywikipedia/testfamily.py trunk/pywikipedia/unlink.py trunk/pywikipedia/us-states.py Modified: trunk/pywikipedia/statistics_in_wikitable.py =================================================================== --- trunk/pywikipedia/statistics_in_wikitable.py 2010-10-09 19:32:57 UTC (rev 8630) +++ trunk/pywikipedia/statistics_in_wikitable.py 2010-10-09 21:01:00 UTC (rev 8631) @@ -1,8 +1,6 @@ -#!/usr/bin/python +#!/usr/bin/python # -*- coding: utf-8 -*- """ - - \03{lightyellow}This bot renders statistics provided by [[Special:Statistics]] in a table on a wiki page.\03{default} Thus it creates and updates a Statistics wikitable. @@ -15,8 +13,10 @@ If existing, it is updated. """ __version__ = '$Id$' -import wikipedia, pagegenerators, query +# import time +import wikipedia as pywikibot +import pagegenerators, query # This is the title of the wikipage where to render stats. your_page = "Logstats" @@ -39,16 +39,16 @@ self.screen = screen self.your_page = your_page self.dict = self.getdata() # Try to get data. - self.site = wikipedia.getSite() + self.site = pywikibot.getSite() def run(self): if self.screen: - wikipedia.output("Bot is running to output stats.") + pywikibot.output("Bot is running to output stats.") self.idle(1) # Run a function to idle self.outputall() if not self.screen: self.outputall() # Output all datas on screen. - wikipedia.output("\nBot is running. Going to treat \03{lightpurple}%s\03{default}..." % self.your_page ) + pywikibot.output("\nBot is running. Going to treat \03{lightpurple}%s\03{default}..." % self.your_page ) self.idle(2) self.treat() @@ -60,49 +60,49 @@ 'meta' :'siteinfo', 'siprop' :'statistics', } - wikipedia.output("\nQuerying api for json-formatted data...") + pywikibot.output("\nQuerying api for json-formatted data...") try: data = query.GetData(params,self.site, encodeTitle = False) except: url = self.site.protocol() + '://' + self.site.hostname() + self.site.api_address() - wikipedia.output("The query has failed. Have you check the API? Cookies are working?") - wikipedia.output(u"\n>> \03{lightpurple}%s\03{default} <<" % url) + pywikibot.output("The query has failed. Have you check the API? Cookies are working?") + pywikibot.output(u"\n>> \03{lightpurple}%s\03{default} <<" % url) if data != None: - wikipedia.output("Extracting statistics...") + pywikibot.output("Extracting statistics...") data = data['query'] # "query" entry of data. dict = data['statistics'] # "statistics" entry of "query" dict. return dict def treat(self): - page = wikipedia.Page(self.site, self.your_page) + page = pywikibot.Page(self.site, self.your_page) if page.exists(): - wikipedia.output(u'\nWikitable on \03{lightpurple}%s\03{default} will be completed with:\n' % self.your_page ) + pywikibot.output(u'\nWikitable on \03{lightpurple}%s\03{default} will be completed with:\n' % self.your_page ) text = page.get() newtext = self.newraw() - wikipedia.output(newtext) - choice = wikipedia.inputChoice(u'Do you want to add these on wikitable?', ['Yes', 'No'], ['y', 'N'], 'N') + pywikibot.output(newtext) + choice = pywikibot.inputChoice(u'Do you want to add these on wikitable?', ['Yes', 'No'], ['y', 'N'], 'N') text = text[:-3] + newtext - summ = wikipedia.translate(self.site, summary_update) + summ = pywikibot.translate(self.site, summary_update) if choice == 'y': try: page.put(u''.join(text), summ) except: - wikipedia.output(u'Impossible to edit. It may be an edit conflict... Skipping...') + pywikibot.output(u'Impossible to edit. It may be an edit conflict... Skipping...') else: - wikipedia.output(u'\nWikitable on \03{lightpurple}%s\03{default} will be created with:\n' % self.your_page ) + pywikibot.output(u'\nWikitable on \03{lightpurple}%s\03{default} will be created with:\n' % self.your_page ) newtext = self.newtable()+self.newraw() - wikipedia.output(newtext) - summ = wikipedia.translate(self.site, summary_creation) - choice = wikipedia.inputChoice(u'Do you want to accept this page creation?', ['Yes', 'No'], ['y', 'N'], 'N') + pywikibot.output(newtext) + summ = pywikibot.translate(self.site, summary_creation) + choice = pywikibot.inputChoice(u'Do you want to accept this page creation?', ['Yes', 'No'], ['y', 'N'], 'N') if choice == 'y': try: page.put(newtext, summ) - except wikipedia.LockedPage: - wikipedia.output(u"Page %s is locked; skipping." % title) - except wikipedia.EditConflict: - wikipedia.output(u'Skipping %s because of edit conflict' % title) - except wikipedia.SpamfilterError, error: - wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url)) + except pywikibot.LockedPage: + pywikibot.output(u"Page %s is locked; skipping." % title) + except pywikibot.EditConflict: + pywikibot.output(u'Skipping %s because of edit conflict' % title) + except pywikibot.SpamfilterError, error: + pywikibot.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url)) def newraw(self): newtext = ('\n|----\n!\'\''+ self.date() +'\'\'') # new raw for date and stats @@ -124,22 +124,22 @@ list = self.dict.keys() list.sort() for name in self.dict: - wikipedia.output("There are "+str(self.dict[name])+" "+name) + pywikibot.output("There are "+str(self.dict[name])+" "+name) def idle(self, retry_idle_time): time.sleep(retry_idle_time) - wikipedia.output(u"Starting in %i second..." % retry_idle_time) + pywikibot.output(u"Starting in %i second..." % retry_idle_time) time.sleep(retry_idle_time) def main(your_page): screen = False # If True it would not edit the wiki, only output statistics _page = None - wikipedia.output("\nBuilding the bot...") - for arg in wikipedia.handleArgs(): # Parse command line arguments + pywikibot.output("\nBuilding the bot...") + for arg in pywikibot.handleArgs(): # Parse command line arguments if arg.startswith('-page'): if len(arg) == 5: - _page = wikipedia.input(u'On what page do you want to add statistics?') + _page = pywikibot.input(u'On what page do you want to add statistics?') else: _page = arg[6:] if arg.startswith("-screen"): @@ -147,7 +147,7 @@ if not _page: _page = your_page if not screen: - wikipedia.output("The bot will add statistics on %s.\n" % _page ) + pywikibot.output("The bot will add statistics on %s.\n" % _page ) bot = StatisticsBot(screen, _page) # Launch the instance of a StatisticsBot bot.run() # Execute the 'run' method @@ -155,4 +155,4 @@ try: main(your_page) finally: - wikipedia.stopme() + pywikibot.stopme() Modified: trunk/pywikipedia/table2wiki.py =================================================================== --- trunk/pywikipedia/table2wiki.py 2010-10-09 19:32:57 UTC (rev 8630) +++ trunk/pywikipedia/table2wiki.py 2010-10-09 21:01:00 UTC (rev 8631) @@ -53,9 +53,11 @@ # Distributed under the terms of the MIT license. # __version__='$Id$' +# import re, sys, time -import wikipedia, config, pagegenerators +import wikipedia as pywikibot +import config, pagegenerators # This is required for the text that is shown when you run this script # with the parameter -help. @@ -133,7 +135,7 @@ tableTagR = re.compile('<table', re.IGNORECASE) for entry in self.xmldump.parse(): if tableTagR.search(entry.text): - yield wikipedia.Page(wikipedia.getSite(), entry.title) + yield pywikibot.Page(pywikibot.getSite(), entry.title) class Table2WikiRobot: def __init__(self, generator, quietMode = False): @@ -161,7 +163,7 @@ num = 1 while num != 0: newTable, num = re.subn("([^\r\n]{1})(<[tT]{1}[dDhHrR]{1})", - r"\1\r\n\2", newTable) + r"\1\r\n\2", newTable) ################## # every open-tag gets a new line. @@ -170,25 +172,27 @@ ################## # Note that we added the ## characters in markActiveTables(). # <table> tag with attributes, with more text on the same line - newTable = re.sub("(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>(?P<more>[\w\W]*?)[\r\n ]*", - r"\r\n{| \g<attr>\r\n\g<more>", newTable) + newTable = re.sub( + "(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>(?P<more>[\w\W]*?)[\r\n ]*", + r"\r\n{| \g<attr>\r\n\g<more>", newTable) # <table> tag without attributes, with more text on the same line newTable = re.sub("(?i)[\r\n]*?<##table##>(?P<more>[\w\W]*?)[\r\n ]*", - r"\r\n{|\n\g<more>\r\n", newTable) + r"\r\n{|\n\g<more>\r\n", newTable) # <table> tag with attributes, without more text on the same line newTable = re.sub("(?i)[\r\n]*?<##table## (?P<attr>[\w\W]*?)>[\r\n ]*", - r"\r\n{| \g<attr>\r\n", newTable) + r"\r\n{| \g<attr>\r\n", newTable) # <table> tag without attributes, without more text on the same line newTable = re.sub("(?i)[\r\n]*?<##table##>[\r\n ]*", - "\r\n{|\r\n", newTable) + "\r\n{|\r\n", newTable) # end </table> newTable = re.sub("(?i)[\s]*<\/##table##>", "\r\n|}", newTable) ################## # caption with attributes - newTable = re.sub("(?i)<caption (?P<attr>[\w\W]*?)>(?P<caption>[\w\W]*?)<\/caption>", - r"\r\n|+\g<attr> | \g<caption>", newTable) + newTable = re.sub( + "(?i)<caption (?P<attr>[\w\W]*?)>(?P<caption>[\w\W]*?)<\/caption>", + r"\r\n|+\g<attr> | \g<caption>", newTable) # caption without attributes newTable = re.sub("(?i)<caption>(?P<caption>[\w\W]*?)<\/caption>", r"\r\n|+ \g<caption>", newTable) @@ -196,56 +200,61 @@ ################## # <th> often people don't write them within <tr>, be warned! # <th> with attributes - newTable = re.sub("(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)<\/th>", - r"\r\n!\g<attr> | \g<header>\r\n", newTable) + newTable = re.sub( + "(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)<\/th>", + r"\r\n!\g<attr> | \g<header>\r\n", newTable) # <th> without attributes newTable = re.sub("(?i)[\r\n]+<th>(?P<header>[\w\W]*?)<\/th>", r"\r\n! \g<header>\r\n", newTable) - # fail save. sometimes people forget </th> # <th> without attributes, without closing </th> newTable, n = re.subn("(?i)[\r\n]+<th>(?P<header>[\w\W]*?)[\r\n]+", r"\r\n! \g<header>\r\n", newTable) if n>0: - warning_messages.append(u'WARNING: found <th> without </th>. (%d occurences)\n' % n) + warning_messages.append( + u'WARNING: found <th> without </th>. (%d occurences)\n' % n) warnings += n # <th> with attributes, without closing </th> - newTable, n = re.subn("(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)[\r\n]+", - r"\n!\g<attr> | \g<header>\r\n", newTable) + newTable, n = re.subn( + "(?i)[\r\n]+<th(?P<attr> [^>]*?)>(?P<header>[\w\W]*?)[\r\n]+", + r"\n!\g<attr> | \g<header>\r\n", newTable) if n>0: - warning_messages.append(u'WARNING: found <th ...> without </th>. (%d occurences\n)' % n) + warning_messages.append( + u'WARNING: found <th ...> without </th>. (%d occurences\n)' % n) warnings += n ################## # <tr> with attributes newTable = re.sub("(?i)[\r\n]*<tr(?P<attr> [^>]*?)>[\r\n]*", - r"\r\n|-\g<attr>\r\n", newTable) + r"\r\n|-\g<attr>\r\n", newTable) # <tr> without attributes newTable = re.sub("(?i)[\r\n]*<tr>[\r\n]*", - r"\r\n|-\r\n", newTable) + r"\r\n|-\r\n", newTable) ################## # normal <td> without arguments newTable = re.sub("(?i)[\r\n]+<td>(?P<cell>[\w\W]*?)<\/td>", - r"\r\n| \g<cell>\r\n", newTable) + r"\r\n| \g<cell>\r\n", newTable) ################## # normal <td> with arguments - newTable = re.sub("(?i)[\r\n]+<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)<\/td>", - r"\r\n|\g<attr> | \g<cell>", newTable) + newTable = re.sub( + "(?i)[\r\n]+<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)<\/td>", + r"\r\n|\g<attr> | \g<cell>", newTable) # WARNING: this sub might eat cells of bad HTML, but most likely it # will correct errors # TODO: some more docu please newTable, n = re.subn("(?i)[\r\n]+<td>(?P<cell>[^\r\n]*?)<td>", - r"\r\n| \g<cell>\r\n", newTable) + r"\r\n| \g<cell>\r\n", newTable) if n>0: - warning_messages.append(u'<td> used where </td> was expected. (%d occurences)\n' % n) + warning_messages.append( + u'<td> used where </td> was expected. (%d occurences)\n' % n) warnings += n # fail save, sometimes it's a <td><td></tr> @@ -261,20 +270,24 @@ newTable, n = re.subn("[\r\n]+<(td|TD)([^>]+?)>([^\r\n]*?)<\/(td|TD)>", r"\r\n|\2 | \3\r\n", newTable) if n>0: - warning_messages.append(u'WARNING: (sorry, bot code unreadable (1). I don\'t know why this warning is given.) (%d occurences)\n' % n) + warning_messages.append( + u'WARNING: (sorry, bot code unreadable (1). I don\'t know why this warning is given.) (%d occurences)\n' % n) # fail save. sometimes people forget </td> # <td> without arguments, with missing </td> newTable, n = re.subn("(?i)<td>(?P<cell>[^<]*?)[\r\n]+", r"\r\n| \g<cell>\r\n", newTable) if n>0: - warning_messages.append(u'NOTE: Found <td> without </td>. This shouldn\'t cause problems.\n') + warning_messages.append( + u'NOTE: Found <td> without </td>. This shouldn\'t cause problems.\n') # <td> with attributes, with missing </td> - newTable, n = re.subn("(?i)[\r\n]*<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)[\r\n]+", - r"\r\n|\g<attr> | \g<cell>\r\n", newTable) + newTable, n = re.subn( + "(?i)[\r\n]*<td(?P<attr> [^>]*?)>(?P<cell>[\w\W]*?)[\r\n]+", + r"\r\n|\g<attr> | \g<cell>\r\n", newTable) if n > 0: - warning_messages.append(u'NOTE: Found <td> without </td>. This shouldn\'t cause problems.\n') + warning_messages.append( + u'NOTE: Found <td> without </td>. This shouldn\'t cause problems.\n') ################## @@ -309,7 +322,7 @@ num = 1 while num != 0: newTable, num = re.subn("(\{\|[\w\W]*?)\n[ \t]+([\w\W]*?\|\})", - r"\1\r\n\2", newTable) + r"\1\r\n\2", newTable) ################## # kills additional spaces after | or ! or {| @@ -344,8 +357,9 @@ # group 2 is the value of the attribute we want to fix here. # We recognize it by searching for a string of non-whitespace characters # - [^\s]+? - which is not embraced by quotation marks - [^"] - newTable, num = re.subn(r'([\r\n]+(?:\|-|\{\|)[^\r\n\|]+) *= *([^"\s>]+)', - r'\1="\2"', newTable, 1) + newTable, num = re.subn( + r'([\r\n]+(?:\|-|\{\|)[^\r\n\|]+) *= *([^"\s>]+)', + r'\1="\2"', newTable, 1) num = 1 while num != 0: @@ -354,16 +368,18 @@ # cell contents which accidentially contain an equal sign. # Group 1 and 2 are anologously to the previous regular expression, # group 3 are the remaining attribute key - value pairs. - newTable, num = re.subn(r'([\r\n]+(?:!|\|)[^\r\n\|]+) *= *([^"\s>]+)([^\|\r\n]*)\|', - r'\1="\2"\3|', newTable, 1) + newTable, num = re.subn( + r'([\r\n]+(?:!|\|)[^\r\n\|]+) *= *([^"\s>]+)([^\|\r\n]*)\|', + r'\1="\2"\3|', newTable, 1) ################## # merge two short <td>s num = 1 while num != 0: - newTable, num = re.subn("[\r\n]+(\|[^\|\-\}]{1}[^\n\r]{0,35})" + - "[\r\n]+(\|[^\|\-\}]{1}[^\r\n]{0,35})[\r\n]+", - r"\r\n\1 |\2\r\n", newTable) + newTable, num = re.subn( + "[\r\n]+(\|[^\|\-\}]{1}[^\n\r]{0,35})" + + "[\r\n]+(\|[^\|\-\}]{1}[^\r\n]{0,35})[\r\n]+", + r"\r\n\1 |\2\r\n", newTable) #### # add a new line if first is * or # newTable = re.sub("[\r\n]+\| ([*#]{1})", @@ -378,15 +394,17 @@ newTable = re.sub("([\r\n]+\! +)align\=\"center\" +\|", r"\1", newTable) # if there are other attributes, simply strip the align="center" - newTable = re.sub("([\r\n]+\![^\r\n\|]+?)align\=\"center\"([^\n\r\|]+?\|)", - r"\1 \2", newTable) + newTable = re.sub( + "([\r\n]+\![^\r\n\|]+?)align\=\"center\"([^\n\r\|]+?\|)", + r"\1 \2", newTable) ################## # kill additional spaces within arguments num = 1 while num != 0: - newTable, num = re.subn("[\r\n]+(\||\!)([^|\r\n]*?)[ \t]{2,}([^\r\n]+?)", - r"\r\n\1\2 \3", newTable) + newTable, num = re.subn( + "[\r\n]+(\||\!)([^|\r\n]*?)[ \t]{2,}([^\r\n]+?)", + r"\r\n\1\2 \3", newTable) ################## # I hate those long lines because they make a wall of letters @@ -396,8 +414,9 @@ while num != 0: # TODO: how does this work? docu please. # why are only äöüß used, but not other special characters? - newTable, num = re.subn("(\r\n[A-Z]{1}[^\n\r]{200,}?[a-zäöüß]\.)\ ([A-ZÄÖÜ]{1}[^\n\r]{200,})", - r"\1\r\n\2", newTable) + newTable, num = re.subn( + "(\r\n[A-Z]{1}[^\n\r]{200,}?[a-zäöüß]\.)\ ([A-ZÄÖÜ]{1}[^\n\r]{200,})", + r"\1\r\n\2", newTable) return newTable, warnings, warning_messages def markActiveTables(self, text): @@ -410,15 +429,19 @@ tableStartTagR = re.compile("<table", re.IGNORECASE) tableEndTagR = re.compile("</table>", re.IGNORECASE) - text = wikipedia.replaceExcept(text, tableStartTagR, "<##table##", exceptions = ['comment', 'math', 'nowiki', 'pre', 'source']) - text = wikipedia.replaceExcept(text, tableEndTagR, "</##table##>", exceptions = ['comment', 'math', 'nowiki', 'pre', 'source']) + text = pywikibot.replaceExcept(text, tableStartTagR, "<##table##", + exceptions=['comment', 'math', + 'nowiki', 'pre', 'source']) + text = pywikibot.replaceExcept(text, tableEndTagR, "</##table##>", + exceptions=['comment', 'math', + 'nowiki', 'pre', 'source']) return text def findTable(self, text): """ Finds the first HTML table (which can contain nested tables) inside a - text. - Returns the table and the start and end position inside the text. + text. Returns the table and the start and end position inside the text. + """ # Note that we added the ## characters in markActiveTables(). markedTableStartTagR = re.compile("<##table##", re.IGNORECASE) @@ -469,49 +492,51 @@ if not table: # no more HTML tables left break - wikipedia.output(">> Table %i <<" % (convertedTables + 1)) + pywikibot.output(">> Table %i <<" % (convertedTables + 1)) # convert the current table newTable, warningsThisTable, warnMsgsThisTable = self.convertTable(table) # show the changes for this table if not self.quietMode: - wikipedia.showDiff(table.replace('##table##', 'table'), newTable) + pywikibot.showDiff(table.replace('##table##', 'table'), + newTable) warningSum += warningsThisTable for msg in warnMsgsThisTable: - warningMessages += 'In table %i: %s' % (convertedTables + 1, msg) + warningMessages += 'In table %i: %s' % (convertedTables + 1, + msg) text = text[:start] + newTable + text[end:] convertedTables += 1 - wikipedia.output(warningMessages) + pywikibot.output(warningMessages) return text, convertedTables, warningSum def treat(self, page): ''' Loads a page, converts all HTML tables in its text to wiki syntax, - and saves the converted text. - Returns True if the converted table was successfully saved, otherwise - returns False. + and saves the converted text. Returns True if the converted table was + successfully saved, otherwise returns False. ''' - wikipedia.output(u'\n>>> %s <<<' % page.title()) + pywikibot.output(u'\n>>> %s <<<' % page.title()) site = page.site() try: text = page.get() - except wikipedia.NoPage: - wikipedia.output(u"ERROR: couldn't find %s" % page.title()) + except pywikibot.NoPage: + pywikibot.output(u"ERROR: couldn't find %s" % page.title()) return False - except wikipedia.IsRedirectPage: - wikipedia.output(u'Skipping redirect %s' % page.title()) + except pywikibot.IsRedirectPage: + pywikibot.output(u'Skipping redirect %s' % page.title()) return False newText, convertedTables, warningSum = self.convertAllHTMLTables(text) # Check if there are any marked tags left markedTableTagR = re.compile("<##table##|</##table##>", re.IGNORECASE) if markedTableTagR.search(newText): - wikipedia.output(u'ERROR: not all marked table start or end tags processed!') + pywikibot.output( + u'ERROR: not all marked table start or end tags processed!') return if convertedTables == 0: - wikipedia.output(u"No changes were necessary.") + pywikibot.output(u"No changes were necessary.") else: if config.table2wikiAskOnlyWarnings and warningSum == 0: doUpload = True @@ -520,15 +545,21 @@ doUpload = True else: print "There were %i replacement(s) that might lead to bad output." % warningSum - doUpload = (wikipedia.input(u'Do you want to change the page anyway? [y|N]') == "y") + doUpload = (pywikibot.input( + u'Do you want to change the page anyway? [y|N]') == "y") if doUpload: # get edit summary message if warningSum == 0: - wikipedia.setAction(wikipedia.translate(site.lang, msg_no_warnings)) + pywikibot.setAction( + pywikibot.translate(site.lang, msg_no_warnings)) elif warningSum == 1: - wikipedia.setAction(wikipedia.translate(site.lang, msg_one_warning) % warningSum) + pywikibot.setAction( + pywikibot.translate(site.lang, msg_one_warning) + % warningSum) else: - wikipedia.setAction(wikipedia.translate(site.lang, msg_multiple_warnings) % warningSum) + pywikibot.setAction( + pywikibot.translate(site.lang, msg_multiple_warnings) + % warningSum) page.put_async(newText) def run(self): @@ -555,10 +586,11 @@ # to work on. genFactory = pagegenerators.GeneratorFactory() - for arg in wikipedia.handleArgs(): + for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: - xmlfilename = wikipedia.input(u'Please enter the XML dump\'s filename:') + xmlfilename = pywikibot.input( + u'Please enter the XML dump\'s filename:') else: xmlfilename = arg[5:] gen = TableXmlDumpPageGenerator(xmlfilename) @@ -590,7 +622,7 @@ # connect the title's parts with spaces if page_title != []: page_title = ' '.join(page_title) - page = wikipedia.Page(wikipedia.getSite(), page_title) + page = pywikibot.Page(pywikibot.getSite(), page_title) gen = iter([page]) if not gen: @@ -603,10 +635,10 @@ bot = Table2WikiRobot(preloadingGen, quietMode) bot.run() else: - wikipedia.showHelp('table2wiki') + pywikibot.showHelp('table2wiki') if __name__ == "__main__": try: main() finally: - wikipedia.stopme() + pywikibot.stopme() Modified: trunk/pywikipedia/templatecount.py =================================================================== --- trunk/pywikipedia/templatecount.py 2010-10-09 19:32:57 UTC (rev 8630) +++ trunk/pywikipedia/templatecount.py 2010-10-09 21:01:00 UTC (rev 8631) @@ -1,9 +1,9 @@ #!/usr/bin/python # -*- coding: utf-8 -*- """ -This script will display the list of pages transcluding a given list of templates. -It can also be used to simply count the number of pages (rather than listing each -individually). +This script will display the list of pages transcluding a given list of +templates. It can also be used to simply count the number of pages (rather than +listing each individually). Syntax: python templatecount.py command [arguments] @@ -34,10 +34,11 @@ # __version__ = '$Id$' -import wikipedia, config -import replace, pagegenerators import re, sys, string import datetime +import wikipedia as pywikibot +import config +import replace, pagegenerators templates = ['ref', 'note', 'ref label', 'note label', 'reflist'] @@ -45,16 +46,20 @@ #def __init__(self): #Nothing def countTemplates(self, templates, namespaces): - mysite = wikipedia.getSite() + mysite = pywikibot.getSite() mytpl = mysite.template_namespace()+':' - finalText = [u'Number of transclusions per template',u'------------------------------------'] + finalText = [u'Number of transclusions per template', u'-' * 36] total = 0 - # The names of the templates are the keys, and the numbers of transclusions are the values. + # The names of the templates are the keys, and the numbers of + # transclusions are the values. templateDict = {} for template in templates: - gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True) + gen = pagegenerators.ReferringPageGenerator( + pywikibot.Page(mysite, mytpl + template), + onlyTemplateInclusion = True) if namespaces: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, + namespaces) count = 0 for page in gen: count += 1 @@ -62,25 +67,32 @@ finalText.append(u'%s: %d' % (template, count)) total += count for line in finalText: - wikipedia.output(line, toStdout=True) - wikipedia.output(u'TOTAL: %d' % total, toStdout=True) - wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) + pywikibot.output(line, toStdout=True) + pywikibot.output(u'TOTAL: %d' % total, toStdout=True) + pywikibot.output(u'Report generated on %s' + % datetime.datetime.utcnow().isoformat(), + toStdout=True) return templateDict def listTemplates(self, templates, namespaces): - mysite = wikipedia.getSite() + mysite = pywikibot.getSite() count = 0 - # The names of the templates are the keys, and lists of pages transcluding templates are the values. + # The names of the templates are the keys, and lists of pages + # transcluding templates are the values. templateDict = {} finalText = [u'List of pages transcluding templates:'] for template in templates: finalText.append(u'* %s' % template) - finalText.append(u'------------------------------------') + finalText.append(u'-' * 36) for template in templates: transcludingArray = [] - gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mysite.template_namespace() + ':' + template), onlyTemplateInclusion = True) + gen = pagegenerators.ReferringPageGenerator( + pywikibot.Page(mysite, + mysite.template_namespace() + ':' + template), + onlyTemplateInclusion=True) if namespaces: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, + namespaces) for page in gen: finalText.append(u'%s' % page.title()) count += 1 @@ -88,8 +100,10 @@ templateDict[template] = transcludingArray; finalText.append(u'Total page count: %d' % count) for line in finalText: - wikipedia.output(line, toStdout=True) - wikipedia.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True) + pywikibot.output(line, toStdout=True) + pywikibot.output(u'Report generated on %s' + % datetime.datetime.utcnow().isoformat(), + toStdout=True) return templateDict def main(): @@ -97,7 +111,7 @@ argsList = [] namespaces = [] - for arg in wikipedia.handleArgs(): + for arg in pywikibot.handleArgs(): if arg == '-count': operation = "Count" elif arg == '-list': @@ -111,15 +125,17 @@ argsList.append(arg) if operation == None: - wikipedia.showHelp('templatecount') + pywikibot.showHelp('templatecount') else: robot = TemplateCountRobot() if not argsList: argsList = templates choice = '' if 'reflist' in argsList: - wikipedia.output(u'NOTE: it will take a long time to count "reflist".') - choice = wikipedia.inputChoice(u'Proceed anyway?', ['yes', 'no', 'skip'], ['y', 'n', 's'], 'y') + pywikibot.output( + u'NOTE: it will take a long time to count "reflist".') + choice = pywikibot.inputChoice( + u'Proceed anyway?', ['yes', 'no', 'skip'], ['y', 'n', 's'], 'y') if choice == 's': argsList.remove('reflist') if choice == 'n': @@ -133,4 +149,4 @@ try: main() finally: - wikipedia.stopme() + pywikibot.stopme() Modified: trunk/pywikipedia/testfamily.py =================================================================== --- trunk/pywikipedia/testfamily.py 2010-10-09 19:32:57 UTC (rev 8630) +++ trunk/pywikipedia/testfamily.py 2010-10-09 21:01:00 UTC (rev 8631) @@ -23,34 +23,34 @@ """ # # (C) Yuri Astrakhan, 2005 +# (C) Pywikipedia bot team, 2006-2010 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' # -import sys, wikipedia, traceback +import sys +import wikipedia as pywikibot +import traceback - -#=========== - def testSite(site): try: - wikipedia.getall(site, [wikipedia.Page(site, 'Any page name')]) + pywikibot.getall(site, [pywikibot.Page(site, 'Any page name')]) except KeyboardInterrupt: raise - except wikipedia.NoSuchSite: - wikipedia.output( u'No such language %s' % site.lang ) + except pywikibot.NoSuchSite: + pywikibot.output( u'No such language %s' % site.lang ) except: - wikipedia.output( u'Error processing language %s' % site.lang ) - wikipedia.output( u''.join(traceback.format_exception(*sys.exc_info()))) + pywikibot.output( u'Error processing language %s' % site.lang ) + pywikibot.output( u''.join(traceback.format_exception(*sys.exc_info()))) def main(): all = False language = None fam = None wikimedia = False - for arg in wikipedia.handleArgs(): + for arg in pywikibot.handleArgs(): if arg == '-all': all = True elif arg[0:7] == '-langs:': @@ -60,11 +60,13 @@ elif arg[0:10] == '-wikimedia': wikimedia = True - mySite = wikipedia.getSite() + mySite = pywikibot.getSite() if language is None: language = mySite.lang if wikimedia: - families = ['wikipedia', 'wiktionary', 'wikiquote', 'wikisource', 'wikibooks', 'wikinews', 'wikiversity', 'meta', 'commons', 'mediawiki', 'species', 'incubator', 'test'] + families = ['wikipedia', 'wiktionary', 'wikiquote', 'wikisource', + 'wikibooks', 'wikinews', 'wikiversity', 'meta', 'commons', + 'mediawiki', 'species', 'incubator', 'test'] elif fam is not None: families = fam.split(',') else: @@ -72,23 +74,24 @@ for family in families: try: - fam = wikipedia.Family(family) + fam = pywikibot.Family(family) except ValueError: - wikipedia.output(u'No such family %s' % family) + pywikibot.output(u'No such family %s' % family) continue if all: for lang in fam.langs.iterkeys(): - testSite(wikipedia.getSite(lang, family)) + testSite(pywikibot.getSite(lang, family)) else: languages = language.split(',') for lang in languages: try: - testSite(wikipedia.getSite(lang, family)) - except wikipedia.NoSuchSite: - wikipedia.output(u'No such language %s in family %s' % (lang, family)) + testSite(pywikibot.getSite(lang, family)) + except pywikibot.NoSuchSite: + pywikibot.output(u'No such language %s in family %s' + % (lang, family)) if __name__ == "__main__": try: main() finally: - wikipedia.stopme() + pywikibot.stopme() Modified: trunk/pywikipedia/unlink.py =================================================================== --- trunk/pywikipedia/unlink.py 2010-10-09 19:32:57 UTC (rev 8630) +++ trunk/pywikipedia/unlink.py 2010-10-09 21:01:00 UTC (rev 8631) @@ -24,9 +24,10 @@ __version__='$Id$' -import wikipedia, pagegenerators +import re +import wikipedia as pywikibot +import pagegenerators import editarticle -import re # Summary messages in different languages msg = { @@ -44,18 +45,16 @@ 'pt':u'Bot: Retirando link para "%s"', } + class UnlinkBot: def __init__(self, pageToUnlink, namespaces, always): self.pageToUnlink = pageToUnlink - gen = pagegenerators.ReferringPageGenerator(pageToUnlink) - if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) self.generator = pagegenerators.PreloadingGenerator(gen) - - linktrail = wikipedia.getSite().linktrail() + linktrail = pywikibot.getSite().linktrail() # The regular expression which finds links. Results consist of four groups: # group title is the target page title, that is, everything before | or ]. # group section is the page section. It'll include the # to make life easier for us. @@ -65,6 +64,8 @@ self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') self.always = always self.done = False + self.comment = pywikibot.translate(pywikibot.getSite(), msg) \ + % self.pageToUnlink.title() def handleNextLink(self, text, match, context = 100): """ @@ -80,8 +81,8 @@ or self.pageToUnlink.site().isInterwikiLink(match.group('title')) \ or match.group('section'): return text, False - - linkedPage = wikipedia.Page(self.pageToUnlink.site(), match.group('title')) + linkedPage = pywikibot.Page(self.pageToUnlink.site(), + match.group('title')) # Check whether the link found is to the current page itself. if linkedPage != self.pageToUnlink: # not a self-link @@ -92,16 +93,16 @@ if self.always: choice = 'a' else: - wikipedia.output( + pywikibot.output( text[max(0, match.start() - context) : match.start()] \ + '\03{lightred}' + text[match.start() : match.end()] \ + '\03{default}' + text[match.end() : match.end() + context]) - choice = wikipedia.inputChoice( + choice = pywikibot.inputChoice( u'\nWhat shall be done with this link?\n', ['unlink', 'skip', 'edit', 'more context', 'unlink all', 'quit'], ['U', 's', 'e', 'm', 'a', 'q'], 'u') - wikipedia.output(u'') + pywikibot.output(u'') if choice == 's': # skip this link @@ -116,7 +117,8 @@ return text, True elif choice == 'm': # show more context by recursive self-call - return self.handleNextLink(text, match, context = context + 100) + return self.handleNextLink(text, match, + context=context + 100) elif choice == 'a': self.always = True elif choice == 'q': @@ -129,7 +131,8 @@ def treat(self, page): # Show the title of the page we're working on. # Highlight the title in purple. - wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) + pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % page.title()) try: oldText = page.get() text = oldText @@ -138,32 +141,32 @@ match = self.linkR.search(text, pos = curpos) if not match: break - # Make sure that next time around we will not find this same hit. + # Make sure that next time around we will not find this same + # hit. curpos = match.start() + 1 text, jumpToBeginning = self.handleNextLink(text, match) if jumpToBeginning: curpos = 0 - if oldText == text: - wikipedia.output(u'No changes necessary.') + pywikibot.output(u'No changes necessary.') else: - wikipedia.showDiff(oldText, text) - page.put(text) - except wikipedia.NoPage: - wikipedia.output(u"Page %s does not exist?!" % page.aslink()) - except wikipedia.IsRedirectPage: - wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink()) - except wikipedia.LockedPage: - wikipedia.output(u"Page %s is locked?!" % page.aslink()) + pywikibot.showDiff(oldText, text) + page.put(text, self.comment) + except pywikibot.NoPage: + pywikibot.output(u"Page %s does not exist?!" + % page.title(asLink=True)) + except pywikibot.IsRedirectPage: + pywikibot.output(u"Page %s is a redirect; skipping." + % page.title(asLink=True)) + except pywikibot.LockedPage: + pywikibot.output(u"Page %s is locked?!" % page.title(asLink=True)) def run(self): - comment = wikipedia.translate(wikipedia.getSite(), msg) % self.pageToUnlink.title() - wikipedia.setAction(comment) - for page in self.generator: if self.done: break self.treat(page) + def main(): # This temporary array is used to read the page title if one single # page that should be unlinked. @@ -173,7 +176,7 @@ namespaces = [] always = False - for arg in wikipedia.handleArgs(): + for arg in pywikibot.handleArgs(): if arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) @@ -185,14 +188,14 @@ pageTitle.append(arg) if pageTitle: - page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle)) + page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) bot = UnlinkBot(page, namespaces, always) bot.run() else: - wikipedia.showHelp('unlink') + pywikibot.showHelp('unlink') if __name__ == "__main__": try: main() finally: - wikipedia.stopme() + pywikibot.stopme() Modified: trunk/pywikipedia/us-states.py =================================================================== --- trunk/pywikipedia/us-states.py 2010-10-09 19:32:57 UTC (rev 8630) +++ trunk/pywikipedia/us-states.py 2010-10-09 21:01:00 UTC (rev 8631) @@ -14,14 +14,17 @@ -force: Don't ask whether to create pages, just create them. """ -__version__ = '$Id$' # # (C) Andre Engels, 2004 +# (C) Pywikipedia bot team, 2004-2010 # # Distributed under the terms of the MIT license. # +__version__ = '$Id$' +# -import re,wikipedia,sys +import re, sys +import wikipedia as pywikibot def main(): start = '0' @@ -84,48 +87,60 @@ 'Wyoming': 'WY' } - for arg in wikipedia.handleArgs(): + for arg in pywikibot.handleArgs(): if arg.startswith('-start:'): start = arg[7:] elif arg == '-force': force = True else: - wikipedia.output(u'Warning: argument "%s" not understood; ignoring.'%arg) + pywikibot.output( + u'Warning: argument "%s" not understood; ignoring.' % arg) - mysite = wikipedia.getSite() + mysite = pywikibot.getSite() for p in mysite.allpages(start = start): for sn in abbrev: R=re.compile('[^[]]*' + '\%2C_' + sn) for res in R.findall(p.title()): - pl=wikipedia.Page(mysite, p.title().replace(sn,abbrev[sn])) - # A bit hacking here - the real work is done in the 'except wikipedia.NoPage' - # part rather than the 'try'. + pl=pywikibot.Page(mysite, p.title().replace(sn,abbrev[sn])) + # A bit hacking here - the real work is done in the + # 'except pywikibot.NoPage' part rather than the 'try'. try: goal = pl.getRedirectTarget().title() - if wikipedia.Page(mysite, goal): - wikipedia.output(u"Not creating %s - redirect already exists." % goal) + if pywikibot.Page(mysite, goal): + pywikibot.output( + u"Not creating %s - redirect already exists." + % goal) else: - wikipedia.output(u"WARNING!!! %s already exists but redirects elsewhere!" % goal) - except wikipedia.IsNotRedirectPage: - wikipedia.output(u"WARNING!!! Page %s already exists and is not a redirect. Please check page!" % goal) - except wikipedia.NoPage: + pywikibot.output( + u"WARNING!!! %s already exists but redirects elsewhere!" + % goal) + except pywikibot.IsNotRedirectPage: + pywikibot.output( + u"WARNING!!! Page %s already exists and is not a redirect. Please check page!" + % goal) + except pywikibot.NoPage: change='' if p.isRedirectPage(): p2 = p.getRedirectTarget() - wikipeda.ouput(u'Note: goal page is redirect. Creating redirect to "%s" to avoid double redirect.'%p2.title().replace("%2C",",").replace("_"," ")) + wikipeda.ouput( + u'Note: goal page is redirect. Creating redirect to "%s" to avoid double redirect.' + % p2.title().replace("%2C",",").replace("_"," ")) else: p2 = p if force: change='y' else: while not change in ['y','n']: - wikipedia.output(u"Create redirect %s"%pl.title().replace("%2C",",").replace("_"," ")) + pywikibot.output( + u"Create redirect %s" % + pl.title().replace("%2C",",").replace("_"," ")) change = raw_input("(y/n)? ") if change=='y': text = '#REDIRECT [['+p2.title().replace("%2C",",").replace("_"," ")+']]' - pl.put(text, comment = wikipedia.translate(mysite, msg), minorEdit = '0') + pl.put(text, comment=pywikibot.translate(mysite, msg), + minorEdit = '0') try: main() finally: - wikipedia.stopme() + pywikibot.stopme()

1 0

← Newer
1
2
3
4
5
6
7
8
9
Older →

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn October 2010