Pywikibot-commits November 2013

pywikibot-commits@lists.wikimedia.org

1 participants
212 discussions

Start a nNew thread

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

10 years, 5 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

10 years, 5 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [PEP8] changes ...................................................................... [PEP8] changes Change-Id: I1129bf9d966dc6c13ac01296439eaf7c3fde31c1 --- M weblinkchecker.py 1 file changed, 106 insertions(+), 75 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/weblinkchecker.py b/weblinkchecker.py index 1e61f99..1eaa96b 100644 --- a/weblinkchecker.py +++ b/weblinkchecker.py @@ -88,19 +88,30 @@ # # (C) Daniel Herding, 2005 -# (C) Pywikipedia bot team, 2005-2012 +# (C) Pywikibot team, 2005-2013 # # Distributed under the terms of the MIT license. # -__version__='$Id$' +__version__ = '$Id$' -import sys, re -import codecs, pickle -import httplib, socket, urlparse, urllib, urllib2 -import threading, time +import sys +import re +import codecs +import pickle +import httplib +import socket +import urlparse +import urllib +import urllib2 +import threading +import time +import gzip +import StringIO + import pywikibot from pywikibot import i18n -import config, pagegenerators +import config +import pagegenerators docuReplacements = { '&params;': pagegenerators.parameterHelp @@ -121,17 +132,18 @@ # Other special cases re.compile('.*[\./(a)]gso\.gbv\.de(/.*)?'), # bot somehow can't handle their redirects - re.compile('.*[\./(a)]berlinonline\.de(/.*)?'), # a de: user wants to fix them by hand and doesn't want them to be deleted, see [[de:Benutzer:BLueFiSH.as/BZ]]. - re.compile('.*[\./(a)]bodo\.kommune\.no(/.*)?'), # bot can't handle their redirects - re.compile('.*[\./(a)]jpl\.nasa\.gov(/.*)?'), # bot rejected on the site - re.compile('.*[\./(a)]itis\.gov(/.*)?'), # bot rejected on the site - re.compile('.*[\./(a)]cev\.lu(/.*)?'), # bot rejected on the site - re.compile('.*[\./(a)]science\.ksc\.nasa\.gov(/.*)?'), # very slow response resulting in bot error - re.compile('.*[\./(a)]britannica\.com(/.*)?'), #HTTP redirect loop - re.compile('.*[\./(a)]quickfacts\.census\.gov(/.*)?'), # bot rejected on the site + re.compile('.*[\./(a)]berlinonline\.de(/.*)?'), # a de: user wants to fix them by hand and doesn't want them to be deleted, see [[de:Benutzer:BLueFiSH.as/BZ]]. + re.compile('.*[\./(a)]bodo\.kommune\.no(/.*)?'), # bot can't handle their redirects + re.compile('.*[\./(a)]jpl\.nasa\.gov(/.*)?'), # bot rejected on the site + re.compile('.*[\./(a)]itis\.gov(/.*)?'), # bot rejected on the site + re.compile('.*[\./(a)]cev\.lu(/.*)?'), # bot rejected on the site + re.compile('.*[\./(a)]science\.ksc\.nasa\.gov(/.*)?'), # very slow response resulting in bot error + re.compile('.*[\./(a)]britannica\.com(/.*)?'), # HTTP redirect loop + re.compile('.*[\./(a)]quickfacts\.census\.gov(/.*)?'), # bot rejected on the site ] -def weblinksIn(text, withoutBracketed = False, onlyBracketed = False): + +def weblinksIn(text, withoutBracketed=False, onlyBracketed=False): text = pywikibot.removeDisabledParts(text) # MediaWiki parses templates before parsing external links. Thus, there @@ -164,6 +176,7 @@ else: yield m.group('urlb') + class InternetArchiveConsulter: def __init__(self, url): self.url = url @@ -176,23 +189,21 @@ except urllib2.HTTPError: # The Internet Archive yields a 403 error when the site was not # archived due to robots.txt restrictions. - return None + return except UnicodeEncodeError: - return None + return data = f.read() if f.headers.get('content-encoding', None) == 'gzip': # Since 2008, the Internet Archive returns pages in GZIPed # compression format. Unfortunatelly urllib2 doesn't handle # the decompression for us, so we have to do it ourselves. - import gzip, StringIO data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read() if "Search Results for " in data: return archiveURL - else: - return None + class LinkChecker(object): - ''' + """ Given a HTTP URL, tries to load the page from the Internet and checks if it is still online. @@ -201,8 +212,10 @@ Warning: Also returns false if your Internet connection isn't working correctly! (This will give a Socket Error) - ''' - def __init__(self, url, redirectChain = [], serverEncoding=None, HTTPignore=[]): + + """ + def __init__(self, url, redirectChain=[], serverEncoding=None, + HTTPignore=[]): """ redirectChain is a list of redirects which were resolved by resolveRedirect(). This is needed to detect redirect loops. @@ -247,8 +260,8 @@ if not self.serverEncoding: # TODO: We might also load a page, then check for an encoding # definition in a HTML meta tag. - pywikibot.output( - u'Error retrieving server\'s default charset. Using ISO 8859-1.') + pywikibot.output(u'Error retrieving server\'s default charset. ' + u'Using ISO 8859-1.') # most browsers use ISO 8859-1 (Latin-1) as the default. self.serverEncoding = 'iso8859-1' return self.serverEncoding @@ -266,7 +279,8 @@ def changeUrl(self, url): self.url = url # we ignore the fragment - self.scheme, self.host, self.path, self.query, self.fragment = urlparse.urlsplit(self.url) + (self.scheme, self.host, self.path, self.query, + self.fragment) = urlparse.urlsplit(self.url) if not self.path: self.path = '/' if self.query: @@ -282,14 +296,15 @@ self.path = unicode(urllib.quote(self.path.encode(encoding))) self.query = unicode(urllib.quote(self.query.encode(encoding), '=&')) - def resolveRedirect(self, useHEAD = False): - ''' + def resolveRedirect(self, useHEAD=False): + """ Requests the header from the server. If the page is an HTTP redirect, returns the redirect target URL as a string. Otherwise returns None. If useHEAD is true, uses the HTTP HEAD method, which saves bandwidth by not downloading the body. Otherwise, the HTTP GET method is used. - ''' + + """ conn = self.getConnection() try: if useHEAD: @@ -307,7 +322,7 @@ # Apache server. Using GET instead works on these (but it uses # more bandwidth). if useHEAD: - return self.resolveRedirect(useHEAD = False) + return self.resolveRedirect(useHEAD=False) else: raise if self.response.status >= 300 and self.response.status <= 399: @@ -327,7 +342,7 @@ self.changeUrl(u'%s://%s%s' % (self.protocol, self.host, redirTarget)) return True - else: # redirect to relative position + else: # redirect to relative position # cut off filename directory = self.path[:self.path.rindex('/') + 1] # handle redirect to parent directory @@ -344,18 +359,18 @@ redirTarget)) return True else: - return False # not a redirect + return False # not a redirect - def check(self, useHEAD = False): + def check(self, useHEAD=False): """ Returns True and the server status message if the page is alive. Otherwise returns false """ try: - wasRedirected = self.resolveRedirect(useHEAD = useHEAD) + wasRedirected = self.resolveRedirect(useHEAD=useHEAD) except UnicodeError, error: - return False, u'Encoding Error: %s (%s)' \ - % (error.__class__.__name__, unicode(error)) + return False, u'Encoding Error: %s (%s)' % ( + error.__class__.__name__, unicode(error)) except httplib.error, error: return False, u'HTTP Error: %s' % error.__class__.__name__ except socket.error, error: @@ -372,7 +387,7 @@ msg = error[1] except IndexError: print u'### DEBUG information for #2972249' - raise IndexError, type(error) + raise IndexError(type(error)) # TODO: decode msg. On Linux, it's encoded in UTF-8. # How is it encoded in Windows? Or can we somehow just # get the English message? @@ -384,31 +399,38 @@ # which leads to a cyclic list of redirects. # We simply start from the beginning, but this time, # we don't use HEAD, but GET requests. - redirChecker = LinkChecker(self.redirectChain[0], - serverEncoding=self.serverEncoding, - HTTPignore=self.HTTPignore) - return redirChecker.check(useHEAD = False) + redirChecker = LinkChecker( + self.redirectChain[0], + serverEncoding=self.serverEncoding, + HTTPignore=self.HTTPignore) + return redirChecker.check(useHEAD=False) else: - urlList = ['[%s]' % url for url in self.redirectChain + [self.url]] - return False, u'HTTP Redirect Loop: %s' % ' -> '.join(urlList) + urlList = ['[%s]' % url + for url in self.redirectChain + [self.url]] + return (False, + u'HTTP Redirect Loop: %s' % ' -> '.join(urlList)) elif len(self.redirectChain) >= 19: if useHEAD: # Some servers don't seem to handle HEAD requests properly, # which leads to a long (or infinite) list of redirects. # We simply start from the beginning, but this time, # we don't use HEAD, but GET requests. - redirChecker = LinkChecker(self.redirectChain[0], - serverEncoding=self.serverEncoding, - HTTPignore = self.HTTPignore) - return redirChecker.check(useHEAD = False) + redirChecker = LinkChecker( + self.redirectChain[0], + serverEncoding=self.serverEncoding, + HTTPignore=self.HTTPignore) + return redirChecker.check(useHEAD=False) else: - urlList = ['[%s]' % url for url in self.redirectChain + [self.url]] - return False, u'Long Chain of Redirects: %s' % ' -> '.join(urlList) + urlList = ['[%s]' % url + for url in self.redirectChain + [self.url]] + return (False, + u'Long Chain of Redirects: %s' + % ' -> '.join(urlList)) else: redirChecker = LinkChecker(self.url, self.redirectChain, self.serverEncoding, HTTPignore=self.HTTPignore) - return redirChecker.check(useHEAD = useHEAD) + return redirChecker.check(useHEAD=useHEAD) else: try: conn = self.getConnection() @@ -431,22 +453,24 @@ alive = False return alive, '%s %s' % (self.response.status, self.response.reason) + class LinkCheckThread(threading.Thread): - ''' - A thread responsible for checking one URL. After checking the page, it + """ A thread responsible for checking one URL. After checking the page, it will die. - ''' + + """ def __init__(self, page, url, history, HTTPignore): threading.Thread.__init__(self) self.page = page self.url = url self.history = history # identification for debugging purposes - self.setName((u'%s - %s' % (page.title(), url)).encode('utf-8', 'replace')) + self.setName((u'%s - %s' % (page.title(), url)).encode('utf-8', + 'replace')) self.HTTPignore = HTTPignore def run(self): - linkChecker = LinkChecker(self.url, HTTPignore = self.HTTPignore) + linkChecker = LinkChecker(self.url, HTTPignore=self.HTTPignore) try: ok, message = linkChecker.check() except: @@ -464,7 +488,7 @@ class History: - ''' Stores previously found dead links. The URLs are dictionary keys, and + """ Stores previously found dead links. The URLs are dictionary keys, and values are lists of tuples where each tuple represents one time the URL was found dead. Tuples have the form (title, date, error) where title is the wiki page where the URL was found, date is an instance of time, and error is @@ -481,15 +505,14 @@ ('WikiPageName2', DATE, '404: File not found'), ] - ''' + """ def __init__(self, reportThread): self.reportThread = reportThread site = pywikibot.getSite() self.semaphore = threading.Semaphore() - self.datfilename = pywikibot.config.datafilepath('deadlinks', - 'deadlinks-%s-%s.dat' - % (site.family.name, site.lang)) + self.datfilename = pywikibot.config.datafilepath( + 'deadlinks', 'deadlinks-%s-%s.dat' % (site.family.name, site.lang)) # Count the number of logged links, so that we can insert captions # from time to time self.logCount = 0 @@ -532,7 +555,6 @@ self.reportThread.report(url, errorReport, containingPage, archiveURL) - def setLinkDead(self, url, error, page, day): """ Adds the fact that the link was found dead to the .dat file. @@ -541,7 +563,7 @@ now = time.time() if url in self.historyDict: timeSinceFirstFound = now - self.historyDict[url][0][1] - timeSinceLastFound= now - self.historyDict[url][-1][1] + timeSinceLastFound = now - self.historyDict[url][-1][1] # if the last time we found this dead link is less than an hour # ago, we won't save it in the history this time. if timeSinceLastFound > 60 * 60: @@ -568,7 +590,7 @@ try: del self.historyDict[url] except KeyError: - # Not sure why this can happen, but I guess we can ignore this... + # Not sure why this can happen, but I guess we can ignore this. pass self.semaphore.release() return True @@ -583,6 +605,7 @@ pickle.dump(self.historyDict, datfile) datfile.close() + class DeadLinkReportThread(threading.Thread): ''' A Thread that is responsible for posting error reports on talk pages. There @@ -592,7 +615,7 @@ def __init__(self): threading.Thread.__init__(self) self.semaphore = threading.Semaphore() - self.queue = []; + self.queue = [] self.finishing = False self.killed = False @@ -631,7 +654,8 @@ content = talkPage.get() + "\n\n" if url in content: pywikibot.output( - u'\03{lightaqua}** Dead link seems to have already been reported on %s\03{default}' + u'\03{lightaqua}** Dead link seems to have already ' + u'been reported on %s\03{default}' % talkPage.title(asLink=True)) self.semaphore.release() continue @@ -672,18 +696,20 @@ talkPage.put(content, comment) except pywikibot.SpamfilterError, error: pywikibot.output( - u'\03{lightaqua}** SpamfilterError while trying to change %s: %s\03{default}' + u'\03{lightaqua}** SpamfilterError while trying to ' + u'change %s: %s\03{default}' % (talkPage.title(asLink=True), error.url)) self.semaphore.release() class WeblinkCheckerRobot: - ''' + """ Robot which will use several LinkCheckThreads at once to search for dead weblinks on pages provided by the given generator. - ''' - def __init__(self, generator, HTTPignore = []): + + """ + def __init__(self, generator, HTTPignore=[]): self.generator = generator if config.report_dead_links_on_talk: #pywikibot.output("Starting talk page thread") @@ -698,7 +724,7 @@ def run(self): for page in self.generator: - self.checkLinksIn(page) + self.checkLinksIn(page) def checkLinksIn(self, page): try: @@ -737,6 +763,7 @@ page = pywikibot.Page(pywikibot.getSite(), pageTitle) yield page + def countLinkCheckThreads(): i = 0 for thread in threading.enumerate(): @@ -744,10 +771,12 @@ i += 1 return i + def check(url): """Peform a check on URL""" c = LinkChecker(url) return c.check() + def main(): gen = None @@ -791,11 +820,11 @@ gen = genFactory.getCombinedGenerator() if gen: if namespaces != []: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) # fetch at least 240 pages simultaneously from the wiki, but more if # a high thread number is set. pageNumber = max(240, config.max_external_links * 2) - gen = pagegenerators.PreloadingGenerator(gen, pageNumber = pageNumber) + gen = pagegenerators.PreloadingGenerator(gen, pageNumber=pageNumber) gen = pagegenerators.RedirectFilterPageGenerator(gen) bot = WeblinkCheckerRobot(gen, HTTPignore) try: @@ -805,8 +834,9 @@ # Don't wait longer than 30 seconds for threads to finish. while countLinkCheckThreads() > 0 and waitTime < 30: try: - pywikibot.output( - u"Waiting for remaining %i threads to finish, please wait..." % countLinkCheckThreads()) + pywikibot.output(u"Waiting for remaining %i threads to " + u"finish, please wait..." + % countLinkCheckThreads()) # wait 1 second time.sleep(1) waitTime += 1 @@ -832,6 +862,7 @@ else: pywikibot.showHelp() + if __name__ == "__main__": try: main() -- To view, visit https://gerrit.wikimedia.org/r/98268 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I1129bf9d966dc6c13ac01296439eaf7c3fde31c1 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

10 years, 5 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by Xqt (Code Review)

Xqt has submitted this change and it was merged. Change subject: [PEP8] changes ...................................................................... [PEP8] changes Change-Id: I31d6c0b29bc099cd64254a0c31f5ea25929ac2b3 --- M sum_disc.py 1 file changed, 609 insertions(+), 489 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/sum_disc.py b/sum_disc.py index a239300..bff8de6 100644 --- a/sum_disc.py +++ b/sum_disc.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ This bot is used for summarize discussions spread over the whole wiki -including all namespaces. It checks several users (at request), sequential +including all namespaces. It checks several users (at request), sequential (currently for the german wiki [de] only). The bot will only change the user's discussion page by appending a summary @@ -9,14 +9,15 @@ The bot's operating procedure: -retrieve user list from [[Benutzer:DrTrigonBot/Diene_Mir!]] - -check recent changes in [[Special:Contributions/<user>]] (Spezial:Beiträge/...) + -check recent changes in [[Special:Contributions/<user>]] + (Spezial:Beiträge/...) -retrieve history from file -checking each listed Discussion on time of latest change - -checks relevancy by searching each heading with user signature in body, if any - found, checks them on changes and finally if the user signature is the last one - (or if there is any foreign signature behind it) - -appending history to local user history file (same as summary report, can also - be redirected on a page in the wiki if useful) + -checks relevancy by searching each heading with user signature in body, if + any found, checks them on changes and finally if the user signature is the + last one (or if there is any foreign signature behind it) + -appending history to local user history file (same as summary report, can + also be redirected on a page in the wiki if useful) -appending summary report to [[Benutzer Diskussion:<user>]] This bot code and 'wikipedaiAPI.py' work with UTC/GMT ONLY beacuse of DST!! For @@ -32,9 +33,9 @@ on the toolserver, at a very low speed, what should be fast enougth to check the few number of users. -Entries can be changed (deleted for example when the discussion is finished) that -will cause no problem for the bot, because the entries are also written to the -history. +Entries can be changed (deleted for example when the discussion is finished) +that will cause no problem for the bot, because the entries are also written to +the history. All other parameters will be ignored. @@ -57,6 +58,7 @@ # @brief Summarize Discussions Robot # # @copyright Dr. Trigon, 2008-2011 +# @copyright pywikibot team, 2013 # # @section FRAMEWORK # @@ -73,152 +75,162 @@ # -import re, time, codecs, os, calendar -import copy #, zlib -import string, datetime, hashlib, locale +import re +import time +import codecs +import os +import calendar +import copy +import string +import datetime +import hashlib +import locale import logging import urllib from xml.etree.cElementTree import XML # or if cElementTree not found, you can use BeautifulStoneSoup instead -import config, pagegenerators, userlib +import config +import pagegenerators +import userlib import basic # Splitting the bot into library parts import wikipedia as pywikibot from pywikibot import i18n, tools -_PS_warning = 1 # serious or no classified warnings/errors that should be reported -_PS_changed = 2 # changed page (if closed, will be removed) -_PS_unchanged = 3 # unchanged page -_PS_new = 4 # new page -_PS_closed = 5 # closed page (remove it from history) -_PS_maintmsg = 6 # maintenance message -_PS_notify = 7 # global wiki notification +_PS_warning = 1 # serious or no classified warnings/errors that should be reported +_PS_changed = 2 # changed page (if closed, will be removed) +_PS_unchanged = 3 # unchanged page +_PS_new = 4 # new page +_PS_closed = 5 # closed page (remove it from history) +_PS_maintmsg = 6 # maintenance message +_PS_notify = 7 # global wiki notification - -_REGEX_eol = re.compile('\n') - +_REGEX_eol = re.compile('\n') bot_config = { # unicode values - 'TemplateName': u'User:DrTrigonBot/SumDisc', - 'userlist': u'User:DrTrigonBot/Diene_Mir!', - 'maintenance_queue': u'User:DrTrigonBot/Maintenance', - 'maintenance_page': u'User talk:DrTrigon#%s', + 'TemplateName': u'User:DrTrigonBot/SumDisc', + 'userlist': u'User:DrTrigonBot/Diene_Mir!', + 'maintenance_queue': u'User:DrTrigonBot/Maintenance', + 'maintenance_page': u'User talk:DrTrigon#%s', - 'queue_security': ([u'DrTrigon'], u'Bot: exec'), + 'queue_security': ([u'DrTrigon'], u'Bot: exec'), - # NON (!) unicode values - 'data_path': '../data/sum_disc', - 'logger_tmsp': True, - 'backup_hist': True, + # NON (!) unicode values + 'data_path': '../data/sum_disc', + 'logger_tmsp': True, + 'backup_hist': True, - # regex values - 'tmpl_params_regex': re.compile('(.*?)data=(.*?)\|timestamp=(.*)', re.S), - 'page_regex': re.compile(r'Page\{\[\[(.*?)\]\]\}'), + # regex values + 'tmpl_params_regex': re.compile('(.*?)data=(.*?)\|timestamp=(.*)', re.S), + 'page_regex': re.compile(r'Page\{\[\[(.*?)\]\]\}'), - # numeric values - 'timeout': 15.0, # timeout for progress info display + # numeric values + 'timeout': 15.0, # timeout for progress info display - # list values - # which lists are regex to compile ('backlinks_list' are no regex) - #'regex_compile': [ 'checkedit_list', 'checksign_list', 'ignorepage_list', ], - #'regex_compile': [ 'checkedit_list', 'ignorepage_list', 'ignorehead_list', 'matchhead_list', ], - 'regex_compile': [ 'checkedit_list', 'ignorehead_list', 'matchhead_list', ], - # which lists may contain variables to substitute - #'vars_subst': [ 'checkedit_list', 'checksign_list', 'ignorepage_list', 'backlinks_list', 'altsign_list' ], - 'vars_subst': [ 'checkedit_list', 'ignorepage_list', 'backlinks_list', 'altsign_list' ], # + 'ignorehead_list', 'matchhead_list' ? - # which lists should preserve/keep their defaults (instead of getting it overwritten by user settings) - 'default_keep': [ 'checkedit_list', 'altsign_list' ], - # which lists should be translated according to site's lang - 'translate': [ 'notify_msg', 'parse_msg' ], + # list values + # which lists are regex to compile ('backlinks_list' are no regex) + #'regex_compile': ['checkedit_list', 'checksign_list', 'ignorepage_list', ], + #'regex_compile': ['checkedit_list', 'ignorepage_list', 'ignorehead_list', 'matchhead_list', ], + 'regex_compile': ['checkedit_list', 'ignorehead_list', 'matchhead_list'], + # which lists may contain variables to substitute + #'vars_subst': ['checkedit_list', 'checksign_list', 'ignorepage_list', 'backlinks_list', 'altsign_list' ], + 'vars_subst': ['checkedit_list', 'ignorepage_list', 'backlinks_list', + 'altsign_list'], # + 'ignorehead_list', 'matchhead_list' ? + # which lists should preserve/keep their defaults (instead of getting it overwritten by user settings) + 'default_keep': ['checkedit_list', 'altsign_list'], + # which lists should be translated according to site's lang + 'translate': ['notify_msg', 'parse_msg'], - # 'msg' moved to i18n (translatewiki.org), key names: - #'thirdparty-drtrigonbot-sum_disc-summary-head' - #'thirdparty-drtrigonbot-sum_disc-summary-add' - #'thirdparty-drtrigonbot-sum_disc-summary-mod' - #'thirdparty-drtrigonbot-sum_disc-summary-clean' + # 'msg' moved to i18n (translatewiki.org), key names: + #'thirdparty-drtrigonbot-sum_disc-summary-head' + #'thirdparty-drtrigonbot-sum_disc-summary-add' + #'thirdparty-drtrigonbot-sum_disc-summary-mod' + #'thirdparty-drtrigonbot-sum_disc-summary-clean' - # bot paramater/options (modifiable by user) - 'param_default': { 'checkedit_count': 500, # CHECK recent EDITs, a COUNT - #'checkedit_count': 1000, - 'reportchanged_switch': True, # REPORT CHANGED discussions, a SWITCH - 'getbacklinks_switch': False, # GET BACKLINKS additionally, a SWITCH - 'reportwarn_switch': True, # (not published yet) - 'globwikinotify_switch': False, # GET OTHER WIKIS NOTIFICATIONS additionally, a SWITCH - 'reportclosed_switch': True, # (not published yet) - 'cleanup_count': -1, # DELETE/CLEAN-UP all older entries, a COUNT - 'mainsignneeded_switch': True, # (not published yet; DRTRIGON-99) - # LIST of talks/discussions to SEARCH, a LIST - 'checkedit_list': [ '^(.*?Diskussion:.*)', - u'^(Wikipedia:Löschkandidaten/.*)', - u'^(Wikipedia:Qualitätssicherung/.*)', - u'^(Wikipedia:Löschprüfung)', - '^(Wikipedia:Fragen zur Wikipedia)', - '^(Portal:.*)', - '^(Wikipedia:WikiProjekt.*)', - '^(Wikipedia:Redaktion.*)', - '^(Wikipedia:Auskunft)', - u'^(Wikipedia:Café)', - u'^(Wikipedia:Verbesserungsvorschläge)', # macht ev. probleme wegen level 1 überschr. - '^(Wikipedia:Tellerrand)', - '^(Wikipedia:Urheberrechtsfragen)', - '^(Wikipedia:Vandalismusmeldung)', - u'^(Wikipedia:Administratoren/Anfragen)', - u'^(Wikipedia:Administratoren/Notizen)', - #u'^(Wikipedia:Administratoren/.*)', - u'^(Wikipedia:Diskussionen über Bilder)', - u'^(Wikipedia:Fotowerkstatt)', # [F46] - u'^(Wikipedia:Bilderwünsche)', # - u'^(Wikipedia:Grafikwerkstatt)', # - u'^(Wikipedia:Grafikwerkstatt/Grafikwünsche)', # - u'^(Wikipedia:Kartenwerkstatt)', # - u'^(Wikipedia:Kartenwerkstatt/Kartenwünsche)', # - u'^(Wikipedia:Bots/.*)', # DRTRIGON-11 - u'^(Wikipedia:Redundanz/.*)', ], # (Wunsch von Flominator) - # LIST of SIGNATUREs REGEX to USE, a LIST (DRTRIGON-89) - # ev. sogar [\]\|/#] statt nur [\]\|/] ...?! dann kann Signatur auch links auf Unterabschn. enth. - # liefert leider aber auch falsch positive treffer... wobei seiten, die mal die aufmerksamkeit geweckt - # haben (auf RecentChanges-Liste waren) und links in user-namensraum enthalten, sind auch interessant!! - # (und eher selten, etwa 1 pro user bei ca. 100 in history) - 'checksign_list': [ u'--\s?\[\[%(ns)s:%(usersig)s[\]\|/]', ], - # LIST of SIGNATUREs to USE, a LIST - 'altsign_list': [ u'%(username)s' ], - # LIST of PAGEs to IGNORE, a LIST - # Alles mit '.*/Archiv.*' könnte man ruhig ausschließen - da passiert ja eh nichts - # und der Bot muss sich nicht mit so großen Seiten rumschlagen. -- Merlissimo 14:03, 31. Jan. 2009 (CET) - # (sofern auf diesen Seiten nichts geändert wird, tauchen sie gar nicht auf...) - 'ignorepage_list': [ u'(.*?)/Archiv', ], # + weitere - # LIST of SIGNATUREs to USE, a LIST - 'backlinks_list': [ u'%(userdiscpage)s', - u'%(userpage)s', ], - # (hidden) - #userResultPage: default is NOT DEFINED - this is a SPECIAL PARAM it is not - # thought to be used explicit, it is defined by the page link (implicit). - # (not officially published yet: LIST of HEADs to IGNORE, a LIST) - 'ignorehead_list': [ u'(.*?) $erl.$', ], - # (not published yet: LIST of HEADs to PROCESS ONLY, a LIST) - 'matchhead_list': [], + # bot paramater/options (modifiable by user) + 'param_default': { + 'checkedit_count': 500, # CHECK recent EDITs, a COUNT +## 'checkedit_count': 1000, + 'reportchanged_switch': True, # REPORT CHANGED discussions, a SWITCH + 'getbacklinks_switch': False, # GET BACKLINKS additionally, a SWITCH + 'reportwarn_switch': True, # (not published yet) + 'globwikinotify_switch': False, # GET OTHER WIKIS NOTIFICATIONS additionally, a SWITCH + 'reportclosed_switch': True, # (not published yet) + 'cleanup_count': -1, # DELETE/CLEAN-UP all older entries, a COUNT + 'mainsignneeded_switch': True, # (not published yet; DRTRIGON-99) + # LIST of talks/discussions to SEARCH, a LIST + 'checkedit_list': [ + '^(.*?Diskussion:.*)', + u'^(Wikipedia:Löschkandidaten/.*)', + u'^(Wikipedia:Qualitätssicherung/.*)', + u'^(Wikipedia:Löschprüfung)', + '^(Wikipedia:Fragen zur Wikipedia)', + '^(Portal:.*)', + '^(Wikipedia:WikiProjekt.*)', + '^(Wikipedia:Redaktion.*)', + '^(Wikipedia:Auskunft)', + u'^(Wikipedia:Café)', + u'^(Wikipedia:Verbesserungsvorschläge)', # macht ev. probleme wegen level 1 überschr. + '^(Wikipedia:Tellerrand)', + '^(Wikipedia:Urheberrechtsfragen)', + '^(Wikipedia:Vandalismusmeldung)', + u'^(Wikipedia:Administratoren/Anfragen)', + u'^(Wikipedia:Administratoren/Notizen)', + #u'^(Wikipedia:Administratoren/.*)', + u'^(Wikipedia:Diskussionen über Bilder)', + u'^(Wikipedia:Fotowerkstatt)', # [F46] + u'^(Wikipedia:Bilderwünsche)', + u'^(Wikipedia:Grafikwerkstatt)', + u'^(Wikipedia:Grafikwerkstatt/Grafikwünsche)', + u'^(Wikipedia:Kartenwerkstatt)', + u'^(Wikipedia:Kartenwerkstatt/Kartenwünsche)', + u'^(Wikipedia:Bots/.*)', # DRTRIGON-11 + u'^(Wikipedia:Redundanz/.*)', # (Wunsch von Flominator) + ], + # LIST of SIGNATUREs REGEX to USE, a LIST (DRTRIGON-89) + # ev. sogar [\]\|/#] statt nur [\]\|/] ...?! dann kann Signatur auch links auf Unterabschn. enth. + # liefert leider aber auch falsch positive treffer... wobei seiten, die mal die aufmerksamkeit geweckt + # haben (auf RecentChanges-Liste waren) und links in user-namensraum enthalten, sind auch interessant!! + # (und eher selten, etwa 1 pro user bei ca. 100 in history) + 'checksign_list': [u'--\s?\[\[%(ns)s:%(usersig)s[\]\|/]'], + # LIST of SIGNATUREs to USE, a LIST + 'altsign_list': [u'%(username)s'], + # LIST of PAGEs to IGNORE, a LIST + # Alles mit '.*/Archiv.*' könnte man ruhig ausschließen - da passiert ja eh nichts + # und der Bot muss sich nicht mit so großen Seiten rumschlagen. -- Merlissimo 14:03, 31. Jan. 2009 (CET) + # (sofern auf diesen Seiten nichts geändert wird, tauchen sie gar nicht auf...) + 'ignorepage_list': [u'(.*?)/Archiv'], # + weitere + # LIST of SIGNATUREs to USE, a LIST + 'backlinks_list': [u'%(userdiscpage)s', u'%(userpage)s'], + # (hidden) + #userResultPage: default is NOT DEFINED - this is a SPECIAL PARAM it is not + # thought to be used explicit, it is defined by the page link (implicit). + # (not officially published yet: LIST of HEADs to IGNORE, a LIST) + 'ignorehead_list': [u'(.*?) $erl.$'], + # (not published yet: LIST of HEADs to PROCESS ONLY, a LIST) + 'matchhead_list': [], - # (hidden) - 'notify_msg': { - _PS_changed: 'thirdparty-drtrigonbot-sum_disc-notify-changed', - _PS_new: 'thirdparty-drtrigonbot-sum_disc-notify-new', - _PS_closed: 'thirdparty-drtrigonbot-sum_disc-notify-closed', - _PS_maintmsg: 'thirdparty-drtrigonbot-sum_disc-notify-maintmsg', - _PS_notify: 'thirdparty-drtrigonbot-sum_disc-notify-notify', - }, - # (hidden) - 'parse_msg': { - u'*': 'thirdparty-drtrigonbot-sum_disc-parse', - _PS_closed: 'thirdparty-drtrigonbot-sum_disc-parse-closed', - _PS_notify: 'thirdparty-drtrigonbot-sum_disc-parse-notify', - _PS_warning: 'thirdparty-drtrigonbot-sum_disc-parse-warning', - u'start': 'thirdparty-drtrigonbot-sum_disc-parse-start', - u'end': 'thirdparty-drtrigonbot-sum_disc-parse-end', # ev. '~~~ um ~~~~~' - u'nonhuman': 'thirdparty-drtrigonbot-sum_disc-parse-nonhuman', - }, - }, + # (hidden) + 'notify_msg': { + _PS_changed: 'thirdparty-drtrigonbot-sum_disc-notify-changed', + _PS_new: 'thirdparty-drtrigonbot-sum_disc-notify-new', + _PS_closed: 'thirdparty-drtrigonbot-sum_disc-notify-closed', + _PS_maintmsg: 'thirdparty-drtrigonbot-sum_disc-notify-maintmsg', + _PS_notify: 'thirdparty-drtrigonbot-sum_disc-notify-notify', + }, + # (hidden) + 'parse_msg': { + u'*': 'thirdparty-drtrigonbot-sum_disc-parse', + _PS_closed: 'thirdparty-drtrigonbot-sum_disc-parse-closed', + _PS_notify: 'thirdparty-drtrigonbot-sum_disc-parse-notify', + _PS_warning: 'thirdparty-drtrigonbot-sum_disc-parse-warning', + u'start': 'thirdparty-drtrigonbot-sum_disc-parse-start', + u'end': 'thirdparty-drtrigonbot-sum_disc-parse-end', # ev. '~~~ um ~~~~~' + u'nonhuman': 'thirdparty-drtrigonbot-sum_disc-parse-nonhuman', + }, + }, } # debug switches @@ -230,8 +242,8 @@ } docuReplacements = { -# '&params;': pagegenerators.parameterHelp '&params;': u'' + #'&params;': pagegenerators.parameterHelp } @@ -244,12 +256,9 @@ #http://de.wikipedia.org/w/index.php?limit=50&title=Spezial:Beiträge&contribs=user&target=DrTrigon&namespace=3&year=&month=-1 #http://de.wikipedia.org/wiki/Spezial:Beiträge/DrTrigon - rollback = 0 - - _param_default = bot_config['param_default'] # same ref, no copy - - _global_warn = [] - + rollback = 0 + _param_default = bot_config['param_default'] # same ref, no copy + _global_warn = [] _REGEX_subster_tag = u'' def __init__(self): @@ -268,10 +277,11 @@ self._bot_config = bot_config # convert e.g. namespaces to corret language self._bot_config['TemplateName'] = pywikibot.Page(self.site, self._bot_config['TemplateName']).title() - self._template_regex = re.compile('\{\{' + self._bot_config['TemplateName'] + '(.*?)\}\}', re.S) + self._template_regex = re.compile( + '\{\{' + self._bot_config['TemplateName'] + '(.*?)\}\}', re.S) - - lang = locale.locale_alias.get(self.site.lang, locale.locale_alias['en']).split('.')[0] + lang = locale.locale_alias.get(self.site.lang, + locale.locale_alias['en']).split('.')[0] # use e.g. 'de_DE.UTF-8', 'de_DE.utf8' (thus no decode('latin-1') anymore!) locale.setlocale(locale.LC_TIME, lang + '.UTF-8') @@ -292,16 +302,17 @@ # _userPage = _userPage.getRedirectTarget() # self.loadMode(_userPage) # if self._mode: - # bot_user_ignorepage.append( self._tmpl_data ) + # bot_user_ignorepage.append(self._tmpl_data) # elif (u'userResultPage' in user.param): - # bot_user_ignorepage.append( u'Benutzer:%s' % user.param['userResultPage'] ) + # bot_user_ignorepage.append(u'Benutzer:%s' % user.param['userResultPage']) #if 'ignorepage_list' in bot_config['regex_compile']: # bot_user_ignorepage = [re.compile(item) for item in bot_user_ignorepage] #self._param_default['ignorepage_list'] += bot_user_ignorepage pywikibot.output(u'\03{lightred}** Receiving Job Queue (Maintenance Messages)\03{default}') page = pywikibot.Page(self.site, bot_config['maintenance_queue']) - self.maintenance_msg = self.loadJobQueue(page, bot_config['queue_security'], + self.maintenance_msg = self.loadJobQueue(page, + bot_config['queue_security'], reset=(not pywikibot.simulate)) self._wday = time.gmtime().tm_wday @@ -316,22 +327,26 @@ def run(self): '''Run SumDiscBot().''' - pywikibot.output(u'\03{lightgreen}* Processing User List (wishes):\03{default}') + pywikibot.output( + u'\03{lightgreen}* Processing User List (wishes):\03{default}') - for user in self._user_list: # may be try with PreloadingGenerator?! + for user in self._user_list: # may be try with PreloadingGenerator?! if _debug_switch['user'](user): - pywikibot.warning(u'\03{lightyellow}=== ! USER WILL BE SKIPPED ! ===\03{default}') + pywikibot.warning(u'\03{lightyellow}=== ! ' + u'USER WILL BE SKIPPED ! ===\03{default}') continue # set user and init params self.setUser(user) self.pages = SumDiscPages(self.site, self._param) - pywikibot.output('\03{lightred}** Processing User: %s\03{default}' % self._user) - #pywikibot.output(u'\03{lightred}** Processing User: %s\03{default}' % self._user.name()) + pywikibot.output('\03{lightred}** Processing User: %s\03{default}' + % self._user) # get operating mode - self.loadMode(self._userPage, regex_compile=('ignorepage_list' in bot_config['regex_compile'])) + self.loadMode(self._userPage, + regex_compile=('ignorepage_list' in + bot_config['regex_compile'])) # get history entries self.loadHistory(rollback=self.rollback) @@ -349,7 +364,7 @@ # SIGNED (have a Signature at the moment) # UserBacklinksPageGenerator - # get global wiki notifications (toolserver/merl) + # get global wiki notifications (toolserver/merl) if self._param['globwikinotify_switch']: self.AddGlobWikiNotify() @@ -377,9 +392,9 @@ # warnings/exceptions are printed to log, could be get by panel.py from there! # (separate and explicit warning handling not used anymore) #for warning in self._global_warn: # output all warnings to log (what about a special wiki page?) - # pywikibot.warning( "%s: %s" % warning ) + # pywikibot.warning("%s: %s" % warning) - def compressHistory(self, users = []): + def compressHistory(self, users=[]): """Read history, and re-write new history without any duplicates. @param users: List of users supported by bot (and thus in history). @@ -388,13 +403,15 @@ Load, truncate and re-write history in files. """ - if not users: users = [ item for item in self._user_list ] + if not users: + users = [item for item in self._user_list] pywikibot.output(u'* Compressing of histories:') if bot_config['backup_hist']: timestmp = pywikibot.Timestamp.now().strftime(pywikibot.Timestamp.mediawikiTSFormat)[:-6] - pathname = pywikibot.config.datafilepath(self._bot_config['data_path'], timestmp, '') # according to 'setUser' + pathname = pywikibot.config.datafilepath( + self._bot_config['data_path'], timestmp, '') # according to 'setUser' import shutil for user in users: @@ -402,7 +419,7 @@ try: begin = float(os.path.getsize(self._datfilename)) - except OSError: # OSError: [Errno 2] No such file or directory + except OSError: # OSError: [Errno 2] No such file or directory continue # backup old history @@ -417,10 +434,10 @@ # write new history os.remove(self._datfilename) self.putHistory(self.pages.hist) - end = float(os.path.getsize(self._datfilename)) - - pywikibot.output(u'\03{lightred}** History of %s compressed and written. (%s %%)\03{default}' % (user.name(), (end/begin)*100)) + pywikibot.output(u'\03{lightred}** History of %s compressed and ' + u'written. (%s %%)\03{default}' + % (user.name(), (end / begin) * 100)) ## @since 10332 # @remarks common interface to bot user settings on wiki @@ -432,38 +449,43 @@ Returns a list with entries: (user, param) This list may be empty. + """ #users = {} final_users = [] - #for item in self._REGEX_eol.split(page.get()): for item in self._REGEX_eol.split(self.load(page)): item = re.split(u',', item, maxsplit=1) - if (len(item) > 1): # for compatibility with 'subster.py' (if needed) - #item[1] = re.compile((self._REGEX_subster_tag%{'var':'.*?','cont':'.*?'}), re.S | re.I).sub(u'', item[1]) - item[1] = re.compile((self._REGEX_subster_tag%{u'var':u'.*?'}), re.S | re.I).sub(u'', item[1]) - try: param = eval(item[1]) - except: param = {} + if (len(item) > 1): # for compatibility with 'subster.py' (if needed) + item[1] = re.compile( + (self._REGEX_subster_tag + % {u'var': u'.*?'}), re.S | re.I).sub(u'', item[1]) + try: + param = eval(item[1]) + except: + param = {} item = item[0] try: - if not (item[0] == u'*'): continue - except: continue + if not (item[0] == u'*'): + continue + except: + continue item = item[1:] item = re.sub(u'[\[\]]', u'', item) - item = re.split(u':', item, maxsplit=1)[-1] # remove 'Benutzer(in)?:', 'User:', ... - subitem = re.split(u'\/', item) # recognize extended user entries with ".../..." - if len(subitem) > 1: # " - param[u'userResultPage'] = item # save extended user info (without duplicates) + item = re.split(u':', item, maxsplit=1)[-1] # remove 'Benutzer(in)?:', 'User:', ... + subitem = re.split(u'\/', item) # recognize extended user entries with ".../..." + if len(subitem) > 1: + param[u'userResultPage'] = item # save extended user info (without duplicates) item = subitem[0] - #users[item] = param # drop duplicates directly + #users[item] = param # drop duplicates directly user = userlib.User(self.site, item) user.param = param - final_users.append( user ) + final_users.append(user) return final_users def setUser(self, user): - ''' + """ set all internal user info input: user [tuple] (see _getUsers(...) for format) @@ -475,13 +497,14 @@ keys are: parameters/options [string] (see 'sum_disc_conf.py') format: (differs from param to param) returns: self._datfilename [string] - ''' + + """ # modified due: http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot/ToDo-Liste (id 30, 28, 17) # defaults settings # thanks to http://mail.python.org/pipermail/python-list/2005-September/339147.html # and http://docs.python.org/library/copy.html - self._user = user + self._user = user self._userPage = self._user.getUserTalkPage() userdiscpage = self._userPage.title() #self._param = dict(self._param_default) @@ -492,20 +515,25 @@ self._param.update(user.param) # re-add defaults to lists in self._param else they are overwritten for key in bot_config['default_keep']: - if key in user.param: self._param[key] += copy.deepcopy(self._param_default[key]) - self._param['ignorepage_list'].append( self._userPage.title() ) # disc-seite von user IMMER ausschliessen - if 'userResultPage' in self._param: # user with extended info (extra page to use) - self._userPage = pywikibot.Page(self.site, u'Benutzer:%s' % self._param['userResultPage']) - self._param['ignorepage_list'].append( self._userPage.title() ) - self._datfilename = pywikibot.config.datafilepath(self._bot_config['data_path'], 'sum_disc-%s-%s-%s.dat' % (self.site.family.name, self.site.lang, self._user.name())) + if key in user.param: + self._param[key] += copy.deepcopy(self._param_default[key]) + self._param['ignorepage_list'].append(self._userPage.title()) # disc-seite von user IMMER ausschliessen + if 'userResultPage' in self._param: # user with extended info (extra page to use) + self._userPage = pywikibot.Page(self.site, u'Benutzer:%s' + % self._param['userResultPage']) + self._param['ignorepage_list'].append(self._userPage.title()) + self._datfilename = pywikibot.config.datafilepath( + self._bot_config['data_path'], 'sum_disc-%s-%s-%s.dat' + % (self.site.family.name, self.site.lang, self._user.name())) # substitute variables for use in user defined parameters/options - param_vars = { 'username': self._user.name(), - 'userpage': self._user.getUserPage().title(), - 'userdiscpage': userdiscpage, - } + param_vars = {'username': self._user.name(), + 'userpage': self._user.getUserPage().title(), + 'userdiscpage': userdiscpage, + } for item in bot_config['vars_subst']: - self._param[item] = [ subitem % param_vars for subitem in self._param[item] ] + self._param[item] = [subitem % param_vars + for subitem in self._param[item]] # pre-compile regex # (probably try to pre-compile 'self._param_default' once in __init__ and reuse the unchanged ones here) @@ -520,27 +548,29 @@ #{}) # generate all possible signatures and drop duplicates - ns_list = self.site.family.namespace(self.site.lang, 2, all=True) + ns_list = self.site.family.namespace(self.site.lang, 2, all=True) ns_list += self.site.family.namespace(self.site.lang, 3, all=True) signs = set() #for user in self._param['altsign_list']: for check in self._param['checksign_list']: for ns in ns_list: - signs.add( check % {'ns':ns, 'usersig':'%(usersig)s'} ) + signs.add(check % {'ns': ns, 'usersig': '%(usersig)s'}) self._param['checksign_list'] = list(signs) ## @since 10332 # @remarks needed by sum_disc def loadMode(self, page, regex_compile=False): - """Get operating mode from user's disc page by searching for the template. + """Get operating mode from user's disc page by searching for the + template. - @param page: The user (page) for which the data should be retrieved. - @param regex_compile: If True the value added to the ignore_list will - be compiled first. + @param page: The user (page) for which the data should be retrieved. + @param regex_compile: If True the value added to the ignore_list will + be compiled first. - Sets self._mode and self._tmpl_data which represent the settings how - to report news to the user. Sets self._content also which is the touched - page content to notify the user. The self._param is modified too. + Sets self._mode and self._tmpl_data which represent the settings how to + report news to the user. Sets self._content also which is the touched + page content to notify the user. The self._param is modified too. + """ templates = self.loadTemplates(page, self._bot_config['TemplateName']) @@ -557,24 +587,28 @@ if regex_compile: self._tmpl_data = re.compile(self._tmpl_data) #if hasattr(self, '_param'): # [JIRA: DRTRIGON-8, DRTRIGON-32] - self._param['ignorepage_list'].append( self._tmpl_data ) + self._param['ignorepage_list'].append(self._tmpl_data) # update template and content tmpl[u'timestamp'] = u'--~~~~' - tmpl_text = pywikibot.glue_template_and_params( (self._bot_config['TemplateName'], tmpl) ) - tmpl_pos = self._template_regex.search(self._content) - self._content = self._content[:tmpl_pos.start()] + tmpl_text + self._content[tmpl_pos.end():] + tmpl_text = pywikibot.glue_template_and_params( + (self._bot_config['TemplateName'], tmpl)) + tmpl_pos = self._template_regex.search(self._content) + self._content = (self._content[:tmpl_pos.start()] + tmpl_text + + self._content[tmpl_pos.end():]) ## @todo the error correctors 'old history' and 'notify tag error' can be removed if # they do not appear in bot logs anymore! # \n[ JIRA: e.g. DRTRIGON-68 ] - def loadHistory(self, rollback = 0): + def loadHistory(self, rollback=0): """Read history, and restore the page objects with sum_disc_data. - @param rollback: Number of history entries to go back (re-use older history). - @type rollback: int + @param rollback: Number of history entries to go back (re-use older + history). + @type rollback: int - Returns nothing, but feeds to self.pages class instance. + Returns nothing, but feeds to self.pages class instance. + """ buf = self._loadFile() @@ -588,10 +622,11 @@ rollback_buf = [] hist = {} for item in buf: - if len(item.strip())==0: continue + if len(item.strip()) == 0: + continue news_item = eval(item) - #news.update( news_item ) + #news.update(news_item) # news.update BUT APPEND the heading data in the last tuple arg for key in news_item.keys(): # old history format @@ -600,27 +635,30 @@ news_item[key] += (_PS_unchanged,) usage['old history'] = True # notify tag error - # (origin could be 'old history' but is not clear, but can be removed in the future, eg. after comp.) - if (news_item[key][0] == u'Notification') and (news_item[key][5] <> _PS_notify): + # (origin could be 'old history' but is not clear, but can be + # removed in the future, eg. after comp.) + if (news_item[key][0] == u'Notification') and ( + news_item[key][5] != _PS_notify): news_item[key] = news_item[key][:-1] + (_PS_notify,) usage['notify tag error'] = True - if key in news: # APPEND the heading data in the last tuple arg + if key in news: # APPEND the heading data in the last tuple arg if news_item[key][5] in [_PS_closed]: del news[key] else: heads = news[key][4] - heads.update( news_item[key][4] ) + heads.update(news_item[key][4]) #news[key] = (news_item[key][0], news_item[key][1], news_item[key][2], news_item[key][3], heads, news_item[key][5]) - news[key] = news_item[key][:4] + (heads, news_item[key][5]) + news[key] = news_item[key][:4] + (heads, + news_item[key][5]) else: news[key] = news_item[key] - rollback_buf.append( copy.deepcopy(news) ) + rollback_buf.append(copy.deepcopy(news)) if rollback_buf: rollback_buf.reverse() - i = min([rollback, (len(rollback_buf)-1)]) + i = min([rollback, (len(rollback_buf) - 1)]) hist = rollback_buf[i] del rollback_buf usage['rollback'] = i @@ -628,12 +666,14 @@ # feed data to pages self.pages.hist = hist - pywikibot.output(u'\03{lightpurple}*** History recieved %s\03{default}' % str(usage)) + pywikibot.output( + u'\03{lightpurple}*** History recieved %s\03{default}' % str(usage)) def putHistory(self, data_dict): """Write history. - Returns nothing but the history file gets filled with archived entries. + Returns nothing but the history file gets filled with archived entries. + """ # extract important data from page list @@ -642,7 +682,7 @@ buf[key] = data_dict[key].sum_disc_data # write new history - self._appendFile( str(buf) ) + self._appendFile(str(buf)) pywikibot.output(u'\03{lightpurple}*** History updated\03{default}') @@ -656,12 +696,14 @@ # unicode text schreiben, danke an http://www.amk.ca/python/howto/unicode try: - datfile = codecs.open(self._datfilename, encoding=config.textfile_encoding, mode='r') + datfile = codecs.open(self._datfilename, + encoding=config.textfile_encoding, mode='r') #datfile = open(self._datfilename, mode='rb') buf = datfile.read() datfile.close() return buf - except: return u'' + except: + return u'' def _appendFile(self, data): ''' @@ -674,7 +716,8 @@ # könnte history dict mit pickle speichern (http://www.thomas-guettler.de/vortraege/python/einfuehrung.html#link_12.2) # verwende stattdessen aber wiki format! (bleibt human readable und kann in wiki umgeleitet werden bei bedarf) - datfile = codecs.open(self._datfilename, encoding=config.textfile_encoding, mode='a+') + datfile = codecs.open(self._datfilename, + encoding=config.textfile_encoding, mode='a+') #datfile = codecs.open(self._datfilename, encoding='zlib', mode='a+b') datfile.write(u'\n\n' + data) #datfile.write(data) @@ -691,7 +734,8 @@ # thanks to http://www.amk.ca/python/howto/regex/ and http://bytes.com/forum/thread24382.html #usersumList = [p.title() for p in pagegenerators.UserContributionsGenerator(self._user.name(), number = count)] - usersumList = [p[0].title() for p in self._user.contributions(limit = count)] + usersumList = [p[0].title() for p in + self._user.contributions(limit=count)] work = {} for item in usersumList: @@ -704,9 +748,10 @@ page = pywikibot.Page(self.site, name) page.sum_disc_data = () work[name] = page - break # should only match one of the possibilities, anyway just add it once! + break # should only match one of the possibilities, anyway just add it once! - pywikibot.output(u'\03{lightpurple}*** Latest %i Contributions checked\03{default}' % len(usersumList)) + pywikibot.output(u'\03{lightpurple}*** Latest %i Contributions ' + u'checked\03{default}' % len(usersumList)) # feed data to pages self.pages.update_work(work) @@ -723,17 +768,18 @@ userbacklinksList = [] for item in self._param['backlinks_list']: - page = pywikibot.Page(self.site, item) # important for the generator to use the API + page = pywikibot.Page(self.site, item) # important for the generator to use the API #userbacklinksList += [p.title() for p in pagegenerators.ReferringPageGenerator(page, withTemplateInclusion=False)] - userbacklinksList += [p.title() for p in page.getReferences(withTemplateInclusion=False)] - userbacklinksList = list(set(userbacklinksList)) # drop duplicates + userbacklinksList += [p.title() for p in + page.getReferences( + withTemplateInclusion=False)] + userbacklinksList = list(set(userbacklinksList)) # drop duplicates work = {} count = 0 # (some return >> 500 backlinks, thus check # only once a week ALL/2000 those, else limit) - #if (self._wday == 0): - if (self._wday == 6): # So + if (self._wday == 6): # So #max_count = len(userbacklinksList) max_count = 2000 else: @@ -754,9 +800,11 @@ page = pywikibot.Page(self.site, name) page.sum_disc_data = () work[name] = page - break # should only match one of the possibilities, anyway just add it once! + break # should only match one of the possibilities, anyway just add it once! - pywikibot.output(u'\03{lightpurple}*** %i Backlinks to user checked (limited to %i)\03{default}' % (len(userbacklinksList), max_count)) + pywikibot.output(u'\03{lightpurple}*** %i Backlinks to user checked ' + u'(limited to %i)\03{default}' + % (len(userbacklinksList), max_count)) # feed data to pages self.pages.update_work(work) @@ -769,11 +817,12 @@ # crashes sometimes, u'Benutzer Diskussion:MerlBot' for example # \n[ JIRA: ticket? ] def getLatestRelevantNews(self): - """Check latest contributions on recent news and check relevancy of page by - splitting it into sections and searching each for specific users signature, - this is all done by PageSections class. + """Check latest contributions on recent news and check relevancy of + page by splitting it into sections and searching each for specific + users signature, this is all done by PageSections class. - Returns nothing, but feeds to self.pages class instance. + Returns nothing, but feeds to self.pages class instance. + """ # check for news to report @@ -781,22 +830,23 @@ work = self.pages.work self.pages.start_promotion() gen1 = pagegenerators.PagesFromTitlesGenerator(work.keys()) - gen2 = pagegenerators.RegexFilterPageGenerator(gen1, - self._param['ignorepage_list'], - inverse = True, - ignore_namespace = False) + gen2 = pagegenerators.RegexFilterPageGenerator( + gen1, + self._param['ignorepage_list'], + inverse=True, + ignore_namespace=False) # Preloads _contents and _versionhistory / [ JIRA: ticket? ] - # WithoutInterwikiPageGenerator, + # WithoutInterwikiPageGenerator, #gen3 = pagegenerators.PreloadingGenerator(gen2) - gen3 = tools.ThreadedGenerator(target=pagegenerators.PreloadingGenerator, - args=(gen2,), - qsize=60) + gen3 = tools.ThreadedGenerator( + target=pagegenerators.PreloadingGenerator, args=(gen2, ), qsize=60) self._th_gen = gen3 #gen4 = pagegenerators.RedirectFilterPageGenerator(gen3) # lets hope that no generator loses pages... (since sometimes this may happen) for page in gen3: if _debug_switch['page'](page): - pywikibot.warning(u'\03{lightyellow}=== ! PAGE WILL BE SKIPPED ! ===\03{default}') + pywikibot.warning(u'\03{lightyellow}=== ! PAGE WILL BE SKIPPED ' + u'! ===\03{default}') continue name = page.title() @@ -813,10 +863,14 @@ if hasattr(page, u'_getexception'): raise page._getexception except pywikibot.NoPage: - pywikibot.output(u'Problem accessing not available (deleted) page at %s, skipping...' % page.title(asLink=True)) + pywikibot.output(u'Problem accessing not available (deleted) ' + u'page at %s, skipping...' + % page.title(asLink=True)) continue except pywikibot.IsRedirectPage: - pywikibot.output(u'Problem using redirect page at %s, skipping...' % page.title(asLink=True)) + pywikibot.output( + u'Problem using redirect page at %s, skipping...' + % page.title(asLink=True)) continue # actual/new status of page, has something changed? @@ -824,29 +878,35 @@ if name in hist: if (not (hist[name].sum_disc_data[3] == actual[0][1])): # discussion has changed, some news? - self.pages.edit_news(page, sum_disc_data=(self._param['notify_msg'][_PS_changed], - None, # obsolete (and recursive) - actual[0][2], - actual[0][1], - hist[name].sum_disc_data[4], - _PS_changed ) ) + self.pages.edit_news(page, + sum_disc_data=( + self._param['notify_msg'][_PS_changed], + None, # obsolete (and recursive) + actual[0][2], + actual[0][1], + hist[name].sum_disc_data[4], + _PS_changed)) news = True else: # nothing new to report (but keep for history and update it) - self.pages.edit_oth(page, sum_disc_data=(hist[name].sum_disc_data[0], - None, # obsolete (and recursive) - actual[0][2], - actual[0][1], - hist[name].sum_disc_data[4], - _PS_unchanged ) ) + self.pages.edit_oth(page, + sum_disc_data=( + hist[name].sum_disc_data[0], + None, # obsolete (and recursive) + actual[0][2], + actual[0][1], + hist[name].sum_disc_data[4], + _PS_unchanged)) else: # new discussion, some news? - self.pages.edit_news(page, sum_disc_data=(self._param['notify_msg'][_PS_new], - None, # obsolete (and recursive) - actual[0][2], - actual[0][1], - {}, - _PS_new ) ) + self.pages.edit_news(page, + sum_disc_data=( + self._param['notify_msg'][_PS_new], + None, # obsolete (and recursive) + actual[0][2], + actual[0][1], + {}, + _PS_new)) news = True # checkRelevancy: Check relevancy of page by splitting it into sections and searching @@ -854,7 +914,7 @@ if not news: continue - self.pages.promote_page() # hist -> news + self.pages.promote_page() # hist -> news try: entries = PageSections(page, self._param, self._user) @@ -874,26 +934,31 @@ except pywikibot.IsRedirectPage: self.pages.promote_page_irrel(page, False) - pywikibot.output(u'Problem using redirect page at %s, tagging irrelevant...' % page.title(asLink=True)) + pywikibot.output(u'Problem using redirect page at %s, tagging ' + u'irrelevant...' % page.title(asLink=True)) gen3.stop() del self._th_gen self.pages.end_promotion() - pywikibot.output(u'\03{lightpurple}*** Latest News searched and relevancy of threads checked\03{default}') + pywikibot.output(u'\03{lightpurple}*** Latest News searched and ' + u'relevancy of threads checked\03{default}') def AddGlobWikiNotify(self): - """Check if there are any global wiki notifications and add them to every users news. + """Check if there are any global wiki notifications and add them to + every users news. - Returns nothing, but feeds to self.pages class instance. + Returns nothing, but feeds to self.pages class instance. + """ hist = self.pages.hist - # get global wiki notifications (toolserver/merl) + # get global wiki notifications (toolserver/merl) if pywikibot.simulate: - pywikibot.warning(u'\03{lightyellow}=== ! TOOLSERVER ACCESS WILL BE SKIPPED ! ===\03{default}') + pywikibot.warning(u'\03{lightyellow}=== ! TOOLSERVER ACCESS WILL ' + u'BE SKIPPED ! ===\03{default}') globalnotify = [] else: globalnotify = self._globalnotifications() @@ -908,55 +973,64 @@ for (page, data) in globalnotify: count += 1 - # skip to local disc page, since this is the only page the user should watch itself + # skip to local disc page, since this is the only page the user + # should watch itself if (page.site().language() == localinterwiki) and \ (page.site().family.name == u'wikipedia'): pywikibot.warning( - u'skipping global wiki notify to local wiki %s' % - page.title(asLink=True) ) + u'skipping global wiki notify to local wiki %s' + % page.title(asLink=True)) continue # actual/new status of page, has something changed? - if (data[u'link'] in hist.keys()) and \ - (data[u'timestamp'] == hist[data[u'link']].sum_disc_data[3]): + if (data[u'link'] in hist.keys()) and ( + data[u'timestamp'] == + hist[data[u'link']].sum_disc_data[3]): continue #data = page.globalwikinotify - self.pages.edit_oth(page, sum_disc_data=(self._param['notify_msg'][_PS_notify], - None, # obsolete (and recursive) - data['user'], - data['timestamp'], - {u'':('',True,u'')}, - _PS_notify ), - title=data[u'link']) + self.pages.edit_oth(page, + sum_disc_data=( + self._param['notify_msg'][_PS_notify], + None, # obsolete (and recursive) + data['user'], + data['timestamp'], + {u'': ('', True, u'')}, + _PS_notify), + title=data[u'link']) #self.pages.edit_hist(self._news_list[page.title()]) except pywikibot.MaxTriesExceededError: - pywikibot.output(u'Problem MaxTriesExceededError occurred, thus skipping global wiki notify!') - self._skip_globwikinotify = True # skip for all following users to speed-up (~30min) + pywikibot.output(u'Problem MaxTriesExceededError occurred, thus ' + u'skipping global wiki notify!') + self._skip_globwikinotify = True # skip for all following users to speed-up (~30min) except pywikibot.urllib2.HTTPError: - pywikibot.output(u'Problem HTTPError occurred, thus skipping global wiki notify!') + pywikibot.output(u'Problem HTTPError occurred, thus skipping ' + u'global wiki notify!') if globalnotify: - pywikibot.output(u'\03{lightpurple}*** %i Global wiki notifications checked\03{default}' % count) + pywikibot.output(u'\03{lightpurple}*** %i Global wiki ' + u'notifications checked\03{default}' % count) ## @since 10332 # @remarks due to http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot/ToDo-Liste (id 38) def _globalnotifications(self): - """Provides a list of results using the toolserver Merlissimo API (can also - be used for a Generator analog to UserContributionsGenerator). + """Provides a list of results using the toolserver Merlissimo API (can + also be used for a Generator analog to UserContributionsGenerator). - Returns a tuple containing the page-object and an extradata dict. + Returns a tuple containing the page-object and an extradata dict. + """ - request = 'http://toolserver.org/~merl/UserPages/query.php?user=%s&format=xml' %\ - urllib.quote(self._user.name().encode(self.site().encoding())) + request = 'http://toolserver.org/~merl/UserPages/query.php?user=%s&format=xml' \ + % urllib.quote(self._user.name().encode(self.site().encoding())) pywikibot.get_throttle() - pywikibot.output(u"Reading global wiki notifications from toolserver (via 'API')...") + pywikibot.output( + u"Reading global wiki notifications from toolserver (via 'API')...") - buf = self.site().getUrl( request, no_hostname = True ) + buf = self.site().getUrl(request, no_hostname=True) - tree = XML( buf.encode(self.site().encoding()) ) + tree = XML(buf.encode(self.site().encoding())) #import xml.etree.cElementTree #print xml.etree.cElementTree.dump(tree) @@ -975,7 +1049,8 @@ continue # process timestamp - data[u'timestamp'] = str(pywikibot.Timestamp.fromtimestampformat(data[u'timestamp'])) + data[u'timestamp'] = str(pywikibot.Timestamp.fromtimestampformat( + data[u'timestamp'])) # convert link to valid interwiki link data[u'link'] = self._dblink2wikilink(self.site(), data[u'link']) @@ -986,8 +1061,8 @@ page.globalwikinotify = data yield (page, data) except pywikibot.NoPage, e: - pywikibot.output(u'%s' %e) - + pywikibot.output(u'%s' % e) + ## @since 10332 # @remarks needed by various bots def _dblink2wikilink(self, site, dblink): @@ -995,11 +1070,13 @@ You can use DB links like used on the toolserver and convert them to valid interwiki links. + """ link = dblink for family in site.fam().get_known_families(site).values(): - title = link.replace(u'%s:' % family.decode('unicode_escape'), u':') # e.g. 'dewiki:...' --> 'de:...' + title = link.replace(u'%s:' % family.decode('unicode_escape'), + u':') # e.g. 'dewiki:...' --> 'de:...' if not (title == link): dblink = u'%s:%s' % (family, title) # [ 'wiki' in framework/interwiki is not the same as in TS DB / JIRA: DRTRIGON-60 ] @@ -1008,86 +1085,100 @@ return dblink def AddMaintenanceMsg(self): - """Check if there are any bot maintenance messages and add them to every users news. + """Check if there are any bot maintenance messages and add them to + every users news. - Returns nothing, but feeds to self.pages class instance. + Returns nothing, but feeds to self.pages class instance. + """ - if (self.maintenance_msg == []): return + if self.maintenance_msg == []: + return for item in self.maintenance_msg: - page = pywikibot.Page(self.site, bot_config['maintenance_page'] % "") + page = pywikibot.Page(self.site, bot_config['maintenance_page'] + % "") tmst = time.strftime(pywikibot.Timestamp.ISO8601Format) - page.sum_disc_data = ( self._param['notify_msg'][_PS_maintmsg], - None, - u'DrTrigon', - tmst, - { item:('',True,item) }, - _PS_maintmsg ) + page.sum_disc_data = (self._param['notify_msg'][_PS_maintmsg], + None, u'DrTrigon', tmst, + {item: ('', True, item)}, + _PS_maintmsg) self.pages.edit_news(page) self.pages.edit_hist(page) - pywikibot.output(u'\03{lightpurple}*** Bot maintenance messages added\03{default}') + pywikibot.output( + u'\03{lightpurple}*** Bot maintenance messages added\03{default}') def postDiscSum(self): - """Post discussion summary of specific user to discussion page and write to histroy - (history currently implemented as local file, but wiki page could also be used). + """Post discussion summary of specific user to discussion page and + write to histroy (history currently implemented as local file, but wiki + page could also be used). - Returns nothing but dumps self.pages class instance to the history file and writes changes - to the wiki page. + Returns nothing but dumps self.pages class instance to the history file + and writes changes to the wiki page. + """ (buf, count) = self.pages.parse_news(self._param) if (count > 0): - pywikibot.output(u'===='*15 + u'\n' + buf + u'\n' + u'===='*15) - pywikibot.output(u'[%i entries]' % count ) + pywikibot.output(u'====' * 15 + u'\n' + buf + u'\n' + u'====' * 15) + pywikibot.output(u'[%i entries]' % count) - head = i18n.twtranslate(self.site, - 'thirdparty-drtrigonbot-sum_disc-summary-head') \ - + u' ' - add = i18n.twtranslate(self.site, - 'thirdparty-drtrigonbot-sum_disc-summary-add') - mod = i18n.twtranslate(self.site, - 'thirdparty-drtrigonbot-sum_disc-summary-mod') - clean = i18n.twtranslate(self.site, - 'thirdparty-drtrigonbot-sum_disc-summary-clean') + head = i18n.twtranslate( + self.site, + 'thirdparty-drtrigonbot-sum_disc-summary-head') + u' ' + add = i18n.twtranslate( + self.site, 'thirdparty-drtrigonbot-sum_disc-summary-add') + mod = i18n.twtranslate( + self.site, 'thirdparty-drtrigonbot-sum_disc-summary-mod') + clean = i18n.twtranslate( + self.site, 'thirdparty-drtrigonbot-sum_disc-summary-clean') if not self._mode: # default: write direct to user disc page - comment = head + add % {'num':count} + comment = head + add % {'num': count} #self.append(self._userPage, buf, comment=comment, minorEdit=False, force=True) (page, text, minEd) = (self._userPage, buf, False) else: # enhanced (with template): update user disc page and write to user specified page tmplsite = pywikibot.Page(self.site, self._tmpl_data) - comment = head + mod % {'num':count, 'page':tmplsite.title(asLink=True)} - self.save(self._userPage, self._content, comment=comment, minorEdit=False, force=True) - comment = head + add % {'num':count} + comment = head + mod % {'num': count, + 'page': tmplsite.title(asLink=True)} + self.save(self._userPage, self._content, comment=comment, + minorEdit=False, force=True) + comment = head + add % {'num': count} #self.append(tmplsite, buf, comment=comment, force=True) - (page, text, minEd) = (tmplsite, buf, True) # 'True' is default + (page, text, minEd) = (tmplsite, buf, True) # 'True' is default if (self._param['cleanup_count'] < 0): # default mode, w/o cleanup try: - self.append(page, text, comment=comment, minorEdit=minEd, force=True) + self.append(page, text, comment=comment, minorEdit=minEd, + force=True) except pywikibot.MaxTriesExceededError: - pywikibot.warning( - u'Problem MaxTriesExceededError occurred, thus skipping this user!') + pywikibot.warning(u'Problem MaxTriesExceededError ' + u'occurred, thus skipping this user!') return # skip history write else: # append with cleanup - text = self.cleanupDiscSum( self.load(page) or u'', - days=self._param['cleanup_count'] ) + u'\n\n' + text - comment = head + clean % {'num':count} - self.save(page, text, comment=comment, minorEdit=minEd, force=True) + text = self.cleanupDiscSum( + self.load(page) or u'', + days=self._param['cleanup_count']) + u'\n\n' + text + comment = head + clean % {'num': count} + self.save(page, text, comment=comment, minorEdit=minEd, + force=True) purge = self._userPage.purgeCache() - pywikibot.output(u'\03{lightpurple}*** Discussion updates added to: %s (purge: %s)\03{default}' % (self._userPage.title(asLink=True), purge)) + pywikibot.output(u'\03{lightpurple}*** Discussion updates added ' + u'to: %s (purge: %s)\03{default}' + % (self._userPage.title(asLink=True), purge)) if not pywikibot.simulate: self.putHistory(self.pages.hist) else: - pywikibot.warning(u'\03{lightyellow}=== ! NOTHING WRITTEN TO HISTORY ! ===\03{default}') + pywikibot.warning(u'\03{lightyellow}=== ! NOTHING WRITTEN TO ' + u'HISTORY ! ===\03{default}') else: - pywikibot.output(u'\03{lightpurple}*** Discussion up to date: NOTHING TO DO\03{default}') + pywikibot.output(u'\03{lightpurple}*** Discussion up to date: ' + u'NOTHING TO DO\03{default}') # JIRA: DRTRIGON-23 def cleanupDiscSum(self, text, days=7): @@ -1099,13 +1190,12 @@ # drop entries older than 'days' today = datetime.datetime.now() - diff = 0 - buf = [] + diff = 0 + buf = [] for line in text.splitlines(): try: - #date = time.strptime(u'abc', u'; %d. %B %Y') - date = time.strptime(line.encode('utf-8'), str(self._param['parse_msg'][u'start'])) - #date = time.strptime(str(line), str(self._param['parse_msg'][u'start'])) + date = time.strptime(line.encode('utf-8'), + str(self._param['parse_msg'][u'start'])) date = datetime.datetime.fromtimestamp(time.mktime(date)) diff = (today - date).days except ValueError: @@ -1113,11 +1203,11 @@ if (diff <= days): buf.append(line) buf = string.join(buf, u'\n') - - # remove bot signature and other 'footer' - buf = re.sub(self._param['parse_msg'][u'end'] % {'sign':u'(.*?)'}, u'', buf) - buf = buf.strip() + # remove bot signature and other 'footer' + buf = re.sub(self._param['parse_msg'][u'end'] + % {'sign': u'(.*?)'}, u'', buf) + buf = buf.strip() return buf @@ -1127,13 +1217,11 @@ """ def __init__(self, site, param): - self._hist_list = {} # archived pages from history - self._work_list = {} # pages to check for news - self._news_list = {} # news to check for relevancy and report afterwards - self._oth_list = {} # ...? - + self._hist_list = {} # archived pages from history + self._work_list = {} # pages to check for news + self._news_list = {} # news to check for relevancy and report afterwards + self._oth_list = {} self.site = site - self.param = param def set_hist(self, hist): @@ -1146,7 +1234,7 @@ try: page = pywikibot.Page(self.site, name) except pywikibot.NoPage, e: - pywikibot.output(u'%s' %e) + pywikibot.output(u'%s' % e) del hist[name] continue @@ -1192,7 +1280,7 @@ # add news page to news page list self._news_list[newspage.title()] = newspage - self.newspage = newspage # for promote_page + self.newspage = newspage # for promote_page def edit_oth(self, othpage, sum_disc_data=None, title=None): # add sum_disc_data if present @@ -1206,10 +1294,10 @@ def exists(self, page): fulldict = {} - fulldict.update( self._hist_list ) - fulldict.update( self._work_list ) - fulldict.update( self._news_list ) - fulldict.update( self._oth_list ) + fulldict.update(self._hist_list) + fulldict.update(self._work_list) + fulldict.update(self._news_list) + fulldict.update(self._oth_list) return (page.title() in fulldict.keys()) def start_promotion(self): @@ -1243,7 +1331,7 @@ def promote_page_irrel(self, page, signed): # page is not relevant, thus don't list discussion - title = page.title() + title = page.title() sum_disc_data = page.sum_disc_data del self._news_list[title] @@ -1252,28 +1340,34 @@ # discussion closed (no signature on page anymore) if (not signed) and (sum_disc_data[5] == _PS_changed): - page.sum_disc_data = ( self.param['notify_msg'][_PS_closed], - None, - sum_disc_data[2], - sum_disc_data[3], - {}, - _PS_closed ) + page.sum_disc_data = (self.param['notify_msg'][_PS_closed], + None, + sum_disc_data[2], + sum_disc_data[3], + {}, + _PS_closed) self.edit_news(page) #del self._hist_list[title] self.edit_hist(page) def parse_news(self, param): - """Filter and parse all the info and rewrite in in wiki-syntax, to be put on page. + """Filter and parse all the info and rewrite in in wiki-syntax, to be + put on page. - Returns a tuple (result wiki text, message count). + Returns a tuple (result wiki text, message count). + """ - switch = param['reportchanged_switch'] + switch = param['reportchanged_switch'] switch2 = param['reportclosed_switch'] - if not switch: ps_types = ( [_PS_new, _PS_maintmsg], ) - else: ps_types = ( [_PS_new, _PS_changed, _PS_maintmsg], ) - if not switch2: ps_types += ( [], ) - else: ps_types += ( [_PS_closed], ) + if not switch: + ps_types = ([_PS_new, _PS_maintmsg], ) + else: + ps_types = ([_PS_new, _PS_changed, _PS_maintmsg], ) + if not switch2: + ps_types += ([], ) + else: + ps_types += ([_PS_closed], ) buf = [] for name in self._news_list.keys(): @@ -1283,66 +1377,80 @@ if data[5] in ps_types[0]: # new and changed report = [] - for anchor in data[4].keys(): # iter over sections/checksum + for anchor in data[4].keys(): # iter over sections/checksum (checksum_cur, rel, line) = data[4][anchor] # is this section/heading relevant? - if not rel: continue + if not rel: + continue # were we able to divide the page into subsections? - if not anchor: continue + if not anchor: + continue # append relevant sections - report.append( u'[[%s#%s|%s]]' % (page.title(), anchor, line) ) + report.append(u'[[%s#%s|%s]]' + % (page.title(), anchor, line)) # default: if no subsections on page item = page.title(asLink=True) - hist = u'http://%s.%s.org%s?title=%s&action=history' % (self.site.language(), self.site.family.name, self.site.path(), page.urlname()) + hist = u'http://%s.%s.org%s?title=%s&action=history' % ( + self.site.language(), self.site.family.name, + self.site.path(), page.urlname()) if report: # subsections on page - item = u'%s → %s' % (page.title(asLink=True), string.join(report, u', ')) + item = u'%s → %s' \ + % (page.title(asLink=True), + string.join(report, u', ')) - data = { 'notify': data[0], - 'page_sections': item, - 'history_link': hist, - 'page': page.title(), # backward compatibility (can be removed depending on TW/i18n) - 'page_size': u'{{subst:PAGESIZE:%s}}' % page.title(), - 'user': self._getLastEditor(page, data[2]), - 'time': self._getTime(data[3]) } + data = { + 'notify': data[0], + 'page_sections': item, + 'history_link': hist, + 'page': page.title(), # backward compatibility (can be removed depending on TW/i18n) + 'page_size': u'{{subst:PAGESIZE:%s}}' % page.title(), + 'user': self._getLastEditor(page, data[2]), + 'time': self._getTime(data[3]) + } data = self.param['parse_msg'][u'*'] % data elif data[5] in ps_types[1]: # closed - data = { 'notify': data[0], - 'page': page.title(asLink=True), - 'user': self._getLastEditor(page, data[2]), - 'time': self._getTime(data[3]) } + data = { + 'notify': data[0], + 'page': page.title(asLink=True), + 'user': self._getLastEditor(page, data[2]), + 'time': self._getTime(data[3]) + } data = self.param['parse_msg'][_PS_closed] % data #elif data[5] in [_PS_warning]: # # warnings # data = { 'page': page.title(asLink=True), # 'warning': data[0] } # data = self.param['parse_msg'][_PS_warning] % data - # self._global_warn.append( (self._user.name(), data) ) + # self._global_warn.append((self._user.name(), data)) # if not param['reportwarn_switch']: continue elif data[5] in [_PS_notify]: # global wiki notifications - data = { 'notify': data[0], - 'page_link': page.globalwikinotify['url'], - 'page': page.title(), - 'user': data[2], - 'time': self._getTime(data[3]) } + data = { + 'notify': data[0], + 'page_link': page.globalwikinotify['url'], + 'page': page.title(), + 'user': data[2], + 'time': self._getTime(data[3]) + } data = self.param['parse_msg'][_PS_notify] % data else: - continue # skip append - buf.append( data ) + continue # skip append + buf.append(data) count = len(buf) if (count > 0): - data = [ time.strftime( self.param['parse_msg'][u'start'].encode('utf-8'), - time.gmtime() ).decode('utf-8') ] + data = [time.strftime( + self.param['parse_msg'][u'start'].encode('utf-8'), + time.gmtime()).decode('utf-8')] data += buf - buf = string.join(data, u'\n') - buf += self.param['parse_msg'][u'end'] % {'sign':u'~~~~'} + buf = string.join(data, u'\n') + buf += self.param['parse_msg'][u'end'] % {'sign': u'~~~~'} else: buf = u'' @@ -1350,14 +1458,15 @@ def _getLastEditor(self, page, lastuser): """Search the last 500 edits/revisions for the most recent human editor - and returns that one. (the non-human/bot). + and returns that one. (the non-human/bot). - @param page: Page to check. - @param lastuser: User made the most recent edit to page. - @type lastuser: string + @param page: Page to check. + @param lastuser: User made the most recent edit to page. + @type lastuser: string - Returns a link with the most recent and most recent human editors of - page. + Returns a link with the most recent and most recent human editors of + page. + """ humaneditor = page.userNameHuman() @@ -1368,7 +1477,8 @@ return u'[[User:%s]]/[[User:%s]]' % (humaneditor, lastuser) else: # no human editor found; use last editor - return (u'[[User:%s]] ' % lastuser) + self.param['parse_msg'][u'nonhuman'] + return (u'[[User:%s]] ' + % lastuser) + self.param['parse_msg'][u'nonhuman'] ## @since r276 (MOVED from dtbext.date.getTime) # @remarks need to convert wiki timestamp format to python @@ -1379,87 +1489,91 @@ # http://www.w3.org/TR/NOTE-datetime # http://pytz.sourceforge.net/ # use only UTC for internal timestamps - # could also be used as given by the API, but is converted here for compatibility + # could also be used as given by the API, but is converted here for + # compatibility timestamp = pywikibot.Timestamp.fromISOformat(timestamp) if localized: - # is localized to the actual date/time settings, cannot localize timestamps that are - # half of a year in the past or future! - timestamp = pywikibot.Timestamp.fromtimestamp( calendar.timegm(timestamp.timetuple()) ) - return timestamp.strftime(u'%H:%M, %d. %b. %Y'.encode('utf-8')).decode('utf-8') + # is localized to the actual date/time settings, cannot localize + # timestamps that are half of a year in the past or future! + timestamp = pywikibot.Timestamp.fromtimestamp( + calendar.timegm(timestamp.timetuple())) + return timestamp.strftime( + u'%H:%M, %d. %b. %Y'.encode('utf-8')).decode('utf-8') class PageSections(object): - """An object representing all sections on a page. - - """ + """ An object representing all sections on a page. """ def __init__(self, page, param, user): - """Retrieves the page content and splits it to headings and bodies ('check relevancy - of page by searching specific users signature'). + """Retrieves the page content and splits it to headings and bodies + ('check relevancy of page by searching specific users signature'). - @param page: Page to process. - @param param: Additional parameters to use for processing. - @type param: dict - @param user: Actual user. - @type user: user object + @param page: Page to process. + @param param: Additional parameters to use for processing. + @type param: dict + @param user: Actual user. + @type user: user object - Returns a list of tuples containing the sections with info and wiki text. + Returns a list of tuples containing the sections with info and wiki + text. + """ self._entries = [] self._full_resolve = True - - self._page = page + self._page = page self._param = param - self._user = user + self._user = user # code debugging pywikibot.debug(page.title()) # get content and sections (content was preloaded earlier) - #buf = page.get(force=True) buf = page.get() try: sections = page.getSections(minLevel=1) except pywikibot.Error: # sections could not be resoled process the whole page at once - #sections = page.getSections(minLevel=1, force=True) # slow for pages with > 100 sections - sections = [] - pywikibot.warning( - u'Problem resolving section data, processing the whole page at once...') +## # slow for pages with > 100 sections +## sections = page.getSections(minLevel=1, force=True) + sections = [] + pywikibot.warning(u'Problem resolving section data, processing ' + u'the whole page at once...') # drop from templates included headings (are None) - sections = [ s for s in sections if s[0] ] + sections = [s for s in sections if s[0]] # extract sections bodies if not sections: - self._entries = [ ((u'',u'',u''), buf) ] + self._entries = [((u'', u'', u''), buf)] self._full_resolve = False else: # append 'EOF' to sections list # (byteoffset, level, wikiline, line, anchor) - sections.append( (len(buf) + 1, None, None, None, None) ) + sections.append((len(buf) + 1, None, None, None, None)) for i, s in enumerate(sections[:-1]): bo_start = s[0] - bo_end = sections[i+1][0] - 1 - - self._entries.append( (s[2:], buf[bo_start:bo_end]) ) + bo_end = sections[i + 1][0] - 1 + self._entries.append((s[2:], buf[bo_start:bo_end])) def check_rel(self): # iterate over all sections in page and check their relevancy page = self._page - - page_rel = False + page_rel = False page_signed = False - try: checksum = page.sum_disc_data[4] - except: checksum = None + try: + checksum = page.sum_disc_data[4] + except: + checksum = None checksum_new = {} - for i, (head, body) in enumerate(self._entries): # iterate over all headings/sub sections - # wikiline is wiki text, line is parsed and anchor is the unique link label + # iterate over all headings/sub sections + for i, (head, body) in enumerate(self._entries): + # wikiline is wiki text, line is parsed and anchor is the unique + # link label (wikiline, line, anchor) = head[:3] # ignorelist for headings @@ -1473,48 +1587,55 @@ if not check.search(wikiline): skip = True break - if skip: continue + if skip: + continue # check relevancy of section - (rel, checksum_cur, checks) = self._check_sect_rel(body, checksum, anchor) - if self._param['mainsignneeded_switch']: # DRTRIGON-99 + (rel, checksum_cur, checks) = self._check_sect_rel(body, checksum, + anchor) + if self._param['mainsignneeded_switch']: # DRTRIGON-99 rel = rel and checks['mainsign'] # is page signed? - page_signed = page_signed or checks['signed'] # signature check + page_signed = page_signed or checks['signed'] # signature check # is page relevant? - if not rel: continue + if not rel: + continue # page IS relevant, update checksum page_rel = True checksum_new[anchor] = (checksum_cur, rel, line) # update sum_disc_data in page (checksums, relevancies, ...) - page.sum_disc_data = page.sum_disc_data[:4] + (checksum_new,) + page.sum_disc_data[5:] + page.sum_disc_data = (page.sum_disc_data[:4] + (checksum_new,) + + page.sum_disc_data[5:]) return (page, page_rel, page_signed) def _check_sect_rel(self, data, checksum, anchor): - """Checks the relevancy of single body data by performing different tests - ('check relevancy of page by searching specific users signature'). + """Checks the relevancy of single body data by performing different + tests ('check relevancy of page by searching specific users signature'). - @param data: Section wiki text to check. - @type data: string - @param checksum: Checksum given from history to compaire against. - @type checksum: string - @param anchor: Anchor of wiki text section heading given by mediawiki - software. - @type anchor: string + @param data: Section wiki text to check. + @type data: string + @param checksum: Checksum given from history to compaire against. + @type checksum: string + @param anchor: Anchor of wiki text section heading given by mediawiki + software. + @type anchor: string - Returns a tuple (True, checksum_cur, checks). + Returns a tuple (True, checksum_cur, checks). + """ # per default assume relevancy - checks = { 'changed': True, - 'signed': True, - 'mainsign': False, - 'lasteditor': False, } + checks = { + 'changed': True, + 'signed': True, + 'mainsign': False, + 'lasteditor': False, + } # check if thread has changed checksum_cur = hashlib.md5(data.encode('utf-8').strip()).hexdigest() @@ -1526,8 +1647,8 @@ # search for signature in section/thread (signed, signs_pos, signs, main) = self._search_sign(data) - checks['signed'] = signed # are signatures present - checks['mainsign'] = main # is main signature present + checks['signed'] = signed # are signatures present + checks['mainsign'] = main # is main signature present if not checks['signed']: return (False, checksum_cur, checks) @@ -1538,10 +1659,12 @@ # (small bug fix: DRTRIGON-82) data = data[signs_pos[-1]:].strip() (sign, data) = _REGEX_eol.split(data + u'\n', maxsplit=1) - checks['lasteditor'] = not (len(data.strip()) > 0) # just check for add. text (more paranoid) + # just check for add. text (more paranoid) + checks['lasteditor'] = not (len(data.strip()) > 0) else: # JIRA: DRTRIGON-83 - checks['lasteditor'] = (self._page.sum_disc_data[2] == self._user.username) + checks['lasteditor'] = (self._page.sum_disc_data[2] == + self._user.username) if checks['lasteditor']: return (False, checksum_cur, checks) @@ -1550,30 +1673,28 @@ def _search_sign(self, text): """Check if there are (any or) a specific user signature resp. link to - user page in text. + user page in text. - @param text: Text content to search for signatures. - @type text: string + @param text: Text content to search for signatures. + @type text: string - Returns a tuple containing a list with byteoffsets and a dict with - the according match object. + Returns a tuple containing a list with byteoffsets and a dict with + the according match object. + """ - sign_list = self._param['altsign_list'] + sign_list = self._param['altsign_list'] check_list = self._param['checksign_list'] - mainsign = sign_list[-1] # last sign in list is main sign - signs = {} - main = False + main = False for user in sign_list: for check in check_list: - for m in re.finditer(check % {'usersig':user}, text): + for m in re.finditer(check % {'usersig': user}, text): signs[m.start()] = m main = (mainsign == user) or main signs_pos = signs.keys() signs_pos.sort() - return ((len(signs_pos) > 0), signs_pos, signs, main) @@ -1585,13 +1706,13 @@ bot = SumDiscBot() if len(pywikibot.handleArgs()) > 0: for arg in pywikibot.handleArgs(): - if arg[:2] == "u'": arg = eval(arg) # for 'runbotrun.py' and unicode compatibility - if arg[:17] == "-compress_history": - #if arg[:17] == "-compress_history": - bot.compressHistory( eval(arg[18:]) ) + if arg[:2] == "u'": + arg = eval(arg) # for 'runbotrun.py' and unicode compatibility + if arg[:17] == "-compress_history": + bot.compressHistory(eval(arg[18:])) return elif (arg[:17] == "-rollback_history"): - bot.rollback = int( arg[18:] ) + bot.rollback = int(arg[18:]) else: pywikibot.showHelp() return @@ -1608,4 +1729,3 @@ main() finally: pywikibot.stopme() - -- To view, visit https://gerrit.wikimedia.org/r/98260 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I31d6c0b29bc099cd64254a0c31f5ea25929ac2b3 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

10 years, 5 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by Xqt (Code Review)

10 years, 5 months

[Gerrit] Change SVN -> Git - change (pywikibot/compat)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: Change SVN -> Git ...................................................................... Change SVN -> Git Change-Id: I3ad2a194e1a442d7777a6c3113301a45f8f99a32 --- M generate_family_file.py 1 file changed, 1 insertion(+), 1 deletion(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/generate_family_file.py b/generate_family_file.py index 270c26e..cfba6f9 100644 --- a/generate_family_file.py +++ b/generate_family_file.py @@ -163,7 +163,7 @@ url = %(url)s name = %(name)s -Please do not commit this to the SVN repository! +Please do not commit this to the Git repository! \"\"\" import family -- To view, visit https://gerrit.wikimedia.org/r/98122 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I3ad2a194e1a442d7777a6c3113301a45f8f99a32 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Lewis Cawte <lewis(a)lewiscawte.me> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

10 years, 5 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

jenkins-bot has submitted this change and it was merged. Change subject: [PEP8] changes ...................................................................... [PEP8] changes Change-Id: I203871505f12d2e248d460b3c9848ed297a119b4 --- M spellcheck.py 1 file changed, 141 insertions(+), 113 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/spellcheck.py b/spellcheck.py index 5bb524c..d0f1ad2 100644 --- a/spellcheck.py +++ b/spellcheck.py @@ -51,15 +51,17 @@ """ # # (C) Andre Engels, 2005 -# (C) Pywikipedia bot team, 2006-2012 +# (C) Pywikibot team, 2006-2013 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' # -import re, sys -import string, codecs +import re +import sys +import string +import codecs import wikipedia as pywikibot from pywikibot import i18n import pagegenerators @@ -69,7 +71,8 @@ def __init__(self, text): self.style = text -def distance(a,b): + +def distance(a, b): # Calculates the Levenshtein distance between a and b. # That is, the number of edits needed to change one into # the other, where one edit is the addition, removal or @@ -78,23 +81,25 @@ n, m = len(a), len(b) if n > m: # Make sure n <= m, to use O(min(n,m)) space - a,b = b,a - n,m = m,n - current = range(n+1) - for i in range(1,m+1): - previous, current = current, [i]+[0]*m - for j in range(1,n+1): - add, delete = previous[j]+1, current[j-1]+1 - change = previous[j-1] - if a[j-1] != b[i-1]: + a, b = b, a + n, m = m, n + current = range(n + 1) + for i in range(1, m + 1): + previous, current = current, [i] + [0] * m + for j in range(1, n + 1): + add, delete = previous[j] + 1, current[j - 1] + 1 + change = previous[j - 1] + if a[j - 1] != b[i - 1]: change = change + 1 current[j] = min(add, delete, change) return current[n] + def getalternatives(string): # Find possible correct words for the incorrect word string basetext = pywikibot.input( - u"Give a text that should occur in the words to be checked.\nYou can choose to give no text, but this will make searching slow:") + u"Give a text that should occur in the words to be checked.\n" + u"You can choose to give no text, but this will make searching slow:") basetext = basetext.lower() simwords = {} for i in xrange(11): @@ -102,7 +107,7 @@ for alt in knownwords.keys(): dothis = not basetext or basetext in alt.lower() if dothis: - diff = distance(string,alt) + diff = distance(string, alt) if diff < 11: if knownwords[alt] == alt: simwords[diff] += [alt] @@ -114,31 +119,35 @@ posswords += simwords[i] return posswords[:30] + def uncap(string): # uncapitalize the first word of the string if len(string) > 1: - return string[0].lower()+string[1:] + return string[0].lower() + string[1:] else: return string.lower() + def cap(string): # uncapitalize the first word of the string - return string[0].upper()+string[1:] + return string[0].upper() + string[1:] + def askAlternative(word, context=None, title=''): correct = None - pywikibot.output(u"="*60) + pywikibot.output(u"=" * 60) pywikibot.output(u"Found unknown word '%s' in '%s'" % (word, title)) if context: pywikibot.output(u"Context:") - pywikibot.output(u""+context) - pywikibot.output(u"-"*60) + pywikibot.output(u"" + context) + pywikibot.output(u"-" * 60) while not correct: for i in xrange(len(Word(word).getAlternatives())): pywikibot.output(u"%s: Replace by '%s'" - % (i+1, - Word(word).getAlternatives()[i].replace('_',' '))) - pywikibot.output(u"a: Add '%s' as correct"%word) + % (i + 1, + Word(word).getAlternatives()[i].replace('_', + ' '))) + pywikibot.output(u"a: Add '%s' as correct" % word) if word[0].isupper(): pywikibot.output(u"c: Add '%s' as correct" % (uncap(word))) pywikibot.output(u"i: Ignore once (default)") @@ -149,7 +158,8 @@ pywikibot.output(u"*: Edit by hand") pywikibot.output(u"x: Do not check the rest of this page") answer = pywikibot.input(u":") - if answer == "": answer = "i" + if answer == "": + answer = "i" if answer in "aAiIpP": correct = word if answer in "aA": @@ -166,9 +176,9 @@ correct != uncap(word) and \ correct != word: try: - knownwords[word] += [correct.replace(' ','_')] + knownwords[word] += [correct.replace(' ', '_')] except KeyError: - knownwords[word] = [correct.replace(' ','_')] + knownwords[word] = [correct.replace(' ', '_')] newwords.append(word) knownwords[correct] = correct newwords.append(correct) @@ -181,69 +191,71 @@ if possible: print "Found alternatives:" for pos in possible: - pywikibot.output(" %s"%pos) + pywikibot.output(" %s" % pos) else: print "No similar words found." - elif answer=="*": + elif answer == "*": correct = edit - elif answer=="x": + elif answer == "x": correct = endpage else: for i in xrange(len(Word(word).getAlternatives())): - if answer == str(i+1): - correct = Word(word).getAlternatives()[i].replace('_',' ') + if answer == str(i + 1): + correct = Word(word).getAlternatives()[i].replace('_', ' ') return correct + def removeHTML(page): # TODO: Consider removing this; this stuff can be done by # cosmetic_changes.py result = page - result = result.replace('Ä',u'Ä') - result = result.replace('ä',u'ä') - result = result.replace('Ë',u'Ë') - result = result.replace('ë',u'ë') - result = result.replace('Ï',u'Ï') - result = result.replace('ï',u'ï') - result = result.replace('Ö',u'Ö') - result = result.replace('ö',u'ö') - result = result.replace('Ü',u'Ü') - result = result.replace('ü',u'ü') - result = result.replace('Á',u'Á') - result = result.replace('á',u'á') - result = result.replace('É',u'É') - result = result.replace('é',u'é') - result = result.replace('Í',u'Í') - result = result.replace('í',u'í') - result = result.replace('Ó',u'Ó') - result = result.replace('ó',u'ó') - result = result.replace('Ú',u'Ú') - result = result.replace('ú',u'ú') - result = result.replace('À',u'À') - result = result.replace('à',u'à') - result = result.replace('È',u'È') - result = result.replace('è',u'è') - result = result.replace('Ì',u'Ì') - result = result.replace('ì',u'ì') - result = result.replace('Ò',u'Ò') - result = result.replace('ò',u'ò') - result = result.replace('Ù',u'Ù') - result = result.replace('ù',u'ù') - result = result.replace('Â',u'Â') - result = result.replace('â',u'â') - result = result.replace('Ê',u'Ê') - result = result.replace('ê',u'ê') - result = result.replace('Î',u'Î') - result = result.replace('î',u'î') - result = result.replace('Ô',u'Ô') - result = result.replace('ô',u'ô') - result = result.replace('Û',u'Û') - result = result.replace('û',u'û') - result = result.replace('Å',u'Å') - result = result.replace('å',u'å') - result = result.replace('°',u'°') + result = result.replace('Ä', u'Ä') + result = result.replace('ä', u'ä') + result = result.replace('Ë', u'Ë') + result = result.replace('ë', u'ë') + result = result.replace('Ï', u'Ï') + result = result.replace('ï', u'ï') + result = result.replace('Ö', u'Ö') + result = result.replace('ö', u'ö') + result = result.replace('Ü', u'Ü') + result = result.replace('ü', u'ü') + result = result.replace('Á', u'Á') + result = result.replace('á', u'á') + result = result.replace('É', u'É') + result = result.replace('é', u'é') + result = result.replace('Í', u'Í') + result = result.replace('í', u'í') + result = result.replace('Ó', u'Ó') + result = result.replace('ó', u'ó') + result = result.replace('Ú', u'Ú') + result = result.replace('ú', u'ú') + result = result.replace('À', u'À') + result = result.replace('à', u'à') + result = result.replace('È', u'È') + result = result.replace('è', u'è') + result = result.replace('Ì', u'Ì') + result = result.replace('ì', u'ì') + result = result.replace('Ò', u'Ò') + result = result.replace('ò', u'ò') + result = result.replace('Ù', u'Ù') + result = result.replace('ù', u'ù') + result = result.replace('Â', u'Â') + result = result.replace('â', u'â') + result = result.replace('Ê', u'Ê') + result = result.replace('ê', u'ê') + result = result.replace('Î', u'Î') + result = result.replace('î', u'î') + result = result.replace('Ô', u'Ô') + result = result.replace('ô', u'ô') + result = result.replace('Û', u'Û') + result = result.replace('û', u'û') + result = result.replace('Å', u'Å') + result = result.replace('å', u'å') + result = result.replace('°', u'°') return result -def spellcheck(page, checknames = True, knownonly = False, title=''): + +def spellcheck(page, checknames=True, knownonly=False, title=''): pageskip = [] text = page if correct_html_codes: @@ -251,30 +263,33 @@ loc = 0 while True: wordsearch = re.compile(r'([\s\=\<\>\_]*)([^\s\=\<\>\_]+)') - match = wordsearch.search(text,loc) + match = wordsearch.search(text, loc) if not match: # No more words on this page break loc += len(match.group(1)) bigword = Word(match.group(2)) smallword = bigword.derive() - if not Word(smallword).isCorrect(checkalternative = knownonly) and \ + if not Word(smallword).isCorrect(checkalternative=knownonly) and \ (checknames or not smallword[0].isupper()): replacement = askAlternative(smallword, - context=text[max(0,loc-40):loc + len(match.group(2))+40], + context=text[ + max(0, loc - 40):loc + len( + match.group(2)) + 40], title=title) if replacement == edit: import editarticle editor = editarticle.TextEditor() # TODO: Don't know to which index to jump - newtxt = editor.edit(text, jumpIndex = 0, highlight=smallword) + newtxt = editor.edit(text, jumpIndex=0, highlight=smallword) if newtxt: text = newtxt elif replacement == endpage: loc = len(text) else: replacement = bigword.replace(replacement) - text = text[:loc] + replacement + text[loc+len(match.group(2)):] + text = text[:loc] + replacement + text[ + loc + len(match.group(2)):] loc += len(replacement) if knownonly == 'plus' and text != page: knownonly = False @@ -288,13 +303,14 @@ class Word(object): - def __init__(self,text): + + def __init__(self, text): self.word = text def __str__(self): return self.word - def __cmp__(self,other): + def __cmp__(self, other): return self.word.__cmp__(str(other)) def derive(self): @@ -309,51 +325,56 @@ # Remove barred links if shortword.rfind('|') != -1: if -1 < shortword.rfind('[[') < shortword.rfind('|'): - shortword = shortword[:shortword.rfind('[[')] + shortword[shortword.rfind('|')+1:] + shortword = shortword[:shortword.rfind('[[')] + shortword[ + shortword.rfind('|') + 1:] else: - shortword = shortword[shortword.rfind('|')+1:] - shortword = shortword.replace('[','') - shortword = shortword.replace(']','') + shortword = shortword[shortword.rfind('|') + 1:] + shortword = shortword.replace('[', '') + shortword = shortword.replace(']', '') # Remove non-alphanumerical characters at the start try: while shortword[0] in string.punctuation: - shortword=shortword[1:] + shortword = shortword[1:] except IndexError: return "" # Remove non-alphanumerical characters at the end; no need for the # try here because if things go wrong here, they should have gone # wrong before while shortword[-1] in string.punctuation: - shortword=shortword[:-1] + shortword = shortword[:-1] # Do not check URLs if shortword.startswith("http://"): - shortword="" + shortword = "" # Do not check 'words' with only numerical characters number = True for i in xrange(len(shortword)): - if not (shortword[i] in string.punctuation or shortword[i] in string.digits): + if not (shortword[i] in string.punctuation or + shortword[i] in string.digits): number = False if number: shortword = "" return shortword - def replace(self,rep): - # Replace the short form by 'rep'. Keeping simple for now - if the - # short form is part of the long form, replace it. If it is not, ask - # the user + def replace(self, rep): + """Replace the short form by 'rep'. Keeping simple for now - if the + short form is part of the long form, replace it. If it is not, ask the + user + + """ if rep == self.derive(): return self.word if self.derive() not in self.word: return pywikibot.input( u"Please give the result of replacing %s by %s in %s:" % (self.derive(), rep, self.word)) - return self.word.replace(self.derive(),rep) + return self.word.replace(self.derive(), rep) - def isCorrect(self,checkalternative = False): - # If checkalternative is True, the word will only be found incorrect if - # it is on the spelling list as a spelling error. Otherwise it will - # be found incorrect if it is not on the list as a correctly spelled - # word. + def isCorrect(self, checkalternative=False): + """If checkalternative is True, the word will only be found incorrect + if it is on the spelling list as a spelling error. Otherwise it will be + found incorrect if it is not on the list as a correctly spelled word. + + """ if self.word == "": return True if self.word in pageskip: @@ -366,7 +387,8 @@ except KeyError: pass if self.word != uncap(self.word): - return Word(uncap(self.word)).isCorrect(checkalternative=checkalternative) + return Word(uncap(self.word)).isCorrect( + checkalternative=checkalternative) else: if checkalternative: if checklang == 'nl' and self.word.endswith("'s"): @@ -401,11 +423,12 @@ def declare_correct(self): knownwords[self.word] = self.word - def declare_alternative(self,alt): + def declare_alternative(self, alt): if not alt in knownwords[self.word]: knownwords[self.word].append(word) newwords.append(self.word) return self.alternatives + def checkPage(page, checknames=True, knownonly=False): try: @@ -413,10 +436,12 @@ except pywikibot.Error: pass else: - text = spellcheck(text, checknames=checknames, knownonly=knownonly, title=page.title()) + text = spellcheck(text, checknames=checknames, knownonly=knownonly, + title=page.title()) if text != page.get(): summary = i18n.twtranslate(page.site, 'spellcheck-checking') page.put(text, summary) + try: pageskip = [] @@ -460,10 +485,10 @@ if not checklang: checklang = mysite.language() filename = pywikibot.config.datafilepath('externals/spelling', - 'spelling-' + checklang + '.txt') + 'spelling-' + checklang + '.txt') print "Getting wordlist" try: - f = codecs.open(filename, 'r', encoding = mysite.encoding()) + f = codecs.open(filename, 'r', encoding=mysite.encoding()) for line in f.readlines(): # remove trailing newlines and carriage returns try: @@ -495,10 +520,13 @@ raise try: if newpages: - for (page, date, length, loggedIn, user, comment) in pywikibot.getSite().newpages(1000): + for (page, date, length, loggedIn, user, + comment) in pywikibot.getSite().newpages(1000): checkPage(page, checknames, knownonly) elif start: - for page in pagegenerators.PreloadingGenerator(pagegenerators.AllpagesPageGenerator(start=start,includeredirects=False)): + for page in pagegenerators.PreloadingGenerator( + pagegenerators.AllpagesPageGenerator(start=start, + includeredirects=False)): checkPage(page, checknames, knownonly) if longpages: @@ -509,7 +537,7 @@ title = ' '.join(title) while title != '': try: - page = pywikibot.Page(mysite,title) + page = pywikibot.Page(mysite, title) text = page.get() except pywikibot.NoPage: print "Page does not exist." @@ -521,14 +549,14 @@ finally: pywikibot.stopme() filename = pywikibot.config.datafilepath('externals/spelling', - 'spelling-' + checklang + '.txt') + 'spelling-' + checklang + '.txt') if rebuild: list = knownwords.keys() list.sort() - f = codecs.open(filename, 'w', encoding = mysite.encoding()) + f = codecs.open(filename, 'w', encoding=mysite.encoding()) else: list = newwords - f = codecs.open(filename, 'a', encoding = mysite.encoding()) + f = codecs.open(filename, 'a', encoding=mysite.encoding()) for word in list: if Word(word).isCorrect(): if word != uncap(word): @@ -536,7 +564,7 @@ # Capitalized form of a word that is in the list # uncapitalized continue - f.write("1 %s\n"%word) + f.write("1 %s\n" % word) else: - f.write("0 %s %s\n"%(word," ".join(knownwords[word]))) + f.write("0 %s %s\n" % (word, " ".join(knownwords[word]))) f.close() -- To view, visit https://gerrit.wikimedia.org/r/98109 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I203871505f12d2e248d460b3c9848ed297a119b4 Gerrit-PatchSet: 3 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

10 years, 5 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

10 years, 5 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

10 years, 5 months

[Gerrit] [PEP8] changes - change (pywikibot/compat)

by jenkins-bot (Code Review)

10 years, 5 months

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

Pywikibot-commits November 2013