Pywikibot-commits

pywikibot-commits@lists.wikimedia.org

13418 discussions

[Gerrit] Always use except Exception as e instead of Exception, e - change (pywikibot/core)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

jenkins-bot has submitted this change and it was merged. Change subject: Always use except Exception as e instead of Exception, e ...................................................................... Always use except Exception as e instead of Exception, e Change-Id: Iab67e2c79c9ace74cbf1d55fcd4dd17e6293f5f1 --- M scripts/add_text.py M scripts/basic.py M scripts/blockpageschecker.py M scripts/category.py M scripts/category_redirect.py M scripts/commonscat.py M scripts/cosmetic_changes.py M scripts/featured.py M scripts/harvest_template.py M scripts/interwiki.py M scripts/isbn.py M scripts/movepages.py M scripts/noreferences.py M scripts/nowcommons.py M scripts/pagefromfile.py M scripts/redirect.py M scripts/reflinks.py M scripts/replace.py M scripts/solve_disambiguation.py M scripts/upload.py 20 files changed, 45 insertions(+), 45 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/add_text.py b/scripts/add_text.py index c865be8..673f1a5 100644 --- a/scripts/add_text.py +++ b/scripts/add_text.py @@ -257,12 +257,12 @@ continue else: raise pywikibot.ServerError(u'Fifth Server Error!') - except pywikibot.SpamfilterError, e: + except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) - except pywikibot.PageNotSaved, error: + except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) except pywikibot.LockedPage: diff --git a/scripts/basic.py b/scripts/basic.py index 4d73deb..96285e6 100755 --- a/scripts/basic.py +++ b/scripts/basic.py @@ -122,7 +122,7 @@ pywikibot.output( u'Skipping %s because of edit conflict' % (page.title())) - except pywikibot.SpamfilterError, error: + except pywikibot.SpamfilterError as error: pywikibot.output( u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url)) diff --git a/scripts/blockpageschecker.py b/scripts/blockpageschecker.py index d6a5677..186c8c3 100755 --- a/scripts/blockpageschecker.py +++ b/scripts/blockpageschecker.py @@ -462,12 +462,12 @@ else: # Prevent Infinite Loops raise pywikibot.ServerError(u'Fifth Server Error!') - except pywikibot.SpamfilterError, e: + except pywikibot.SpamfilterError as e: pywikibot.output(u'Cannot change %s because of ' u'blacklist entry %s' % (page.title(), e.url)) break - except pywikibot.PageNotSaved, error: + except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % (error.args,)) break diff --git a/scripts/category.py b/scripts/category.py index 06989ab..ddf5262 100755 --- a/scripts/category.py +++ b/scripts/category.py @@ -304,7 +304,7 @@ else: pywikibot.output(u"Page %s does not exist; skipping." % page.title(asLink=True)) - except pywikibot.IsRedirectPage, arg: + except pywikibot.IsRedirectPage as arg: redirTarget = pywikibot.Page(self.site, arg.args[0]) if self.follow_redirects: text = redirTarget.get() @@ -349,7 +349,7 @@ pywikibot.output( u'Skipping %s because of edit conflict' % (page.title())) - except pywikibot.SpamfilterError, error: + except pywikibot.SpamfilterError as error: pywikibot.output( u'Cannot change %s because of spam blacklist entry ' u'%s' % (page.title(), error.url)) @@ -435,7 +435,7 @@ newTalkTitle = newCat.toggleTalkPage().title() try: talkMoved = oldTalk.move(newTalkTitle, reason) - except (pywikibot.NoPage, pywikibot.PageNotSaved), e: + except (pywikibot.NoPage, pywikibot.PageNotSaved) as e: #in order : #Source talk does not exist, or #Target talk already exists diff --git a/scripts/category_redirect.py b/scripts/category_redirect.py index a6f54c3..3ec3800 100755 --- a/scripts/category_redirect.py +++ b/scripts/category_redirect.py @@ -118,7 +118,7 @@ 'title': article.title(asLink=True, textlink=True), 'oldcat': oldCat.title(asLink=True, textlink=True), 'newcat': newCat.title(asLink=True, textlink=True)}) - except pywikibot.SpamfilterError, error: + except pywikibot.SpamfilterError as error: pywikibot.output( u'Changing page %s blocked by spam filter (URL=%s)' % (article.title(asLink=True), error.url)) @@ -130,7 +130,7 @@ 'title': article.title(asLink=True, textlink=True), 'oldcat': oldCat.title(asLink=True, textlink=True), 'newcat': newCat.title(asLink=True, textlink=True)}) - except pywikibot.PageNotSaved, error: + except pywikibot.PageNotSaved as error: pywikibot.output(u"Saving page %s failed: %s" % (article.title(asLink=True), error)) return False @@ -298,7 +298,7 @@ % (template_list[0], page.title(asLink=True, textlink=True))) - except pywikibot.Error, e: + except pywikibot.Error as e: self.log_text.append(u"* Failed to add {{tl|%s}} to %s" % (template_list[0], page.title(asLink=True, @@ -423,7 +423,7 @@ i18n.twtranslate(self.site.lang, self.dbl_redir_comment), minorEdit=True) - except pywikibot.Error, e: + except pywikibot.Error as e: self.log_text.append("** Failed: %s" % e) continue diff --git a/scripts/commonscat.py b/scripts/commonscat.py index 441aa62..e03d7cd 100755 --- a/scripts/commonscat.py +++ b/scripts/commonscat.py @@ -309,7 +309,7 @@ pywikibot.output( u'Skipping %s because of edit conflict' % (page.title())) - except pywikibot.SpamfilterError, error: + except pywikibot.SpamfilterError as error: pywikibot.output( u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url)) diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py index 557873e..1e50762 100755 --- a/scripts/cosmetic_changes.py +++ b/scripts/cosmetic_changes.py @@ -191,7 +191,7 @@ text = self.fixArabicLetters(text) try: text = isbn.hyphenateIsbnNumbers(text) - except isbn.InvalidIsbnException, error: + except isbn.InvalidIsbnException as error: pywikibot.log(u"ISBN error: %s" % error) pass if self.debug: diff --git a/scripts/featured.py b/scripts/featured.py index 2a24c43..eeccd09 100644 --- a/scripts/featured.py +++ b/scripts/featured.py @@ -603,7 +603,7 @@ except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked!' % atrans.title()) - except pywikibot.PageNotSaved, e: + except pywikibot.PageNotSaved as e: pywikibot.output(u"Page not saved") diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py index dbdb9ac..ea01b18 100755 --- a/scripts/harvest_template.py +++ b/scripts/harvest_template.py @@ -80,7 +80,7 @@ for page in self.generator: try: self.procesPage(page) - except Exception, e: + except Exception as e: pywikibot.exception(tb=True) def getTemplateSynonyms(self, title): diff --git a/scripts/interwiki.py b/scripts/interwiki.py index 74fd13b..d0c7b85 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -1995,15 +1995,15 @@ pywikibot.output( u'ERROR putting page: An edit conflict occurred. Giving up.') raise SaveError(u'Edit conflict') - except (pywikibot.SpamfilterError), error: + except (pywikibot.SpamfilterError) as error: pywikibot.output( u'ERROR putting page: %s blacklisted by spamfilter. Giving up.' % (error.url,)) raise SaveError(u'Spam filter') - except (pywikibot.PageNotSaved), error: + except (pywikibot.PageNotSaved) as error: pywikibot.output(u'ERROR putting page: %s' % (error.args,)) raise SaveError(u'PageNotSaved') - except (socket.error, IOError), error: + except (socket.error, IOError) as error: if timeout > 3600: raise pywikibot.output(u'ERROR putting page: %s' % (error.args,)) diff --git a/scripts/isbn.py b/scripts/isbn.py index 9af931e..32d18dd 100755 --- a/scripts/isbn.py +++ b/scripts/isbn.py @@ -1349,10 +1349,10 @@ def getIsbn(code): try: i = ISBN13(code) - except InvalidIsbnException, e13: + except InvalidIsbnException as e13: try: i = ISBN10(code) - except InvalidIsbnException, e10: + except InvalidIsbnException as e10: raise InvalidIsbnException(u'ISBN-13: %s / ISBN-10: %s' % (e13, e10)) return i @@ -1415,7 +1415,7 @@ code = match.group('code') try: getIsbn(code) - except InvalidIsbnException, e: + except InvalidIsbnException as e: pywikibot.output(e) newText = oldText @@ -1456,7 +1456,7 @@ except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) - except pywikibot.SpamfilterError, e: + except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) diff --git a/scripts/movepages.py b/scripts/movepages.py index 24d8434..5a94858 100644 --- a/scripts/movepages.py +++ b/scripts/movepages.py @@ -77,7 +77,7 @@ pywikibot.output(u'Page %s is a redirect; skipping.' % page.title()) except pywikibot.LockedPage: pywikibot.output(u'Page %s is locked!' % page.title()) - except pywikibot.PageNotSaved, e: + except pywikibot.PageNotSaved as e: #target newPageTitle already exists pywikibot.output(e.message) diff --git a/scripts/noreferences.py b/scripts/noreferences.py index 791fb27..e3f556a 100755 --- a/scripts/noreferences.py +++ b/scripts/noreferences.py @@ -603,7 +603,7 @@ except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) - except pywikibot.SpamfilterError, e: + except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py index ed90214..af33690 100644 --- a/scripts/nowcommons.py +++ b/scripts/nowcommons.py @@ -453,7 +453,7 @@ else: pywikibot.output( u'The image is not identical to the one on Commons.') - except (pywikibot.NoPage, pywikibot.IsRedirectPage), e: + except (pywikibot.NoPage, pywikibot.IsRedirectPage) as e: pywikibot.output(u'%s' % e[0]) continue diff --git a/scripts/pagefromfile.py b/scripts/pagefromfile.py index 02f1fc5..2a8c56c 100644 --- a/scripts/pagefromfile.py +++ b/scripts/pagefromfile.py @@ -231,7 +231,7 @@ pywikibot.output(u"Page %s is locked; skipping." % title) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % title) - except pywikibot.SpamfilterError, error: + except pywikibot.SpamfilterError as error: pywikibot.output( u'Cannot change %s because of spam blacklist entry %s' % (title, error.url)) @@ -258,7 +258,7 @@ try: f = codecs.open(self.filename, 'r', encoding=config.textfile_encoding) - except IOError, err: + except IOError as err: print err return @@ -274,7 +274,7 @@ else: pywikibot.output(u'End of file.') break - except NoTitle, err: + except NoTitle as err: pywikibot.output(u'\nNo title found - skipping a page.') position += err.offset continue diff --git a/scripts/redirect.py b/scripts/redirect.py index 3c1272c..5027ecc 100755 --- a/scripts/redirect.py +++ b/scripts/redirect.py @@ -467,14 +467,14 @@ pywikibot.warning( u"Redirect target section %s doesn't exist." % newRedir.title(asLink=True)) - except pywikibot.CircularRedirect, e: + except pywikibot.CircularRedirect as e: try: pywikibot.warning(u"Skipping circular redirect: [[%s]]" % str(e)) except UnicodeDecodeError: pywikibot.warning(u"Skipping circular redirect") break - except pywikibot.BadTitle, e: + except pywikibot.BadTitle as e: # str(e) is in the format 'BadTitle: [[Foo]]' pywikibot.warning( u'Redirect target %s is not a valid page title.' @@ -589,18 +589,18 @@ redir.put(text, summary) except pywikibot.LockedPage: pywikibot.output(u'%s is locked.' % redir.title()) - except pywikibot.SpamfilterError, error: + except pywikibot.SpamfilterError as error: pywikibot.output( u"Saving page [[%s]] prevented by spam filter: %s" % (redir.title(), error.url)) - except pywikibot.PageNotSaved, error: + except pywikibot.PageNotSaved as error: pywikibot.output(u"Saving page [[%s]] failed: %s" % (redir.title(), error)) except pywikibot.NoUsername: pywikibot.output( u"Page [[%s]] not saved; sysop privileges required." % redir.title()) - except pywikibot.Error, error: + except pywikibot.Error as error: pywikibot.output( u"Unexpected error occurred trying to save [[%s]]: %s" % (redir.title(), error)) diff --git a/scripts/reflinks.py b/scripts/reflinks.py index 6a024cd..9320ab6 100644 --- a/scripts/reflinks.py +++ b/scripts/reflinks.py @@ -471,16 +471,16 @@ except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) - except pywikibot.SpamfilterError, e: + except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) - except pywikibot.PageNotSaved, error: + except pywikibot.PageNotSaved as error: pywikibot.error(u'putting page: %s' % (error.args,)) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(),)) - except pywikibot.ServerError, e: + except pywikibot.ServerError as e: pywikibot.output(u'Server Error : %s' % e) def httpError(self, err_num, link, pagetitleaslink): @@ -632,7 +632,7 @@ u'\03{lightred}Bad link\03{default} : %s in %s' % (ref.url, page.title(asLink=True))) continue - except urllib2.HTTPError, e: + except urllib2.HTTPError as e: pywikibot.output(u'HTTP error (%s) for %s on %s' % (e.code, ref.url, page.title(asLink=True)), @@ -647,7 +647,7 @@ except (urllib2.URLError, socket.error, IOError, - httplib.error), e: + httplib.error) as e: pywikibot.output(u'Can\'t retrieve page %s : %s' % (ref.url, e)) continue diff --git a/scripts/replace.py b/scripts/replace.py index e02cb0c..e95a3c1 100755 --- a/scripts/replace.py +++ b/scripts/replace.py @@ -408,11 +408,11 @@ except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) - except pywikibot.SpamfilterError, e: + except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) - except pywikibot.PageNotSaved, error: + except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % (error.args,)) except pywikibot.LockedPage: diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py index 9dc6d77..b435cba 100644 --- a/scripts/solve_disambiguation.py +++ b/scripts/solve_disambiguation.py @@ -567,7 +567,7 @@ % (self.mysite.redirect(default=True), target) try: refPage.put_async(redir_text, comment=self.comment) - except pywikibot.PageNotSaved, error: + except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) else: choice = pywikibot.inputChoice( @@ -832,7 +832,7 @@ refPage.put_async(text, comment=self.comment) except pywikibot.LockedPage: pywikibot.output(u'Page not saved: page is locked') - except pywikibot.PageNotSaved, error: + except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) return True diff --git a/scripts/upload.py b/scripts/upload.py index f913e38..476841e 100755 --- a/scripts/upload.py +++ b/scripts/upload.py @@ -214,7 +214,7 @@ site.upload(imagepage, source_filename=temp, ignore_warnings=self.ignoreWarning) - except pywikibot.UploadWarning, warn: + except pywikibot.UploadWarning as warn: pywikibot.output(u"We got a warning message: ", newline=False) pywikibot.output(str(warn)) answer = pywikibot.inputChoice(u"Do you want to ignore?", @@ -227,7 +227,7 @@ pywikibot.output(u"Upload aborted.") return - except Exception, e: + except Exception as e: pywikibot.error("Upload error: ", exc_info=True) else: -- To view, visit https://gerrit.wikimedia.org/r/98278 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Iab67e2c79c9ace74cbf1d55fcd4dd17e6293f5f1 Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] Ignore obsolete sites from beeing processed - change (pywikibot/core)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

jenkins-bot has submitted this change and it was merged. Change subject: Ignore obsolete sites from beeing processed ...................................................................... Ignore obsolete sites from beeing processed Wikidata may contain links to obsolete sites which raises an error. Now we check this exception and ignore that site. Change-Id: Ie0e7afbc2b17e4c56a6688203af03a4e5aa993a1 --- M scripts/featured.py 1 file changed, 6 insertions(+), 0 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/scripts/featured.py b/scripts/featured.py index bffea39..2a24c43 100644 --- a/scripts/featured.py +++ b/scripts/featured.py @@ -312,6 +312,9 @@ except KeyboardInterrupt: pywikibot.output('\nQuitting featured treat...') break + except pywikibot.NoSuchSite: + pywikibot.output('"%s" is not a valid site. Skipping...' % code) + continue self.writecache() # not implemented yet @@ -349,6 +352,9 @@ except KeyboardInterrupt: pywikibot.output('\nQuitting featured treat...') break + except pywikibot.NoSuchSite: + pywikibot.output('"%s" is not a valid site. Skipping...' % code) + continue self.writecache() def treat(self, code, process): -- To view, visit https://gerrit.wikimedia.org/r/98152 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ie0e7afbc2b17e4c56a6688203af03a4e5aa993a1 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] [BUGFIX] Link._site is a site object and is not callable - change (pywikibot/core)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

jenkins-bot has submitted this change and it was merged. Change subject: [BUGFIX] Link._site is a site object and is not callable ...................................................................... [BUGFIX] Link._site is a site object and is not callable Change-Id: I50f2d1f3b151919b1a3c4dabaa75d5a11b64966e --- M pywikibot/page.py 1 file changed, 3 insertions(+), 2 deletions(-) Approvals: Merlijn van Deen: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/page.py b/pywikibot/page.py index 1db43d3..02ae13f 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -1656,7 +1656,8 @@ in self.getFileVersionHistory(): lines.append(u'| %s || %s || %s || %s || <nowiki>%s</nowiki>' % (datetime, username, resolution, size, comment)) - return u'{| border="1"\n! date/time || username || resolution || size || edit summary\n|----\n' + u'\n|----\n'.join(lines) + '\n|}' + return u'{| border="1"\n! date/time || username || resolution || size || edit summary\n|----\n' + \ + u'\n|----\n'.join(lines) + '\n|}' def usingPages(self, step=None, total=None, content=False): """Yield Pages on which the image is displayed. @@ -3208,7 +3209,7 @@ """\ %s is not a local page on %s, and the %s family is not supported by PyWikiBot!""" - % (self._text, self._site(), familyName)) + % (self._text, self._site, familyName)) # Redundant interwiki prefix to the local wiki if newsite == self._site: -- To view, visit https://gerrit.wikimedia.org/r/98151 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I50f2d1f3b151919b1a3c4dabaa75d5a11b64966e Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] [PEP8] changes - change (pywikibot/compat)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

1 0

[Gerrit] (PEP8] changes - change (pywikibot/compat)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

jenkins-bot has submitted this change and it was merged. Change subject: (PEP8] changes ...................................................................... (PEP8] changes Change-Id: I40d16980fc53104246eeaf7a1c5743172bfc57a2 --- M unlink.py 1 file changed, 33 insertions(+), 17 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/unlink.py b/unlink.py index 8448479..cfce090 100644 --- a/unlink.py +++ b/unlink.py @@ -21,8 +21,13 @@ Removes links to the page [[Foo bar]] in articles and image descriptions. """ - -__version__='$Id$' +# +# (C) Pywikibot team, 2007-2013 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' +# import re import wikipedia as pywikibot @@ -37,22 +42,32 @@ self.pageToUnlink = pageToUnlink gen = pagegenerators.ReferringPageGenerator(pageToUnlink) if namespaces != []: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) - self.generator = pagegenerators.PreloadingGenerator(gen) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + self.generator = pagegenerators.PreloadingGenerator(gen) linktrail = pywikibot.getSite().linktrail() - # The regular expression which finds links. Results consist of four groups: - # group title is the target page title, that is, everything before | or ]. - # group section is the page section. It'll include the # to make life easier for us. - # group label is the alternative link title, that's everything between | and ]. - # group linktrail is the link trail, that's letters after ]] which are part of the word. + # The regular expression which finds links. Results consist of four + # groups: + # + # group title is the target page title, that is, everything + # before | or ]. + # + # group section is the page section. + # It'll include the # to make life easier for us. + # + # group label is the alternative link title, that's everything + # between | and ]. + # + # group linktrail is the link trail, that's letters after ]] which are + # part of the word. # note that the definition of 'letter' varies from language to language. - self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') + self.linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' + % linktrail) self.always = always self.done = False self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking', self.pageToUnlink.title()) - def handleNextLink(self, text, match, context = 100): + def handleNextLink(self, text, match, context=100): """ Returns a tuple (text, jumpToBeginning). text is the unicode string after the current link has been processed. @@ -79,9 +94,9 @@ choice = 'a' else: pywikibot.output( - text[max(0, match.start() - context) : match.start()] \ - + '\03{lightred}' + text[match.start() : match.end()] \ - + '\03{default}' + text[match.end() : match.end() + context]) + text[max(0, match.start() - context):match.start()] + + '\03{lightred}' + text[match.start():match.end()] + + '\03{default}' + text[match.end():match.end() + context]) choice = pywikibot.inputChoice( u'\nWhat shall be done with this link?\n', ['unlink', 'skip', 'edit', 'more context', @@ -94,7 +109,7 @@ return text, False elif choice == 'e': editor = editarticle.TextEditor() - newText = editor.edit(text, jumpIndex = match.start()) + newText = editor.edit(text, jumpIndex=match.start()) # if user didn't press Cancel if newText: return newText, True @@ -123,7 +138,7 @@ text = oldText curpos = 0 while curpos < len(text): - match = self.linkR.search(text, pos = curpos) + match = self.linkR.search(text, pos=curpos) if not match: break # Make sure that next time around we will not find this same @@ -148,7 +163,8 @@ def run(self): for page in self.generator: - if self.done: break + if self.done: + break self.treat(page) -- To view, visit https://gerrit.wikimedia.org/r/98262 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I40d16980fc53104246eeaf7a1c5743172bfc57a2 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] [PEP8] changes - change (pywikibot/compat)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

1 0

[Gerrit] [PEP8] changes - change (pywikibot/compat)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

1 0

[Gerrit] [PEP8] changes - change (pywikibot/compat)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

1 0

[Gerrit] [PEP8] changes - change (pywikibot/compat)
by jenkins-bot (Code Review) 30 Nov '13

30 Nov '13

jenkins-bot has submitted this change and it was merged. Change subject: [PEP8] changes ...................................................................... [PEP8] changes Change-Id: I1129bf9d966dc6c13ac01296439eaf7c3fde31c1 --- M weblinkchecker.py 1 file changed, 106 insertions(+), 75 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/weblinkchecker.py b/weblinkchecker.py index 1e61f99..1eaa96b 100644 --- a/weblinkchecker.py +++ b/weblinkchecker.py @@ -88,19 +88,30 @@ # # (C) Daniel Herding, 2005 -# (C) Pywikipedia bot team, 2005-2012 +# (C) Pywikibot team, 2005-2013 # # Distributed under the terms of the MIT license. # -__version__='$Id$' +__version__ = '$Id$' -import sys, re -import codecs, pickle -import httplib, socket, urlparse, urllib, urllib2 -import threading, time +import sys +import re +import codecs +import pickle +import httplib +import socket +import urlparse +import urllib +import urllib2 +import threading +import time +import gzip +import StringIO + import pywikibot from pywikibot import i18n -import config, pagegenerators +import config +import pagegenerators docuReplacements = { '&params;': pagegenerators.parameterHelp @@ -121,17 +132,18 @@ # Other special cases re.compile('.*[\./(a)]gso\.gbv\.de(/.*)?'), # bot somehow can't handle their redirects - re.compile('.*[\./(a)]berlinonline\.de(/.*)?'), # a de: user wants to fix them by hand and doesn't want them to be deleted, see [[de:Benutzer:BLueFiSH.as/BZ]]. - re.compile('.*[\./(a)]bodo\.kommune\.no(/.*)?'), # bot can't handle their redirects - re.compile('.*[\./(a)]jpl\.nasa\.gov(/.*)?'), # bot rejected on the site - re.compile('.*[\./(a)]itis\.gov(/.*)?'), # bot rejected on the site - re.compile('.*[\./(a)]cev\.lu(/.*)?'), # bot rejected on the site - re.compile('.*[\./(a)]science\.ksc\.nasa\.gov(/.*)?'), # very slow response resulting in bot error - re.compile('.*[\./(a)]britannica\.com(/.*)?'), #HTTP redirect loop - re.compile('.*[\./(a)]quickfacts\.census\.gov(/.*)?'), # bot rejected on the site + re.compile('.*[\./(a)]berlinonline\.de(/.*)?'), # a de: user wants to fix them by hand and doesn't want them to be deleted, see [[de:Benutzer:BLueFiSH.as/BZ]]. + re.compile('.*[\./(a)]bodo\.kommune\.no(/.*)?'), # bot can't handle their redirects + re.compile('.*[\./(a)]jpl\.nasa\.gov(/.*)?'), # bot rejected on the site + re.compile('.*[\./(a)]itis\.gov(/.*)?'), # bot rejected on the site + re.compile('.*[\./(a)]cev\.lu(/.*)?'), # bot rejected on the site + re.compile('.*[\./(a)]science\.ksc\.nasa\.gov(/.*)?'), # very slow response resulting in bot error + re.compile('.*[\./(a)]britannica\.com(/.*)?'), # HTTP redirect loop + re.compile('.*[\./(a)]quickfacts\.census\.gov(/.*)?'), # bot rejected on the site ] -def weblinksIn(text, withoutBracketed = False, onlyBracketed = False): + +def weblinksIn(text, withoutBracketed=False, onlyBracketed=False): text = pywikibot.removeDisabledParts(text) # MediaWiki parses templates before parsing external links. Thus, there @@ -164,6 +176,7 @@ else: yield m.group('urlb') + class InternetArchiveConsulter: def __init__(self, url): self.url = url @@ -176,23 +189,21 @@ except urllib2.HTTPError: # The Internet Archive yields a 403 error when the site was not # archived due to robots.txt restrictions. - return None + return except UnicodeEncodeError: - return None + return data = f.read() if f.headers.get('content-encoding', None) == 'gzip': # Since 2008, the Internet Archive returns pages in GZIPed # compression format. Unfortunatelly urllib2 doesn't handle # the decompression for us, so we have to do it ourselves. - import gzip, StringIO data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read() if "Search Results for " in data: return archiveURL - else: - return None + class LinkChecker(object): - ''' + """ Given a HTTP URL, tries to load the page from the Internet and checks if it is still online. @@ -201,8 +212,10 @@ Warning: Also returns false if your Internet connection isn't working correctly! (This will give a Socket Error) - ''' - def __init__(self, url, redirectChain = [], serverEncoding=None, HTTPignore=[]): + + """ + def __init__(self, url, redirectChain=[], serverEncoding=None, + HTTPignore=[]): """ redirectChain is a list of redirects which were resolved by resolveRedirect(). This is needed to detect redirect loops. @@ -247,8 +260,8 @@ if not self.serverEncoding: # TODO: We might also load a page, then check for an encoding # definition in a HTML meta tag. - pywikibot.output( - u'Error retrieving server\'s default charset. Using ISO 8859-1.') + pywikibot.output(u'Error retrieving server\'s default charset. ' + u'Using ISO 8859-1.') # most browsers use ISO 8859-1 (Latin-1) as the default. self.serverEncoding = 'iso8859-1' return self.serverEncoding @@ -266,7 +279,8 @@ def changeUrl(self, url): self.url = url # we ignore the fragment - self.scheme, self.host, self.path, self.query, self.fragment = urlparse.urlsplit(self.url) + (self.scheme, self.host, self.path, self.query, + self.fragment) = urlparse.urlsplit(self.url) if not self.path: self.path = '/' if self.query: @@ -282,14 +296,15 @@ self.path = unicode(urllib.quote(self.path.encode(encoding))) self.query = unicode(urllib.quote(self.query.encode(encoding), '=&')) - def resolveRedirect(self, useHEAD = False): - ''' + def resolveRedirect(self, useHEAD=False): + """ Requests the header from the server. If the page is an HTTP redirect, returns the redirect target URL as a string. Otherwise returns None. If useHEAD is true, uses the HTTP HEAD method, which saves bandwidth by not downloading the body. Otherwise, the HTTP GET method is used. - ''' + + """ conn = self.getConnection() try: if useHEAD: @@ -307,7 +322,7 @@ # Apache server. Using GET instead works on these (but it uses # more bandwidth). if useHEAD: - return self.resolveRedirect(useHEAD = False) + return self.resolveRedirect(useHEAD=False) else: raise if self.response.status >= 300 and self.response.status <= 399: @@ -327,7 +342,7 @@ self.changeUrl(u'%s://%s%s' % (self.protocol, self.host, redirTarget)) return True - else: # redirect to relative position + else: # redirect to relative position # cut off filename directory = self.path[:self.path.rindex('/') + 1] # handle redirect to parent directory @@ -344,18 +359,18 @@ redirTarget)) return True else: - return False # not a redirect + return False # not a redirect - def check(self, useHEAD = False): + def check(self, useHEAD=False): """ Returns True and the server status message if the page is alive. Otherwise returns false """ try: - wasRedirected = self.resolveRedirect(useHEAD = useHEAD) + wasRedirected = self.resolveRedirect(useHEAD=useHEAD) except UnicodeError, error: - return False, u'Encoding Error: %s (%s)' \ - % (error.__class__.__name__, unicode(error)) + return False, u'Encoding Error: %s (%s)' % ( + error.__class__.__name__, unicode(error)) except httplib.error, error: return False, u'HTTP Error: %s' % error.__class__.__name__ except socket.error, error: @@ -372,7 +387,7 @@ msg = error[1] except IndexError: print u'### DEBUG information for #2972249' - raise IndexError, type(error) + raise IndexError(type(error)) # TODO: decode msg. On Linux, it's encoded in UTF-8. # How is it encoded in Windows? Or can we somehow just # get the English message? @@ -384,31 +399,38 @@ # which leads to a cyclic list of redirects. # We simply start from the beginning, but this time, # we don't use HEAD, but GET requests. - redirChecker = LinkChecker(self.redirectChain[0], - serverEncoding=self.serverEncoding, - HTTPignore=self.HTTPignore) - return redirChecker.check(useHEAD = False) + redirChecker = LinkChecker( + self.redirectChain[0], + serverEncoding=self.serverEncoding, + HTTPignore=self.HTTPignore) + return redirChecker.check(useHEAD=False) else: - urlList = ['[%s]' % url for url in self.redirectChain + [self.url]] - return False, u'HTTP Redirect Loop: %s' % ' -> '.join(urlList) + urlList = ['[%s]' % url + for url in self.redirectChain + [self.url]] + return (False, + u'HTTP Redirect Loop: %s' % ' -> '.join(urlList)) elif len(self.redirectChain) >= 19: if useHEAD: # Some servers don't seem to handle HEAD requests properly, # which leads to a long (or infinite) list of redirects. # We simply start from the beginning, but this time, # we don't use HEAD, but GET requests. - redirChecker = LinkChecker(self.redirectChain[0], - serverEncoding=self.serverEncoding, - HTTPignore = self.HTTPignore) - return redirChecker.check(useHEAD = False) + redirChecker = LinkChecker( + self.redirectChain[0], + serverEncoding=self.serverEncoding, + HTTPignore=self.HTTPignore) + return redirChecker.check(useHEAD=False) else: - urlList = ['[%s]' % url for url in self.redirectChain + [self.url]] - return False, u'Long Chain of Redirects: %s' % ' -> '.join(urlList) + urlList = ['[%s]' % url + for url in self.redirectChain + [self.url]] + return (False, + u'Long Chain of Redirects: %s' + % ' -> '.join(urlList)) else: redirChecker = LinkChecker(self.url, self.redirectChain, self.serverEncoding, HTTPignore=self.HTTPignore) - return redirChecker.check(useHEAD = useHEAD) + return redirChecker.check(useHEAD=useHEAD) else: try: conn = self.getConnection() @@ -431,22 +453,24 @@ alive = False return alive, '%s %s' % (self.response.status, self.response.reason) + class LinkCheckThread(threading.Thread): - ''' - A thread responsible for checking one URL. After checking the page, it + """ A thread responsible for checking one URL. After checking the page, it will die. - ''' + + """ def __init__(self, page, url, history, HTTPignore): threading.Thread.__init__(self) self.page = page self.url = url self.history = history # identification for debugging purposes - self.setName((u'%s - %s' % (page.title(), url)).encode('utf-8', 'replace')) + self.setName((u'%s - %s' % (page.title(), url)).encode('utf-8', + 'replace')) self.HTTPignore = HTTPignore def run(self): - linkChecker = LinkChecker(self.url, HTTPignore = self.HTTPignore) + linkChecker = LinkChecker(self.url, HTTPignore=self.HTTPignore) try: ok, message = linkChecker.check() except: @@ -464,7 +488,7 @@ class History: - ''' Stores previously found dead links. The URLs are dictionary keys, and + """ Stores previously found dead links. The URLs are dictionary keys, and values are lists of tuples where each tuple represents one time the URL was found dead. Tuples have the form (title, date, error) where title is the wiki page where the URL was found, date is an instance of time, and error is @@ -481,15 +505,14 @@ ('WikiPageName2', DATE, '404: File not found'), ] - ''' + """ def __init__(self, reportThread): self.reportThread = reportThread site = pywikibot.getSite() self.semaphore = threading.Semaphore() - self.datfilename = pywikibot.config.datafilepath('deadlinks', - 'deadlinks-%s-%s.dat' - % (site.family.name, site.lang)) + self.datfilename = pywikibot.config.datafilepath( + 'deadlinks', 'deadlinks-%s-%s.dat' % (site.family.name, site.lang)) # Count the number of logged links, so that we can insert captions # from time to time self.logCount = 0 @@ -532,7 +555,6 @@ self.reportThread.report(url, errorReport, containingPage, archiveURL) - def setLinkDead(self, url, error, page, day): """ Adds the fact that the link was found dead to the .dat file. @@ -541,7 +563,7 @@ now = time.time() if url in self.historyDict: timeSinceFirstFound = now - self.historyDict[url][0][1] - timeSinceLastFound= now - self.historyDict[url][-1][1] + timeSinceLastFound = now - self.historyDict[url][-1][1] # if the last time we found this dead link is less than an hour # ago, we won't save it in the history this time. if timeSinceLastFound > 60 * 60: @@ -568,7 +590,7 @@ try: del self.historyDict[url] except KeyError: - # Not sure why this can happen, but I guess we can ignore this... + # Not sure why this can happen, but I guess we can ignore this. pass self.semaphore.release() return True @@ -583,6 +605,7 @@ pickle.dump(self.historyDict, datfile) datfile.close() + class DeadLinkReportThread(threading.Thread): ''' A Thread that is responsible for posting error reports on talk pages. There @@ -592,7 +615,7 @@ def __init__(self): threading.Thread.__init__(self) self.semaphore = threading.Semaphore() - self.queue = []; + self.queue = [] self.finishing = False self.killed = False @@ -631,7 +654,8 @@ content = talkPage.get() + "\n\n" if url in content: pywikibot.output( - u'\03{lightaqua}** Dead link seems to have already been reported on %s\03{default}' + u'\03{lightaqua}** Dead link seems to have already ' + u'been reported on %s\03{default}' % talkPage.title(asLink=True)) self.semaphore.release() continue @@ -672,18 +696,20 @@ talkPage.put(content, comment) except pywikibot.SpamfilterError, error: pywikibot.output( - u'\03{lightaqua}** SpamfilterError while trying to change %s: %s\03{default}' + u'\03{lightaqua}** SpamfilterError while trying to ' + u'change %s: %s\03{default}' % (talkPage.title(asLink=True), error.url)) self.semaphore.release() class WeblinkCheckerRobot: - ''' + """ Robot which will use several LinkCheckThreads at once to search for dead weblinks on pages provided by the given generator. - ''' - def __init__(self, generator, HTTPignore = []): + + """ + def __init__(self, generator, HTTPignore=[]): self.generator = generator if config.report_dead_links_on_talk: #pywikibot.output("Starting talk page thread") @@ -698,7 +724,7 @@ def run(self): for page in self.generator: - self.checkLinksIn(page) + self.checkLinksIn(page) def checkLinksIn(self, page): try: @@ -737,6 +763,7 @@ page = pywikibot.Page(pywikibot.getSite(), pageTitle) yield page + def countLinkCheckThreads(): i = 0 for thread in threading.enumerate(): @@ -744,10 +771,12 @@ i += 1 return i + def check(url): """Peform a check on URL""" c = LinkChecker(url) return c.check() + def main(): gen = None @@ -791,11 +820,11 @@ gen = genFactory.getCombinedGenerator() if gen: if namespaces != []: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) # fetch at least 240 pages simultaneously from the wiki, but more if # a high thread number is set. pageNumber = max(240, config.max_external_links * 2) - gen = pagegenerators.PreloadingGenerator(gen, pageNumber = pageNumber) + gen = pagegenerators.PreloadingGenerator(gen, pageNumber=pageNumber) gen = pagegenerators.RedirectFilterPageGenerator(gen) bot = WeblinkCheckerRobot(gen, HTTPignore) try: @@ -805,8 +834,9 @@ # Don't wait longer than 30 seconds for threads to finish. while countLinkCheckThreads() > 0 and waitTime < 30: try: - pywikibot.output( - u"Waiting for remaining %i threads to finish, please wait..." % countLinkCheckThreads()) + pywikibot.output(u"Waiting for remaining %i threads to " + u"finish, please wait..." + % countLinkCheckThreads()) # wait 1 second time.sleep(1) waitTime += 1 @@ -832,6 +862,7 @@ else: pywikibot.showHelp() + if __name__ == "__main__": try: main() -- To view, visit https://gerrit.wikimedia.org/r/98268 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I1129bf9d966dc6c13ac01296439eaf7c3fde31c1 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

[Gerrit] [PEP8] changes - change (pywikibot/compat)
by Xqt (Code Review) 30 Nov '13

30 Nov '13

Xqt has submitted this change and it was merged. Change subject: [PEP8] changes ...................................................................... [PEP8] changes Change-Id: I31d6c0b29bc099cd64254a0c31f5ea25929ac2b3 --- M sum_disc.py 1 file changed, 609 insertions(+), 489 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/sum_disc.py b/sum_disc.py index a239300..bff8de6 100644 --- a/sum_disc.py +++ b/sum_disc.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ This bot is used for summarize discussions spread over the whole wiki -including all namespaces. It checks several users (at request), sequential +including all namespaces. It checks several users (at request), sequential (currently for the german wiki [de] only). The bot will only change the user's discussion page by appending a summary @@ -9,14 +9,15 @@ The bot's operating procedure: -retrieve user list from [[Benutzer:DrTrigonBot/Diene_Mir!]] - -check recent changes in [[Special:Contributions/<user>]] (Spezial:Beiträge/...) + -check recent changes in [[Special:Contributions/<user>]] + (Spezial:Beiträge/...) -retrieve history from file -checking each listed Discussion on time of latest change - -checks relevancy by searching each heading with user signature in body, if any - found, checks them on changes and finally if the user signature is the last one - (or if there is any foreign signature behind it) - -appending history to local user history file (same as summary report, can also - be redirected on a page in the wiki if useful) + -checks relevancy by searching each heading with user signature in body, if + any found, checks them on changes and finally if the user signature is the + last one (or if there is any foreign signature behind it) + -appending history to local user history file (same as summary report, can + also be redirected on a page in the wiki if useful) -appending summary report to [[Benutzer Diskussion:<user>]] This bot code and 'wikipedaiAPI.py' work with UTC/GMT ONLY beacuse of DST!! For @@ -32,9 +33,9 @@ on the toolserver, at a very low speed, what should be fast enougth to check the few number of users. -Entries can be changed (deleted for example when the discussion is finished) that -will cause no problem for the bot, because the entries are also written to the -history. +Entries can be changed (deleted for example when the discussion is finished) +that will cause no problem for the bot, because the entries are also written to +the history. All other parameters will be ignored. @@ -57,6 +58,7 @@ # @brief Summarize Discussions Robot # # @copyright Dr. Trigon, 2008-2011 +# @copyright pywikibot team, 2013 # # @section FRAMEWORK # @@ -73,152 +75,162 @@ # -import re, time, codecs, os, calendar -import copy #, zlib -import string, datetime, hashlib, locale +import re +import time +import codecs +import os +import calendar +import copy +import string +import datetime +import hashlib +import locale import logging import urllib from xml.etree.cElementTree import XML # or if cElementTree not found, you can use BeautifulStoneSoup instead -import config, pagegenerators, userlib +import config +import pagegenerators +import userlib import basic # Splitting the bot into library parts import wikipedia as pywikibot from pywikibot import i18n, tools -_PS_warning = 1 # serious or no classified warnings/errors that should be reported -_PS_changed = 2 # changed page (if closed, will be removed) -_PS_unchanged = 3 # unchanged page -_PS_new = 4 # new page -_PS_closed = 5 # closed page (remove it from history) -_PS_maintmsg = 6 # maintenance message -_PS_notify = 7 # global wiki notification +_PS_warning = 1 # serious or no classified warnings/errors that should be reported +_PS_changed = 2 # changed page (if closed, will be removed) +_PS_unchanged = 3 # unchanged page +_PS_new = 4 # new page +_PS_closed = 5 # closed page (remove it from history) +_PS_maintmsg = 6 # maintenance message +_PS_notify = 7 # global wiki notification - -_REGEX_eol = re.compile('\n') - +_REGEX_eol = re.compile('\n') bot_config = { # unicode values - 'TemplateName': u'User:DrTrigonBot/SumDisc', - 'userlist': u'User:DrTrigonBot/Diene_Mir!', - 'maintenance_queue': u'User:DrTrigonBot/Maintenance', - 'maintenance_page': u'User talk:DrTrigon#%s', + 'TemplateName': u'User:DrTrigonBot/SumDisc', + 'userlist': u'User:DrTrigonBot/Diene_Mir!', + 'maintenance_queue': u'User:DrTrigonBot/Maintenance', + 'maintenance_page': u'User talk:DrTrigon#%s', - 'queue_security': ([u'DrTrigon'], u'Bot: exec'), + 'queue_security': ([u'DrTrigon'], u'Bot: exec'), - # NON (!) unicode values - 'data_path': '../data/sum_disc', - 'logger_tmsp': True, - 'backup_hist': True, + # NON (!) unicode values + 'data_path': '../data/sum_disc', + 'logger_tmsp': True, + 'backup_hist': True, - # regex values - 'tmpl_params_regex': re.compile('(.*?)data=(.*?)\|timestamp=(.*)', re.S), - 'page_regex': re.compile(r'Page\{\[\[(.*?)\]\]\}'), + # regex values + 'tmpl_params_regex': re.compile('(.*?)data=(.*?)\|timestamp=(.*)', re.S), + 'page_regex': re.compile(r'Page\{\[\[(.*?)\]\]\}'), - # numeric values - 'timeout': 15.0, # timeout for progress info display + # numeric values + 'timeout': 15.0, # timeout for progress info display - # list values - # which lists are regex to compile ('backlinks_list' are no regex) - #'regex_compile': [ 'checkedit_list', 'checksign_list', 'ignorepage_list', ], - #'regex_compile': [ 'checkedit_list', 'ignorepage_list', 'ignorehead_list', 'matchhead_list', ], - 'regex_compile': [ 'checkedit_list', 'ignorehead_list', 'matchhead_list', ], - # which lists may contain variables to substitute - #'vars_subst': [ 'checkedit_list', 'checksign_list', 'ignorepage_list', 'backlinks_list', 'altsign_list' ], - 'vars_subst': [ 'checkedit_list', 'ignorepage_list', 'backlinks_list', 'altsign_list' ], # + 'ignorehead_list', 'matchhead_list' ? - # which lists should preserve/keep their defaults (instead of getting it overwritten by user settings) - 'default_keep': [ 'checkedit_list', 'altsign_list' ], - # which lists should be translated according to site's lang - 'translate': [ 'notify_msg', 'parse_msg' ], + # list values + # which lists are regex to compile ('backlinks_list' are no regex) + #'regex_compile': ['checkedit_list', 'checksign_list', 'ignorepage_list', ], + #'regex_compile': ['checkedit_list', 'ignorepage_list', 'ignorehead_list', 'matchhead_list', ], + 'regex_compile': ['checkedit_list', 'ignorehead_list', 'matchhead_list'], + # which lists may contain variables to substitute + #'vars_subst': ['checkedit_list', 'checksign_list', 'ignorepage_list', 'backlinks_list', 'altsign_list' ], + 'vars_subst': ['checkedit_list', 'ignorepage_list', 'backlinks_list', + 'altsign_list'], # + 'ignorehead_list', 'matchhead_list' ? + # which lists should preserve/keep their defaults (instead of getting it overwritten by user settings) + 'default_keep': ['checkedit_list', 'altsign_list'], + # which lists should be translated according to site's lang + 'translate': ['notify_msg', 'parse_msg'], - # 'msg' moved to i18n (translatewiki.org), key names: - #'thirdparty-drtrigonbot-sum_disc-summary-head' - #'thirdparty-drtrigonbot-sum_disc-summary-add' - #'thirdparty-drtrigonbot-sum_disc-summary-mod' - #'thirdparty-drtrigonbot-sum_disc-summary-clean' + # 'msg' moved to i18n (translatewiki.org), key names: + #'thirdparty-drtrigonbot-sum_disc-summary-head' + #'thirdparty-drtrigonbot-sum_disc-summary-add' + #'thirdparty-drtrigonbot-sum_disc-summary-mod' + #'thirdparty-drtrigonbot-sum_disc-summary-clean' - # bot paramater/options (modifiable by user) - 'param_default': { 'checkedit_count': 500, # CHECK recent EDITs, a COUNT - #'checkedit_count': 1000, - 'reportchanged_switch': True, # REPORT CHANGED discussions, a SWITCH - 'getbacklinks_switch': False, # GET BACKLINKS additionally, a SWITCH - 'reportwarn_switch': True, # (not published yet) - 'globwikinotify_switch': False, # GET OTHER WIKIS NOTIFICATIONS additionally, a SWITCH - 'reportclosed_switch': True, # (not published yet) - 'cleanup_count': -1, # DELETE/CLEAN-UP all older entries, a COUNT - 'mainsignneeded_switch': True, # (not published yet; DRTRIGON-99) - # LIST of talks/discussions to SEARCH, a LIST - 'checkedit_list': [ '^(.*?Diskussion:.*)', - u'^(Wikipedia:Löschkandidaten/.*)', - u'^(Wikipedia:Qualitätssicherung/.*)', - u'^(Wikipedia:Löschprüfung)', - '^(Wikipedia:Fragen zur Wikipedia)', - '^(Portal:.*)', - '^(Wikipedia:WikiProjekt.*)', - '^(Wikipedia:Redaktion.*)', - '^(Wikipedia:Auskunft)', - u'^(Wikipedia:Café)', - u'^(Wikipedia:Verbesserungsvorschläge)', # macht ev. probleme wegen level 1 überschr. - '^(Wikipedia:Tellerrand)', - '^(Wikipedia:Urheberrechtsfragen)', - '^(Wikipedia:Vandalismusmeldung)', - u'^(Wikipedia:Administratoren/Anfragen)', - u'^(Wikipedia:Administratoren/Notizen)', - #u'^(Wikipedia:Administratoren/.*)', - u'^(Wikipedia:Diskussionen über Bilder)', - u'^(Wikipedia:Fotowerkstatt)', # [F46] - u'^(Wikipedia:Bilderwünsche)', # - u'^(Wikipedia:Grafikwerkstatt)', # - u'^(Wikipedia:Grafikwerkstatt/Grafikwünsche)', # - u'^(Wikipedia:Kartenwerkstatt)', # - u'^(Wikipedia:Kartenwerkstatt/Kartenwünsche)', # - u'^(Wikipedia:Bots/.*)', # DRTRIGON-11 - u'^(Wikipedia:Redundanz/.*)', ], # (Wunsch von Flominator) - # LIST of SIGNATUREs REGEX to USE, a LIST (DRTRIGON-89) - # ev. sogar [\]\|/#] statt nur [\]\|/] ...?! dann kann Signatur auch links auf Unterabschn. enth. - # liefert leider aber auch falsch positive treffer... wobei seiten, die mal die aufmerksamkeit geweckt - # haben (auf RecentChanges-Liste waren) und links in user-namensraum enthalten, sind auch interessant!! - # (und eher selten, etwa 1 pro user bei ca. 100 in history) - 'checksign_list': [ u'--\s?\[\[%(ns)s:%(usersig)s[\]\|/]', ], - # LIST of SIGNATUREs to USE, a LIST - 'altsign_list': [ u'%(username)s' ], - # LIST of PAGEs to IGNORE, a LIST - # Alles mit '.*/Archiv.*' könnte man ruhig ausschließen - da passiert ja eh nichts - # und der Bot muss sich nicht mit so großen Seiten rumschlagen. -- Merlissimo 14:03, 31. Jan. 2009 (CET) - # (sofern auf diesen Seiten nichts geändert wird, tauchen sie gar nicht auf...) - 'ignorepage_list': [ u'(.*?)/Archiv', ], # + weitere - # LIST of SIGNATUREs to USE, a LIST - 'backlinks_list': [ u'%(userdiscpage)s', - u'%(userpage)s', ], - # (hidden) - #userResultPage: default is NOT DEFINED - this is a SPECIAL PARAM it is not - # thought to be used explicit, it is defined by the page link (implicit). - # (not officially published yet: LIST of HEADs to IGNORE, a LIST) - 'ignorehead_list': [ u'(.*?) $erl.$', ], - # (not published yet: LIST of HEADs to PROCESS ONLY, a LIST) - 'matchhead_list': [], + # bot paramater/options (modifiable by user) + 'param_default': { + 'checkedit_count': 500, # CHECK recent EDITs, a COUNT +## 'checkedit_count': 1000, + 'reportchanged_switch': True, # REPORT CHANGED discussions, a SWITCH + 'getbacklinks_switch': False, # GET BACKLINKS additionally, a SWITCH + 'reportwarn_switch': True, # (not published yet) + 'globwikinotify_switch': False, # GET OTHER WIKIS NOTIFICATIONS additionally, a SWITCH + 'reportclosed_switch': True, # (not published yet) + 'cleanup_count': -1, # DELETE/CLEAN-UP all older entries, a COUNT + 'mainsignneeded_switch': True, # (not published yet; DRTRIGON-99) + # LIST of talks/discussions to SEARCH, a LIST + 'checkedit_list': [ + '^(.*?Diskussion:.*)', + u'^(Wikipedia:Löschkandidaten/.*)', + u'^(Wikipedia:Qualitätssicherung/.*)', + u'^(Wikipedia:Löschprüfung)', + '^(Wikipedia:Fragen zur Wikipedia)', + '^(Portal:.*)', + '^(Wikipedia:WikiProjekt.*)', + '^(Wikipedia:Redaktion.*)', + '^(Wikipedia:Auskunft)', + u'^(Wikipedia:Café)', + u'^(Wikipedia:Verbesserungsvorschläge)', # macht ev. probleme wegen level 1 überschr. + '^(Wikipedia:Tellerrand)', + '^(Wikipedia:Urheberrechtsfragen)', + '^(Wikipedia:Vandalismusmeldung)', + u'^(Wikipedia:Administratoren/Anfragen)', + u'^(Wikipedia:Administratoren/Notizen)', + #u'^(Wikipedia:Administratoren/.*)', + u'^(Wikipedia:Diskussionen über Bilder)', + u'^(Wikipedia:Fotowerkstatt)', # [F46] + u'^(Wikipedia:Bilderwünsche)', + u'^(Wikipedia:Grafikwerkstatt)', + u'^(Wikipedia:Grafikwerkstatt/Grafikwünsche)', + u'^(Wikipedia:Kartenwerkstatt)', + u'^(Wikipedia:Kartenwerkstatt/Kartenwünsche)', + u'^(Wikipedia:Bots/.*)', # DRTRIGON-11 + u'^(Wikipedia:Redundanz/.*)', # (Wunsch von Flominator) + ], + # LIST of SIGNATUREs REGEX to USE, a LIST (DRTRIGON-89) + # ev. sogar [\]\|/#] statt nur [\]\|/] ...?! dann kann Signatur auch links auf Unterabschn. enth. + # liefert leider aber auch falsch positive treffer... wobei seiten, die mal die aufmerksamkeit geweckt + # haben (auf RecentChanges-Liste waren) und links in user-namensraum enthalten, sind auch interessant!! + # (und eher selten, etwa 1 pro user bei ca. 100 in history) + 'checksign_list': [u'--\s?\[\[%(ns)s:%(usersig)s[\]\|/]'], + # LIST of SIGNATUREs to USE, a LIST + 'altsign_list': [u'%(username)s'], + # LIST of PAGEs to IGNORE, a LIST + # Alles mit '.*/Archiv.*' könnte man ruhig ausschließen - da passiert ja eh nichts + # und der Bot muss sich nicht mit so großen Seiten rumschlagen. -- Merlissimo 14:03, 31. Jan. 2009 (CET) + # (sofern auf diesen Seiten nichts geändert wird, tauchen sie gar nicht auf...) + 'ignorepage_list': [u'(.*?)/Archiv'], # + weitere + # LIST of SIGNATUREs to USE, a LIST + 'backlinks_list': [u'%(userdiscpage)s', u'%(userpage)s'], + # (hidden) + #userResultPage: default is NOT DEFINED - this is a SPECIAL PARAM it is not + # thought to be used explicit, it is defined by the page link (implicit). + # (not officially published yet: LIST of HEADs to IGNORE, a LIST) + 'ignorehead_list': [u'(.*?) $erl.$'], + # (not published yet: LIST of HEADs to PROCESS ONLY, a LIST) + 'matchhead_list': [], - # (hidden) - 'notify_msg': { - _PS_changed: 'thirdparty-drtrigonbot-sum_disc-notify-changed', - _PS_new: 'thirdparty-drtrigonbot-sum_disc-notify-new', - _PS_closed: 'thirdparty-drtrigonbot-sum_disc-notify-closed', - _PS_maintmsg: 'thirdparty-drtrigonbot-sum_disc-notify-maintmsg', - _PS_notify: 'thirdparty-drtrigonbot-sum_disc-notify-notify', - }, - # (hidden) - 'parse_msg': { - u'*': 'thirdparty-drtrigonbot-sum_disc-parse', - _PS_closed: 'thirdparty-drtrigonbot-sum_disc-parse-closed', - _PS_notify: 'thirdparty-drtrigonbot-sum_disc-parse-notify', - _PS_warning: 'thirdparty-drtrigonbot-sum_disc-parse-warning', - u'start': 'thirdparty-drtrigonbot-sum_disc-parse-start', - u'end': 'thirdparty-drtrigonbot-sum_disc-parse-end', # ev. '~~~ um ~~~~~' - u'nonhuman': 'thirdparty-drtrigonbot-sum_disc-parse-nonhuman', - }, - }, + # (hidden) + 'notify_msg': { + _PS_changed: 'thirdparty-drtrigonbot-sum_disc-notify-changed', + _PS_new: 'thirdparty-drtrigonbot-sum_disc-notify-new', + _PS_closed: 'thirdparty-drtrigonbot-sum_disc-notify-closed', + _PS_maintmsg: 'thirdparty-drtrigonbot-sum_disc-notify-maintmsg', + _PS_notify: 'thirdparty-drtrigonbot-sum_disc-notify-notify', + }, + # (hidden) + 'parse_msg': { + u'*': 'thirdparty-drtrigonbot-sum_disc-parse', + _PS_closed: 'thirdparty-drtrigonbot-sum_disc-parse-closed', + _PS_notify: 'thirdparty-drtrigonbot-sum_disc-parse-notify', + _PS_warning: 'thirdparty-drtrigonbot-sum_disc-parse-warning', + u'start': 'thirdparty-drtrigonbot-sum_disc-parse-start', + u'end': 'thirdparty-drtrigonbot-sum_disc-parse-end', # ev. '~~~ um ~~~~~' + u'nonhuman': 'thirdparty-drtrigonbot-sum_disc-parse-nonhuman', + }, + }, } # debug switches @@ -230,8 +242,8 @@ } docuReplacements = { -# '&params;': pagegenerators.parameterHelp '&params;': u'' + #'&params;': pagegenerators.parameterHelp } @@ -244,12 +256,9 @@ #http://de.wikipedia.org/w/index.php?limit=50&title=Spezial:Beiträge&contribs=user&target=DrTrigon&namespace=3&year=&month=-1 #http://de.wikipedia.org/wiki/Spezial:Beiträge/DrTrigon - rollback = 0 - - _param_default = bot_config['param_default'] # same ref, no copy - - _global_warn = [] - + rollback = 0 + _param_default = bot_config['param_default'] # same ref, no copy + _global_warn = [] _REGEX_subster_tag = u'' def __init__(self): @@ -268,10 +277,11 @@ self._bot_config = bot_config # convert e.g. namespaces to corret language self._bot_config['TemplateName'] = pywikibot.Page(self.site, self._bot_config['TemplateName']).title() - self._template_regex = re.compile('\{\{' + self._bot_config['TemplateName'] + '(.*?)\}\}', re.S) + self._template_regex = re.compile( + '\{\{' + self._bot_config['TemplateName'] + '(.*?)\}\}', re.S) - - lang = locale.locale_alias.get(self.site.lang, locale.locale_alias['en']).split('.')[0] + lang = locale.locale_alias.get(self.site.lang, + locale.locale_alias['en']).split('.')[0] # use e.g. 'de_DE.UTF-8', 'de_DE.utf8' (thus no decode('latin-1') anymore!) locale.setlocale(locale.LC_TIME, lang + '.UTF-8') @@ -292,16 +302,17 @@ # _userPage = _userPage.getRedirectTarget() # self.loadMode(_userPage) # if self._mode: - # bot_user_ignorepage.append( self._tmpl_data ) + # bot_user_ignorepage.append(self._tmpl_data) # elif (u'userResultPage' in user.param): - # bot_user_ignorepage.append( u'Benutzer:%s' % user.param['userResultPage'] ) + # bot_user_ignorepage.append(u'Benutzer:%s' % user.param['userResultPage']) #if 'ignorepage_list' in bot_config['regex_compile']: # bot_user_ignorepage = [re.compile(item) for item in bot_user_ignorepage] #self._param_default['ignorepage_list'] += bot_user_ignorepage pywikibot.output(u'\03{lightred}** Receiving Job Queue (Maintenance Messages)\03{default}') page = pywikibot.Page(self.site, bot_config['maintenance_queue']) - self.maintenance_msg = self.loadJobQueue(page, bot_config['queue_security'], + self.maintenance_msg = self.loadJobQueue(page, + bot_config['queue_security'], reset=(not pywikibot.simulate)) self._wday = time.gmtime().tm_wday @@ -316,22 +327,26 @@ def run(self): '''Run SumDiscBot().''' - pywikibot.output(u'\03{lightgreen}* Processing User List (wishes):\03{default}') + pywikibot.output( + u'\03{lightgreen}* Processing User List (wishes):\03{default}') - for user in self._user_list: # may be try with PreloadingGenerator?! + for user in self._user_list: # may be try with PreloadingGenerator?! if _debug_switch['user'](user): - pywikibot.warning(u'\03{lightyellow}=== ! USER WILL BE SKIPPED ! ===\03{default}') + pywikibot.warning(u'\03{lightyellow}=== ! ' + u'USER WILL BE SKIPPED ! ===\03{default}') continue # set user and init params self.setUser(user) self.pages = SumDiscPages(self.site, self._param) - pywikibot.output('\03{lightred}** Processing User: %s\03{default}' % self._user) - #pywikibot.output(u'\03{lightred}** Processing User: %s\03{default}' % self._user.name()) + pywikibot.output('\03{lightred}** Processing User: %s\03{default}' + % self._user) # get operating mode - self.loadMode(self._userPage, regex_compile=('ignorepage_list' in bot_config['regex_compile'])) + self.loadMode(self._userPage, + regex_compile=('ignorepage_list' in + bot_config['regex_compile'])) # get history entries self.loadHistory(rollback=self.rollback) @@ -349,7 +364,7 @@ # SIGNED (have a Signature at the moment) # UserBacklinksPageGenerator - # get global wiki notifications (toolserver/merl) + # get global wiki notifications (toolserver/merl) if self._param['globwikinotify_switch']: self.AddGlobWikiNotify() @@ -377,9 +392,9 @@ # warnings/exceptions are printed to log, could be get by panel.py from there! # (separate and explicit warning handling not used anymore) #for warning in self._global_warn: # output all warnings to log (what about a special wiki page?) - # pywikibot.warning( "%s: %s" % warning ) + # pywikibot.warning("%s: %s" % warning) - def compressHistory(self, users = []): + def compressHistory(self, users=[]): """Read history, and re-write new history without any duplicates. @param users: List of users supported by bot (and thus in history). @@ -388,13 +403,15 @@ Load, truncate and re-write history in files. """ - if not users: users = [ item for item in self._user_list ] + if not users: + users = [item for item in self._user_list] pywikibot.output(u'* Compressing of histories:') if bot_config['backup_hist']: timestmp = pywikibot.Timestamp.now().strftime(pywikibot.Timestamp.mediawikiTSFormat)[:-6] - pathname = pywikibot.config.datafilepath(self._bot_config['data_path'], timestmp, '') # according to 'setUser' + pathname = pywikibot.config.datafilepath( + self._bot_config['data_path'], timestmp, '') # according to 'setUser' import shutil for user in users: @@ -402,7 +419,7 @@ try: begin = float(os.path.getsize(self._datfilename)) - except OSError: # OSError: [Errno 2] No such file or directory + except OSError: # OSError: [Errno 2] No such file or directory continue # backup old history @@ -417,10 +434,10 @@ # write new history os.remove(self._datfilename) self.putHistory(self.pages.hist) - end = float(os.path.getsize(self._datfilename)) - - pywikibot.output(u'\03{lightred}** History of %s compressed and written. (%s %%)\03{default}' % (user.name(), (end/begin)*100)) + pywikibot.output(u'\03{lightred}** History of %s compressed and ' + u'written. (%s %%)\03{default}' + % (user.name(), (end / begin) * 100)) ## @since 10332 # @remarks common interface to bot user settings on wiki @@ -432,38 +449,43 @@ Returns a list with entries: (user, param) This list may be empty. + """ #users = {} final_users = [] - #for item in self._REGEX_eol.split(page.get()): for item in self._REGEX_eol.split(self.load(page)): item = re.split(u',', item, maxsplit=1) - if (len(item) > 1): # for compatibility with 'subster.py' (if needed) - #item[1] = re.compile((self._REGEX_subster_tag%{'var':'.*?','cont':'.*?'}), re.S | re.I).sub(u'', item[1]) - item[1] = re.compile((self._REGEX_subster_tag%{u'var':u'.*?'}), re.S | re.I).sub(u'', item[1]) - try: param = eval(item[1]) - except: param = {} + if (len(item) > 1): # for compatibility with 'subster.py' (if needed) + item[1] = re.compile( + (self._REGEX_subster_tag + % {u'var': u'.*?'}), re.S | re.I).sub(u'', item[1]) + try: + param = eval(item[1]) + except: + param = {} item = item[0] try: - if not (item[0] == u'*'): continue - except: continue + if not (item[0] == u'*'): + continue + except: + continue item = item[1:] item = re.sub(u'[\[\]]', u'', item) - item = re.split(u':', item, maxsplit=1)[-1] # remove 'Benutzer(in)?:', 'User:', ... - subitem = re.split(u'\/', item) # recognize extended user entries with ".../..." - if len(subitem) > 1: # " - param[u'userResultPage'] = item # save extended user info (without duplicates) + item = re.split(u':', item, maxsplit=1)[-1] # remove 'Benutzer(in)?:', 'User:', ... + subitem = re.split(u'\/', item) # recognize extended user entries with ".../..." + if len(subitem) > 1: + param[u'userResultPage'] = item # save extended user info (without duplicates) item = subitem[0] - #users[item] = param # drop duplicates directly + #users[item] = param # drop duplicates directly user = userlib.User(self.site, item) user.param = param - final_users.append( user ) + final_users.append(user) return final_users def setUser(self, user): - ''' + """ set all internal user info input: user [tuple] (see _getUsers(...) for format) @@ -475,13 +497,14 @@ keys are: parameters/options [string] (see 'sum_disc_conf.py') format: (differs from param to param) returns: self._datfilename [string] - ''' + + """ # modified due: http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot/ToDo-Liste (id 30, 28, 17) # defaults settings # thanks to http://mail.python.org/pipermail/python-list/2005-September/339147.html # and http://docs.python.org/library/copy.html - self._user = user + self._user = user self._userPage = self._user.getUserTalkPage() userdiscpage = self._userPage.title() #self._param = dict(self._param_default) @@ -492,20 +515,25 @@ self._param.update(user.param) # re-add defaults to lists in self._param else they are overwritten for key in bot_config['default_keep']: - if key in user.param: self._param[key] += copy.deepcopy(self._param_default[key]) - self._param['ignorepage_list'].append( self._userPage.title() ) # disc-seite von user IMMER ausschliessen - if 'userResultPage' in self._param: # user with extended info (extra page to use) - self._userPage = pywikibot.Page(self.site, u'Benutzer:%s' % self._param['userResultPage']) - self._param['ignorepage_list'].append( self._userPage.title() ) - self._datfilename = pywikibot.config.datafilepath(self._bot_config['data_path'], 'sum_disc-%s-%s-%s.dat' % (self.site.family.name, self.site.lang, self._user.name())) + if key in user.param: + self._param[key] += copy.deepcopy(self._param_default[key]) + self._param['ignorepage_list'].append(self._userPage.title()) # disc-seite von user IMMER ausschliessen + if 'userResultPage' in self._param: # user with extended info (extra page to use) + self._userPage = pywikibot.Page(self.site, u'Benutzer:%s' + % self._param['userResultPage']) + self._param['ignorepage_list'].append(self._userPage.title()) + self._datfilename = pywikibot.config.datafilepath( + self._bot_config['data_path'], 'sum_disc-%s-%s-%s.dat' + % (self.site.family.name, self.site.lang, self._user.name())) # substitute variables for use in user defined parameters/options - param_vars = { 'username': self._user.name(), - 'userpage': self._user.getUserPage().title(), - 'userdiscpage': userdiscpage, - } + param_vars = {'username': self._user.name(), + 'userpage': self._user.getUserPage().title(), + 'userdiscpage': userdiscpage, + } for item in bot_config['vars_subst']: - self._param[item] = [ subitem % param_vars for subitem in self._param[item] ] + self._param[item] = [subitem % param_vars + for subitem in self._param[item]] # pre-compile regex # (probably try to pre-compile 'self._param_default' once in __init__ and reuse the unchanged ones here) @@ -520,27 +548,29 @@ #{}) # generate all possible signatures and drop duplicates - ns_list = self.site.family.namespace(self.site.lang, 2, all=True) + ns_list = self.site.family.namespace(self.site.lang, 2, all=True) ns_list += self.site.family.namespace(self.site.lang, 3, all=True) signs = set() #for user in self._param['altsign_list']: for check in self._param['checksign_list']: for ns in ns_list: - signs.add( check % {'ns':ns, 'usersig':'%(usersig)s'} ) + signs.add(check % {'ns': ns, 'usersig': '%(usersig)s'}) self._param['checksign_list'] = list(signs) ## @since 10332 # @remarks needed by sum_disc def loadMode(self, page, regex_compile=False): - """Get operating mode from user's disc page by searching for the template. + """Get operating mode from user's disc page by searching for the + template. - @param page: The user (page) for which the data should be retrieved. - @param regex_compile: If True the value added to the ignore_list will - be compiled first. + @param page: The user (page) for which the data should be retrieved. + @param regex_compile: If True the value added to the ignore_list will + be compiled first. - Sets self._mode and self._tmpl_data which represent the settings how - to report news to the user. Sets self._content also which is the touched - page content to notify the user. The self._param is modified too. + Sets self._mode and self._tmpl_data which represent the settings how to + report news to the user. Sets self._content also which is the touched + page content to notify the user. The self._param is modified too. + """ templates = self.loadTemplates(page, self._bot_config['TemplateName']) @@ -557,24 +587,28 @@ if regex_compile: self._tmpl_data = re.compile(self._tmpl_data) #if hasattr(self, '_param'): # [JIRA: DRTRIGON-8, DRTRIGON-32] - self._param['ignorepage_list'].append( self._tmpl_data ) + self._param['ignorepage_list'].append(self._tmpl_data) # update template and content tmpl[u'timestamp'] = u'--~~~~' - tmpl_text = pywikibot.glue_template_and_params( (self._bot_config['TemplateName'], tmpl) ) - tmpl_pos = self._template_regex.search(self._content) - self._content = self._content[:tmpl_pos.start()] + tmpl_text + self._content[tmpl_pos.end():] + tmpl_text = pywikibot.glue_template_and_params( + (self._bot_config['TemplateName'], tmpl)) + tmpl_pos = self._template_regex.search(self._content) + self._content = (self._content[:tmpl_pos.start()] + tmpl_text + + self._content[tmpl_pos.end():]) ## @todo the error correctors 'old history' and 'notify tag error' can be removed if # they do not appear in bot logs anymore! # \n[ JIRA: e.g. DRTRIGON-68 ] - def loadHistory(self, rollback = 0): + def loadHistory(self, rollback=0): """Read history, and restore the page objects with sum_disc_data. - @param rollback: Number of history entries to go back (re-use older history). - @type rollback: int + @param rollback: Number of history entries to go back (re-use older + history). + @type rollback: int - Returns nothing, but feeds to self.pages class instance. + Returns nothing, but feeds to self.pages class instance. + """ buf = self._loadFile() @@ -588,10 +622,11 @@ rollback_buf = [] hist = {} for item in buf: - if len(item.strip())==0: continue + if len(item.strip()) == 0: + continue news_item = eval(item) - #news.update( news_item ) + #news.update(news_item) # news.update BUT APPEND the heading data in the last tuple arg for key in news_item.keys(): # old history format @@ -600,27 +635,30 @@ news_item[key] += (_PS_unchanged,) usage['old history'] = True # notify tag error - # (origin could be 'old history' but is not clear, but can be removed in the future, eg. after comp.) - if (news_item[key][0] == u'Notification') and (news_item[key][5] <> _PS_notify): + # (origin could be 'old history' but is not clear, but can be + # removed in the future, eg. after comp.) + if (news_item[key][0] == u'Notification') and ( + news_item[key][5] != _PS_notify): news_item[key] = news_item[key][:-1] + (_PS_notify,) usage['notify tag error'] = True - if key in news: # APPEND the heading data in the last tuple arg + if key in news: # APPEND the heading data in the last tuple arg if news_item[key][5] in [_PS_closed]: del news[key] else: heads = news[key][4] - heads.update( news_item[key][4] ) + heads.update(news_item[key][4]) #news[key] = (news_item[key][0], news_item[key][1], news_item[key][2], news_item[key][3], heads, news_item[key][5]) - news[key] = news_item[key][:4] + (heads, news_item[key][5]) + news[key] = news_item[key][:4] + (heads, + news_item[key][5]) else: news[key] = news_item[key] - rollback_buf.append( copy.deepcopy(news) ) + rollback_buf.append(copy.deepcopy(news)) if rollback_buf: rollback_buf.reverse() - i = min([rollback, (len(rollback_buf)-1)]) + i = min([rollback, (len(rollback_buf) - 1)]) hist = rollback_buf[i] del rollback_buf usage['rollback'] = i @@ -628,12 +666,14 @@ # feed data to pages self.pages.hist = hist - pywikibot.output(u'\03{lightpurple}*** History recieved %s\03{default}' % str(usage)) + pywikibot.output( + u'\03{lightpurple}*** History recieved %s\03{default}' % str(usage)) def putHistory(self, data_dict): """Write history. - Returns nothing but the history file gets filled with archived entries. + Returns nothing but the history file gets filled with archived entries. + """ # extract important data from page list @@ -642,7 +682,7 @@ buf[key] = data_dict[key].sum_disc_data # write new history - self._appendFile( str(buf) ) + self._appendFile(str(buf)) pywikibot.output(u'\03{lightpurple}*** History updated\03{default}') @@ -656,12 +696,14 @@ # unicode text schreiben, danke an http://www.amk.ca/python/howto/unicode try: - datfile = codecs.open(self._datfilename, encoding=config.textfile_encoding, mode='r') + datfile = codecs.open(self._datfilename, + encoding=config.textfile_encoding, mode='r') #datfile = open(self._datfilename, mode='rb') buf = datfile.read() datfile.close() return buf - except: return u'' + except: + return u'' def _appendFile(self, data): ''' @@ -674,7 +716,8 @@ # könnte history dict mit pickle speichern (http://www.thomas-guettler.de/vortraege/python/einfuehrung.html#link_12.2) # verwende stattdessen aber wiki format! (bleibt human readable und kann in wiki umgeleitet werden bei bedarf) - datfile = codecs.open(self._datfilename, encoding=config.textfile_encoding, mode='a+') + datfile = codecs.open(self._datfilename, + encoding=config.textfile_encoding, mode='a+') #datfile = codecs.open(self._datfilename, encoding='zlib', mode='a+b') datfile.write(u'\n\n' + data) #datfile.write(data) @@ -691,7 +734,8 @@ # thanks to http://www.amk.ca/python/howto/regex/ and http://bytes.com/forum/thread24382.html #usersumList = [p.title() for p in pagegenerators.UserContributionsGenerator(self._user.name(), number = count)] - usersumList = [p[0].title() for p in self._user.contributions(limit = count)] + usersumList = [p[0].title() for p in + self._user.contributions(limit=count)] work = {} for item in usersumList: @@ -704,9 +748,10 @@ page = pywikibot.Page(self.site, name) page.sum_disc_data = () work[name] = page - break # should only match one of the possibilities, anyway just add it once! + break # should only match one of the possibilities, anyway just add it once! - pywikibot.output(u'\03{lightpurple}*** Latest %i Contributions checked\03{default}' % len(usersumList)) + pywikibot.output(u'\03{lightpurple}*** Latest %i Contributions ' + u'checked\03{default}' % len(usersumList)) # feed data to pages self.pages.update_work(work) @@ -723,17 +768,18 @@ userbacklinksList = [] for item in self._param['backlinks_list']: - page = pywikibot.Page(self.site, item) # important for the generator to use the API + page = pywikibot.Page(self.site, item) # important for the generator to use the API #userbacklinksList += [p.title() for p in pagegenerators.ReferringPageGenerator(page, withTemplateInclusion=False)] - userbacklinksList += [p.title() for p in page.getReferences(withTemplateInclusion=False)] - userbacklinksList = list(set(userbacklinksList)) # drop duplicates + userbacklinksList += [p.title() for p in + page.getReferences( + withTemplateInclusion=False)] + userbacklinksList = list(set(userbacklinksList)) # drop duplicates work = {} count = 0 # (some return >> 500 backlinks, thus check # only once a week ALL/2000 those, else limit) - #if (self._wday == 0): - if (self._wday == 6): # So + if (self._wday == 6): # So #max_count = len(userbacklinksList) max_count = 2000 else: @@ -754,9 +800,11 @@ page = pywikibot.Page(self.site, name) page.sum_disc_data = () work[name] = page - break # should only match one of the possibilities, anyway just add it once! + break # should only match one of the possibilities, anyway just add it once! - pywikibot.output(u'\03{lightpurple}*** %i Backlinks to user checked (limited to %i)\03{default}' % (len(userbacklinksList), max_count)) + pywikibot.output(u'\03{lightpurple}*** %i Backlinks to user checked ' + u'(limited to %i)\03{default}' + % (len(userbacklinksList), max_count)) # feed data to pages self.pages.update_work(work) @@ -769,11 +817,12 @@ # crashes sometimes, u'Benutzer Diskussion:MerlBot' for example # \n[ JIRA: ticket? ] def getLatestRelevantNews(self): - """Check latest contributions on recent news and check relevancy of page by - splitting it into sections and searching each for specific users signature, - this is all done by PageSections class. + """Check latest contributions on recent news and check relevancy of + page by splitting it into sections and searching each for specific + users signature, this is all done by PageSections class. - Returns nothing, but feeds to self.pages class instance. + Returns nothing, but feeds to self.pages class instance. + """ # check for news to report @@ -781,22 +830,23 @@ work = self.pages.work self.pages.start_promotion() gen1 = pagegenerators.PagesFromTitlesGenerator(work.keys()) - gen2 = pagegenerators.RegexFilterPageGenerator(gen1, - self._param['ignorepage_list'], - inverse = True, - ignore_namespace = False) + gen2 = pagegenerators.RegexFilterPageGenerator( + gen1, + self._param['ignorepage_list'], + inverse=True, + ignore_namespace=False) # Preloads _contents and _versionhistory / [ JIRA: ticket? ] - # WithoutInterwikiPageGenerator, + # WithoutInterwikiPageGenerator, #gen3 = pagegenerators.PreloadingGenerator(gen2) - gen3 = tools.ThreadedGenerator(target=pagegenerators.PreloadingGenerator, - args=(gen2,), - qsize=60) + gen3 = tools.ThreadedGenerator( + target=pagegenerators.PreloadingGenerator, args=(gen2, ), qsize=60) self._th_gen = gen3 #gen4 = pagegenerators.RedirectFilterPageGenerator(gen3) # lets hope that no generator loses pages... (since sometimes this may happen) for page in gen3: if _debug_switch['page'](page): - pywikibot.warning(u'\03{lightyellow}=== ! PAGE WILL BE SKIPPED ! ===\03{default}') + pywikibot.warning(u'\03{lightyellow}=== ! PAGE WILL BE SKIPPED ' + u'! ===\03{default}') continue name = page.title() @@ -813,10 +863,14 @@ if hasattr(page, u'_getexception'): raise page._getexception except pywikibot.NoPage: - pywikibot.output(u'Problem accessing not available (deleted) page at %s, skipping...' % page.title(asLink=True)) + pywikibot.output(u'Problem accessing not available (deleted) ' + u'page at %s, skipping...' + % page.title(asLink=True)) continue except pywikibot.IsRedirectPage: - pywikibot.output(u'Problem using redirect page at %s, skipping...' % page.title(asLink=True)) + pywikibot.output( + u'Problem using redirect page at %s, skipping...' + % page.title(asLink=True)) continue # actual/new status of page, has something changed? @@ -824,29 +878,35 @@ if name in hist: if (not (hist[name].sum_disc_data[3] == actual[0][1])): # discussion has changed, some news? - self.pages.edit_news(page, sum_disc_data=(self._param['notify_msg'][_PS_changed], - None, # obsolete (and recursive) - actual[0][2], - actual[0][1], - hist[name].sum_disc_data[4], - _PS_changed ) ) + self.pages.edit_news(page, + sum_disc_data=( + self._param['notify_msg'][_PS_changed], + None, # obsolete (and recursive) + actual[0][2], + actual[0][1], + hist[name].sum_disc_data[4], + _PS_changed)) news = True else: # nothing new to report (but keep for history and update it) - self.pages.edit_oth(page, sum_disc_data=(hist[name].sum_disc_data[0], - None, # obsolete (and recursive) - actual[0][2], - actual[0][1], - hist[name].sum_disc_data[4], - _PS_unchanged ) ) + self.pages.edit_oth(page, + sum_disc_data=( + hist[name].sum_disc_data[0], + None, # obsolete (and recursive) + actual[0][2], + actual[0][1], + hist[name].sum_disc_data[4], + _PS_unchanged)) else: # new discussion, some news? - self.pages.edit_news(page, sum_disc_data=(self._param['notify_msg'][_PS_new], - None, # obsolete (and recursive) - actual[0][2], - actual[0][1], - {}, - _PS_new ) ) + self.pages.edit_news(page, + sum_disc_data=( + self._param['notify_msg'][_PS_new], + None, # obsolete (and recursive) + actual[0][2], + actual[0][1], + {}, + _PS_new)) news = True # checkRelevancy: Check relevancy of page by splitting it into sections and searching @@ -854,7 +914,7 @@ if not news: continue - self.pages.promote_page() # hist -> news + self.pages.promote_page() # hist -> news try: entries = PageSections(page, self._param, self._user) @@ -874,26 +934,31 @@ except pywikibot.IsRedirectPage: self.pages.promote_page_irrel(page, False) - pywikibot.output(u'Problem using redirect page at %s, tagging irrelevant...' % page.title(asLink=True)) + pywikibot.output(u'Problem using redirect page at %s, tagging ' + u'irrelevant...' % page.title(asLink=True)) gen3.stop() del self._th_gen self.pages.end_promotion() - pywikibot.output(u'\03{lightpurple}*** Latest News searched and relevancy of threads checked\03{default}') + pywikibot.output(u'\03{lightpurple}*** Latest News searched and ' + u'relevancy of threads checked\03{default}') def AddGlobWikiNotify(self): - """Check if there are any global wiki notifications and add them to every users news. + """Check if there are any global wiki notifications and add them to + every users news. - Returns nothing, but feeds to self.pages class instance. + Returns nothing, but feeds to self.pages class instance. + """ hist = self.pages.hist - # get global wiki notifications (toolserver/merl) + # get global wiki notifications (toolserver/merl) if pywikibot.simulate: - pywikibot.warning(u'\03{lightyellow}=== ! TOOLSERVER ACCESS WILL BE SKIPPED ! ===\03{default}') + pywikibot.warning(u'\03{lightyellow}=== ! TOOLSERVER ACCESS WILL ' + u'BE SKIPPED ! ===\03{default}') globalnotify = [] else: globalnotify = self._globalnotifications() @@ -908,55 +973,64 @@ for (page, data) in globalnotify: count += 1 - # skip to local disc page, since this is the only page the user should watch itself + # skip to local disc page, since this is the only page the user + # should watch itself if (page.site().language() == localinterwiki) and \ (page.site().family.name == u'wikipedia'): pywikibot.warning( - u'skipping global wiki notify to local wiki %s' % - page.title(asLink=True) ) + u'skipping global wiki notify to local wiki %s' + % page.title(asLink=True)) continue # actual/new status of page, has something changed? - if (data[u'link'] in hist.keys()) and \ - (data[u'timestamp'] == hist[data[u'link']].sum_disc_data[3]): + if (data[u'link'] in hist.keys()) and ( + data[u'timestamp'] == + hist[data[u'link']].sum_disc_data[3]): continue #data = page.globalwikinotify - self.pages.edit_oth(page, sum_disc_data=(self._param['notify_msg'][_PS_notify], - None, # obsolete (and recursive) - data['user'], - data['timestamp'], - {u'':('',True,u'')}, - _PS_notify ), - title=data[u'link']) + self.pages.edit_oth(page, + sum_disc_data=( + self._param['notify_msg'][_PS_notify], + None, # obsolete (and recursive) + data['user'], + data['timestamp'], + {u'': ('', True, u'')}, + _PS_notify), + title=data[u'link']) #self.pages.edit_hist(self._news_list[page.title()]) except pywikibot.MaxTriesExceededError: - pywikibot.output(u'Problem MaxTriesExceededError occurred, thus skipping global wiki notify!') - self._skip_globwikinotify = True # skip for all following users to speed-up (~30min) + pywikibot.output(u'Problem MaxTriesExceededError occurred, thus ' + u'skipping global wiki notify!') + self._skip_globwikinotify = True # skip for all following users to speed-up (~30min) except pywikibot.urllib2.HTTPError: - pywikibot.output(u'Problem HTTPError occurred, thus skipping global wiki notify!') + pywikibot.output(u'Problem HTTPError occurred, thus skipping ' + u'global wiki notify!') if globalnotify: - pywikibot.output(u'\03{lightpurple}*** %i Global wiki notifications checked\03{default}' % count) + pywikibot.output(u'\03{lightpurple}*** %i Global wiki ' + u'notifications checked\03{default}' % count) ## @since 10332 # @remarks due to http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot/ToDo-Liste (id 38) def _globalnotifications(self): - """Provides a list of results using the toolserver Merlissimo API (can also - be used for a Generator analog to UserContributionsGenerator). + """Provides a list of results using the toolserver Merlissimo API (can + also be used for a Generator analog to UserContributionsGenerator). - Returns a tuple containing the page-object and an extradata dict. + Returns a tuple containing the page-object and an extradata dict. + """ - request = 'http://toolserver.org/~merl/UserPages/query.php?user=%s&format=xml' %\ - urllib.quote(self._user.name().encode(self.site().encoding())) + request = 'http://toolserver.org/~merl/UserPages/query.php?user=%s&format=xml' \ + % urllib.quote(self._user.name().encode(self.site().encoding())) pywikibot.get_throttle() - pywikibot.output(u"Reading global wiki notifications from toolserver (via 'API')...") + pywikibot.output( + u"Reading global wiki notifications from toolserver (via 'API')...") - buf = self.site().getUrl( request, no_hostname = True ) + buf = self.site().getUrl(request, no_hostname=True) - tree = XML( buf.encode(self.site().encoding()) ) + tree = XML(buf.encode(self.site().encoding())) #import xml.etree.cElementTree #print xml.etree.cElementTree.dump(tree) @@ -975,7 +1049,8 @@ continue # process timestamp - data[u'timestamp'] = str(pywikibot.Timestamp.fromtimestampformat(data[u'timestamp'])) + data[u'timestamp'] = str(pywikibot.Timestamp.fromtimestampformat( + data[u'timestamp'])) # convert link to valid interwiki link data[u'link'] = self._dblink2wikilink(self.site(), data[u'link']) @@ -986,8 +1061,8 @@ page.globalwikinotify = data yield (page, data) except pywikibot.NoPage, e: - pywikibot.output(u'%s' %e) - + pywikibot.output(u'%s' % e) + ## @since 10332 # @remarks needed by various bots def _dblink2wikilink(self, site, dblink): @@ -995,11 +1070,13 @@ You can use DB links like used on the toolserver and convert them to valid interwiki links. + """ link = dblink for family in site.fam().get_known_families(site).values(): - title = link.replace(u'%s:' % family.decode('unicode_escape'), u':') # e.g. 'dewiki:...' --> 'de:...' + title = link.replace(u'%s:' % family.decode('unicode_escape'), + u':') # e.g. 'dewiki:...' --> 'de:...' if not (title == link): dblink = u'%s:%s' % (family, title) # [ 'wiki' in framework/interwiki is not the same as in TS DB / JIRA: DRTRIGON-60 ] @@ -1008,86 +1085,100 @@ return dblink def AddMaintenanceMsg(self): - """Check if there are any bot maintenance messages and add them to every users news. + """Check if there are any bot maintenance messages and add them to + every users news. - Returns nothing, but feeds to self.pages class instance. + Returns nothing, but feeds to self.pages class instance. + """ - if (self.maintenance_msg == []): return + if self.maintenance_msg == []: + return for item in self.maintenance_msg: - page = pywikibot.Page(self.site, bot_config['maintenance_page'] % "") + page = pywikibot.Page(self.site, bot_config['maintenance_page'] + % "") tmst = time.strftime(pywikibot.Timestamp.ISO8601Format) - page.sum_disc_data = ( self._param['notify_msg'][_PS_maintmsg], - None, - u'DrTrigon', - tmst, - { item:('',True,item) }, - _PS_maintmsg ) + page.sum_disc_data = (self._param['notify_msg'][_PS_maintmsg], + None, u'DrTrigon', tmst, + {item: ('', True, item)}, + _PS_maintmsg) self.pages.edit_news(page) self.pages.edit_hist(page) - pywikibot.output(u'\03{lightpurple}*** Bot maintenance messages added\03{default}') + pywikibot.output( + u'\03{lightpurple}*** Bot maintenance messages added\03{default}') def postDiscSum(self): - """Post discussion summary of specific user to discussion page and write to histroy - (history currently implemented as local file, but wiki page could also be used). + """Post discussion summary of specific user to discussion page and + write to histroy (history currently implemented as local file, but wiki + page could also be used). - Returns nothing but dumps self.pages class instance to the history file and writes changes - to the wiki page. + Returns nothing but dumps self.pages class instance to the history file + and writes changes to the wiki page. + """ (buf, count) = self.pages.parse_news(self._param) if (count > 0): - pywikibot.output(u'===='*15 + u'\n' + buf + u'\n' + u'===='*15) - pywikibot.output(u'[%i entries]' % count ) + pywikibot.output(u'====' * 15 + u'\n' + buf + u'\n' + u'====' * 15) + pywikibot.output(u'[%i entries]' % count) - head = i18n.twtranslate(self.site, - 'thirdparty-drtrigonbot-sum_disc-summary-head') \ - + u' ' - add = i18n.twtranslate(self.site, - 'thirdparty-drtrigonbot-sum_disc-summary-add') - mod = i18n.twtranslate(self.site, - 'thirdparty-drtrigonbot-sum_disc-summary-mod') - clean = i18n.twtranslate(self.site, - 'thirdparty-drtrigonbot-sum_disc-summary-clean') + head = i18n.twtranslate( + self.site, + 'thirdparty-drtrigonbot-sum_disc-summary-head') + u' ' + add = i18n.twtranslate( + self.site, 'thirdparty-drtrigonbot-sum_disc-summary-add') + mod = i18n.twtranslate( + self.site, 'thirdparty-drtrigonbot-sum_disc-summary-mod') + clean = i18n.twtranslate( + self.site, 'thirdparty-drtrigonbot-sum_disc-summary-clean') if not self._mode: # default: write direct to user disc page - comment = head + add % {'num':count} + comment = head + add % {'num': count} #self.append(self._userPage, buf, comment=comment, minorEdit=False, force=True) (page, text, minEd) = (self._userPage, buf, False) else: # enhanced (with template): update user disc page and write to user specified page tmplsite = pywikibot.Page(self.site, self._tmpl_data) - comment = head + mod % {'num':count, 'page':tmplsite.title(asLink=True)} - self.save(self._userPage, self._content, comment=comment, minorEdit=False, force=True) - comment = head + add % {'num':count} + comment = head + mod % {'num': count, + 'page': tmplsite.title(asLink=True)} + self.save(self._userPage, self._content, comment=comment, + minorEdit=False, force=True) + comment = head + add % {'num': count} #self.append(tmplsite, buf, comment=comment, force=True) - (page, text, minEd) = (tmplsite, buf, True) # 'True' is default + (page, text, minEd) = (tmplsite, buf, True) # 'True' is default if (self._param['cleanup_count'] < 0): # default mode, w/o cleanup try: - self.append(page, text, comment=comment, minorEdit=minEd, force=True) + self.append(page, text, comment=comment, minorEdit=minEd, + force=True) except pywikibot.MaxTriesExceededError: - pywikibot.warning( - u'Problem MaxTriesExceededError occurred, thus skipping this user!') + pywikibot.warning(u'Problem MaxTriesExceededError ' + u'occurred, thus skipping this user!') return # skip history write else: # append with cleanup - text = self.cleanupDiscSum( self.load(page) or u'', - days=self._param['cleanup_count'] ) + u'\n\n' + text - comment = head + clean % {'num':count} - self.save(page, text, comment=comment, minorEdit=minEd, force=True) + text = self.cleanupDiscSum( + self.load(page) or u'', + days=self._param['cleanup_count']) + u'\n\n' + text + comment = head + clean % {'num': count} + self.save(page, text, comment=comment, minorEdit=minEd, + force=True) purge = self._userPage.purgeCache() - pywikibot.output(u'\03{lightpurple}*** Discussion updates added to: %s (purge: %s)\03{default}' % (self._userPage.title(asLink=True), purge)) + pywikibot.output(u'\03{lightpurple}*** Discussion updates added ' + u'to: %s (purge: %s)\03{default}' + % (self._userPage.title(asLink=True), purge)) if not pywikibot.simulate: self.putHistory(self.pages.hist) else: - pywikibot.warning(u'\03{lightyellow}=== ! NOTHING WRITTEN TO HISTORY ! ===\03{default}') + pywikibot.warning(u'\03{lightyellow}=== ! NOTHING WRITTEN TO ' + u'HISTORY ! ===\03{default}') else: - pywikibot.output(u'\03{lightpurple}*** Discussion up to date: NOTHING TO DO\03{default}') + pywikibot.output(u'\03{lightpurple}*** Discussion up to date: ' + u'NOTHING TO DO\03{default}') # JIRA: DRTRIGON-23 def cleanupDiscSum(self, text, days=7): @@ -1099,13 +1190,12 @@ # drop entries older than 'days' today = datetime.datetime.now() - diff = 0 - buf = [] + diff = 0 + buf = [] for line in text.splitlines(): try: - #date = time.strptime(u'abc', u'; %d. %B %Y') - date = time.strptime(line.encode('utf-8'), str(self._param['parse_msg'][u'start'])) - #date = time.strptime(str(line), str(self._param['parse_msg'][u'start'])) + date = time.strptime(line.encode('utf-8'), + str(self._param['parse_msg'][u'start'])) date = datetime.datetime.fromtimestamp(time.mktime(date)) diff = (today - date).days except ValueError: @@ -1113,11 +1203,11 @@ if (diff <= days): buf.append(line) buf = string.join(buf, u'\n') - - # remove bot signature and other 'footer' - buf = re.sub(self._param['parse_msg'][u'end'] % {'sign':u'(.*?)'}, u'', buf) - buf = buf.strip() + # remove bot signature and other 'footer' + buf = re.sub(self._param['parse_msg'][u'end'] + % {'sign': u'(.*?)'}, u'', buf) + buf = buf.strip() return buf @@ -1127,13 +1217,11 @@ """ def __init__(self, site, param): - self._hist_list = {} # archived pages from history - self._work_list = {} # pages to check for news - self._news_list = {} # news to check for relevancy and report afterwards - self._oth_list = {} # ...? - + self._hist_list = {} # archived pages from history + self._work_list = {} # pages to check for news + self._news_list = {} # news to check for relevancy and report afterwards + self._oth_list = {} self.site = site - self.param = param def set_hist(self, hist): @@ -1146,7 +1234,7 @@ try: page = pywikibot.Page(self.site, name) except pywikibot.NoPage, e: - pywikibot.output(u'%s' %e) + pywikibot.output(u'%s' % e) del hist[name] continue @@ -1192,7 +1280,7 @@ # add news page to news page list self._news_list[newspage.title()] = newspage - self.newspage = newspage # for promote_page + self.newspage = newspage # for promote_page def edit_oth(self, othpage, sum_disc_data=None, title=None): # add sum_disc_data if present @@ -1206,10 +1294,10 @@ def exists(self, page): fulldict = {} - fulldict.update( self._hist_list ) - fulldict.update( self._work_list ) - fulldict.update( self._news_list ) - fulldict.update( self._oth_list ) + fulldict.update(self._hist_list) + fulldict.update(self._work_list) + fulldict.update(self._news_list) + fulldict.update(self._oth_list) return (page.title() in fulldict.keys()) def start_promotion(self): @@ -1243,7 +1331,7 @@ def promote_page_irrel(self, page, signed): # page is not relevant, thus don't list discussion - title = page.title() + title = page.title() sum_disc_data = page.sum_disc_data del self._news_list[title] @@ -1252,28 +1340,34 @@ # discussion closed (no signature on page anymore) if (not signed) and (sum_disc_data[5] == _PS_changed): - page.sum_disc_data = ( self.param['notify_msg'][_PS_closed], - None, - sum_disc_data[2], - sum_disc_data[3], - {}, - _PS_closed ) + page.sum_disc_data = (self.param['notify_msg'][_PS_closed], + None, + sum_disc_data[2], + sum_disc_data[3], + {}, + _PS_closed) self.edit_news(page) #del self._hist_list[title] self.edit_hist(page) def parse_news(self, param): - """Filter and parse all the info and rewrite in in wiki-syntax, to be put on page. + """Filter and parse all the info and rewrite in in wiki-syntax, to be + put on page. - Returns a tuple (result wiki text, message count). + Returns a tuple (result wiki text, message count). + """ - switch = param['reportchanged_switch'] + switch = param['reportchanged_switch'] switch2 = param['reportclosed_switch'] - if not switch: ps_types = ( [_PS_new, _PS_maintmsg], ) - else: ps_types = ( [_PS_new, _PS_changed, _PS_maintmsg], ) - if not switch2: ps_types += ( [], ) - else: ps_types += ( [_PS_closed], ) + if not switch: + ps_types = ([_PS_new, _PS_maintmsg], ) + else: + ps_types = ([_PS_new, _PS_changed, _PS_maintmsg], ) + if not switch2: + ps_types += ([], ) + else: + ps_types += ([_PS_closed], ) buf = [] for name in self._news_list.keys(): @@ -1283,66 +1377,80 @@ if data[5] in ps_types[0]: # new and changed report = [] - for anchor in data[4].keys(): # iter over sections/checksum + for anchor in data[4].keys(): # iter over sections/checksum (checksum_cur, rel, line) = data[4][anchor] # is this section/heading relevant? - if not rel: continue + if not rel: + continue # were we able to divide the page into subsections? - if not anchor: continue + if not anchor: + continue # append relevant sections - report.append( u'[[%s#%s|%s]]' % (page.title(), anchor, line) ) + report.append(u'[[%s#%s|%s]]' + % (page.title(), anchor, line)) # default: if no subsections on page item = page.title(asLink=True) - hist = u'http://%s.%s.org%s?title=%s&action=history' % (self.site.language(), self.site.family.name, self.site.path(), page.urlname()) + hist = u'http://%s.%s.org%s?title=%s&action=history' % ( + self.site.language(), self.site.family.name, + self.site.path(), page.urlname()) if report: # subsections on page - item = u'%s → %s' % (page.title(asLink=True), string.join(report, u', ')) + item = u'%s → %s' \ + % (page.title(asLink=True), + string.join(report, u', ')) - data = { 'notify': data[0], - 'page_sections': item, - 'history_link': hist, - 'page': page.title(), # backward compatibility (can be removed depending on TW/i18n) - 'page_size': u'{{subst:PAGESIZE:%s}}' % page.title(), - 'user': self._getLastEditor(page, data[2]), - 'time': self._getTime(data[3]) } + data = { + 'notify': data[0], + 'page_sections': item, + 'history_link': hist, + 'page': page.title(), # backward compatibility (can be removed depending on TW/i18n) + 'page_size': u'{{subst:PAGESIZE:%s}}' % page.title(), + 'user': self._getLastEditor(page, data[2]), + 'time': self._getTime(data[3]) + } data = self.param['parse_msg'][u'*'] % data elif data[5] in ps_types[1]: # closed - data = { 'notify': data[0], - 'page': page.title(asLink=True), - 'user': self._getLastEditor(page, data[2]), - 'time': self._getTime(data[3]) } + data = { + 'notify': data[0], + 'page': page.title(asLink=True), + 'user': self._getLastEditor(page, data[2]), + 'time': self._getTime(data[3]) + } data = self.param['parse_msg'][_PS_closed] % data #elif data[5] in [_PS_warning]: # # warnings # data = { 'page': page.title(asLink=True), # 'warning': data[0] } # data = self.param['parse_msg'][_PS_warning] % data - # self._global_warn.append( (self._user.name(), data) ) + # self._global_warn.append((self._user.name(), data)) # if not param['reportwarn_switch']: continue elif data[5] in [_PS_notify]: # global wiki notifications - data = { 'notify': data[0], - 'page_link': page.globalwikinotify['url'], - 'page': page.title(), - 'user': data[2], - 'time': self._getTime(data[3]) } + data = { + 'notify': data[0], + 'page_link': page.globalwikinotify['url'], + 'page': page.title(), + 'user': data[2], + 'time': self._getTime(data[3]) + } data = self.param['parse_msg'][_PS_notify] % data else: - continue # skip append - buf.append( data ) + continue # skip append + buf.append(data) count = len(buf) if (count > 0): - data = [ time.strftime( self.param['parse_msg'][u'start'].encode('utf-8'), - time.gmtime() ).decode('utf-8') ] + data = [time.strftime( + self.param['parse_msg'][u'start'].encode('utf-8'), + time.gmtime()).decode('utf-8')] data += buf - buf = string.join(data, u'\n') - buf += self.param['parse_msg'][u'end'] % {'sign':u'~~~~'} + buf = string.join(data, u'\n') + buf += self.param['parse_msg'][u'end'] % {'sign': u'~~~~'} else: buf = u'' @@ -1350,14 +1458,15 @@ def _getLastEditor(self, page, lastuser): """Search the last 500 edits/revisions for the most recent human editor - and returns that one. (the non-human/bot). + and returns that one. (the non-human/bot). - @param page: Page to check. - @param lastuser: User made the most recent edit to page. - @type lastuser: string + @param page: Page to check. + @param lastuser: User made the most recent edit to page. + @type lastuser: string - Returns a link with the most recent and most recent human editors of - page. + Returns a link with the most recent and most recent human editors of + page. + """ humaneditor = page.userNameHuman() @@ -1368,7 +1477,8 @@ return u'[[User:%s]]/[[User:%s]]' % (humaneditor, lastuser) else: # no human editor found; use last editor - return (u'[[User:%s]] ' % lastuser) + self.param['parse_msg'][u'nonhuman'] + return (u'[[User:%s]] ' + % lastuser) + self.param['parse_msg'][u'nonhuman'] ## @since r276 (MOVED from dtbext.date.getTime) # @remarks need to convert wiki timestamp format to python @@ -1379,87 +1489,91 @@ # http://www.w3.org/TR/NOTE-datetime # http://pytz.sourceforge.net/ # use only UTC for internal timestamps - # could also be used as given by the API, but is converted here for compatibility + # could also be used as given by the API, but is converted here for + # compatibility timestamp = pywikibot.Timestamp.fromISOformat(timestamp) if localized: - # is localized to the actual date/time settings, cannot localize timestamps that are - # half of a year in the past or future! - timestamp = pywikibot.Timestamp.fromtimestamp( calendar.timegm(timestamp.timetuple()) ) - return timestamp.strftime(u'%H:%M, %d. %b. %Y'.encode('utf-8')).decode('utf-8') + # is localized to the actual date/time settings, cannot localize + # timestamps that are half of a year in the past or future! + timestamp = pywikibot.Timestamp.fromtimestamp( + calendar.timegm(timestamp.timetuple())) + return timestamp.strftime( + u'%H:%M, %d. %b. %Y'.encode('utf-8')).decode('utf-8') class PageSections(object): - """An object representing all sections on a page. - - """ + """ An object representing all sections on a page. """ def __init__(self, page, param, user): - """Retrieves the page content and splits it to headings and bodies ('check relevancy - of page by searching specific users signature'). + """Retrieves the page content and splits it to headings and bodies + ('check relevancy of page by searching specific users signature'). - @param page: Page to process. - @param param: Additional parameters to use for processing. - @type param: dict - @param user: Actual user. - @type user: user object + @param page: Page to process. + @param param: Additional parameters to use for processing. + @type param: dict + @param user: Actual user. + @type user: user object - Returns a list of tuples containing the sections with info and wiki text. + Returns a list of tuples containing the sections with info and wiki + text. + """ self._entries = [] self._full_resolve = True - - self._page = page + self._page = page self._param = param - self._user = user + self._user = user # code debugging pywikibot.debug(page.title()) # get content and sections (content was preloaded earlier) - #buf = page.get(force=True) buf = page.get() try: sections = page.getSections(minLevel=1) except pywikibot.Error: # sections could not be resoled process the whole page at once - #sections = page.getSections(minLevel=1, force=True) # slow for pages with > 100 sections - sections = [] - pywikibot.warning( - u'Problem resolving section data, processing the whole page at once...') +## # slow for pages with > 100 sections +## sections = page.getSections(minLevel=1, force=True) + sections = [] + pywikibot.warning(u'Problem resolving section data, processing ' + u'the whole page at once...') # drop from templates included headings (are None) - sections = [ s for s in sections if s[0] ] + sections = [s for s in sections if s[0]] # extract sections bodies if not sections: - self._entries = [ ((u'',u'',u''), buf) ] + self._entries = [((u'', u'', u''), buf)] self._full_resolve = False else: # append 'EOF' to sections list # (byteoffset, level, wikiline, line, anchor) - sections.append( (len(buf) + 1, None, None, None, None) ) + sections.append((len(buf) + 1, None, None, None, None)) for i, s in enumerate(sections[:-1]): bo_start = s[0] - bo_end = sections[i+1][0] - 1 - - self._entries.append( (s[2:], buf[bo_start:bo_end]) ) + bo_end = sections[i + 1][0] - 1 + self._entries.append((s[2:], buf[bo_start:bo_end])) def check_rel(self): # iterate over all sections in page and check their relevancy page = self._page - - page_rel = False + page_rel = False page_signed = False - try: checksum = page.sum_disc_data[4] - except: checksum = None + try: + checksum = page.sum_disc_data[4] + except: + checksum = None checksum_new = {} - for i, (head, body) in enumerate(self._entries): # iterate over all headings/sub sections - # wikiline is wiki text, line is parsed and anchor is the unique link label + # iterate over all headings/sub sections + for i, (head, body) in enumerate(self._entries): + # wikiline is wiki text, line is parsed and anchor is the unique + # link label (wikiline, line, anchor) = head[:3] # ignorelist for headings @@ -1473,48 +1587,55 @@ if not check.search(wikiline): skip = True break - if skip: continue + if skip: + continue # check relevancy of section - (rel, checksum_cur, checks) = self._check_sect_rel(body, checksum, anchor) - if self._param['mainsignneeded_switch']: # DRTRIGON-99 + (rel, checksum_cur, checks) = self._check_sect_rel(body, checksum, + anchor) + if self._param['mainsignneeded_switch']: # DRTRIGON-99 rel = rel and checks['mainsign'] # is page signed? - page_signed = page_signed or checks['signed'] # signature check + page_signed = page_signed or checks['signed'] # signature check # is page relevant? - if not rel: continue + if not rel: + continue # page IS relevant, update checksum page_rel = True checksum_new[anchor] = (checksum_cur, rel, line) # update sum_disc_data in page (checksums, relevancies, ...) - page.sum_disc_data = page.sum_disc_data[:4] + (checksum_new,) + page.sum_disc_data[5:] + page.sum_disc_data = (page.sum_disc_data[:4] + (checksum_new,) + + page.sum_disc_data[5:]) return (page, page_rel, page_signed) def _check_sect_rel(self, data, checksum, anchor): - """Checks the relevancy of single body data by performing different tests - ('check relevancy of page by searching specific users signature'). + """Checks the relevancy of single body data by performing different + tests ('check relevancy of page by searching specific users signature'). - @param data: Section wiki text to check. - @type data: string - @param checksum: Checksum given from history to compaire against. - @type checksum: string - @param anchor: Anchor of wiki text section heading given by mediawiki - software. - @type anchor: string + @param data: Section wiki text to check. + @type data: string + @param checksum: Checksum given from history to compaire against. + @type checksum: string + @param anchor: Anchor of wiki text section heading given by mediawiki + software. + @type anchor: string - Returns a tuple (True, checksum_cur, checks). + Returns a tuple (True, checksum_cur, checks). + """ # per default assume relevancy - checks = { 'changed': True, - 'signed': True, - 'mainsign': False, - 'lasteditor': False, } + checks = { + 'changed': True, + 'signed': True, + 'mainsign': False, + 'lasteditor': False, + } # check if thread has changed checksum_cur = hashlib.md5(data.encode('utf-8').strip()).hexdigest() @@ -1526,8 +1647,8 @@ # search for signature in section/thread (signed, signs_pos, signs, main) = self._search_sign(data) - checks['signed'] = signed # are signatures present - checks['mainsign'] = main # is main signature present + checks['signed'] = signed # are signatures present + checks['mainsign'] = main # is main signature present if not checks['signed']: return (False, checksum_cur, checks) @@ -1538,10 +1659,12 @@ # (small bug fix: DRTRIGON-82) data = data[signs_pos[-1]:].strip() (sign, data) = _REGEX_eol.split(data + u'\n', maxsplit=1) - checks['lasteditor'] = not (len(data.strip()) > 0) # just check for add. text (more paranoid) + # just check for add. text (more paranoid) + checks['lasteditor'] = not (len(data.strip()) > 0) else: # JIRA: DRTRIGON-83 - checks['lasteditor'] = (self._page.sum_disc_data[2] == self._user.username) + checks['lasteditor'] = (self._page.sum_disc_data[2] == + self._user.username) if checks['lasteditor']: return (False, checksum_cur, checks) @@ -1550,30 +1673,28 @@ def _search_sign(self, text): """Check if there are (any or) a specific user signature resp. link to - user page in text. + user page in text. - @param text: Text content to search for signatures. - @type text: string + @param text: Text content to search for signatures. + @type text: string - Returns a tuple containing a list with byteoffsets and a dict with - the according match object. + Returns a tuple containing a list with byteoffsets and a dict with + the according match object. + """ - sign_list = self._param['altsign_list'] + sign_list = self._param['altsign_list'] check_list = self._param['checksign_list'] - mainsign = sign_list[-1] # last sign in list is main sign - signs = {} - main = False + main = False for user in sign_list: for check in check_list: - for m in re.finditer(check % {'usersig':user}, text): + for m in re.finditer(check % {'usersig': user}, text): signs[m.start()] = m main = (mainsign == user) or main signs_pos = signs.keys() signs_pos.sort() - return ((len(signs_pos) > 0), signs_pos, signs, main) @@ -1585,13 +1706,13 @@ bot = SumDiscBot() if len(pywikibot.handleArgs()) > 0: for arg in pywikibot.handleArgs(): - if arg[:2] == "u'": arg = eval(arg) # for 'runbotrun.py' and unicode compatibility - if arg[:17] == "-compress_history": - #if arg[:17] == "-compress_history": - bot.compressHistory( eval(arg[18:]) ) + if arg[:2] == "u'": + arg = eval(arg) # for 'runbotrun.py' and unicode compatibility + if arg[:17] == "-compress_history": + bot.compressHistory(eval(arg[18:])) return elif (arg[:17] == "-rollback_history"): - bot.rollback = int( arg[18:] ) + bot.rollback = int(arg[18:]) else: pywikibot.showHelp() return @@ -1608,4 +1729,3 @@ main() finally: pywikibot.stopme() - -- To view, visit https://gerrit.wikimedia.org/r/98260 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I31d6c0b29bc099cd64254a0c31f5ea25929ac2b3 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/compat Gerrit-Branch: master Gerrit-Owner: Xqt <info(a)gno.de> Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com> Gerrit-Reviewer: Xqt <info(a)gno.de> Gerrit-Reviewer: jenkins-bot

1 0

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

Pywikibot-commits