Revision: 5971 Author: a_engels Date: 2008-10-15 07:18:52 +0000 (Wed, 15 Oct 2008)
Log Message: ----------- 1. Add a new option -back. If -back is added as an option, ONLY pages that do not have backlinks yet will be worked on. 2. When using -autonomous, the bot will now halt as soon as it finds a conflict, and not needlessly load more pages.
Modified Paths: -------------- trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2008-10-15 07:16:05 UTC (rev 5970) +++ trunk/pywikipedia/interwiki.py 2008-10-15 07:18:52 UTC (rev 5971) @@ -169,6 +169,10 @@ you are sure you have first gotten the interwiki on the starting page exactly right). (note: without ending colon) + + -back only work on pages that have no backlink from any other + language; if a backlink is found, all work on the page + will be halted.
The following arguments are only important for users who have accounts for multiple languages, and specify on which sites the bot should modify pages: @@ -462,6 +466,7 @@ rememberno = False followinterwiki = True minsubjects = config.interwiki_min_subjects + nobackonly = False
class Subject(object): """ @@ -493,6 +498,7 @@ self.problemfound = False self.untranslated = None self.hintsAsked = False + self.forcedStop = False
def getFoundDisambig(self, site): """ @@ -575,6 +581,15 @@ # If there are any, return them. Otherwise, nothing is in progress. return self.pending
+ def makeForcedStop(self,counter): + """ + Ends work on the page before the normal end. + """ + for page in self.todo: + counter.minus(page.site()) + self.todo = [] + self.forcedStop = True + def addIfNew(self, page, counter, linkingPage): """ Adds the pagelink given to the todo list, but only if we didn't know @@ -585,6 +600,13 @@
Returns True iff the page is new. """ + if self.forcedStop: + return False + if globalvar.nobackonly: + if page == self.originPage: + wikipedia.output("%s has a backlink from %s."%(page,linkingPage)) + self.makeForcedStop(counter) + return False if self.foundIn.has_key(page): # not new self.foundIn[page].append(linkingPage) @@ -809,6 +831,11 @@ if globalvar.untranslatedonly: # Ignore the interwiki links. iw = () + elif globalvar.autonomous and page.site() in [p.site() for p in self.done if p != page and p.exists() and not p.isRedirectPage()]: + otherpage = [p for p in self.done if p.site() == page.site() and p != page and p.exists() and not p.isRedirectPage()][0] + wikipedia.output(u"Stopping work on %s because duplicate pages %s and %s are found"%(self.originPage.aslink(),otherpage.aslink(True),page.aslink(True))) + self.makeForcedStop(counter) + iw = () elif page.isEmpty() and not page.isCategory(): wikipedia.output(u"NOTE: %s is empty; ignoring it and its interwiki links" % page.aslink(True)) # Ignore the interwiki links @@ -979,6 +1006,9 @@ be told to make another get request first.""" if not self.isDone(): raise "Bugcheck: finish called before done" + if self.forcedStop: + wikipedia.output("Stopping work on %s."%self.originPage) + return if self.originPage.isRedirectPage(): return if not self.untranslated and globalvar.untranslatedonly: @@ -1677,6 +1707,8 @@ globalvar.minsubjects = int(arg[7:]) elif arg.startswith('-query:'): globalvar.maxquerysize = int(arg[7:]) + elif arg == '-back': + globalvar.nobackonly = True else: generator = genFactory.handleArg(arg) if generator:
pywikipedia-l@lists.wikimedia.org