[Pywikipedia-l] SVN: [5971] trunk/pywikipedia/interwiki.py

a_engels at svn.wikimedia.org a_engels at svn.wikimedia.org
Wed Oct 15 07:18:52 UTC 2008


Revision: 5971
Author:   a_engels
Date:     2008-10-15 07:18:52 +0000 (Wed, 15 Oct 2008)

Log Message:
-----------
1. Add a new option -back. If -back is added as an option, ONLY pages that do not have backlinks yet will be worked on.
2. When using -autonomous, the bot will now halt as soon as it finds a conflict, and not needlessly load more pages.

Modified Paths:
--------------
    trunk/pywikipedia/interwiki.py

Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py	2008-10-15 07:16:05 UTC (rev 5970)
+++ trunk/pywikipedia/interwiki.py	2008-10-15 07:18:52 UTC (rev 5971)
@@ -169,6 +169,10 @@
                    you are sure you have first gotten the interwiki on the
                    starting page exactly right).
                    (note: without ending colon)
+                   
+    -back          only work on pages that have no backlink from any other
+                   language; if a backlink is found, all work on the page
+                   will be halted.
 
 The following arguments are only important for users who have accounts for
 multiple languages, and specify on which sites the bot should modify pages:
@@ -462,6 +466,7 @@
     rememberno = False
     followinterwiki = True
     minsubjects = config.interwiki_min_subjects
+    nobackonly = False
 
 class Subject(object):
     """
@@ -493,6 +498,7 @@
         self.problemfound = False
         self.untranslated = None
         self.hintsAsked = False
+        self.forcedStop = False
 
     def getFoundDisambig(self, site):
         """
@@ -575,6 +581,15 @@
         # If there are any, return them. Otherwise, nothing is in progress.
         return self.pending
 
+    def makeForcedStop(self,counter):
+        """
+        Ends work on the page before the normal end.
+        """
+        for page in self.todo:
+            counter.minus(page.site())
+        self.todo = []
+        self.forcedStop = True
+
     def addIfNew(self, page, counter, linkingPage):
         """
         Adds the pagelink given to the todo list, but only if we didn't know
@@ -585,6 +600,13 @@
 
         Returns True iff the page is new.
         """
+        if self.forcedStop:
+            return False
+        if globalvar.nobackonly:
+            if page == self.originPage:
+                wikipedia.output("%s has a backlink from %s."%(page,linkingPage))
+                self.makeForcedStop(counter)
+                return False
         if self.foundIn.has_key(page):
             # not new
             self.foundIn[page].append(linkingPage)
@@ -809,6 +831,11 @@
                         if globalvar.untranslatedonly:
                             # Ignore the interwiki links.
                             iw = ()
+                    elif globalvar.autonomous and page.site() in [p.site() for p in self.done if p != page and p.exists() and not p.isRedirectPage()]:
+                        otherpage = [p for p in self.done if p.site() == page.site() and p != page and p.exists() and not p.isRedirectPage()][0]
+                        wikipedia.output(u"Stopping work on %s because duplicate pages %s and %s are found"%(self.originPage.aslink(),otherpage.aslink(True),page.aslink(True)))
+                        self.makeForcedStop(counter)
+                        iw = ()
                     elif page.isEmpty() and not page.isCategory():
                         wikipedia.output(u"NOTE: %s is empty; ignoring it and its interwiki links" % page.aslink(True))
                         # Ignore the interwiki links
@@ -979,6 +1006,9 @@
            be told to make another get request first."""
         if not self.isDone():
             raise "Bugcheck: finish called before done"
+        if self.forcedStop:
+            wikipedia.output("Stopping work on %s."%self.originPage)
+            return
         if self.originPage.isRedirectPage():
             return
         if not self.untranslated and globalvar.untranslatedonly:
@@ -1677,6 +1707,8 @@
                 globalvar.minsubjects = int(arg[7:])
             elif arg.startswith('-query:'):
                 globalvar.maxquerysize = int(arg[7:])
+            elif arg == '-back':
+                globalvar.nobackonly = True
             else:
                 generator = genFactory.handleArg(arg)
                 if generator:





More information about the Pywikipedia-l mailing list