Revision: 6730
Author: nicdumz
Date: 2009-04-26 11:58:34 +0000 (Sun, 26 Apr 2009)
Log Message:
-----------
Do not generate twice a list when looking for duplicate links
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-04-26 09:58:10 UTC (rev 6729)
+++ trunk/pywikipedia/interwiki.py 2009-04-26 11:58:34 UTC (rev 6730)
@@ -958,22 +958,24 @@
# add the page that was entered by the user
self.addIfNew(alternativePage, counter, None)
+ duplicate = None
+ for p in self.done.filter(page.site()):
+ if p != page and p.exists() and not p.isRedirectPage():
+ duplicate = p
+ break
+
if self.originPage == page:
self.untranslated = (len(iw) == 0)
if globalvar.untranslatedonly:
# Ignore the interwiki links.
iw = ()
- # FIXME: the filtered list generated in the condition is
- # re-generated the lign after.
- # And we only use the first item of that list.
- elif globalvar.autonomous and [p for p in self.done.filter(page.site()) if p != page and p.exists() and not p.isRedirectPage()]:
+
+ elif globalvar.autonomous and duplicate:
- for p in self.done.filter(page.site()):
- if p != page and p.exists() and \
- not p.isRedirectPage():
- otherpage = p
- break
- wikipedia.output(u"Stopping work on %s because duplicate pages %s and %s are found"%(self.originPage.aslink(),otherpage.aslink(True),page.aslink(True)))
+ wikipedia.output(u"Stopping work on %s because duplicate pages"\
+ " %s and %s are found" % (self.originPage.aslink(),
+ duplicate.aslink(True),
+ page.aslink(True)))
self.makeForcedStop(counter)
try:
f = codecs.open(
Revision: 6729
Author: nicdumz
Date: 2009-04-26 09:58:10 +0000 (Sun, 26 Apr 2009)
Log Message:
-----------
Fixing the self.pending issue: add to self.todo instead,
and wait one more round
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-04-26 08:54:30 UTC (rev 6728)
+++ trunk/pywikipedia/interwiki.py 2009-04-26 09:58:10 UTC (rev 6729)
@@ -918,13 +918,7 @@
if page == self.originPage:
if globalvar.initialredirect:
self.originPage = redirectTargetPage
- #XXX might not work if page.site != redirTar.site:
- # We are appending an item to
- # self.pending[redirTar.site]
- # but we are iterating on self.pending at the same
- # time.
- # On the other hand... crosslanguage redirects?
- self.pending.add(redirectTargetPage)
+ self.todo.add(redirectTargetPage)
counter.plus(redirectTargetPage.site)
else:
# This is a redirect page to the origin. We don't need to
Revision: 6724
Author: nicdumz
Date: 2009-04-26 04:13:27 +0000 (Sun, 26 Apr 2009)
Log Message:
-----------
Fixing an IndexError in SearchPagesGenerator. (page[0] -> page)
Also changing other generators so they explicitely name the tuples
they receive from site methods, to avoid "page[0]" naming.
Modified Paths:
--------------
branches/rewrite/pywikibot/pagegenerators.py
Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py 2009-04-26 04:06:33 UTC (rev 6723)
+++ branches/rewrite/pywikibot/pagegenerators.py 2009-04-26 04:13:27 UTC (rev 6724)
@@ -700,8 +700,9 @@
def NewimagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
site = pywikibot.Site()
- for page in site.newimages(number, repeat=repeat):
- yield page[0]
+ for tuple in site.newimages(number, repeat=repeat):
+ # tuple is ImagePage, date, user comment
+ yield tuple[0]
def UnCategorizedPageGenerator(number = 100, repeat = False, site = None):
if site is None:
@@ -724,8 +725,8 @@
def AncientPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
site = pywikibot.Site()
- for page in site.ancientpages(number=number, repeat=repeat):
- yield page[0]
+ for page, date in site.ancientpages(number=number, repeat=repeat):
+ yield page
def DeadendPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
@@ -736,14 +737,14 @@
def LongPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
site = pywikibot.Site()
- for page in site.longpages(number=number, repeat=repeat):
- yield page[0]
+ for page, length in site.longpages(number=number, repeat=repeat):
+ yield page
def ShortPagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
site = pywikibot.Site()
- for page in site.shortpages(number=number, repeat=repeat):
- yield page[0]
+ for page, length in site.shortpages(number=number, repeat=repeat):
+ yield page
def LinksearchPageGenerator(link, limit=None, site=None):
"""Yields all pages that include a specified link, according to
@@ -762,7 +763,7 @@
if site is None:
site = pywikibot.Site()
for page in site.search(query, number=number, namespaces = namespaces):
- yield page[0]
+ yield page
# following classes just ported from version 1 without revision; not tested
Revision: 6721
Author: nicdumz
Date: 2009-04-26 02:51:55 +0000 (Sun, 26 Apr 2009)
Log Message:
-----------
Fixing bug #2780256 "Removing of interwiki" :
This indeed came from r6992:
- if page in self.done: #XXX: Ugly bugfix - the following line has reportedly thrown "ValueError: list.remove(x): x not in list"
- self.done.remove(page)
+ self.done = PageTree()
The correct replacement is self.done.remove(page), of course.
Thanks to Tieump for helping me to track this bug down, and for
providing the original patch :)
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-04-25 16:21:08 UTC (rev 6720)
+++ trunk/pywikipedia/interwiki.py 2009-04-26 02:51:55 UTC (rev 6721)
@@ -527,10 +527,17 @@
def add(self, page):
site = page.site()
if not site in self.tree:
- self.tree[site] = []
- self.tree[site].append(page)
+ self.tree[site] = {}
+ self.tree[site][page] = True
self.size += 1
+ def remove(self, page):
+ try:
+ del self.tree[page.site()][page]
+ self.size -= 1
+ except KeyError:
+ pass
+
def removeSite(self, site):
"""
Removes all pages from Site site
@@ -951,7 +958,7 @@
(skip, alternativePage) = self.disambigMismatch(page, counter)
if skip:
wikipedia.output(u"NOTE: ignoring %s and its interwiki links" % page.aslink(True))
- self.done = PageTree()
+ self.done.remove(page)
iw = ()
if alternativePage:
# add the page that was entered by the user
@@ -994,7 +1001,7 @@
elif page.isEmpty() and not page.isCategory():
wikipedia.output(u"NOTE: %s is empty; ignoring it and its interwiki links" % page.aslink(True))
# Ignore the interwiki links
- self.done = PageTree()
+ self.done.remove(page)
iw = ()
for linkedPage in iw: