Revision: 4037
Author: wikipedian
Date: 2007-08-13 19:47:50 +0000 (Mon, 13 Aug 2007)
Log Message:
-----------
Sped up backlinks report generation.
By making use of dictionaries and sets, decreased complexity from O(n^3)
to O(n^2).
For example, the backlinks report for
python interwiki.py -lang:de Indien -localonly
is now generated in 26 seconds, instead of the 190 seconds that were
needed before.
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2007-08-13 19:41:35 UTC (rev 4036)
+++ trunk/pywikipedia/interwiki.py 2007-08-13 19:47:50 UTC (rev 4037)
@@ -1076,34 +1076,33 @@
reporting of missing backlinks for pages we already fixed
"""
+ expectedPages = set(new.values())
+ expectedSites = set([page.site() for page in expectedPages])
try:
for site, page in new.iteritems():
if site not in updatedSites and not page.section():
- shouldlink = new.values()
try:
- linked = page.interwiki()
+ linkedPages = set(page.interwiki())
except wikipedia.NoPage:
wikipedia.output(u"WARNING: Page %s does no longer
exist?!" % page.title())
break
- for xpage in shouldlink:
- if xpage != page and not xpage in linked:
- for l in linked:
- if l.site() == xpage.site():
- wikipedia.output(u"WARNING: %s: %s does not link
to %s but to %s" % (page.site().family.name, page.aslink(True), xpage.aslink(True),
l.aslink(True)))
- break
- else:
- wikipedia.output(u"WARNING: %s: %s does not link to
%s" % (page.site().family.name, page.aslink(True), xpage.aslink(True)))
+ linkedPagesDict = {}
+ for linkedPage in linkedPages:
+ linkedPagesDict[linkedPage.site()] = linkedPage
+ for expectedPage in expectedPages:
+ if expectedPage != page and expectedPage not in linkedPages:
+ try:
+ linkedPage = linkedPagesDict[expectedPage.site()]
+ wikipedia.output(u"WARNING: %s: %s does not link to
%s but to %s" % (page.site().family.name, page.aslink(True),
expectedPage.aslink(True), linkedPage.aslink(True)))
+ except KeyError:
+ wikipedia.output(u"WARNING: %s: %s does not link to
%s" % (page.site().family.name, page.aslink(True), expectedPage.aslink(True)))
# Check for superfluous links
- for xpage in linked:
- if not xpage in shouldlink:
+ for linkedPage in linkedPages:
+ if linkedPage not in expectedPages:
# Check whether there is an alternative page on that
language.
- for l in shouldlink:
- if l.site() == xpage.site():
- # Already reported above.
- break
- else:
- # New warning
- wikipedia.output(u"WARNING: %s: %s links to
incorrect %s" % (page.site().family.name, page.aslink(True), xpage.aslink(True)))
+ # In this case, it was already reported above.
+ if linkedPage.site() not in expectedSites:
+ wikipedia.output(u"WARNING: %s: %s links to
incorrect %s" % (page.site().family.name, page.aslink(True),
linkedPage.aslink(True)))
except (socket.error, IOError):
wikipedia.output(u'ERROR: could not report backlinks')
Show replies by date