Revision: 6767
Author: nicdumz
Date: 2009-04-30 09:00:50 +0000 (Thu, 30 Apr 2009)
Log Message:
-----------
[ 2771272 ] 44 Error Dump Files :
print a message on site error, sleep and retry.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-04-30 08:54:39 UTC (rev 6766)
+++ trunk/pywikipedia/wikipedia.py 2009-04-30 09:00:50 UTC (rev 6767)
@@ -2974,6 +2974,13 @@
else:
if "<title>Wiki does not exist</title>" in data:
raise NoSuchSite(u'Wiki %s does not exist yet' % self.site)
+ elif "</mediawiki>" not in data:
+ # HTML error Page got thrown because of an internal
+ # error when fetching a revision.
+ output(u'Remote site has a problem, it probably ' \
+ 'exited our query with an internal Error. ' \
+ 'Sleeping for %d seconds...' % self.sleeptime)
+ self.sleep()
elif "<siteinfo>" not in data: # This probably means we got a 'temporary unaivalable'
output(u'Got incorrect export page. ' \
'Sleeping for %d seconds...' % self.sleeptime)
Revision: 6764
Author: nicdumz
Date: 2009-04-30 08:28:31 +0000 (Thu, 30 Apr 2009)
Log Message:
-----------
Not all pages have _contents attribute
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-04-30 08:24:07 UTC (rev 6763)
+++ trunk/pywikipedia/interwiki.py 2009-04-30 08:28:31 UTC (rev 6764)
@@ -1432,7 +1432,8 @@
# foundIn can contain either Page or StoredPage objects
# calling the destructor on _contents will delete the
# disk records if necessary
- del page._contents
+ if hasattr(page, '_contents'):
+ del page._contents
def replaceLinks(self, page, newPages, bot):
"""
Revision: 6763
Author: nicdumz
Date: 2009-04-30 08:24:07 +0000 (Thu, 30 Apr 2009)
Log Message:
-----------
Convert to StoredPage only if the Page will eventually be loaded
(aka, if the Page is added to Subject.todo)
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-04-30 08:13:06 UTC (rev 6762)
+++ trunk/pywikipedia/interwiki.py 2009-04-30 08:24:07 UTC (rev 6763)
@@ -860,14 +860,14 @@
self.makeForcedStop(counter)
return False
- if globalvar.contentsondisk:
- page = StoredPage(page)
if page in self.foundIn:
# not new
self.foundIn[page].append(linkingPage)
return False
else:
+ if globalvar.contentsondisk:
+ page = StoredPage(page)
self.foundIn[page] = [linkingPage]
self.todo.add(page)
counter.plus(page.site())
@@ -899,9 +899,6 @@
if globalvar.autonomous:
wikipedia.output(u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i." % (self.originPage.aslink(True), self.originPage.namespace(), linkedPage.aslink(True), linkedPage.namespace()))
# Fill up foundIn, so that we will not write this notice
- if globalvar.contentsondisk:
- linkedPage = StoredPage(linkedPage)
-
self.foundIn[linkedPage] = [linkingPage]
return True
else:
@@ -913,9 +910,6 @@
choice = wikipedia.inputChoice('WARNING: %s is in namespace %i, but %s is in namespace %i. Follow it anyway?' % (self.originPage.aslink(True), self.originPage.namespace(), linkedPage.aslink(True), linkedPage.namespace()), ['Yes', 'No', 'Add an alternative', 'give up'], ['y', 'n', 'a', 'g'])
if choice != 'y':
# Fill up foundIn, so that we will not ask again
- if globalvar.contentsondisk:
- linkedPage = StoredPage(linkedPage)
-
self.foundIn[linkedPage] = [linkingPage]
if choice == 'g':
self.makeForcedStop(counter)
@@ -1081,6 +1075,8 @@
wikipedia.output(u"NOTE: %s is redirect to %s" % (page.aslink(True), redirectTargetPage.aslink(True)))
if page == self.originPage:
if globalvar.initialredirect:
+ if globalvar.contentsondisk:
+ redirectTargetPage = StoredPage(redirectTargetPage)
self.originPage = redirectTargetPage
self.todo.add(redirectTargetPage)
counter.plus(redirectTargetPage.site)
@@ -1432,8 +1428,11 @@
whole storage file will be eventually removed.
"""
if globalvar.contentsondisk:
- for storedPage in self.foundIn:
- storedPage.SPdelContents()
+ for page in self.foundIn:
+ # foundIn can contain either Page or StoredPage objects
+ # calling the destructor on _contents will delete the
+ # disk records if necessary
+ del page._contents
def replaceLinks(self, page, newPages, bot):
"""
Revision: 6762
Author: nicdumz
Date: 2009-04-30 08:13:06 +0000 (Thu, 30 Apr 2009)
Log Message:
-----------
Converting to StoredPage everywhere
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-04-30 08:08:00 UTC (rev 6761)
+++ trunk/pywikipedia/interwiki.py 2009-04-30 08:13:06 UTC (rev 6762)
@@ -796,6 +796,8 @@
pages = titletranslate.translate(self.originPage, hints = hints, auto = globalvar.auto, removebrackets
= globalvar.hintnobracket)
for page in pages:
+ if globalvar.contentsondisk:
+ page = StoredPage(page)
self.todo.add(page)
self.foundIn[page] = [None]
if keephintedsites:
@@ -897,6 +899,9 @@
if globalvar.autonomous:
wikipedia.output(u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i." % (self.originPage.aslink(True), self.originPage.namespace(), linkedPage.aslink(True), linkedPage.namespace()))
# Fill up foundIn, so that we will not write this notice
+ if globalvar.contentsondisk:
+ linkedPage = StoredPage(linkedPage)
+
self.foundIn[linkedPage] = [linkingPage]
return True
else:
@@ -908,6 +913,9 @@
choice = wikipedia.inputChoice('WARNING: %s is in namespace %i, but %s is in namespace %i. Follow it anyway?' % (self.originPage.aslink(True), self.originPage.namespace(), linkedPage.aslink(True), linkedPage.namespace()), ['Yes', 'No', 'Add an alternative', 'give up'], ['y', 'n', 'a', 'g'])
if choice != 'y':
# Fill up foundIn, so that we will not ask again
+ if globalvar.contentsondisk:
+ linkedPage = StoredPage(linkedPage)
+
self.foundIn[linkedPage] = [linkingPage]
if choice == 'g':
self.makeForcedStop(counter)
Revision: 6761
Author: nicdumz
Date: 2009-04-30 08:08:00 +0000 (Thu, 30 Apr 2009)
Log Message:
-----------
Move the cleanup bits in a Subject.clean() function,
and make sure it is called after finish(), even if finish()
returns early.
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-04-30 04:52:44 UTC (rev 6760)
+++ trunk/pywikipedia/interwiki.py 2009-04-30 08:08:00 UTC (rev 6761)
@@ -1410,6 +1410,7 @@
if config.interwiki_backlink:
self.reportBacklinks(new, updatedSites)
+ def clean(self):
"""
Delete the contents that are stored on disk for this Subject.
@@ -1822,6 +1823,7 @@
subj = self.subjects[i]
if subj.isDone():
subj.finish(self)
+ subj.clean()
del self.subjects[i]
def isDone(self):