Revision: 7192
Author: alexsh
Date: 2009-08-31 20:00:38 +0000 (Mon, 31 Aug 2009)
Log Message:
-----------
Page().getVersionHistory():
* remove duplicate codes
* use a temporary list to save revisions and dump into self._versionhistory or
self._versionhistoryearliest after the process.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-08-31 15:40:11 UTC (rev 7191)
+++ trunk/pywikipedia/wikipedia.py 2009-08-31 20:00:38 UTC (rev 7192)
@@ -2283,7 +2283,6 @@
unless getAll is True.
"""
- site = self.site()
# regular expression matching one edit in the version history.
# results will have 4 groups: oldid, edit date/time, user name, and edit
@@ -2297,6 +2296,7 @@
startFromPage = None
thisHistoryDone = False
skip = False # Used in determining whether we need to skip the first page
+ dataQuery = []
RLinkToNextPage = re.compile('&offset=(.*?)&')
@@ -2309,6 +2309,7 @@
# Cause a reload, or at least make the loop run
thisHistoryDone = False
skip = True
+ dataQuery = self._versionhistoryearliest
else:
thisHistoryDone = True
elif not hasattr(self, '_versionhistory') or forceReload:
@@ -2317,11 +2318,12 @@
# Cause a reload, or at least make the loop run
thisHistoryDone = False
skip = True
+ dataQuery = self._versionhistory
else:
thisHistoryDone = True
while not thisHistoryDone:
- path = site.family.version_history_address(self.site().language(),
self.urlname(), revCount)
+ path = self.site().family.version_history_address(self.site().language(),
self.urlname(), revCount)
if reverseOrder:
path += '&dir=prev'
@@ -2341,108 +2343,60 @@
else:
output(u'Getting version history of %s' %
self.aslink(forceInterwiki = True))
- txt = site.getUrl(path)
+ txt = self.site().getUrl(path)
# save a copy of the text
self_txt = txt
- if reverseOrder:
- # If we are getting all of the page history...
- if getAll:
- if len(self._versionhistoryearliest) == 0:
- matchObj = RLinkToNextPage.search(self_txt)
- if matchObj:
- startFromPage = matchObj.group(1)
- else:
- thisHistoryDone = True
-
- edits = editR.findall(self_txt)
- edits.reverse()
- for edit in edits:
- self._versionhistoryearliest.append(edit)
- if len(edits) < revCount:
- thisHistoryDone = True
- else:
- if not skip:
- edits = editR.findall(self_txt)
- edits.reverse()
- for edit in edits:
- self._versionhistoryearliest.append(edit)
- if len(edits) < revCount:
- thisHistoryDone = True
-
- matchObj = RLinkToNextPage.search(self_txt)
- if matchObj:
- startFromPage = matchObj.group(1)
- else:
- thisHistoryDone = True
-
- else:
- # Skip the first page only,
- skip = False
-
- matchObj = RLinkToNextPage.search(self_txt)
- if matchObj:
- startFromPage = matchObj.group(1)
- else:
- thisHistoryDone = True
+ # If we are getting all of the page history...
+ if getAll:
+ #Find the nextPage link, if not exist, the page is last history page
+ matchObj = RLinkToNextPage.search(self_txt)
+ if matchObj:
+ startFromPage = matchObj.group(1)
else:
- # If we are not getting all, we stop on the first page.
- for edit in editR.findall(self_txt):
- self._versionhistoryearliest.append(edit)
- self._versionhistoryearliest.reverse()
-
thisHistoryDone = True
- else:
- # If we are getting all of the page history...
- if getAll:
- if len(self._versionhistory) == 0:
- matchObj = RLinkToNextPage.search(self_txt)
- if matchObj:
- startFromPage = matchObj.group(1)
- else:
- thisHistoryDone = True
+ if len(dataQuery) == 0:
+ edits = editR.findall(self_txt)
+ if reverseOrder:
+ edits.reverse()
+ #for edit in edits:
+ dataQuery.extend([edit for edit in edits])
+ if len(edits) < revCount:
+ thisHistoryDone = True
+ else:
+ if not skip:
edits = editR.findall(self_txt)
- for edit in edits:
- self._versionhistory.append(edit)
+ if reverseOrder:
+ edits.reverse()
+ #for edit in edits:
+ dataQuery.extend([edit for edit in edits])
if len(edits) < revCount:
thisHistoryDone = True
else:
- if not skip:
- edits = editR.findall(self_txt)
- for edit in edits:
- self._versionhistory.append(edit)
- if len(edits) < revCount:
- thisHistoryDone = True
+ # Skip the first page only,
+ skip = False
+ else:
+ # If we are not getting all, we stop on the first page.
+ #for edit in editR.findall(self_txt):
+ dataQuery.extend([edit for edit in editR.findall(self_txt)] )
+ if reverseOrder:
+ dataQuery.reverse()
+ thisHistoryDone = True
- matchObj = RLinkToNextPage.findall(self_txt)
- if len(matchObj) >= 2:
- startFromPage = matchObj[1]
- else:
- thisHistoryDone = True
- else:
- # Skip the first page only,
- skip = False
-
- matchObj = RLinkToNextPage.search(self_txt)
- if matchObj:
- startFromPage = matchObj.group(1)
- else:
- thisHistoryDone = True
- else:
- # If we are not getting all, we stop on the first page.
- for edit in editR.findall(self_txt):
- self._versionhistory.append(edit)
-
- thisHistoryDone = True
-
if reverseOrder:
# Return only revCount edits, even if the version history is extensive
+ if dataQuery != []:
+ self._versionhistoryearliest = dataQuery
+ del dataQuery
if len(self._versionhistoryearliest) > revCount and not getAll:
return self._versionhistoryearliest[0:revCount]
return self._versionhistoryearliest
+ if dataQuery != []:
+ self._versionhistory = dataQuery
+ del dataQuery
# Return only revCount edits, even if the version history is extensive
if len(self._versionhistory) > revCount and not getAll:
return self._versionhistory[0:revCount]
Show replies by date