Revision: 5195
Author: filnik
Date: 2008-04-08 16:56:41 +0000 (Tue, 08 Apr 2008)
Log Message:
-----------
Fixing the regex according to the change of HTML
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-04-08 14:19:22 UTC (rev 5194)
+++ trunk/pywikipedia/wikipedia.py 2008-04-08 16:56:41 UTC (rev 5195)
@@ -828,6 +828,7 @@
def previousRevision(self):
"""Return the revision id for the previous revision of this Page."""
vh = self.getVersionHistory(revCount=2)
+ print vh
return vh[1][0]
def exists(self):
@@ -972,9 +973,20 @@
"""
if not hasattr(self, '_isDisambig'):
- foo = self.templates()
- return self._isDisambig
+ locdis = self.site().family.disambig( self._site.lang )
+ for tn in self.templates():
+ tn = tn[:1].upper() + tn[1:]
+ tn = tn.replace(u'_', u' ')
+ while u" " in tn:
+ tn = tn.replace(u" ", u" ")
+ if tn in locdis:
+ _isDisambig = True
+ break
+ else:
+ _isDisambig = False
+ return _isDisambig
+
def getReferences(self,
follow_redirects=True, withTemplateInclusion=True,
onlyTemplateInclusion=False, redirectsOnly=False):
@@ -1154,7 +1166,7 @@
force, callback))
def put(self, newtext, comment=None, watchArticle=None, minorEdit=True,
- force=False):
+ force=False, deleted = True):
"""Save the page with the contents of the first argument as the text.
Optional parameters:
@@ -1207,10 +1219,11 @@
# of Bordeaux
if self.site().lang == 'eo':
newtext = encodeEsperantoX(newtext)
- return self._putPage(newtext, comment, watchArticle, minorEdit, newPage, self.site().getToken(sysop = sysop), sysop = sysop)
+ return self._putPage(newtext, comment, watchArticle, minorEdit,
+ newPage, self.site().getToken(sysop = sysop), sysop = sysop, deleted = deleted)
def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
- newPage=False, token=None, newToken=False, sysop=False):
+ newPage=False, token=None, newToken=False, sysop=False, deleted=True):
"""Upload 'text' as new content of Page by filling out the edit form.
Don't use this directly, use put() instead.
@@ -1297,7 +1310,7 @@
time.sleep(5)
continue
# A second text area means that an edit conflict has occured.
- if 'id=\'wpTextbox2\' name="wpTextbox2"' in data:
+ if 'id=\'wpTextbox2\' name="wpTextbox2"' in data and deleted == True:
raise EditConflict(u'An edit conflict has occured.')
if self.site().has_mediawiki_message("spamprotectiontitle")\
and self.site().mediawiki_message('spamprotectiontitle') in data:
@@ -1545,8 +1558,7 @@
try:
page = Page(self.site(), title)
except Error:
- if title.strip(" "):
- output(u"Page %s contains invalid link to [[%s]]."
+ output(u"Page %s contains invalid link to [[%s]]."
% (self.title(), title))
continue
if not withImageLinks and page.isImage():
@@ -1602,12 +1614,10 @@
If thistxt is set, it is used instead of current page content.
"""
- check_disambig = (thistxt is None)
if not thistxt:
try:
thistxt = self.get()
except (IsRedirectPage, NoPage):
- self._isDisambig = False
return []
# remove commented-out stuff etc.
@@ -1652,13 +1662,10 @@
try:
name = Page(self.site(), name).title()
except Error:
- if name.strip():
- output(u"Page %s contains invalid template name {{%s}}."
+ output(u"Page %s contains invalid template name {{%s}}."
% (self.title(), name.strip()))
continue
- if check_disambig and \
- name in self.site().family.disambig(self.site().lang):
- self._isDisambig = True
+
# Parameters
paramString = m.group('params')
params = []
@@ -1669,25 +1676,20 @@
for m2 in Rlink.finditer(paramString):
count2 += 1
text = m2.group()
- paramString = paramString.replace(text,
- '%s%d%s' % (marker2, count2, marker2))
+ paramString = paramString.replace(text, '%s%d%s' % (marker2, count2, marker2))
links[count2] = text
# Parse string
markedParams = paramString.split('|')
# Replace markers
for param in markedParams:
for m2 in Rmarker.finditer(param):
- param = param.replace(m2.group(),
- inside[int(m2.group(1))])
+ param = param.replace(m2.group(), inside[int(m2.group(1))])
for m2 in Rmarker2.finditer(param):
- param = param.replace(m2.group(),
- links[int(m2.group(1))])
+ param = param.replace(m2.group(), links[int(m2.group(1))])
params.append(param)
# Add it to the result
result.append((name, params))
- if check_disambig and not hasattr(self, "_isDisambig"):
- self._isDisambig = False
return result
def getRedirectTarget(self):
@@ -4639,7 +4641,7 @@
"""Yield ImagePages from Special:Log&type=upload"""
seen = set()
- regexp = re.compile('<li[^>]*>(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a>\s+\(.+?</a>\).*?<a href=".*?"(?P<new> class="new")? title=".*?"\s*>(?P<image>.+?)</a>(?:.*?<span class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
+ regexp = re.compile(r'(?:<li[^>]*>|<div class="mw-log-entry"[^>]*>)(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a>\s+\(.+?</a>\).*?<a href=".*?"(?P<new> class="new")? title=".*?"\s*>(?P<image>.+?)</a>(?:.*?<span class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
while True:
path = self.log_address(number, mode = 'upload')