Revision: 5195 Author: filnik Date: 2008-04-08 16:56:41 +0000 (Tue, 08 Apr 2008)
Log Message: ----------- Fixing the regex according to the change of HTML
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-08 14:19:22 UTC (rev 5194) +++ trunk/pywikipedia/wikipedia.py 2008-04-08 16:56:41 UTC (rev 5195) @@ -828,6 +828,7 @@ def previousRevision(self): """Return the revision id for the previous revision of this Page.""" vh = self.getVersionHistory(revCount=2) + print vh return vh[1][0]
def exists(self): @@ -972,9 +973,20 @@
""" if not hasattr(self, '_isDisambig'): - foo = self.templates() - return self._isDisambig + locdis = self.site().family.disambig( self._site.lang )
+ for tn in self.templates(): + tn = tn[:1].upper() + tn[1:] + tn = tn.replace(u'_', u' ') + while u" " in tn: + tn = tn.replace(u" ", u" ") + if tn in locdis: + _isDisambig = True + break + else: + _isDisambig = False + return _isDisambig + def getReferences(self, follow_redirects=True, withTemplateInclusion=True, onlyTemplateInclusion=False, redirectsOnly=False): @@ -1154,7 +1166,7 @@ force, callback))
def put(self, newtext, comment=None, watchArticle=None, minorEdit=True, - force=False): + force=False, deleted = True): """Save the page with the contents of the first argument as the text.
Optional parameters: @@ -1207,10 +1219,11 @@ # of Bordeaux if self.site().lang == 'eo': newtext = encodeEsperantoX(newtext) - return self._putPage(newtext, comment, watchArticle, minorEdit, newPage, self.site().getToken(sysop = sysop), sysop = sysop) + return self._putPage(newtext, comment, watchArticle, minorEdit, + newPage, self.site().getToken(sysop = sysop), sysop = sysop, deleted = deleted)
def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True, - newPage=False, token=None, newToken=False, sysop=False): + newPage=False, token=None, newToken=False, sysop=False, deleted=True): """Upload 'text' as new content of Page by filling out the edit form.
Don't use this directly, use put() instead. @@ -1297,7 +1310,7 @@ time.sleep(5) continue # A second text area means that an edit conflict has occured. - if 'id='wpTextbox2' name="wpTextbox2"' in data: + if 'id='wpTextbox2' name="wpTextbox2"' in data and deleted == True: raise EditConflict(u'An edit conflict has occured.') if self.site().has_mediawiki_message("spamprotectiontitle")\ and self.site().mediawiki_message('spamprotectiontitle') in data: @@ -1545,8 +1558,7 @@ try: page = Page(self.site(), title) except Error: - if title.strip(" "): - output(u"Page %s contains invalid link to [[%s]]." + output(u"Page %s contains invalid link to [[%s]]." % (self.title(), title)) continue if not withImageLinks and page.isImage(): @@ -1602,12 +1614,10 @@
If thistxt is set, it is used instead of current page content. """ - check_disambig = (thistxt is None) if not thistxt: try: thistxt = self.get() except (IsRedirectPage, NoPage): - self._isDisambig = False return []
# remove commented-out stuff etc. @@ -1652,13 +1662,10 @@ try: name = Page(self.site(), name).title() except Error: - if name.strip(): - output(u"Page %s contains invalid template name {{%s}}." + output(u"Page %s contains invalid template name {{%s}}." % (self.title(), name.strip())) continue - if check_disambig and \ - name in self.site().family.disambig(self.site().lang): - self._isDisambig = True + # Parameters paramString = m.group('params') params = [] @@ -1669,25 +1676,20 @@ for m2 in Rlink.finditer(paramString): count2 += 1 text = m2.group() - paramString = paramString.replace(text, - '%s%d%s' % (marker2, count2, marker2)) + paramString = paramString.replace(text, '%s%d%s' % (marker2, count2, marker2)) links[count2] = text # Parse string markedParams = paramString.split('|') # Replace markers for param in markedParams: for m2 in Rmarker.finditer(param): - param = param.replace(m2.group(), - inside[int(m2.group(1))]) + param = param.replace(m2.group(), inside[int(m2.group(1))]) for m2 in Rmarker2.finditer(param): - param = param.replace(m2.group(), - links[int(m2.group(1))]) + param = param.replace(m2.group(), links[int(m2.group(1))]) params.append(param)
# Add it to the result result.append((name, params)) - if check_disambig and not hasattr(self, "_isDisambig"): - self._isDisambig = False return result
def getRedirectTarget(self): @@ -4639,7 +4641,7 @@ """Yield ImagePages from Special:Log&type=upload"""
seen = set() - regexp = re.compile('<li[^>]*>(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a>\s+(.+?</a>).*?<a href=".*?"(?P<new> class="new")? title=".*?"\s*>(?P<image>.+?)</a>(?:.*?<span class="comment">(?P<comment>.*?)</span>)?', re.UNICODE) + regexp = re.compile(r'(?:<li[^>]*>|<div class="mw-log-entry"[^>]*>)(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a>\s+(.+?</a>).*?<a href=".*?"(?P<new> class="new")? title=".*?"\s*>(?P<image>.+?)</a>(?:.*?<span class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
while True: path = self.log_address(number, mode = 'upload')
pywikipedia-l@lists.wikimedia.org