-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1
If this is also an issue with section detection within pages you could (if you like) also consider to use the code given in 'getSections' [1]...
[1] https://fisheye.toolserver.org/browse/drtrigon/pywikipedia/dtbext/dtbext_wik...
Greetings DrTrigon
Am 03.09.2011 13:58, schrieb xqt@svn.wikimedia.org:
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9494
Revision: 9494 Author: xqt Date: 2011-09-03 11:58:48 +0000 (Sat, 03 Sep 2011) Log Message: ----------- reverrevert r3147 due to bug #2989218; check for italic code in headings.TODO: use a better regex to find it.
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
- --- trunk/pywikipedia/wikipedia.py 2011-09-03 11:17:47 UTC (rev 9493)
+++ trunk/pywikipedia/wikipedia.py 2011-09-03 11:58:48 UTC (rev 9494) @@ -66,7 +66,6 @@ within a non-wiki-markup section of text decodeEsperantoX: decode Esperanto text using the x convention. encodeEsperantoX: convert wikitext to the Esperanto x-encoding. - sectionencode: encode text for use as a section title in wiki-links. findmarker(text, startwith, append): return a string which is not part of text expandmarker(text, marker, separator): return marker string expanded @@ -654,7 +653,7 @@ self._contents = contents hn = self.section() if hn: - m = re.search("=+ *%s *=+" % hn, self._contents) + m = re.search("=+[ ']*%s[ ']*=+" % hn, self._contents) if verbose and not m: output(u"WARNING: Section does not exist: %s" % self.aslink(forceInterwiki = True)) # Store any exceptions for later reference @@ -779,8 +778,8 @@ else: raise IsRedirectPage(redirtarget) if self.section(): - # TODO: What the hell is this? Docu please. - m = re.search(".3D_*(.27.27+)?(.5B.5B)?_*%s_*(.5B.5B)?(.27.27+)?_*.3D" % re.escape(self.section()), sectionencode(pageInfo['revisions'][0]['*'],self.site().encoding()))
+ m = re.search("=+[ ']*%s[ ']*=+" % re.escape(self.section()),
pageInfo['revisions'][0]['*']) if not
m: try: self._getexception @@ -920,8 +919,8 @@ else: raise IsRedirectPage(redirtarget) if self.section(): - # TODO: What the hell is this? Docu please. - m = re.search(".3D_*(.27.27+)?(.5B.5B)?_*%s_*(.5B.5B)?(.27.27+)?_*.3D" % re.escape(self.section()), sectionencode(text,self.site().encoding())) + m = re.search("=+[ ']*%s[ ']*=+" % re.escape(self.section()), + text) if not m: try: self._getexception @@ -4140,8 +4139,7 @@ page2._startTime = time.strftime('%Y%m%d%H%M%S', time.gmtime()) if section: - m = re.search(".3D_*(.27.27+)?(.5B.5B)?_*%s_*(.5B.5B)?(.27.27+)?_*.3D"
- - % re.escape(section), sectionencode(text,page2.site().encoding()))
m = re.search("=+[ ']*%s[ ']*=+" %
re.escape(section), text) if not m: try: page2._getexception @@ -4302,7 +4300,7 @@ # Use the data loading time. page2._startTime = time.strftime('%Y%m%d%H%M%S', time.gmtime()) if section: - m = re.search(".3D_*(.27.27+)?(.5B.5B)?_*%s_*(.5B.5B)?(.27.27+)?_*.3D" % re.escape(section), sectionencode(text,page2.site().encoding()))
m = re.search("=+[ ']*%s[ ']*=+" %
re.escape(section), text) if not m: try: page2._getexception @@ -4531,10 +4529,6 @@ break return text
-def sectionencode(text, encoding): - """Encode text so that it can be used as a section title in wiki-links.""" - return urllib.quote(text.replace(" ","_").encode(encoding)).replace("%",".") - ######## Unicode library functions ########
def UnicodeToAsciiHtml(s):
_______________________________________________ Pywikipedia-svn mailing list Pywikipedia-svn@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn