[Pywikipedia-l] [Pywikipedia-svn] SVN: [9494] trunk/pywikipedia/wikipedia.py
Dr. Trigon
dr.trigon at surfeu.ch
Sat Sep 3 20:25:03 UTC 2011
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
If this is also an issue with section detection within pages
you could (if you like) also consider to use the code given
in 'getSections' [1]...
[1]
https://fisheye.toolserver.org/browse/drtrigon/pywikipedia/dtbext/dtbext_wikipedia.py?hb=true
Greetings
DrTrigon
Am 03.09.2011 13:58, schrieb xqt at svn.wikimedia.org:
> http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9494
>
> Revision: 9494 Author: xqt Date: 2011-09-03 11:58:48 +0000
> (Sat, 03 Sep 2011) Log Message: ----------- reverrevert r3147 due
> to bug #2989218; check for italic code in headings.TODO: use a
> better regex to find it.
>
> Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
>
> Modified: trunk/pywikipedia/wikipedia.py
> ===================================================================
>
>
- --- trunk/pywikipedia/wikipedia.py 2011-09-03 11:17:47 UTC (rev 9493)
> +++ trunk/pywikipedia/wikipedia.py 2011-09-03 11:58:48 UTC (rev
> 9494) @@ -66,7 +66,6 @@ within a non-wiki-markup section of text
> decodeEsperantoX: decode Esperanto text using the x convention.
> encodeEsperantoX: convert wikitext to the Esperanto x-encoding. -
> sectionencode: encode text for use as a section title in
> wiki-links. findmarker(text, startwith, append): return a string
> which is not part of text expandmarker(text, marker, separator):
> return marker string expanded @@ -654,7 +653,7 @@ self._contents =
> contents hn = self.section() if hn: - m =
> re.search("=+ *%s *=+" % hn, self._contents) + m
> = re.search("=+[ ']*%s[ ']*=+" % hn, self._contents) if verbose and
> not m: output(u"WARNING: Section does not exist: %s" %
> self.aslink(forceInterwiki = True)) # Store any exceptions for
> later reference @@ -779,8 +778,8 @@ else: raise
> IsRedirectPage(redirtarget) if self.section(): - # TODO:
> What the hell is this? Docu please. - m =
> re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D"
> % re.escape(self.section()),
> sectionencode(pageInfo['revisions'][0]['*'],self.site().encoding()))
>
>
+ m = re.search("=+[ ']*%s[ ']*=+" % re.escape(self.section()),
> + pageInfo['revisions'][0]['*']) if not
> m: try: self._getexception @@ -920,8 +919,8 @@ else: raise
> IsRedirectPage(redirtarget) if self.section(): - # TODO:
> What the hell is this? Docu please. - m =
> re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D"
> % re.escape(self.section()),
> sectionencode(text,self.site().encoding())) + m =
> re.search("=+[ ']*%s[ ']*=+" % re.escape(self.section()), +
> text) if not m: try: self._getexception @@ -4140,8 +4139,7 @@
> page2._startTime = time.strftime('%Y%m%d%H%M%S', time.gmtime()) if
> section: - m =
> re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D"
>
>
- - % re.escape(section),
sectionencode(text,page2.site().encoding()))
> + m = re.search("=+[ ']*%s[ ']*=+" %
> re.escape(section), text) if not m: try: page2._getexception @@
> -4302,7 +4300,7 @@ # Use the data loading time. page2._startTime =
> time.strftime('%Y%m%d%H%M%S', time.gmtime()) if section: -
> m =
> re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D"
> % re.escape(section), sectionencode(text,page2.site().encoding()))
> + m = re.search("=+[ ']*%s[ ']*=+" %
> re.escape(section), text) if not m: try: page2._getexception @@
> -4531,10 +4529,6 @@ break return text
>
> -def sectionencode(text, encoding): - """Encode text so that it
> can be used as a section title in wiki-links.""" - return
> urllib.quote(text.replace("
> ","_").encode(encoding)).replace("%",".") - ######## Unicode
> library functions ########
>
> def UnicodeToAsciiHtml(s):
>
>
> _______________________________________________ Pywikipedia-svn
> mailing list Pywikipedia-svn at lists.wikimedia.org
> https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn
>
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/
iEYEARECAAYFAk5ijR8ACgkQAXWvBxzBrDBNNQCgve2/z/SUa3bUNd625ibUKG/G
sEMAn2/LtRfr9kvdV1UX+aVKL9MQZwl8
=9anJ
-----END PGP SIGNATURE-----
More information about the Pywikipedia-l
mailing list