Revision: 5764
Author: valhallasw
Date: 2008-07-27 11:04:03 +0000 (Sun, 27 Jul 2008)
Log Message:
-----------
Added support for per-site post-get and pre-put conversions. Added automatic conversion for eowiki X-convention -> unicode
Modified Paths:
--------------
trunk/pywikipedia/families/wikipedia_family.py
trunk/pywikipedia/family.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/families/wikipedia_family.py
===================================================================
--- trunk/pywikipedia/families/wikipedia_family.py 2008-07-26 12:22:14 UTC (rev 5763)
+++ trunk/pywikipedia/families/wikipedia_family.py 2008-07-27 11:04:03 UTC (rev 5764)
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
import urllib
-import family, config
+import family, config, wikipedia
__version__ = '$Id$'
@@ -954,3 +954,9 @@
def shared_image_repository(self, code):
return ('commons', 'commons')
+
+ def post_get_convert(self, site, getText):
+ if site.lang == 'eo':
+ return wikipedia.decodeEsperantoX(getText)
+ else:
+ return getText
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2008-07-26 12:22:14 UTC (rev 5763)
+++ trunk/pywikipedia/family.py 2008-07-27 11:04:03 UTC (rev 5764)
@@ -3183,3 +3183,13 @@
def isPublic(self):
"""Does the wiki require logging in before viewing it ?"""
return True
+
+ def post_get_convert(self, site, getText):
+ """Does a conversion on the retrieved text from the wiki
+ i.e. Esperanto X-conversion """
+ return getText
+
+ def pre_put_convert(self, site, putText):
+ """Does a conversion on the text to insert on the wiki
+ i.e. Esperanto X-conversion """
+ return putText
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-07-26 12:22:14 UTC (rev 5763)
+++ trunk/pywikipedia/wikipedia.py 2008-07-27 11:04:03 UTC (rev 5764)
@@ -782,7 +782,13 @@
else:
self._isWatched = False
# Now process the contents of the textarea
- m = self.site().redirectRegex().match(text[i1:i2])
+ # Unescape HTML characters, strip whitespace and postconvert
+ pagetext = text[i1:i2]
+ pagetext = unescape(pagetext)
+ pagetext = pagetext.strip()
+ pagetext = self.site().post_get_convert(pagetext)
+
+ m = self.site().redirectRegex().match(pagetext)
if m:
# page text matches the redirect pattern
if self.section() and not "#" in m.group(1):
@@ -801,13 +807,8 @@
self._getexception
except AttributeError:
raise SectionError # Page has no section by this name
- # TODO: Docu and rewrite also this as above.
- x = text[i1:i2]
- x = unescape(x)
- while x and x[-1] in '\n ':
- x = x[:-1]
- return x
+ return pagetext
def getOldVersion(self, oldid, force=False, get_redirect=False,
throttle=True, sysop=False, change_edit_time=True):
@@ -1277,6 +1278,7 @@
import watchlist
watchArticle = watchlist.isWatched(self.title(), site = self.site())
newPage = not self.exists()
+ newtext = self.site().pre_put_convert(newtext)
return self._putPage(newtext, comment, watchArticle, minorEdit,
newPage, self.site().getToken(sysop = sysop), sysop = sysop)
@@ -3954,6 +3956,11 @@
linktrail: Return regex for trailing chars displayed as part of a link.
disambcategory: Category in which disambiguation pages are listed.
+ post_get_convert: Converts text data from the site immediatly after get
+ i.e. EsperantoX -> unicode
+ pre_put_convert: Converts text data from the site immediatly before put
+ i.e. unicode -> EsperantoX
+
Methods that yield Page objects derived from a wiki's Special: pages
(note, some methods yield other information in a tuple along with the
Pages; see method docs for details) --
@@ -5739,7 +5746,13 @@
def linktrail(self):
"""Return regex for trailing chars displayed as part of a link."""
return self.family.linktrail(self.lang)
+
+ def post_get_convert(self, getText):
+ return self.family.post_get_convert(self, getText)
+ def pre_put_convert(self, putText):
+ return self.family.pre_put_convert(self, putText)
+
def language(self):
"""Return Site's language code."""
return self.lang