Revision: 5233
Author: russblau
Date: 2008-04-18 18:53:17 +0000 (Fri, 18 Apr 2008)
Log Message:
-----------
use site.encoding instead of hard-coded utf-8, and try
to encode before calling api.py whenever possible. Also add ThreadedGenerator class for later use.
Modified Paths:
--------------
branches/rewrite/pywikibot/__init__.py
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/site.py
Added Paths:
-----------
branches/rewrite/pywikibot/tools.py
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py 2008-04-18 14:47:28 UTC (rev 5232)
+++ branches/rewrite/pywikibot/__init__.py 2008-04-18 18:53:17 UTC (rev 5233)
@@ -62,18 +62,6 @@
from page import Page, ImagePage, Category
-##def Page(*args, **kwargs):
-## from page import Page as _Page
-## return _Page(*args, **kwargs)
-##
-##def ImagePage(*args, **kwargs):
-## from page import ImagePage as _ImagePage
-## return _ImagePage(*args, **kwargs)
-##
-##def Category(*args, **kwargs):
-## from page import Category as _Category
-## return _Category(*args, **kwargs)
-
# DEBUG
def output(text):
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-04-18 14:47:28 UTC (rev 5232)
+++ branches/rewrite/pywikibot/data/api.py 2008-04-18 18:53:17 UTC (rev 5233)
@@ -142,8 +142,10 @@
raise TypeError("Query format '%s' cannot be parsed."
% self.params['format'])
uri = self.site.scriptpath() + "/api.php"
- params = urllib.urlencode([(k, v.encode("utf8"))
- for (k, v) in self.params.items()])
+ for key in self.params:
+ if isinstance(self.params[key], unicode):
+ self.params[key] = self.params[key].encode(self.site.encoding())
+ params = urllib.urlencode(self.params)
while True:
# TODO catch http errors
try:
@@ -330,6 +332,14 @@
"""
p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns'])
+ if "pageid" in pagedata:
+ self._pageid = int(pagedata['pageid'])
+ elif "missing" in pagedata:
+ self._pageid = 0 # Non-existent page
+ else:
+ raise AssertionError(
+ "Page %s has neither 'pageid' nor 'missing' attribute"
+ % pagedata['title'])
if 'lastrevid' in pagedata:
p._revid = pagedata['lastrevid']
if 'touched' in pagedata:
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-04-18 14:47:28 UTC (rev 5232)
+++ branches/rewrite/pywikibot/site.py 2008-04-18 18:53:17 UTC (rev 5233)
@@ -484,6 +484,17 @@
return self._namespaces[num]
return self._namespaces[num][0]
+ def page_exists(self, page):
+ """Return True if and only if page is an existing page on site."""
+ if not hasattr(page, "_pageid"):
+ query = api.PropertyGenerator(
+ "info", inprop="protection|talkid|subjectid",
+ titles=page.title(withSection=False
+ ).encode(self.encoding()))
+ for item in query():
+ pass #FIXME
+ return page._pageid > 0
+
# following group of methods map more-or-less directly to API queries
def getbacklinks(self, page, followRedirects=False, filterRedirects=None,
@@ -500,7 +511,7 @@
in this list.
"""
- bltitle = page.title(withSection=False)
+ bltitle = page.title(withSection=False).encode(self.encoding())
blgen = api.PageGenerator("backlinks", gbltitle=bltitle)
if namespaces is not None:
blgen.request["gblnamespace"] = u"|".join(unicode(ns)
@@ -523,7 +534,7 @@
in this list.
"""
- eititle = page.title(withSection=False)
+ eititle = page.title(withSection=False).encode(self.encoding())
eigen = api.PageGenerator("embeddedin", geititle=eititle)
if namespaces is not None:
eigen.request["geinamespace"] = u"|".join(unicode(ns)
@@ -548,8 +559,12 @@
def getlinks(self, page, namespaces=None):
"""Iterate internal wikilinks contained (or transcluded) on page."""
- pltitle = page.title(withSection=False)
- plgen = api.PageGenerator("links", titles=pltitle)
+ plgen = api.PageGenerator("links")
+ if hasattr(page, "_pageid"):
+ plgen.request['pageids'] = str(page._pageid)
+ else:
+ pltitle = page.title(withSection=False).encode(self.encoding())
+ plgen.request['titles'] = pltitle
if namespaces is not None:
plgen.request["gplnamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
@@ -557,20 +572,24 @@
def getcategories(self, page, withSortKey=False):
"""Iterate categories to which page belongs."""
- # Sortkey doesn't seem to work with generator; FIXME
- cltitle = page.title(withSection=False)
- clgen = api.CategoryPageGenerator("categories", titles=cltitle)
+ # Sortkey doesn't work with generator; FIXME or deprecate
+ clgen = api.CategoryPageGenerator("categories")
+ if hasattr(page, "_pageid"):
+ clgen.request['pageids'] = str(page._pageid)
+ else:
+ cltitle = page.title(withSection=False).encode(self.encoding())
+ clgen.request['titles'] = cltitle
return clgen
def getimages(self, page):
"""Iterate images used (not just linked) on the page."""
- imtitle = page.title(withSection=False)
+ imtitle = page.title(withSection=False).encode(self.encoding())
imgen = api.ImagePageGenerator("images", titles=imtitle)
return imgen
def gettemplates(self, page, namespaces=None):
"""Iterate templates transcluded (not just linked) on the page."""
- tltitle = page.title(withSection=False)
+ tltitle = page.title(withSection=False).encode(self.encoding())
tlgen = api.PageGenerator("templates", titles=tltitle)
if namespaces is not None:
tlgen.request["gtlnamespace"] = u"|".join(unicode(ns)
@@ -593,7 +612,7 @@
raise ValueError(
"Cannot get category members of non-Category page '%s'"
% category.title())
- cmtitle = category.title(withSection=False)
+ cmtitle = category.title(withSection=False).encode(self.encoding())
cmgen = api.PageGenerator(u"categorymembers", gcmtitle=cmtitle,
gcmprop="ids|title|sortkey")
if namespaces is not None:
@@ -626,8 +645,8 @@
if page is None and revids is None:
raise ValueError(
"getrevisions needs either page or revids argument.")
- if page is not None:
- rvtitle = page.title(withSection=False)
+ if revids is None:
+ rvtitle = page.title(withSection=False).encode(self.encoding())
rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle)
else:
ids = u"|".join(unicode(r) for r in revids)
Added: branches/rewrite/pywikibot/tools.py
===================================================================
--- branches/rewrite/pywikibot/tools.py (rev 0)
+++ branches/rewrite/pywikibot/tools.py 2008-04-18 18:53:17 UTC (rev 5233)
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+"""Miscellaneous helper functions (not wiki-dependent)"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id: $'
+
+
+import threading
+import time
+import Queue
+
+
+class ThreadedGenerator(threading.Thread):
+ """Look-ahead generator class.
+
+ Runs a generator in a separate thread and queues the results; can
+ be called like a regular generator.
+
+ Subclasses should override self.generator, I{not} self.run
+
+ Important: the generator thread will stop itself if the generator's
+ internal queue is exhausted; but, if the calling program does not use
+ all the generated values, it must call the generator's stop() method to
+ stop the background thread. Example usage:
+
+ >>> gen = ThreadedGenerator(target=foo)
+ >>> try:
+ ... for data in gen:
+ ... do_work(data)
+ ... finally:
+ ... gen.stop()
+
+ """
+
+ def __init__(self, group=None, target=None, name="GeneratorThread",
+ args=(), kwargs=None, qsize=65536):
+ """Constructor. Takes same keyword arguments as threading.Thread.
+
+ target must be a generator function (or other callable that returns
+ an iterable object).
+
+ @param qsize: The size of the lookahead queue. The larger the qsize,
+ the more values will be computed in advance of use (which can eat
+ up memory and processor time).
+ @type qsize: int
+
+ """
+ if kwargs is None:
+ kwargs = {}
+ if target:
+ self.generator = target
+ if not hasattr(self, "generator"):
+ raise RuntimeError("No generator for ThreadedGenerator to run.")
+ self.args, self.kwargs = args, kwargs
+ threading.Thread.__init__(self, group=group, name=name)
+ self.queue = Queue.Queue(qsize)
+ self.finished = threading.Event()
+
+ def __iter__(self):
+ """Iterate results from the queue."""
+ if not self.isAlive() and not self.finished.isSet():
+ self.start()
+ # if there is an item in the queue, yield it, otherwise wait
+ while not self.finished.isSet():
+ try:
+ yield self.queue.get(True, 0.25)
+ except Queue.Empty:
+ pass
+ except KeyboardInterrupt:
+ self.stop()
+
+ def stop(self):
+ """Stop the background thread."""
+ self.finished.set()
+
+ def run(self):
+ """Run the generator and store the results on the queue."""
+ self.__gen = self.generator(*self.args, **self.kwargs)
+ for result in self.__gen:
+ while True:
+ if self.finished.isSet():
+ return
+ try:
+ self.queue.put_nowait(result)
+ except Queue.Full:
+ time.sleep(0.25)
+ continue
+ break
+ # wait for queue to be emptied, then kill the thread
+ while not self.finished.isSet() and not self.queue.empty():
+ time.sleep(0.25)
+ self.stop()
+
+