http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9246
Revision: 9246
Author: xqt
Date: 2011-05-15 15:42:46 +0000 (Sun, 15 May 2011)
Log Message:
-----------
replacements to remove the "\x1d" (group separator) and "\x0c" (form feed) characters requested by bug #3126855
Modified Paths:
--------------
trunk/pywikipedia/djvutext.py
Modified: trunk/pywikipedia/djvutext.py
===================================================================
--- trunk/pywikipedia/djvutext.py 2011-05-13 04:04:55 UTC (rev 9245)
+++ trunk/pywikipedia/djvutext.py 2011-05-15 15:42:46 UTC (rev 9246)
@@ -138,7 +138,9 @@
# convert to wikisyntax
# this adds a second line feed, which makes a new paragraph
- text = text.replace('', "\n")
+ text = text.replace('', "\n") # US /x1F
+ text = text.replace('', "\n") # GS /x1D
+ text = text.replace('', "\n") # FF /x0C
# only save if something was changed
# automatically ask if overwriting an existing page
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9244
Revision: 9244
Author: xqt
Date: 2011-05-12 14:56:49 +0000 (Thu, 12 May 2011)
Log Message:
-----------
move ThreadedGenerator from pagegenerators to pywikibot/tools from rewrite r9067
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Added Paths:
-----------
trunk/pywikipedia/pywikibot/tools.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2011-05-12 14:18:20 UTC (rev 9243)
+++ trunk/pywikipedia/pagegenerators.py 2011-05-12 14:56:49 UTC (rev 9244)
@@ -24,10 +24,8 @@
import config
import traceback
-import Queue
import re
import sys
-import threading
import codecs
import urllib, urllib2, time
@@ -208,91 +206,6 @@
class GeneratorExit(Exception): pass
-class ThreadedGenerator(threading.Thread):
- """Look-ahead generator class.
-
- Runs a generator in a separate thread and queues the results; can
- be called like a regular generator.
-
- Subclasses should override self.generator, _not_ self.run
-
- Important: the generator thread will stop itself if the generator's
- internal queue is exhausted; but, if the calling program does not use
- all the generated values, it must call the generator's stop() method to
- stop the background thread. Example usage:
-
- >>> gen = ThreadedGenerator(target=foo)
- >>> try:
- ... for data in gen:
- ... do_work(data)
- ... finally:
- ... gen.stop()
-
- """
-
- def __init__(self, group=None, target=None, name="GeneratorThread",
- args=(), kwargs=None, qsize=65536):
- """Constructor. Takes same keyword arguments as threading.Thread.
-
- target must be a generator function (or other callable that returns
- an iterable object).
-
- @param qsize: The size of the lookahead queue. The larger the qsize,
- the more values will be computed in advance of use (which can eat
- up memory and processor time).
- @type qsize: int
-
- """
- if kwargs is None:
- kwargs = {}
- if target:
- self.generator = target
- if not hasattr(self, "generator"):
- raise RuntimeError("No generator for ThreadedGenerator to run.")
- self.args, self.kwargs = args, kwargs
- threading.Thread.__init__(self, group=group, name=name)
- self.queue = Queue.Queue(qsize)
- self.finished = threading.Event()
-
- def __iter__(self):
- """Iterate results from the queue."""
- if not self.isAlive() and not self.finished.isSet():
- self.start()
- # if there is an item in the queue, yield it, otherwise wait
- while not self.finished.isSet():
- try:
- yield self.queue.get(True, 0.25)
- except Queue.Empty:
- pass
- except KeyboardInterrupt:
- self.stop()
-
- def stop(self):
- """Stop the background thread."""
-## if not self.finished.isSet():
-## pywikibot.output("DEBUG: signalling %s to stop." % self)
- self.finished.set()
-
- def run(self):
- """Run the generator and store the results on the queue."""
- self.__gen = self.generator(*self.args, **self.kwargs)
- for result in self.__gen:
- while True:
- if self.finished.isSet():
-## pywikibot.output("DEBUG: %s received stop signal." % self)
- return
- try:
- self.queue.put_nowait(result)
- except Queue.Full:
- time.sleep(0.25)
- continue
- break
- # wait for queue to be emptied, then kill the thread
- while not self.finished.isSet() and not self.queue.empty():
- time.sleep(0.25)
- self.stop()
-## pywikibot.output("DEBUG: %s stopped because generator exhausted." % self)
-
class GeneratorFactory(object):
"""Process command line arguments and return appropriate page generator.
This factory is responsible for processing command line arguments
Copied: trunk/pywikipedia/pywikibot/tools.py (from rev 9242, branches/rewrite/pywikibot/tools.py)
===================================================================
--- trunk/pywikipedia/pywikibot/tools.py (rev 0)
+++ trunk/pywikipedia/pywikibot/tools.py 2011-05-12 14:56:49 UTC (rev 9244)
@@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+"""Miscellaneous helper functions (not wiki-dependent)"""
+#
+# (C) Pywikipedia bot team, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+
+import sys
+import threading
+import time
+import Queue
+
+
+class ThreadedGenerator(threading.Thread):
+ """Look-ahead generator class.
+
+ Runs a generator in a separate thread and queues the results; can
+ be called like a regular generator.
+
+ Subclasses should override self.generator, I{not} self.run
+
+ Important: the generator thread will stop itself if the generator's
+ internal queue is exhausted; but, if the calling program does not use
+ all the generated values, it must call the generator's stop() method to
+ stop the background thread. Example usage:
+
+ >>> gen = ThreadedGenerator(target=xrange, args=(20,))
+ >>> try:
+ ... for data in gen:
+ ... print data,
+ ... finally:
+ ... gen.stop()
+ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
+
+ """
+
+ def __init__(self, group=None, target=None, name="GeneratorThread",
+ args=(), kwargs=None, qsize=65536):
+ """Constructor. Takes same keyword arguments as threading.Thread.
+
+ target must be a generator function (or other callable that returns
+ an iterable object).
+
+ @param qsize: The size of the lookahead queue. The larger the qsize,
+ the more values will be computed in advance of use (which can eat
+ up memory and processor time).
+ @type qsize: int
+
+ """
+ if kwargs is None:
+ kwargs = {}
+ if target:
+ self.generator = target
+ if not hasattr(self, "generator"):
+ raise RuntimeError("No generator for ThreadedGenerator to run.")
+ self.args, self.kwargs = args, kwargs
+ threading.Thread.__init__(self, group=group, name=name)
+ self.queue = Queue.Queue(qsize)
+ self.finished = threading.Event()
+
+ def __iter__(self):
+ """Iterate results from the queue."""
+ if not self.isAlive() and not self.finished.isSet():
+ self.start()
+ # if there is an item in the queue, yield it, otherwise wait
+ while not self.finished.isSet():
+ try:
+ yield self.queue.get(True, 0.25)
+ except Queue.Empty:
+ pass
+ except KeyboardInterrupt:
+ self.stop()
+
+ def stop(self):
+ """Stop the background thread."""
+ self.finished.set()
+
+ def run(self):
+ """Run the generator and store the results on the queue."""
+ self.__gen = self.generator(*self.args, **self.kwargs)
+ for result in self.__gen:
+ while True:
+ if self.finished.isSet():
+ return
+ try:
+ self.queue.put_nowait(result)
+ except Queue.Full:
+ time.sleep(0.25)
+ continue
+ break
+ # wait for queue to be emptied, then kill the thread
+ while not self.finished.isSet() and not self.queue.empty():
+ time.sleep(0.25)
+ self.stop()
+
+
+def itergroup(iterable, size):
+ """Make an iterator that returns lists of (up to) size items from iterable.
+
+ Example:
+
+ >>> i = itergroup(xrange(25), 10)
+ >>> print i.next()
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+ >>> print i.next()
+ [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+ >>> print i.next()
+ [20, 21, 22, 23, 24]
+ >>> print i.next()
+ Traceback (most recent call last):
+ ...
+ StopIteration
+
+ """
+ group = []
+ for item in iterable:
+ group.append(item)
+ if len(group) == size:
+ yield group
+ group = []
+ if group:
+ yield group
+
+
+class ThreadList(list):
+ """A simple threadpool class to limit the number of simultaneous threads.
+
+ Any threading.Thread object can be added to the pool using the append()
+ method. If the maximum number of simultaneous threads has not been reached,
+ the Thread object will be started immediately; if not, the append() call
+ will block until the thread is able to start.
+
+ >>> pool = ThreadList(limit=10)
+ >>> def work():
+ ... time.sleep(1)
+ ...
+ >>> for x in xrange(20):
+ ... pool.append(threading.Thread(target=work))
+ ...
+
+ """
+ def __init__(self, limit=sys.maxint, *args):
+ self.limit = limit
+ list.__init__(self, *args)
+ for item in list(self):
+ if not isinstance(threading.Thread, item):
+ raise TypeError("Cannot add '%s' to ThreadList" % type(item))
+
+ def active_count(self):
+ """Return the number of alive threads, and delete all non-alive ones."""
+ count = 0
+ for item in list(self):
+ if item.isAlive():
+ count += 1
+ else:
+ self.remove(item)
+ return count
+
+ def append(self, thd):
+ if not isinstance(thd, threading.Thread):
+ raise TypeError("Cannot append '%s' to ThreadList" % type(thd))
+ while self.active_count() >= self.limit:
+ time.sleep(2)
+ list.append(self, thd)
+ thd.start()
+
+
+if __name__ == "__main__":
+ def _test():
+ import doctest
+ doctest.testmod()
+ _test()