Revision: 3953
Author: valhallasw
Date: 2007-08-02 15:18:17 +0000 (Thu, 02 Aug 2007)
Log Message:
-----------
New version, now tests for <div id="mw-subcategories"> and, if not found, for <div id="mw-pages">.
*** THIS MAY BREAK SUPPORT FOR OLDER VERSIONS OF MEDIAWIKI ***
Modified Paths:
--------------
trunk/pywikipedia/catlib.py
Modified: trunk/pywikipedia/catlib.py
===================================================================
--- trunk/pywikipedia/catlib.py 2007-08-02 15:07:16 UTC (rev 3952)
+++ trunk/pywikipedia/catlib.py 2007-08-02 15:18:17 UTC (rev 3953)
@@ -195,8 +195,14 @@
# save a copy of this text to find out self's supercategory.
self_txt = txt
# index where subcategory listing begins
- # this only works for the current version of the MonoBook skin
- ibegin = txt.index('Saved in parser cache')
+ try:
+ ibegin = txt.index('<div id="mw-subcategories">')
+ except ValueError:
+ try:
+ ibegin = txt.index('<div id="mw-pages">')
+ except ValueError:
+ wikipedia.output("\nCategory page detection is not bug free. Please report this error!")
+ raise
# index where article listing ends
try:
iend = txt.index('<div class="printfooter">')
Revision: 3951
Author: wikipedian
Date: 2007-08-02 14:56:28 +0000 (Thu, 02 Aug 2007)
Log Message:
-----------
heavily simplified Page.replaceImage()
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-08-02 12:11:29 UTC (rev 3950)
+++ trunk/pywikipedia/wikipedia.py 2007-08-02 14:56:28 UTC (rev 3951)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+## -*- coding: utf-8 -*-
"""
Library to get and put pages on a MediaWiki.
@@ -2003,62 +2003,32 @@
return ur'(?:[%s%s]%s)' % (s[0].upper(), s[0].lower(), s[1:])
def create_regex_i(s):
return ur'(?:%s)' % u''.join([u'[%s%s]' % (c.upper(), c.lower()) for c in s])
-
+
namespaces = ('Image', 'Media') + site.namespace(6, all = True) + site.namespace(-2, all = True)
+ # note that the colon is already included here
r_namespace = ur'\s*(?:%s)\s*\:\s*' % u'|'.join(map(create_regex_i, namespaces))
r_image = u'(%s)' % create_regex(image).replace(r'\_', '[ _]')
- def simple_replacer(match):
+ def simple_replacer(match, groupNumber = 1):
if replacement == None:
return u''
else:
groups = list(match.groups())
- groups[1] = replacement
+ groups[groupNumber] = replacement
return u''.join(groups)
-
- # Previously links in image descriptions will cause
- # unexpected behaviour: [[Image:image.jpg|thumb|[[link]] in description]]
- # will truncate at the first occurence of ]]. This cannot be
- # fixed using one regular expression.
- # This means that all ]] after the start of the image
- # must be located. If it then does not have an associated
- # [[, this one is the closure of the image.
-
- r_simple_s = u'(\[\[%s)%s' % (r_namespace, r_image)
- r_s = '\[\['
- r_e = '\]\]'
- # First determine where wikilinks start and end
- image_starts = [match.start() for match in re.finditer(r_simple_s, text)]
- link_starts = [match.start() for match in re.finditer(r_s, text)]
- link_ends = [match.end() for match in re.finditer(r_e, text)]
-
- r_simple = u'(\[\[%s)%s(.*)' % (r_namespace, r_image)
- replacements = []
- for image_start in image_starts:
- current_link_starts = [link_start for link_start in link_starts
- if link_start > image_start]
- current_link_ends = [link_end for link_end in link_ends
- if link_end > image_start]
- end = image_start
- if current_link_ends: end = current_link_ends[0]
-
- while current_link_starts and current_link_ends:
- start = current_link_starts.pop(0)
- end = current_link_ends.pop(0)
- if end <= start and end > image_start:
- # Found the end of the image
- break
-
- # Add the replacement to the todo list. Doing the
- # replacement right know would alter the indices.
- replacements.append((new_text[image_start:end],
- re.sub(r_simple, simple_replacer,
- new_text[image_start:end])))
-
- # Perform the replacements
- for old, new in replacements:
- if old: new_text = new_text.replace(old, new)
-
+
+ # The group params contains parameters such as thumb and 200px, as well
+ # as the image caption. The caption can contain wiki links, but each
+ # link has to be closed properly.
+ r_param = r'(?:\|(?:(?!\[\[).|\[\[.*?\]\])*?)'
+ rImage = re.compile(ur'(\[\[)(?P<namespace>%s)%s(?P<params>%s*?)(\]\])' % (r_namespace, r_image, r_param))
+
+ while True:
+ m = rImage.search(new_text)
+ if not m:
+ break
+ new_text = new_text[:m.start()] + simple_replacer(m, 2) + new_text[m.end():]
+
# Remove the image from galleries
r_galleries = ur'(?s)(\<%s\>)(?s)(.*?)(\<\/%s\>)' % (create_regex_i('gallery'),
create_regex_i('gallery'))
Revision: 3948
Author: valhallasw
Date: 2007-08-02 10:25:38 +0000 (Thu, 02 Aug 2007)
Log Message:
-----------
bugfix: category.articles(startFrom) now passes startFrom to the correct parameter of _getContentsAndSupercats
Modified Paths:
--------------
trunk/pywikipedia/catlib.py
Modified: trunk/pywikipedia/catlib.py
===================================================================
--- trunk/pywikipedia/catlib.py 2007-08-02 01:15:39 UTC (rev 3947)
+++ trunk/pywikipedia/catlib.py 2007-08-02 10:25:38 UTC (rev 3948)
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Library to work with category pages on Wikipedia
@@ -295,7 +295,7 @@
Results are unsorted (except as sorted by MediaWiki), and need not
be unique.
"""
- for tag, page in self._getContentsAndSupercats(recurse, startFrom):
+ for tag, page in self._getContentsAndSupercats(recurse, startFrom=startFrom):
if tag == ARTICLE:
yield page
Revision: 3945
Author: valhallasw
Date: 2007-08-02 00:41:50 +0000 (Thu, 02 Aug 2007)
Log Message:
-----------
Updated async put: added estimate of remaining time when quitting; KeyboardInterrupts are caught by joining the thread only one second at a time (instead of indefinitly)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-08-01 23:41:57 UTC (rev 3944)
+++ trunk/pywikipedia/wikipedia.py 2007-08-02 00:41:50 UTC (rev 3945)
@@ -4539,7 +4539,17 @@
Returns a one-letter string in lowercase.
"""
- return ui.inputChoice(question, answers, hotkeys, default).lower()
+ input_lock.acquire()
+ try:
+ data = ui.inputChoice(question, answers, hotkeys, default).lower()
+ finally:
+ for output in output_cache:
+ ui.output(*output[0], **output[1])
+ input_lock.release()
+ for output in output_cache: #for output added between the start of the for loop and the lock release
+ ui.output(*output[0], **output[1])
+
+ return data
def showHelp(moduleName = None):
# the parameter moduleName is deprecated and should be left out.
@@ -4581,7 +4591,6 @@
output(u'Sorry, no help available for %s' % moduleName)
page_put_queue = Queue.Queue()
-
def async_put():
'''
Daemon that takes pages from the queue and tries to save them on the wiki.
@@ -4622,8 +4631,22 @@
'''Wait for the page-putter to flush its queue;
called automatically upon exiting from Python.
'''
+ if page_put_queue.qsize() > 0:
+ import datetime
+ remaining = datetime.timedelta(seconds=(page_put_queue.qsize()+1) * config.put_throttle)
+ output('Waiting for %i pages to be put. Estimated time remaining: %s' % (page_put_queue.qsize()+1, remaining))
+
page_put_queue.put((None, None, None, None, None))
- _putthread.join()
+
+ while(_putthread.isAlive()):
+ try:
+ _putthread.join(1)
+ except KeyboardInterrupt:
+ answer = inputChoice(u'There are %i pages remaining in the queue. Estimated time remaining: %s\nReally exit?'
+ % (page_put_queue.qsize(), datetime.timedelta(seconds=(page_put_queue.qsize()) * config.put_throttle)),
+ ['yes', 'no'], ['y', 'N'], 'N')
+ if answer in ['y', 'Y']:
+ break
import atexit
atexit.register(_flush)
---------------------------- Original Message ----------------------------
Subject: Re: [Pywikipedia-l] Subversion
From: "Merlijn van Deen" <valhallasw(a)arctus.nl>
Date: Thu, August 2, 2007 1:57 am
To: "Andre Engels" <andreengels(a)gmail.com>
--------------------------------------------------------------------------
linux:
make sure your private key is in .ssh/id_rsa
svn co svn+ssh://svn.mediawiki.org/svnroot/pywikipedia/trunk/pywikipedia
pywikipedia
windows:
download TortoiseSVN and putty.
Run putty, create a new session with these settings:
host name: svn.mediawiki.org
session name: svn.mediawiki.org
connection/data/username: a_engels
connection/data/SSH/auth/private key: (select your private key)
save this session
Then use tortoiseSVN to checkout
svn+ssh://svn.mediawiki.org/svnroot/pywikipedia/trunk/pywikipedia
--valhallasw
Note to self: learn the 'reply to all' button?