Revision: 6445
Author: russblau
Date: 2009-02-26 21:30:24 +0000 (Thu, 26 Feb 2009)
Log Message:
-----------
Implement the (relatively) new "prop=categoryinfo" query
Modified Paths:
--------------
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2009-02-26 16:03:34 UTC (rev 6444)
+++ branches/rewrite/pywikibot/data/api.py 2009-02-26 21:30:24 UTC (rev 6445)
@@ -493,9 +493,9 @@
QueryGenerator.__init__(self, generator=generator, **kwargs)
# get some basic information about every page generated
if 'prop' in self.request:
- self.request['prop'] += "|info|imageinfo"
+ self.request['prop'] += "|info|imageinfo|categoryinfo"
else:
- self.request['prop'] = 'info|imageinfo'
+ self.request['prop'] = 'info|imageinfo|categoryinfo'
if "inprop" in self.request:
if "protection" not in self.request["inprop"]:
self.request["inprop"] += "|protection"
@@ -671,6 +671,8 @@
page._revid = pagedict['lastrevid']
if page._revid in page._revisions:
page._text = page._revisions[page._revid].text
+ if "categoryinfo" in pagedict:
+ page._catinfo = pagedict["categoryinfo"]
if __name__ == "__main__":
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2009-02-26 16:03:34 UTC (rev 6444)
+++ branches/rewrite/pywikibot/page.py 2009-02-26 21:30:24 UTC (rev 6445)
@@ -1169,6 +1169,20 @@
pywikibot.output(u"Saving page %s failed: %s"
% (self.title(asLink=True), error.message))
+ @property
+ def categoryinfo(self):
+ """If supported, return a dict containing category content values:
+
+ Numbers of pages, subcategories, files, and total contents.
+
+ """
+ if not self.isCategory():
+ return None # should this raise an exception??
+ try:
+ return self.site().categoryinfo(self)
+ except NotImplementedError:
+ return None
+
######## DEPRECATED METHODS ########
def encoding(self):
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2009-02-26 16:03:34 UTC (rev 6444)
+++ branches/rewrite/pywikibot/site.py 2009-02-26 21:30:24 UTC (rev 6445)
@@ -1384,6 +1384,25 @@
for linkdata in pageitem['extlinks']:
yield linkdata['*']
+ # TODO: implement a method to retrieve categoryinfo
+ def getcategoryinfo(self, category):
+ """Retrieve data on contents of category."""
+ cititle = category.title(withSection=False)
+ ciquery = api.PropertyGenerator("categoryinfo",
+ titles=cititle.encode(self.encoding()),
+ site=self)
+ for pageitem in ciquery:
+ if pageitem['title'] != cititle:
+ raise Error(
+ u"categoryinfo: Query on %s returned data on '%s'"
+ % (category, pageitem['title']))
+ api.update_page(category, pageitem)
+
+ def categoryinfo(self, category):
+ if not hasattr(category, "_catinfo"):
+ self.getcategoryinfo(category)
+ return category._catinfo
+
@deprecate_arg("throttle", None)
@deprecate_arg("includeredirects", "filterredir")
def allpages(self, start="!", prefix="", namespace=0, filterredir=None,
@@ -2175,7 +2194,7 @@
self.unlock_page(page)
if "nochange" in result["edit"]:
# null edit, page not changed
- # TODO: do we want to notify the user of this?
+ #TODO: do we want to notify the user of this?
return True
page._revid = result["edit"]["newrevid"]
# see http://www.mediawiki.org/wiki/API:Wikimania_2006_API_discussion#Notes
@@ -2302,7 +2321,7 @@
if "move" not in result:
pywikibot.output(u"movepage: %s" % result, level=pywikibot.ERROR)
raise Error("movepage: unexpected response")
- # TODO: Check for talkmove-error messages
+ #TODO: Check for talkmove-error messages
if "talkmove-error-code" in result["move"]:
pywikibot.output(u"movepage: Talk page %s not moved"
% (page.toggleTalkPage().title(asLink=True)),
@@ -2417,9 +2436,9 @@
finally:
self.unlock_page(page)
- # TODO: implement undelete
+ #TODO: implement undelete
- # TODO: implement patrol
+ #TODO: implement patrol
def linksearch(self, siteurl, limit=None):
"""Backwards-compatible interface to exturlusage()"""
@@ -2452,7 +2471,7 @@
#### METHODS NOT IMPLEMENTED YET ####
class NotImplementedYet:
- # TODO: is this needed any more? can it be obtained from the http module?
+ #TODO: is this needed any more? can it be obtained from the http module?
def cookies(self, sysop = False):
"""Return a string containing the user's current cookies."""
self._loadCookies(sysop = sysop)
@@ -2494,7 +2513,7 @@
f.close()
# THESE ARE FUNCTIONS NOT YET IMPLEMENTED IN THE API
- # TODO: avoid code duplication for the following methods
+ #TODO: avoid code duplication for the following methods
def newpages(self, number = 10, get_redirect = False, repeat = False):
"""Yield new articles (as Page objects) from Special:Newpages.
Revision: 6444
Author: russblau
Date: 2009-02-26 16:03:34 +0000 (Thu, 26 Feb 2009)
Log Message:
-----------
Trying again to fix output issues
Modified Paths:
--------------
branches/rewrite/pywikibot/bot.py
branches/rewrite/pywikibot/pagegenerators.py
branches/rewrite/pywikibot/userinterfaces/terminal_interface.py
Modified: branches/rewrite/pywikibot/bot.py
===================================================================
--- branches/rewrite/pywikibot/bot.py 2009-02-26 00:01:56 UTC (rev 6443)
+++ branches/rewrite/pywikibot/bot.py 2009-02-26 16:03:34 UTC (rev 6444)
@@ -60,8 +60,12 @@
class RotatingFileHandler(logging.handlers.RotatingFileHandler):
"""Strip trailing newlines before outputting text to file"""
def emit(self, record):
- record.msg = record.msg.rstrip("\r\n")
- logging.handlers.RotatingFileHandler.emit(self, record)
+ newrecord = logging.LogRecord(record.name, record.levelno,
+ record.pathname, record.lineno,
+ record.msg, record.args,
+ record.exc_info, record.funcName)
+ newrecord.msg = newrecord.msg.rstrip("\r\n")
+ logging.handlers.RotatingFileHandler.emit(self, newrecord)
def output(text, decoder=None, newline=True, toStdout=False, level=INFO):
Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py 2009-02-26 00:01:56 UTC (rev 6443)
+++ branches/rewrite/pywikibot/pagegenerators.py 2009-02-26 16:03:34 UTC (rev 6444)
@@ -955,7 +955,7 @@
gen = genFactory.getCombinedGenerator()
if gen:
for page in gen:
- pywikibot.output(page.title(), toStdout = True)
+ pywikibot.output(page.title(), level=pywikibot.STDOUT)
else:
pywikibot.showHelp()
except Exception:
Modified: branches/rewrite/pywikibot/userinterfaces/terminal_interface.py
===================================================================
--- branches/rewrite/pywikibot/userinterfaces/terminal_interface.py 2009-02-26 00:01:56 UTC (rev 6443)
+++ branches/rewrite/pywikibot/userinterfaces/terminal_interface.py 2009-02-26 16:03:34 UTC (rev 6444)
@@ -125,48 +125,6 @@
terminal, it will be replaced with a question mark or by a
transliteration.
"""
- if config.transliterate:
- # Encode unicode string in the encoding used by the user's console,
- # and decode it back to unicode. Then we can see which characters
- # can't be represented in the console encoding.
- codecedText = text.encode(config.console_encoding, 'replace'
- ).decode(config.console_encoding)
- transliteratedText = list(codecedText)
- # Note: A transliteration replacement might be longer than the
- # original character; e.g., ч is transliterated to ch.
- # the resulting list will have as many elements as there are
- # characters in the original text, but some list elements may
- # contain multiple characters
- prev = "-"
- prevchar = -1
- cursor = 0
- while cursor < len(codecedText):
- char = codecedText.find(u"?", cursor)
- if char == -1:
- break
- cursor = char + 1
- # work on characters that couldn't be encoded, but not on
- # original question marks.
- if text[char] != u"?":
- if char > 0 and prevchar != char - 1:
- prev = transliteratedText[char-1]
- try:
- transliterated = transliteration.trans(
- text[char], default='?',
- prev=prev, next=text[char+1])
- except IndexError:
- transliterated = transliteration.trans(
- text[char], default='?',
- prev=prev, next=' ')
- # transliteration was successful. The replacement
- # could consist of multiple letters.
- # mark the transliterated letters in yellow.
- transliteratedText[char] = u'\03{lightyellow}%s\03{default}' \
- % transliterated
- # save the last transliterated character
- prev = transliterated[-1:]
- prevchar = char
- text = u"".join(transliteratedText)
self.writelock.acquire()
try:
logging.log(level, text)
@@ -377,11 +335,53 @@
self.emit_raw(record, line)
def emit(self, record):
- msg = self.format(record)
+ text = self.format(record)
+ if config.transliterate:
+ # Encode unicode string in the encoding used by the user's console,
+ # and decode it back to unicode. Then we can see which characters
+ # can't be represented in the console encoding.
+ codecedText = text.encode(config.console_encoding, 'replace'
+ ).decode(config.console_encoding)
+ transliteratedText = list(codecedText)
+ # Note: A transliteration replacement might be longer than the
+ # original character; e.g., ч is transliterated to ch.
+ # the resulting list will have as many elements as there are
+ # characters in the original text, but some list elements may
+ # contain multiple characters
+ prev = "-"
+ prevchar = -1
+ cursor = 0
+ while cursor < len(codecedText):
+ char = codecedText.find(u"?", cursor)
+ if char == -1:
+ break
+ cursor = char + 1
+ # work on characters that couldn't be encoded, but not on
+ # original question marks.
+ if text[char] != u"?":
+ if char > 0 and prevchar != char - 1:
+ prev = transliteratedText[char-1]
+ try:
+ transliterated = transliteration.trans(
+ text[char], default='?',
+ prev=prev, next=text[char+1])
+ except IndexError:
+ transliterated = transliteration.trans(
+ text[char], default='?',
+ prev=prev, next=' ')
+ # transliteration was successful. The replacement
+ # could consist of multiple letters.
+ # mark the transliterated letters in yellow.
+ transliteratedText[char] = u'\03{lightyellow}%s\03{default}' \
+ % transliterated
+ # save the last transliterated character
+ prev = transliterated[-1:]
+ prevchar = char
+ text = u"".join(transliteratedText)
if config.colorized_output:
if sys.platform == 'win32':
- self.emitColorizedInWindows(record, msg)
+ self.emitColorizedInWindows(record, text)
else:
- self.emitColorizedInUnix(record, msg)
+ self.emitColorizedInUnix(record, text)
else:
- self.emit_raw(record, msg)
+ self.emit_raw(record, text)
Bugs item #2618858, was opened at 2009-02-20 07:39
Message generated for change (Comment added) made by nicdumz
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2618858&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: rewrite
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: NicDumZ — Nicolas Dumazet (nicdumz)
Assigned to: Russell Blau (russblau)
Summary: logfiles format
Initial Comment:
output created in pagegenerators log after running "python pywikibot/pagegenerators.py -cat:1918 -debug" contains control characters :
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT Grippe de 1918
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT 1918 en bande dessinée
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT Armistice de Moudros
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT Déclaration d'indépendance de la Lituanie
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT Guerre d'indépendance lettone
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT 1918
[0m
and so on...
Also, I find it strange to have terminal_interface logging STDOUT, when in fact pagegenerators is calling output() =)
----------------------------------------------------------------------
>Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2009-02-26 11:27
Message:
oh, are you developing under XP ?
For me (and I suppose, Unix users...) r6441 didn't change a thing:
* python pywikibot/pagegenerators.py -cat:1918 -debug -> no new
lines
* python pywikibot/pagegenerators.py -cat:1918 -> output
is okay
----------------------------------------------------------------------
Comment By: Russell Blau (russblau)
Date: 2009-02-25 22:37
Message:
> Well, the logs are okay since that rev, but the terminal output is
broken
> now :P
OK, this was really weird. At least on Windows XP, a line written to
sys.stdout that ends with "\n" instead of "\r\n" doesn't print a newline on
the terminal (in cmd.exe), but the same line written to sys.stderr *does*
print a newline. Hopefully fixed in r6441, but needs to be tested on other
platforms.
----------------------------------------------------------------------
Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2009-02-25 05:06
Message:
To clarify my unclear "I find it strange to have terminal_interface logging
STDOUT, when in
fact pagegenerators is calling output()":
Is it possible to have, instead of :
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT
2009-02-20 15:28:59 pagegenerators.py, xxx: STDOUT ? I believe that the
file and line calling output() would be more relevant here than the line
calling logging.log !
----------------------------------------------------------------------
Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2009-02-25 05:01
Message:
Well, the logs are okay since that rev, but the terminal output is broken
now :P
$python pywikibot/pagegenerators.py -cat:1918 -debug
Found 1 wikipedia:fr processes running, including this one.
Password for user DumZiBoT on wikipedia:fr:
Logging in to wikipedia:fr as DumZiBoT
Should be logged in now
Grippe de 19181918 en bande dessinéeArmistice de MoudrosDéclaration
d'indépendance de la LituanieGuerre d'indépendance lettone19181918 en
Suisse1918 dans les parcs d'attractions1918 en littérature1918 en musique
classiqueInsurrection de Grande-Pologne (1918-1919)Guerres d'indépendance
lituaniennesRéforme d'OctobreGrève générale de 1918 en Suisse1918 dans
les chemins de fer1918 (Chronologie de Dada et du surréalisme)1918 au
théâtreQuébec, Printemps 19181918 en architectureRévolution
allemande1918 en France1918 en science1918 en aéronautiqueIl tritticoSuor
AngelicaTraité de Brest-LitovskAmerican Expeditionary Force1918 en
musiqueTraité de Bucarest (1918)Guerre polono-ukrainienne1918 au Québec$
It seems that the ending lines are stripped for terminal output :)
----------------------------------------------------------------------
Comment By: Russell Blau (russblau)
Date: 2009-02-23 21:32
Message:
This should be fixed after r6431; please advise if the problem is resolved
on your machine.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2618858&group_…
Revision: 6443
Author: kim
Date: 2009-02-26 00:01:56 +0000 (Thu, 26 Feb 2009)
Log Message:
-----------
Add configuration directive retry_on_fail (default True, which gives the original
behaviour).
If False, pywikipedia will not retry connecting if the first attempt fails.
(while testing, if the wiki to be tested was broken, the test script would hang forever)
Modified Paths:
--------------
trunk/pywikipedia/config.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2009-02-25 23:37:32 UTC (rev 6442)
+++ trunk/pywikipedia/config.py 2009-02-26 00:01:56 UTC (rev 6443)
@@ -401,6 +401,10 @@
# Use the experimental disk cache to prevent huge memory usage
use_diskcache = False
+# Retry loading a page on failure (back off 1 minute, 2 minutes, 4 minutes
+# up to 30 minutes)
+retry_on_fail = True
+
# End of configuration section
# ============================
# System-level and User-level changes.
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-02-25 23:37:32 UTC (rev 6442)
+++ trunk/pywikipedia/wikipedia.py 2009-02-26 00:01:56 UTC (rev 6443)
@@ -759,7 +759,7 @@
elif text.find(self.site().mediawiki_message('badaccess')) != -1 or \
text.find("<div class=\"permissions-errors\">") != -1:
raise NoPage(self.site(), self.aslink(forceInterwiki = True))
- else:
+ elif config.retry_on_fail:
if text.find( "<title>Wikimedia Error</title>") > -1:
output( u"Wikimedia has technical problems; will retry in %i minutes." % retry_idle_time)
else:
@@ -771,6 +771,9 @@
retry_idle_time *= 2
if retry_idle_time > 30:
retry_idle_time = 30
+ else:
+ output( u"Failed to access wiki")
+ sys.exit(1)
# Check for restrictions
m = re.search('var wgRestrictionEdit = \\["(\w+)"\\]', text)
if m:
@@ -4635,7 +4638,7 @@
return response, data
- def getUrl(self, path, retry = True, sysop = False, data = None,
+ def getUrl(self, path, retry = None, sysop = False, data = None,
compress = True, no_hostname = False, cookie_only=False):
"""
Low-level routine to get a URL from the wiki.
@@ -4651,6 +4654,10 @@
Returns the HTML text of the page converted to unicode.
"""
+
+ if retry==None:
+ retry=config.retry_on_fail
+
if False: #self.persistent_http and not data:
self.conn.putrequest('GET', path)
self.conn.putheader('User-agent', useragent)
Bugs item #2618858, was opened at 2009-02-20 01:39
Message generated for change (Comment added) made by russblau
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2618858&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: rewrite
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: NicDumZ — Nicolas Dumazet (nicdumz)
Assigned to: Russell Blau (russblau)
Summary: logfiles format
Initial Comment:
output created in pagegenerators log after running "python pywikibot/pagegenerators.py -cat:1918 -debug" contains control characters :
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT Grippe de 1918
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT 1918 en bande dessinée
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT Armistice de Moudros
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT Déclaration d'indépendance de la Lituanie
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT Guerre d'indépendance lettone
[0m
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT 1918
[0m
and so on...
Also, I find it strange to have terminal_interface logging STDOUT, when in fact pagegenerators is calling output() =)
----------------------------------------------------------------------
>Comment By: Russell Blau (russblau)
Date: 2009-02-25 16:37
Message:
> Well, the logs are okay since that rev, but the terminal output is
broken
> now :P
OK, this was really weird. At least on Windows XP, a line written to
sys.stdout that ends with "\n" instead of "\r\n" doesn't print a newline on
the terminal (in cmd.exe), but the same line written to sys.stderr *does*
print a newline. Hopefully fixed in r6441, but needs to be tested on other
platforms.
----------------------------------------------------------------------
Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2009-02-24 23:06
Message:
To clarify my unclear "I find it strange to have terminal_interface logging
STDOUT, when in
fact pagegenerators is calling output()":
Is it possible to have, instead of :
2009-02-20 15:28:59 terminal_interface.py, 126: STDOUT
2009-02-20 15:28:59 pagegenerators.py, xxx: STDOUT ? I believe that the
file and line calling output() would be more relevant here than the line
calling logging.log !
----------------------------------------------------------------------
Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2009-02-24 23:01
Message:
Well, the logs are okay since that rev, but the terminal output is broken
now :P
$python pywikibot/pagegenerators.py -cat:1918 -debug
Found 1 wikipedia:fr processes running, including this one.
Password for user DumZiBoT on wikipedia:fr:
Logging in to wikipedia:fr as DumZiBoT
Should be logged in now
Grippe de 19181918 en bande dessinéeArmistice de MoudrosDéclaration
d'indépendance de la LituanieGuerre d'indépendance lettone19181918 en
Suisse1918 dans les parcs d'attractions1918 en littérature1918 en musique
classiqueInsurrection de Grande-Pologne (1918-1919)Guerres d'indépendance
lituaniennesRéforme d'OctobreGrève générale de 1918 en Suisse1918 dans
les chemins de fer1918 (Chronologie de Dada et du surréalisme)1918 au
théâtreQuébec, Printemps 19181918 en architectureRévolution
allemande1918 en France1918 en science1918 en aéronautiqueIl tritticoSuor
AngelicaTraité de Brest-LitovskAmerican Expeditionary Force1918 en
musiqueTraité de Bucarest (1918)Guerre polono-ukrainienne1918 au Québec$
It seems that the ending lines are stripped for terminal output :)
----------------------------------------------------------------------
Comment By: Russell Blau (russblau)
Date: 2009-02-23 15:32
Message:
This should be fixed after r6431; please advise if the problem is resolved
on your machine.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2618858&group_…
Bugs item #2636014, was opened at 2009-02-25 06:47
Message generated for change (Comment added) made by nobody
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2636014&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Nobody/Anonymous (nobody)
Assigned to: Nobody/Anonymous (nobody)
Summary: family.py - outdated namespaces for gan-wiki
Initial Comment:
There are outdatet namespace -2 to 15 in then gan wikipedia.
(my family.py r6421)
<w:de:User:Xqt>
----------------------------------------------------------------------
Comment By: Nobody/Anonymous (nobody)
Date: 2009-02-25 19:41
Message:
outdated family.py for all namespaces of the wuu-wiki too.
<w:de:user:xqt>
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2636014&group_…