pywikibot

pywikibot@lists.wikimedia.org

3 participants
6830 discussions

[Pywikipedia-l] SVN: [3957] trunk/pywikipedia/weblinkchecker.py
by wikipedian＠svn.wikimedia.org 03 Aug '07

03 Aug '07

Revision: 3957 Author: wikipedian Date: 2007-08-03 00:24:24 +0000 (Fri, 03 Aug 2007) Log Message: ----------- removed debug output Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2007-08-02 23:11:32 UTC (rev 3956) +++ trunk/pywikipedia/weblinkchecker.py 2007-08-03 00:24:24 UTC (rev 3957) @@ -230,7 +230,6 @@ pass def changeUrl(self, url): - print url self.url = url # we ignore the fragment self.scheme, self.host, self.path, self.query, self.fragment = urlparse.urlsplit(self.url) @@ -537,7 +536,6 @@ def run(self): while not self.killed: - # print 'RUN, queue length: %i' % len(self.queue) if len(self.queue) == 0: if self.finishing: break @@ -635,7 +633,6 @@ def countLinkCheckThreads(): i = 0 for thread in threading.enumerate(): - # print thread if isinstance(thread, LinkCheckThread): i += 1 return i

1 0

[Pywikipedia-l] SVN: [3956] trunk/pywikipedia/weblinkchecker.py
by wikipedian＠svn.wikimedia.org 03 Aug '07

03 Aug '07

Revision: 3956 Author: wikipedian Date: 2007-08-02 23:11:32 +0000 (Thu, 02 Aug 2007) Log Message: ----------- bugfix: only the BadStatusLine exception has a 'line' attribute, the other httplib errors don't. Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2007-08-02 16:47:03 UTC (rev 3955) +++ trunk/pywikipedia/weblinkchecker.py 2007-08-02 23:11:32 UTC (rev 3956) @@ -230,6 +230,7 @@ pass def changeUrl(self, url): + print url self.url = url # we ignore the fragment self.scheme, self.host, self.path, self.query, self.fragment = urlparse.urlsplit(self.url) @@ -248,7 +249,7 @@ self.path = unicode(urllib.quote(self.path.encode(encoding))) self.query = unicode(urllib.quote(self.query.encode(encoding), '=&')) - def resolveRedirect(self, useHEAD = True): + def resolveRedirect(self, useHEAD = False): ''' Requests the header from the server. If the page is an HTTP redirect, returns the redirect target URL as a string. Otherwise returns None. @@ -305,24 +306,22 @@ else: return False # not a redirect - def check(self, useHEAD = True): + def check(self, useHEAD = False): """ Returns True and the server status message if the page is alive. Otherwise returns false """ try: wasRedirected = self.resolveRedirect(useHEAD = useHEAD) - except UnicodeError, arg: - return False, u'Encoding Error: %s (%s)' % (arg.__class__.__name__, unicode(arg)) - except httplib.error, arg: - return False, u'HTTP Error: %s (%s)' % (arg.__class__.__name__, arg.line) - except socket.error, arg: - # TODO: decode arg[1]. On Linux, it's encoded in UTF-8. + except UnicodeError, error: + return False, u'Encoding Error: %s (%s)' % (error.__class__.__name__, unicode(error)) + except httplib.error, error: + return False, u'HTTP Error: %s' % error.__class__.__name__ + except socket.error, error: + # TODO: decode error[1]. On Linux, it's encoded in UTF-8. # How is it encoded in Windows? Or can we somehow just # get the English message? - return False, u'Socket Error: %s' % repr(arg[1]) - #except UnicodeEncodeError, arg: - # return False, u'Non-ASCII Characters in URL: %s' % arg + return False, u'Socket Error: %s' % repr(error[1]) if wasRedirected: if self.url in self.redirectChain: if useHEAD: @@ -352,18 +351,16 @@ else: try: conn = self.getConnection() - except httplib.error, arg: - return False, u'HTTP Error: %s (%s)' % (arg.__class__.__name__, arg.line) + except httplib.error, error: + return False, u'HTTP Error: %s' % error.__class__.__name__ try: conn.request('GET', '%s%s' % (self.path, self.query), None, self.header) - except socket.error, arg: - return False, u'Socket Error: %s' % repr(arg[1]) - #except UnicodeEncodeError, arg: - # return False, u'Non-ASCII Characters in URL: %s' % arg + except socket.error, error: + return False, u'Socket Error: %s' % repr(error[1]) try: response = conn.getresponse() - except Exception, arg: - return False, u'Error: %s' % arg + except Exception, error: + return False, u'Error: %s' % error # read the server's encoding, in case we need it later self.readEncodingFromResponse(response) # site down if the server status is between 400 and 499

1 0

[Pywikipedia-l] SVN: [3955] trunk/pywikipedia/weblinkchecker.py
by wikipedian＠svn.wikimedia.org 03 Aug '07

03 Aug '07

Revision: 3955 Author: wikipedian Date: 2007-08-02 16:47:03 +0000 (Thu, 02 Aug 2007) Log Message: ----------- bugfix Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2007-08-02 16:27:50 UTC (rev 3954) +++ trunk/pywikipedia/weblinkchecker.py 2007-08-02 16:47:03 UTC (rev 3955) @@ -203,12 +203,12 @@ def getEncodingUsedByServer(self): if not self.serverEncoding: try: - wikipedia.output(u'Contacting server %s to find out its default encoding...' % self.conn) + wikipedia.output(u'Contacting server %s to find out its default encoding...' % self.host) conn = self.getConnection() conn.request('HEAD', '/', None, self.header) response = conn.getresponse() - self.readEncodingFromResponse() + self.readEncodingFromResponse(response) except: pass if not self.serverEncoding:

1 0

[Pywikipedia-l] SVN: [3954] trunk/pywikipedia/weblinkchecker.py
by wikipedian＠svn.wikimedia.org 03 Aug '07

03 Aug '07

Revision: 3954 Author: wikipedian Date: 2007-08-02 16:27:50 +0000 (Thu, 02 Aug 2007) Log Message: ----------- encoding bug workaround Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2007-08-02 15:18:17 UTC (rev 3953) +++ trunk/pywikipedia/weblinkchecker.py 2007-08-02 16:27:50 UTC (rev 3954) @@ -320,7 +320,7 @@ # TODO: decode arg[1]. On Linux, it's encoded in UTF-8. # How is it encoded in Windows? Or can we somehow just # get the English message? - return False, u'Socket Error: %s' % arg[1] + return False, u'Socket Error: %s' % repr(arg[1]) #except UnicodeEncodeError, arg: # return False, u'Non-ASCII Characters in URL: %s' % arg if wasRedirected: @@ -357,7 +357,7 @@ try: conn.request('GET', '%s%s' % (self.path, self.query), None, self.header) except socket.error, arg: - return False, u'Socket Error: %s' % arg[1] + return False, u'Socket Error: %s' % repr(arg[1]) #except UnicodeEncodeError, arg: # return False, u'Non-ASCII Characters in URL: %s' % arg try:

1 0

[Pywikipedia-l] SVN: [3953] trunk/pywikipedia/catlib.py
by valhallasw＠svn.wikimedia.org 02 Aug '07

02 Aug '07

Revision: 3953 Author: valhallasw Date: 2007-08-02 15:18:17 +0000 (Thu, 02 Aug 2007) Log Message: ----------- New version, now tests for <div id="mw-subcategories"> and, if not found, for <div id="mw-pages">. *** THIS MAY BREAK SUPPORT FOR OLDER VERSIONS OF MEDIAWIKI *** Modified Paths: -------------- trunk/pywikipedia/catlib.py Modified: trunk/pywikipedia/catlib.py =================================================================== --- trunk/pywikipedia/catlib.py 2007-08-02 15:07:16 UTC (rev 3952) +++ trunk/pywikipedia/catlib.py 2007-08-02 15:18:17 UTC (rev 3953) @@ -195,8 +195,14 @@ # save a copy of this text to find out self's supercategory. self_txt = txt # index where subcategory listing begins - # this only works for the current version of the MonoBook skin - ibegin = txt.index('Saved in parser cache') + try: + ibegin = txt.index('<div id="mw-subcategories">') + except ValueError: + try: + ibegin = txt.index('<div id="mw-pages">') + except ValueError: + wikipedia.output("\nCategory page detection is not bug free. Please report this error!") + raise # index where article listing ends try: iend = txt.index('<div class="printfooter">')

1 0

[Pywikipedia-l] SVN: [3952] trunk/pywikipedia/families/wikipedia_family.py
by valhallasw＠svn.wikimedia.org 02 Aug '07

02 Aug '07

Revision: 3952 Author: valhallasw Date: 2007-08-02 15:07:16 +0000 (Thu, 02 Aug 2007) Log Message: ----------- disambcatname['ca'] updated to Viquip?\195?\168dia-fake namespace. Byte order marker added Modified Paths: -------------- trunk/pywikipedia/families/wikipedia_family.py Modified: trunk/pywikipedia/families/wikipedia_family.py =================================================================== --- trunk/pywikipedia/families/wikipedia_family.py 2007-08-02 14:56:28 UTC (rev 3951) +++ trunk/pywikipedia/families/wikipedia_family.py 2007-08-02 15:07:16 UTC (rev 3952) @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- import urllib import family, config @@ -456,7 +456,7 @@ 'be': u'Disambig', 'be-x-old': u'Вікіпэдыя:Неадназначнасьці', 'bg': u'Пояснителни страници', - 'ca': u'Registre de pàginas de desambiguació', + 'ca': u'Viquipèdia:Registre de pàgines de desambiguació', 'cs': u'Rozcestníky', 'cy': u'Gwahaniaethu', 'da': u'Flertdig',

1 0

[Pywikipedia-l] SVN: [3951] trunk/pywikipedia/wikipedia.py
by wikipedian＠svn.wikimedia.org 02 Aug '07

02 Aug '07

Revision: 3951 Author: wikipedian Date: 2007-08-02 14:56:28 +0000 (Thu, 02 Aug 2007) Log Message: ----------- heavily simplified Page.replaceImage() Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-08-02 12:11:29 UTC (rev 3950) +++ trunk/pywikipedia/wikipedia.py 2007-08-02 14:56:28 UTC (rev 3951) @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +## -*- coding: utf-8 -*- """ Library to get and put pages on a MediaWiki. @@ -2003,62 +2003,32 @@ return ur'(?:[%s%s]%s)' % (s[0].upper(), s[0].lower(), s[1:]) def create_regex_i(s): return ur'(?:%s)' % u''.join([u'[%s%s]' % (c.upper(), c.lower()) for c in s]) - + namespaces = ('Image', 'Media') + site.namespace(6, all = True) + site.namespace(-2, all = True) + # note that the colon is already included here r_namespace = ur'\s*(?:%s)\s*\:\s*' % u'|'.join(map(create_regex_i, namespaces)) r_image = u'(%s)' % create_regex(image).replace(r'\_', '[ _]') - def simple_replacer(match): + def simple_replacer(match, groupNumber = 1): if replacement == None: return u'' else: groups = list(match.groups()) - groups[1] = replacement + groups[groupNumber] = replacement return u''.join(groups) - - # Previously links in image descriptions will cause - # unexpected behaviour: [[Image:image.jpg|thumb|[[link]] in description]] - # will truncate at the first occurence of ]]. This cannot be - # fixed using one regular expression. - # This means that all ]] after the start of the image - # must be located. If it then does not have an associated - # [[, this one is the closure of the image. - - r_simple_s = u'(\[\[%s)%s' % (r_namespace, r_image) - r_s = '\[\[' - r_e = '\]\]' - # First determine where wikilinks start and end - image_starts = [match.start() for match in re.finditer(r_simple_s, text)] - link_starts = [match.start() for match in re.finditer(r_s, text)] - link_ends = [match.end() for match in re.finditer(r_e, text)] - - r_simple = u'(\[\[%s)%s(.*)' % (r_namespace, r_image) - replacements = [] - for image_start in image_starts: - current_link_starts = [link_start for link_start in link_starts - if link_start > image_start] - current_link_ends = [link_end for link_end in link_ends - if link_end > image_start] - end = image_start - if current_link_ends: end = current_link_ends[0] - - while current_link_starts and current_link_ends: - start = current_link_starts.pop(0) - end = current_link_ends.pop(0) - if end <= start and end > image_start: - # Found the end of the image - break - - # Add the replacement to the todo list. Doing the - # replacement right know would alter the indices. - replacements.append((new_text[image_start:end], - re.sub(r_simple, simple_replacer, - new_text[image_start:end]))) - - # Perform the replacements - for old, new in replacements: - if old: new_text = new_text.replace(old, new) - + + # The group params contains parameters such as thumb and 200px, as well + # as the image caption. The caption can contain wiki links, but each + # link has to be closed properly. + r_param = r'(?:\|(?:(?!\[\[).|\[\[.*?\]\])*?)' + rImage = re.compile(ur'(\[\[)(?P<namespace>%s)%s(?P<params>%s*?)(\]\])' % (r_namespace, r_image, r_param)) + + while True: + m = rImage.search(new_text) + if not m: + break + new_text = new_text[:m.start()] + simple_replacer(m, 2) + new_text[m.end():] + # Remove the image from galleries r_galleries = ur'(?s)(\<%s\>)(?s)(.*?)(\<\/%s\>)' % (create_regex_i('gallery'), create_regex_i('gallery'))

1 0

[Pywikipedia-l] SVN: [3950] trunk/pywikipedia/wikipedia.py
by valhallasw＠svn.wikimedia.org 02 Aug '07

02 Aug '07

Revision: 3950 Author: valhallasw Date: 2007-08-02 12:11:29 +0000 (Thu, 02 Aug 2007) Log Message: ----------- bugfix: the output buffer now actually gets cleared. Changed break to return in the put_async waiting routine ^c-handler. Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-08-02 10:26:31 UTC (rev 3949) +++ trunk/pywikipedia/wikipedia.py 2007-08-02 12:11:29 UTC (rev 3950) @@ -4491,12 +4491,20 @@ logfile.write(text + '\n') logfile.flush() if input_lock.locked(): - output_cache.append(((text,), {'colors': colors, 'newline': newline, 'toStdout': toStdout})) + cache_output(text, colors = colors, newline = newline, toStdout = toStdout) else: ui.output(text, colors = colors, newline = newline, toStdout = toStdout) finally: output_lock.release() +def cache_output(*args, **kwargs): + output_cache.append((args, kwargs)) + +def flush_output_cache(): + while(output_cache): + (args, kwargs) = output_cache.pop(0) + ui.output(*args, **kwargs) + def input(question, colors = None, password = False): """ Asks the user a question, then returns the user's answer. @@ -4513,15 +4521,12 @@ input_lock.acquire() try: data = ui.input(question, colors, password) - finally: - for output in output_cache: - ui.output(*output[0], **output[1]) + finally: + flush_output_cache() input_lock.release() - for output in output_cache: #for output added between the start of the for loop and the lock release - ui.output(*output[0], **output[1]) - + return data - + def inputChoice(question, answers, hotkeys, default = None): """ Asks the user a question and offers several options, then returns the @@ -4543,12 +4548,9 @@ try: data = ui.inputChoice(question, answers, hotkeys, default).lower() finally: - for output in output_cache: - ui.output(*output[0], **output[1]) + flush_output_cache() input_lock.release() - for output in output_cache: #for output added between the start of the for loop and the lock release - ui.output(*output[0], **output[1]) - + return data def showHelp(moduleName = None): @@ -4646,7 +4648,7 @@ % (page_put_queue.qsize(), datetime.timedelta(seconds=(page_put_queue.qsize()) * config.put_throttle)), ['yes', 'no'], ['y', 'N'], 'N') if answer in ['y', 'Y']: - break + return import atexit atexit.register(_flush)

1 0

[Pywikipedia-l] SVN: [3949] trunk/pywikipedia/solve_disambiguation.py
by valhallasw＠svn.wikimedia.org 02 Aug '07

02 Aug '07

Revision: 3949 Author: valhallasw Date: 2007-08-02 10:26:31 +0000 (Thu, 02 Aug 2007) Log Message: ----------- Updated: no more finally with 'please wait' message needed; this is now handled by wikipedia.py Modified Paths: -------------- trunk/pywikipedia/solve_disambiguation.py Modified: trunk/pywikipedia/solve_disambiguation.py =================================================================== --- trunk/pywikipedia/solve_disambiguation.py 2007-08-02 10:25:38 UTC (rev 3948) +++ trunk/pywikipedia/solve_disambiguation.py 2007-08-02 10:26:31 UTC (rev 3949) @@ -899,11 +899,10 @@ generator = iter([page]) bot = DisambiguationRobot(always, alternatives, getAlternatives, generator, primary, main_only) - try: - bot.run() - finally: - wikipedia.output(u'\n\nPlease wait for the asynchronous page edits to finish...') + bot.run() + + if __name__ == "__main__": try: main()

1 0

[Pywikipedia-l] SVN: [3948] trunk/pywikipedia/catlib.py
by valhallasw＠svn.wikimedia.org 02 Aug '07

02 Aug '07

Revision: 3948 Author: valhallasw Date: 2007-08-02 10:25:38 +0000 (Thu, 02 Aug 2007) Log Message: ----------- bugfix: category.articles(startFrom) now passes startFrom to the correct parameter of _getContentsAndSupercats Modified Paths: -------------- trunk/pywikipedia/catlib.py Modified: trunk/pywikipedia/catlib.py =================================================================== --- trunk/pywikipedia/catlib.py 2007-08-02 01:15:39 UTC (rev 3947) +++ trunk/pywikipedia/catlib.py 2007-08-02 10:25:38 UTC (rev 3948) @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python # -*- coding: utf-8 -*- """ Library to work with category pages on Wikipedia @@ -295,7 +295,7 @@ Results are unsorted (except as sorted by MediaWiki), and need not be unique. """ - for tag, page in self._getContentsAndSupercats(recurse, startFrom): + for tag, page in self._getContentsAndSupercats(recurse, startFrom=startFrom): if tag == ARTICLE: yield page

1 0

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

pywikibot