Pywikipedia-svn March 2012

pywikipedia-svn@lists.wikimedia.org

6 participants
129 discussions

SVN: [10022] trunk/pywikipedia/splitwarning.py
by xqt＠svn.wikimedia.org 17 Mar '12

17 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10022 Revision: 10022 Author: xqt Date: 2012-03-17 17:54:57 +0000 (Sat, 17 Mar 2012) Log Message: ----------- -folder parameter that specifies the log file path (feature request bug #3504335) Modified Paths: -------------- trunk/pywikipedia/splitwarning.py Modified: trunk/pywikipedia/splitwarning.py =================================================================== --- trunk/pywikipedia/splitwarning.py 2012-03-17 17:29:58 UTC (rev 10021) +++ trunk/pywikipedia/splitwarning.py 2012-03-17 17:54:57 UTC (rev 10022) @@ -1,8 +1,14 @@ # -*- coding: utf-8 -*- -"""Splits a interwiki.log file into chunks of warnings separated by language""" +"""Splits a interwiki.log file into chunks of warnings separated by language. + +The following parameter is supported: + +-folder: The target folder to save warning files, if given. Otherwise + use the /logs/ folder. +""" # # (C) Rob W.W. Hooft, 2003 -# (C) Pywikipedia bot team, 2004-2011 +# (C) Pywikipedia bot team, 2004-2012 # # Distributed under the terms of the MIT license. # @@ -13,28 +19,39 @@ import codecs import re -pywikibot.stopme() # No need to have me on the stack - I don't contact the wiki -files={} -count={} +def splitwarning(folder): + files={} + count={} -# TODO: Variable log filename -fn = pywikibot.config.datafilepath("logs", "interwiki.log") -logFile = codecs.open(fn, 'r', 'utf-8') -rWarning = re.compile('WARNING: (?P<family>.+?): \[\[(?P<code>.+?):.*') -for line in logFile: - m = rWarning.match(line) - if m: - family = m.group('family') - code = m.group('code') - if code in pywikibot.getSite().languages(): - if not code in files: - files[code] = codecs.open( - pywikibot.config.datafilepath('logs', - 'warning-%s-%s.log' % (family, code)), - 'w', 'utf-8') - count[code] = 0 - files[code].write(line) - count[code] += 1 -for code in files.keys(): - print '*%s (%d)' % (code, count[code]) + # TODO: Variable log filename + fn = pywikibot.config.datafilepath("logs", "interwiki.log") + logFile = codecs.open(fn, 'r', 'utf-8') + rWarning = re.compile('WARNING: (?P<family>.+?): \[\[(?P<code>.+?):.*') + for line in logFile: + m = rWarning.match(line) + if m: + family = m.group('family') + code = m.group('code') + if code in pywikibot.getSite().languages(): + if not code in files: + files[code] = codecs.open( + pywikibot.config.datafilepath( + folder, 'warning-%s-%s.log' % (family, code)), + 'w', 'utf-8') + count[code] = 0 + files[code].write(line) + count[code] += 1 + for code in files.keys(): + print '* %s (%d)' % (code, count[code]) +def main(*args): + folder = 'logs' + for arg in pywikibot.handleArgs(*args): + if arg.startswith("-folder"): + folder = arg[len('-folder:'):] + splitwarning(folder) + +if __name__ == "__main__": + # No need to have me on the stack - I don't contact the wiki + pywikibot.stopme() + main()

1 0

SVN: [10021] trunk/pywikipedia/interwiki.py
by xqt＠svn.wikimedia.org 17 Mar '12

17 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10021 Revision: 10021 Author: xqt Date: 2012-03-17 17:29:58 +0000 (Sat, 17 Mar 2012) Log Message: ----------- Format doc due to PEP 8 (max 80 chars) Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2012-03-15 11:23:42 UTC (rev 10020) +++ trunk/pywikipedia/interwiki.py 2012-03-17 17:29:58 UTC (rev 10021) @@ -133,25 +133,25 @@ There are some special hints, trying a number of languages at once: - * all: All languages with at least ca. 100 articles. - * 10: The 10 largest languages (sites with most - articles). Analogous for any other natural - number. - * arab: All languages using the Arabic alphabet. - * cyril: All languages that use the Cyrillic alphabet. - * chinese: All Chinese dialects. - * latin: All languages using the Latin script. - * scand: All Scandinavian languages. + * all: All languages with at least ca. 100 articles. + * 10: The 10 largest languages (sites with most + articles). Analogous for any other natural + number. + * arab: All languages using the Arabic alphabet. + * cyril: All languages that use the Cyrillic alphabet. + * chinese: All Chinese dialects. + * latin: All languages using the Latin script. + * scand: All Scandinavian languages. Names of families that forward their interlanguage links - to the wiki family being worked upon can be used, they are: - with -family=wikipedia only: - * commons: Interlanguage links of Mediawiki Commons. - * incubator: Links in pages on the Mediawiki Incubator. - * meta: Interlanguage links of named pages on Meta. - * species: Interlanguage links of the wikispecies wiki. - * strategy: Links in pages on Wikimedias strategy wiki. - * test: Take interwiki links from Test Wikipedia + to the wiki family being worked upon can be used (with + -family=wikipedia only), they are: + * commons: Interlanguage links of Mediawiki Commons. + * incubator: Links in pages on the Mediawiki Incubator. + * meta: Interlanguage links of named pages on Meta. + * species: Interlanguage links of the wikispecies wiki. + * strategy: Links in pages on Wikimedias strategy wiki. + * test: Take interwiki links from Test Wikipedia Languages, groups and families having the same page title can be combined, as -hint:5,scand,sr,pt,commons:New_York

1 0

SVN: [10020] trunk/pywikipedia/replace.py
by binbot＠svn.wikimedia.org 15 Mar '12

15 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10020 Revision: 10020 Author: binbot Date: 2012-03-15 11:23:42 +0000 (Thu, 15 Mar 2012) Log Message: ----------- Follow-up to r9920: better handling of excepted titles Modified Paths: -------------- trunk/pywikipedia/replace.py Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2012-03-15 08:11:55 UTC (rev 10019) +++ trunk/pywikipedia/replace.py 2012-03-15 11:23:42 UTC (rev 10020) @@ -516,7 +516,7 @@ self.acceptall = True if choice == 'x': #May happen only if self.exctitles isn't None self.exctitles.write( - u"u'%s$',\n" % re.escape(page.title())) + u"ur'^%s$',\n" % re.escape(page.title())) self.exctitles.flush() self.exceptcounter += 1 if choice == 'y':

1 0

SVN: [10019] trunk/pywikipedia/pagegenerators.py
by binbot＠svn.wikimedia.org 15 Mar '12

15 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10019 Revision: 10019 Author: binbot Date: 2012-03-15 08:11:55 +0000 (Thu, 15 Mar 2012) Log Message: ----------- Modified help (placing of quotation marks was misleading) Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2012-03-12 14:18:53 UTC (rev 10018) +++ trunk/pywikipedia/pagegenerators.py 2012-03-15 08:11:55 UTC (rev 10019) @@ -73,8 +73,10 @@ across all namespaces. -namespace Filter the page generator to only yield pages in the --ns specified namespaces. Separate multiple namespace - numbers with commas. Example "-ns:0,2,4" +-ns specified namespaces. Separate multiple namespace numbers + with commas. Example: -ns:"0,2,4" (Take care of quotation + marks as comma may qualify as command line separator.) + Will ask for namespaces if you write just -namespace or -ns. -interwiki Work on the given page and all equivalent pages in other languages. This can, for example, be used to fight

1 0

SVN: [10018] trunk/pywikipedia
by xqt＠svn.wikimedia.org 12 Mar '12

12 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10018 Revision: 10018 Author: xqt Date: 2012-03-12 14:18:53 +0000 (Mon, 12 Mar 2012) Log Message: ----------- strip trailing whitespace Modified Paths: -------------- trunk/pywikipedia/replace.py trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2012-03-12 14:14:03 UTC (rev 10017) +++ trunk/pywikipedia/replace.py 2012-03-12 14:18:53 UTC (rev 10018) @@ -96,7 +96,7 @@ -fix:XYZ Perform one of the predefined replacements tasks, which are given in the dictionary 'fixes' defined inside the files fixes.py and user-fixes.py. - The -regex, -recursive and -nocase argument and given + The -regex, -recursive and -nocase argument and given replacements and exceptions will be ignored if you use -fix and they are present in the 'fixes' dictionary. Currently available predefined fixes are: @@ -345,7 +345,7 @@ # is on, and to display the number of edited articles otherwise. self.editcounter = 0 # A counter for saved exceptions - self.exceptcounter = 0 + self.exceptcounter = 0 def isTitleExcepted(self, title): """ @@ -478,7 +478,7 @@ if self.exctitles: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', - ['Yes', 'No', 'no+eXcept', 'Edit', + ['Yes', 'No', 'no+eXcept', 'Edit', 'open in Browser', 'All', 'Quit'], ['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N') else: Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2012-03-12 14:14:03 UTC (rev 10017) +++ trunk/pywikipedia/wikipedia.py 2012-03-12 14:18:53 UTC (rev 10018) @@ -7615,7 +7615,7 @@ r = result[u'parse'][u'text'][u'*'] # disable/remove comments - r = pywikibot.removeDisabledParts(r, tags = ['comments']).strip() + r = pywikibot.removeDisabledParts(r, tags = ['comments']).strip() # disable/remove ALL tags if not (keeptags == [u'*']): @@ -7768,7 +7768,7 @@ debug = True config.special_page_limit = 500 elif arg == '-simulate': - config.actions_to_block = ['edit', 'watch', 'move', 'delete', + config.actions_to_block = ['edit', 'watch', 'move', 'delete', 'undelete', 'protect'] else: # the argument is not global. Let the specific bot script care @@ -7835,7 +7835,7 @@ -verbose Have the bot provide additional output that may be -v useful in debugging. --debug +-debug -cosmeticchanges Toggles the cosmetic_changes setting made in config.py or -cc user_config.py to its inverse and overrules it. All other @@ -8245,8 +8245,8 @@ Pywikibot has detected that you use this outdated version of Python: %s. We would like to hear your voice before ceasing support of this version. -Please update to \03{lightyellow}Python 2.7.2\03{default} if possible or visit -http://www.mediawiki.org/wiki/Pywikipediabot/Survey2012 to tell us why we +Please update to \03{lightyellow}Python 2.7.2\03{default} if possible or visit +http://www.mediawiki.org/wiki/Pywikipediabot/Survey2012 to tell us why we should support your version and to learn how to hide this message. After collecting opinions for a time we will decide and announce the deadline of deprecating use of old Python versions for Pywikipedia.

1 0

SVN: [10017] trunk/threadedhttp/threadedhttp/connectionpool.py
by xqt＠svn.wikimedia.org 12 Mar '12

12 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10017 Revision: 10017 Author: xqt Date: 2012-03-12 14:14:03 +0000 (Mon, 12 Mar 2012) Log Message: ----------- fis Id: string Modified Paths: -------------- trunk/threadedhttp/threadedhttp/connectionpool.py Modified: trunk/threadedhttp/threadedhttp/connectionpool.py =================================================================== --- trunk/threadedhttp/threadedhttp/connectionpool.py 2012-03-12 14:12:02 UTC (rev 10016) +++ trunk/threadedhttp/threadedhttp/connectionpool.py 2012-03-12 14:14:03 UTC (rev 10017) @@ -7,7 +7,7 @@ # Distributed under the terms of the MIT license # -__version__ = '$Id$' +__version__ = '$Id: $' __docformat__ = 'epytext' import logging

1 0

SVN: [10016] trunk/threadedhttp/threadedhttp
by xqt＠svn.wikimedia.org 12 Mar '12

12 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10016 Revision: 10016 Author: xqt Date: 2012-03-12 14:12:02 +0000 (Mon, 12 Mar 2012) Log Message: ----------- strip trailing whitespace, keywords:Id Modified Paths: -------------- trunk/threadedhttp/threadedhttp/__init__.py trunk/threadedhttp/threadedhttp/connectionpool.py trunk/threadedhttp/threadedhttp/cookiejar.py trunk/threadedhttp/threadedhttp/dummy.py trunk/threadedhttp/threadedhttp/http.py trunk/threadedhttp/threadedhttp/threadedhttp.py Property Changed: ---------------- trunk/threadedhttp/threadedhttp/cookiejar.py trunk/threadedhttp/threadedhttp/dummy.py trunk/threadedhttp/threadedhttp/http.py trunk/threadedhttp/threadedhttp/threadedhttp.py Modified: trunk/threadedhttp/threadedhttp/__init__.py =================================================================== --- trunk/threadedhttp/threadedhttp/__init__.py 2012-03-12 14:08:55 UTC (rev 10015) +++ trunk/threadedhttp/threadedhttp/__init__.py 2012-03-12 14:12:02 UTC (rev 10016) @@ -5,4 +5,4 @@ """ from http import Http -from threadedhttp import HttpRequest, HttpProcessor \ No newline at end of file +from threadedhttp import HttpRequest, HttpProcessor Modified: trunk/threadedhttp/threadedhttp/connectionpool.py =================================================================== --- trunk/threadedhttp/threadedhttp/connectionpool.py 2012-03-12 14:08:55 UTC (rev 10015) +++ trunk/threadedhttp/threadedhttp/connectionpool.py 2012-03-12 14:12:02 UTC (rev 10016) @@ -31,7 +31,7 @@ self.connections = [None] * max_connections # fill known connections witn Nones self.clists = {} # 'id': (semaphore, lock, [connection1, connection2]) logging.log(1,'<%r>: initialized' % self) - + def __del__(self): """ Destructor to close all connections in the pool. Not completely thread-safe, as connections *could* return just @@ -46,7 +46,7 @@ del self.clists finally: self.lock.release() - + def pop_connection(self, identifier): """ Gets a connection from identifiers connection pool @param identifier The pool identifier @@ -93,13 +93,13 @@ self.lock.release() except Exception, e: logging.log(20,'<%r>: Exception raised level 2 | %r' % (self, e)) - clist.max.release() + clist.max.release() raise except Exception, e: logging.log(20,'<%r>: Exception raised level 1 | %r' % (self, e)) self.global_max.release() raise - + def push_connection(self, identifier, connection): """ Gets a connection from identifiers connection pool @param identifier The pool identifier @@ -126,20 +126,20 @@ self.connections = {} self.lock = threading.Lock() self.maxnum = maxnum - + def __del__(self): """ Destructor to close all connections in the pool """ self.lock.acquire() try: for connection in self.connections: connection.close() - + finally: self.lock.release() - + def __repr__(self): return self.connections.__repr__() - + def pop_connection(self, identifier): """ Gets a connection from identifiers connection pool @param identifier: The pool identifier @@ -153,7 +153,7 @@ return None finally: self.lock.release() - + def push_connection(self, identifier, connection): """ Adds a connection to identifiers connection pool @param identifier: The pool identifier @@ -163,7 +163,7 @@ try: if identifier not in self.connections: self.connections[identifier] = [] - + if len(self.connections[identifier]) == self.maxnum: logging.debug('closing %s connection %r' % (identifier, connection)) connection.close() @@ -171,4 +171,4 @@ else: self.connections[identifier].append(connection) finally: - self.lock.release() \ No newline at end of file + self.lock.release() Modified: trunk/threadedhttp/threadedhttp/cookiejar.py =================================================================== --- trunk/threadedhttp/threadedhttp/cookiejar.py 2012-03-12 14:08:55 UTC (rev 10015) +++ trunk/threadedhttp/threadedhttp/cookiejar.py 2012-03-12 14:12:02 UTC (rev 10016) @@ -19,7 +19,7 @@ def __init__(self, *args, **kwargs): cookielib.CookieJar.__init__(self, *args, **kwargs) self.lock = threading.Lock() - + class LockableFileCookieJar(cookielib.FileCookieJar): """ CookieJar with integrated Lock object """ def __init__(self, *args, **kwargs): Property changes on: trunk/threadedhttp/threadedhttp/cookiejar.py ___________________________________________________________________ Added: svn:keywords + Id Modified: trunk/threadedhttp/threadedhttp/dummy.py =================================================================== --- trunk/threadedhttp/threadedhttp/dummy.py 2012-03-12 14:08:55 UTC (rev 10015) +++ trunk/threadedhttp/threadedhttp/dummy.py 2012-03-12 14:12:02 UTC (rev 10016) @@ -23,7 +23,7 @@ # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. -# +# # THIS SOFTWARE IS PROVIDED BY METAWEB TECHNOLOGIES AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS @@ -113,4 +113,4 @@ # as part of the expires= date format. so we have # to split carefully here - header.split(',') won't do it. HEADERVAL= re.compile(r'\s*(([^,]|(,\s*\d))+)') - return [h[0] for h in HEADERVAL.findall(self.response[k])] \ No newline at end of file + return [h[0] for h in HEADERVAL.findall(self.response[k])] Property changes on: trunk/threadedhttp/threadedhttp/dummy.py ___________________________________________________________________ Added: svn:keywords + Id Modified: trunk/threadedhttp/threadedhttp/http.py =================================================================== --- trunk/threadedhttp/threadedhttp/http.py 2012-03-12 14:08:55 UTC (rev 10015) +++ trunk/threadedhttp/threadedhttp/http.py 2012-03-12 14:12:02 UTC (rev 10016) @@ -4,7 +4,7 @@ # # (C) Merlijn van Deen, 2007 # -# Indicated parts (C) Joe Gregorio et al, 2006 +# Indicated parts (C) Joe Gregorio et al, 2006 # Distributed under the terms of the MIT license # __version__ = '$Id$' @@ -24,7 +24,7 @@ class Http(httplib2.Http): """ Subclass of httplib2.Http that uses a `LockableCookieJar` to store cookies. - Overrides httplib2s internal redirect support to prevent cookies + Overrides httplib2s internal redirect support to prevent cookies being eaten by the wrong sites. """ def __init__(self, *args, **kwargs): @@ -42,13 +42,13 @@ @param uri: The uri to retrieve @param method: (optional) The HTTP method to use. Default is 'GET' @param body: (optional) The request body. Default is no body. - @param headers: (optional) Additional headers to send. Defaults include + @param headers: (optional) Additional headers to send. Defaults include C{connection: keep-alive}, C{user-agent} and C{content-type}. @param max_redirects: (optional) The maximum number of redirects to use for this request. The class instances max_redirects is default @param connection_type: (optional) ? @returns: (response, content) tuple - """ + """ if max_redirects is None: max_redirects = self.max_redirects if headers is None: @@ -62,18 +62,18 @@ finally: self.cookiejar.lock.release() headers = req.headers - + # Wikimedia squids: add connection: keep-alive to request headers unless overridden headers['connection'] = headers.pop('connection', 'keep-alive') - + # determine connection pool key and fetch connection (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri)) conn_key = scheme+":"+authority - + connection = self.connection_pool.pop_connection(conn_key) if connection is not None: self.connections[conn_key] = connection - + # Redirect hack: we want to regulate redirects follow_redirects = self.follow_redirects #print 'follow_redirects: %r %r' % (self.follow_redirects, follow_redirects) @@ -84,30 +84,30 @@ #print 'follow_redirects: %r %r' % (self.follow_redirects, follow_redirects) self.follow_redirects = follow_redirects #print 'follow_redirects: %r %r' % (self.follow_redirects, follow_redirects) - - + + # return connection to pool self.connection_pool.push_connection(conn_key, self.connections[conn_key]) del self.connections[conn_key] - - # First write cookies + + # First write cookies self.cookiejar.lock.acquire() - try: + try: self.cookiejar.extract_cookies(DummyResponse(response), req) finally: self.cookiejar.lock.release() - + # Check for possible redirects redirectable_response = ((response.status == 303) or (response.status in [300, 301, 302, 307] and method in ["GET", "HEAD"])) if self.follow_redirects and (max_redirects > 0) and redirectable_response: (response, content) = self._follow_redirect(uri, method, body, headers, response, content, max_redirects) return (response, content) - + # The _follow_redirect function is based on the redirect handling in the # _request function of httplib2. The original function is (C) Joe Gregorio et al, 2006 # and licensed under the MIT license. Other contributers include - # Thomas Broyer (t.broyer(a)ltgt.net), James Antill, Xavier Verges Farrero, + # Thomas Broyer (t.broyer(a)ltgt.net), James Antill, Xavier Verges Farrero, # Jonathan Feinberg, Blair Zajac, Sam Ruby and Louis Nyffenegger (httplib2.__contributers__) def _follow_redirect(self, uri, method, body, headers, response, content, max_redirects): """ Internal function to follow a redirect recieved by L{request} """ @@ -131,15 +131,15 @@ if response.status == 301 and method in ["GET", "HEAD"]: response['-x-permanent-redirect-url'] = response['location'] if not response.has_key('content-location'): - response['content-location'] = absolute_uri + response['content-location'] = absolute_uri httplib2._updateCache(headers, response, content, self.cache, cachekey) - + headers.pop('if-none-match', None) headers.pop('if-modified-since', None) - + if response.has_key('location'): location = response['location'] redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method return self.request(location, redirect_method, body=body, headers = headers, max_redirects = max_redirects - 1) else: - raise httplib2.RedirectLimit("Redirected more times than redirection_limit allows.", response, content) \ No newline at end of file + raise httplib2.RedirectLimit("Redirected more times than redirection_limit allows.", response, content) Property changes on: trunk/threadedhttp/threadedhttp/http.py ___________________________________________________________________ Added: svn:keywords + Id Modified: trunk/threadedhttp/threadedhttp/threadedhttp.py =================================================================== --- trunk/threadedhttp/threadedhttp/threadedhttp.py 2012-03-12 14:08:55 UTC (rev 10015) +++ trunk/threadedhttp/threadedhttp/threadedhttp.py 2012-03-12 14:12:02 UTC (rev 10016) @@ -21,7 +21,7 @@ >>> queue.put(request) >>> request.lock.acquire() >>> print request.data - + C{request.lock.acquire()} will block until the data is available. """ def __init__(self, *args, **kwargs): @@ -40,7 +40,7 @@ threading.Thread.__init__(self) self.queue = queue self.http = Http(cookiejar=cookiejar, connection_pool=connection_pool) - + def run(self): # The Queue item is expected to either an HttpRequest object # or None (to shut down the thread) @@ -54,4 +54,4 @@ item.data = self.http.request(*item.args, **item.kwargs) finally: if item.lock: - item.lock.release() \ No newline at end of file + item.lock.release() Property changes on: trunk/threadedhttp/threadedhttp/threadedhttp.py ___________________________________________________________________ Added: svn:keywords + Id

1 0

SVN: [10015] trunk/threadedhttp/setup.py
by xqt＠svn.wikimedia.org 12 Mar '12

12 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10015 Revision: 10015 Author: xqt Date: 2012-03-12 14:08:55 +0000 (Mon, 12 Mar 2012) Log Message: ----------- strip trailing whitespace, eol-style:native Modified Paths: -------------- trunk/threadedhttp/setup.py Property Changed: ---------------- trunk/threadedhttp/setup.py Modified: trunk/threadedhttp/setup.py =================================================================== --- trunk/threadedhttp/setup.py 2012-03-12 14:06:23 UTC (rev 10014) +++ trunk/threadedhttp/setup.py 2012-03-12 14:08:55 UTC (rev 10015) @@ -18,7 +18,7 @@ author_email = "valhallasw(a)arctus.nl", description = "httplib2-based HTTP library supporting cookies and threads", classifiers = filter(None, map(str.strip, -""" +""" Intended Audience :: Developers License :: OSI Approved :: MIT License Programming Language :: Python @@ -27,4 +27,4 @@ license = "MIT License", keywords = "thread cookie httplib2", url = "http://pywikipediabot.sourceforge.net", -) \ No newline at end of file +) Property changes on: trunk/threadedhttp/setup.py ___________________________________________________________________ Added: svn:eol-style + native

1 0

SVN: [10014] trunk/pywikiparser
by xqt＠svn.wikimedia.org 12 Mar '12

12 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10014 Revision: 10014 Author: xqt Date: 2012-03-12 14:06:23 +0000 (Mon, 12 Mar 2012) Log Message: ----------- strip trailing whitespace Modified Paths: -------------- trunk/pywikiparser/Lexer.py trunk/pywikiparser/Parser.py Modified: trunk/pywikiparser/Lexer.py =================================================================== --- trunk/pywikiparser/Lexer.py 2012-03-12 14:06:07 UTC (rev 10013) +++ trunk/pywikiparser/Lexer.py 2012-03-12 14:06:23 UTC (rev 10014) @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- """ Mediawiki wikitext lexer """ # # (C) 2007 Merlijn 'valhallasw' van Deen @@ -13,11 +13,11 @@ def __init__(self, name, description): self.name = name self.__doc__ = description - + def __repr__(self): return '<T_%s>' % (self.name,) -class Tokens: +class Tokens: tokens = [ ('TEXT', ' Text data'), ('SQRE_OPEN', '[ Square bracket open'), @@ -46,7 +46,7 @@ class Lexer: """ Lexer class for mediawiki wikitext. Used by the Parser module Lexer.lexer() returns a generator that returns (Token, text) pairs. The text represents the actual text data, the token the interpreted data. - + >>> l = Lexer('Test with [[wikilink|description]], {{template|parameter\\'s|{{nested}}=booh}}, \\n\\n new paragraphs, <html>, {| tables |- |}') >>> gen = l.lexer() >>> gen.next() @@ -56,10 +56,10 @@ >>> [token for token in gen][:10] [(<T_TEXT>, 'with'), (<T_WHITESPACE>, ' '), (<T_SQRE_OPEN>, '['), (<T_SQRE_OPEN>, '['), (<T_TEXT>, 'wikilink'), (<T_PIPE>, None), (<T_TEXT>, 'description'), (<T_SQRE_CLOSE>, ']'), (<T_SQRE_CLOSE>, ']'), (<T_TEXT>, ',')] """ - + def __init__(self, string): self.data = (a for a in string) - + def lexer(self): text = '' try: @@ -69,7 +69,7 @@ if text: yield (Tokens.TEXT, text) text = '' - + if (c == '['): yield (Tokens.SQRE_OPEN, c) elif (c == ']'): yield (Tokens.SQRE_CLOSE, c) elif (c == '}'): yield (Tokens.CURL_CLOSE, c) @@ -92,14 +92,14 @@ c = self.getchar() else: yield (Tokens.CURL_OPEN, '{') - + c = t elif (c == '|'): if text: yield (Tokens.TEXT, text) text = '' t = self.getchar() - + if (t == '-'): yield (Tokens.TAB_NEWLINE, '|-') c = self.getchar() @@ -131,9 +131,9 @@ yield (Tokens.TEXT, text) yield (Tokens.EOF, None) - def getchar(self): + def getchar(self): return self.data.next() - + if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() Modified: trunk/pywikiparser/Parser.py =================================================================== --- trunk/pywikiparser/Parser.py 2012-03-12 14:06:07 UTC (rev 10013) +++ trunk/pywikiparser/Parser.py 2012-03-12 14:06:23 UTC (rev 10014) @@ -30,11 +30,11 @@ _debug = debug self.lex = BufferedReader(Lexer(data).lexer()) - + def expect(self, tokens): if not isinstance(tokens, list): tokens = [tokens,] - + data = self.lex.peek() if data[0] in tokens: return self.lex.next()[1] @@ -48,15 +48,15 @@ data += self.expect(tokens) except ParseError: return data - + def parse(self, breaktoken=[]): self.root = dom.Element('wikipage') self.par = self.root.appendElement('p') self.italic = False self.bold = False - + restore = self.lex.getrestore() - + try: while(True): token = self.lex.peek() @@ -67,7 +67,7 @@ dbgmsg("Adding %r (was %r)" % (node,token)) self.par.extend(node) restore = self.lex.commit(restore) - + except StopIteration: pass return self.root @@ -75,23 +75,23 @@ # The function to call is parser<token> exec("data = self.parse%s(restore)" % token[0].name, globals(), locals()) return data - + def parseEOF(self, restore): token = self.expect(Tokens.EOF) raise StopIteration - + # Special functions that directly access the storage tree - + def parseNEWPAR(self, restore): token = self.expect(Tokens.NEWPAR) self.par = self.root.appendElement('p') self.bold = False self.italic = False return [] - + def parseAPOSTROPHE(self, restore): num = len(self.eat(Tokens.APOSTROPHE)) - + #prepare length if (num == 1): self.par.append('\'') @@ -101,11 +101,11 @@ elif (num > 5): self.par.append('\'' * (num-5)) num = 5 - + # determine changes newitalic = self.italic newbold = self.bold - + if num == 2: #toggle italic newitalic = not self.italic elif num == 3: #toggle bold @@ -113,7 +113,7 @@ elif num == 5: #toggle both newitalic = not self.italic newbold = not self.bold - + dbgmsg('bold: %r>%r italic: %r>%r' % (self.bold, newbold, self.italic, newitalic)) if self.italic and not newitalic: if self.par.name == 'i' or not newbold: @@ -131,21 +131,21 @@ self.par = self.par.appendElement('i') self.italic = True if not self.bold and newbold: - self.par = self.par.appendElement('b') + self.par = self.par.appendElement('b') self.bold = True - return [] - + return [] + # Functions that return the input directly - + def parseSQRE_CLOSE(self, restore): return self.expect(Tokens.SQRE_CLOSE) - + def parsePIPE(self, restore): return self.expect(Tokens.PIPE) - + def parseEQUAL_SIGN(self, restore): return self.expect(Tokens.EQUAL_SIGN) - + def parseCURL_CLOSE(self, restore): return self.expect(Tokens.CURL_CLOSE) @@ -154,13 +154,13 @@ def parseASTERISK(self, restore): return self.expect(Tokens.ASTERISK) - + def parseCOLON(self, restore): return self.expect(Tokens.COLON) - + def parseSEMICOLON(self, restore): return self.expect(Tokens.SEMICOLON) - + def parseHASH(self, restore): return self.expect(Tokens.HASH) @@ -169,11 +169,11 @@ def parseTAB_CLOSE(self, restore): return self.expect(Tokens.TAB_CLOSE) - + # True parser callers def parseWHITESPACE(self, restore): - # Todo: + # Todo: return self.parseTEXT(restore) def parseTEXT(self, restore): @@ -193,15 +193,15 @@ try: return self.parseExternallink() except ParseError: pass - + self.lex.undo(restore) return self.expect(Tokens.SQRE_OPEN) - + def parseCURL_OPEN(self, restore): try: return self.parseTemplateparam() except ParseError: pass - + self.lex.undo(restore) try: return self.parseTemplate() @@ -209,12 +209,12 @@ self.lex.undo(restore) return self.expect(Tokens.CURL_OPEN) - + def parseANGL_OPEN(self, restore): try: return self.parseHTML() except ParseError: pass - + self.lex.undo(restore) return self.expect(Tokens.ANGL_OPEN) @@ -222,38 +222,38 @@ try: return self.parseWikitable() except ParseError: pass - + self.lex.undo(restore) return self.expect(Tokens.TAB_OPEN) - + def parseWikilink(self): retval = dom.Element('') self.expect(Tokens.SQRE_OPEN) self.expect(Tokens.SQRE_OPEN) - + pre = self.eat(Tokens.SQRE_OPEN) if pre: retval.append(pre) wikilink = retval.appendElement('wikilink') - # get page title + # get page title title = wikilink.appendElement('title') #parse title title.extend(self.parseTitle(Tokens.SQRE_CLOSE)) - + self.expect(Tokens.SQRE_CLOSE) self.expect(Tokens.SQRE_CLOSE) - + return retval - - - + + + # while( titlere.match(next) ): # title += next # next = self.lex.peek() -# # +# # else: # break # while(True): @@ -266,27 +266,27 @@ # continue # else: # break -# -# -# +# +# +# # breaktoken = self.lex.peek() # if breaktoken[0] == Tokens.PIPE: # break # elif breaktoken[0] == Tokens.SQRE_CLOSE: # next = self.lex.peek(2) # if next[0] == Tokens.SQRE_CLOSE: -# -# self.expect(Tokens.SQRE_CLOSE) +# # self.expect(Tokens.SQRE_CLOSE) +# self.expect(Tokens.SQRE_CLOSE) # return retval -# - +# + def parseExternallink(self): raise ParseError("Needs implementation") - + def parseTemplateparam(self): raise ParseError("Needs implementation") - + def parseTemplate(self): retval = dom.Element('') self.expect(Tokens.CURL_OPEN) @@ -297,23 +297,23 @@ retval.append(pre) wikilink = retval.appendElement('template') - # get page title + # get page title title = wikilink.appendElement('title') title.extend(self.parseTitle(Tokens.CURL_CLOSE)) - + self.expect(Tokens.CURL_CLOSE) self.expect(Tokens.CURL_CLOSE) return retval - - + + def parseHTML(self): raise ParseError("Needs implementation") - + def parseWikitable(self): raise ParseError("Needs implementation") - - titlere = re.compile(r"[^\^\]<>\[\|\{\}\n]*$") + + titlere = re.compile(r"[^\^\]<>\[\|\{\}\n]*$") def parseTitle(self, closetoken): title = dom.Element('title') while(True):

1 0

SVN: [10013] trunk/pywikiparser/ObjectTree
by xqt＠svn.wikimedia.org 12 Mar '12

12 Mar '12

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10013 Revision: 10013 Author: xqt Date: 2012-03-12 14:06:07 +0000 (Mon, 12 Mar 2012) Log Message: ----------- strip trailing whitespace Modified Paths: -------------- trunk/pywikiparser/ObjectTree/Element.py trunk/pywikiparser/ObjectTree/XMLParse.py trunk/pywikiparser/ObjectTree/__init__.py Modified: trunk/pywikiparser/ObjectTree/Element.py =================================================================== --- trunk/pywikiparser/ObjectTree/Element.py 2012-03-12 14:04:28 UTC (rev 10012) +++ trunk/pywikiparser/ObjectTree/Element.py 2012-03-12 14:06:07 UTC (rev 10013) @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- """ Simple object tree system for python. This module contains the Element class @@ -13,7 +13,7 @@ class Element(list): """ Element in the element tree. Usage examples: - + >>> book = Element(u'book', {u'title': u'Wikitext Parsing', u'authors': u'valhallasw'}) >>> chapter = Element(u'chapter', {u'title': u'Wikitext'}) >>> section = chapter.appendElement(u'section', {u'title': u'Basic principles of wikitext'}) @@ -30,14 +30,14 @@ </chapter> </book> """ - + def __init__(self, element_name, element_attributes={}, contents=[]): self.name = element_name self.attributes = element_attributes self.parent = None for item in contents: self.append(item) - + def toxml(self, pretty=False, level=1, symbol=u'\t'): retval = u'<%s' % (self.name,) for (attribute, value) in self.attributes.iteritems(): @@ -53,7 +53,7 @@ else: retval += u'\n' + symbol*level + subelement.toxml(pretty, level+1, symbol) retval += u'\n' + symbol*(level-1) + '</%s>' % (self.name,) - else: + else: for subelement in self: if isinstance(subelement, unicode): retval += xmlify(subelement) @@ -64,10 +64,10 @@ retval += subelement.toxml(pretty, level+1, symbol) retval += u'</%s>' % (self.name,) return retval - + def __repr__(self): return u"<'%s' element: %r %s>" % (self.name, self.attributes, list.__repr__(self)) - + def append(self, arg): if isinstance(arg, basestring): if len(arg) == 0: #don't attach empty strings @@ -84,16 +84,16 @@ arg.parent = self else: raise TypeError(u'Argument is of %r; expected <type \'BaseElement\'>.' % (type(arg),)) - + def extend(self, list): for item in list: self.append(item) - + def appendElement(self, *args, **kwargs): element = Element(*args, **kwargs) self.append(element) return element - + def xmlify(data): """ >>> xmlify(u'mooh&<>\\'"') @@ -109,6 +109,6 @@ def _test(*args, **kwargs): import doctest doctest.testmod(*args, **kwargs) - + if __name__ == "__main__": - _test() \ No newline at end of file + _test() Modified: trunk/pywikiparser/ObjectTree/XMLParse.py =================================================================== --- trunk/pywikiparser/ObjectTree/XMLParse.py 2012-03-12 14:04:28 UTC (rev 10012) +++ trunk/pywikiparser/ObjectTree/XMLParse.py 2012-03-12 14:06:07 UTC (rev 10013) @@ -19,10 +19,10 @@ def __init__(self): self.root = Element('root') self.currentNode = self.root - + def startElement(self, name, attrs): self.currentNode = self.currentNode.appendElement(name, dict(attrs.items())) - + def endElement(self, name): if self.currentNode.name == name: self.currentNode = self.currentNode.parent @@ -41,7 +41,7 @@ <'root' element: {} [u'Hello, ', <'bold' element: {} [u'this']>, u' is a test! ', <'link' element: {u'rel': u'blah'} []>]> >>> tree.toxml() u'<root>Hello, <bold>this</bold> is a test! <link rel="blah"/></root>' - + >>> parseText("<root><nonclosed></root>") Traceback (most recent call last): ... @@ -49,4 +49,4 @@ """ handler = XMLParser() xml.sax.parseString(data, handler) - return handler.root[0] \ No newline at end of file + return handler.root[0] Modified: trunk/pywikiparser/ObjectTree/__init__.py =================================================================== --- trunk/pywikiparser/ObjectTree/__init__.py 2012-03-12 14:04:28 UTC (rev 10012) +++ trunk/pywikiparser/ObjectTree/__init__.py 2012-03-12 14:06:07 UTC (rev 10013) @@ -10,4 +10,4 @@ __all__ = ['Element', 'XMLParse'] from Element import Element -from XMLParse import parseText \ No newline at end of file +from XMLParse import parseText

1 0

← Newer
1
2
3
4
5
6
7
8
9
...
13
Older →

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn March 2012