Bugs item #1860244, was opened at 2007-12-29 01:37
Message generated for change (Settings changed) made by wikipedian
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1860244&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Closed
>Resolution: Fixed
Priority: 5
Private: No
Submitted By: Bernhard Mayr (falk_steinhauer)
Assigned to: Nobody/Anonymous (nobody)
Summary: Deadlock in wikipedia.mediawiki_message()
Initial Comment:
All tries to run a script that implicitly needs to call wikipedia.mediawiki_message() fail with the following error message:
"WARNING: No text area found on www.wiki-aventurica.de/index.php?title=MediaWiki:whatlinkshere-next&action=….
Maybe the server is down. Retrying in 1 minutes..."
This message is repeated infinitely with longer wait periods.
The failure is due to the missing textarea in "http://www.wiki-aventurica.de/index.php?title=MediaWiki:Whatlinkshere-next&…".
So line 3992 of wikipedia.py (if tree.textarea is None:) returns TRUE all time and causes the deadlock.
With this problem I can't use the global option "-ref" (ReferringPageGenerator), because it obviously causes the call of mediawiki_message().
I hope you can find a solution for wikis that do not have text on this special page.
----------------------------------------------------------------------
Comment By: Rotem Liss (rotemliss)
Date: 2007-12-29 08:49
Message:
Logged In: YES
user_id=1327030
Originator: NO
Fixed in r4776.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1860244&group_…
Bugs item #1860244, was opened at 2007-12-29 02:37
Message generated for change (Comment added) made by rotemliss
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1860244&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Bernhard Mayr (falk_steinhauer)
Assigned to: Nobody/Anonymous (nobody)
Summary: Deadlock in wikipedia.mediawiki_message()
Initial Comment:
All tries to run a script that implicitly needs to call wikipedia.mediawiki_message() fail with the following error message:
"WARNING: No text area found on www.wiki-aventurica.de/index.php?title=MediaWiki:whatlinkshere-next&action=….
Maybe the server is down. Retrying in 1 minutes..."
This message is repeated infinitely with longer wait periods.
The failure is due to the missing textarea in "http://www.wiki-aventurica.de/index.php?title=MediaWiki:Whatlinkshere-next&…".
So line 3992 of wikipedia.py (if tree.textarea is None:) returns TRUE all time and causes the deadlock.
With this problem I can't use the global option "-ref" (ReferringPageGenerator), because it obviously causes the call of mediawiki_message().
I hope you can find a solution for wikis that do not have text on this special page.
----------------------------------------------------------------------
Comment By: Rotem Liss (rotemliss)
Date: 2007-12-29 09:49
Message:
Logged In: YES
user_id=1327030
Originator: NO
Fixed in r4776.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1860244&group_…
Revision: 4776
Author: rotem
Date: 2007-12-29 07:47:42 +0000 (Sat, 29 Dec 2007)
Log Message:
-----------
Move retries to Special:Allmessages, to avoid problems when the MediaWiki messages don't exist and don't have a textarea.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-12-29 00:56:31 UTC (rev 4775)
+++ trunk/pywikipedia/wikipedia.py 2007-12-29 07:47:42 UTC (rev 4776)
@@ -3978,33 +3978,19 @@
global mwpage, tree
if key not in self._mediawiki_messages.keys() \
and not hasattr(self, "_phploaded"):
- retry_idle_time = 1
- while True:
- get_throttle()
- mwpage = self.getUrl("%s?title=%s:%s&action=edit"
- % (self.path(), urllib.quote(
- self.namespace(8).replace(' ', '_').encode(
- self.encoding())),
- key))
- tree = BeautifulSoup(mwpage,
- convertEntities=BeautifulSoup.HTML_ENTITIES,
- parseOnlyThese=SoupStrainer("textarea"))
- if tree.textarea is None:
- # We assume that the server is down.
- # Wait some time, then try again.
- output(
-u"""WARNING: No text area found on %s%s?title=MediaWiki:%s&action=edit.
-Maybe the server is down. Retrying in %i minutes..."""
- % (self.hostname(), self.path(), key, retry_idle_time)
- )
- time.sleep(retry_idle_time * 60)
- # Next time wait longer, but not longer than half an hour
- retry_idle_time *= 2
- if retry_idle_time > 30:
- retry_idle_time = 30
- continue
- break
- value = tree.textarea.string.strip()
+ get_throttle()
+ mwpage = self.getUrl("%s?title=%s:%s&action=edit"
+ % (self.path(), urllib.quote(
+ self.namespace(8).replace(' ', '_').encode(
+ self.encoding())),
+ key))
+ tree = BeautifulSoup(mwpage,
+ convertEntities=BeautifulSoup.HTML_ENTITIES,
+ parseOnlyThese=SoupStrainer("textarea"))
+ if tree.textarea is not None:
+ value = tree.textarea.string.strip()
+ else:
+ value = None
if value:
self._mediawiki_messages[key] = value
else:
@@ -4013,12 +3999,28 @@
if verbose:
output(
u"Retrieving mediawiki messages from Special:Allmessages")
- get_throttle()
- phppage = self.getUrl(self.get_address("Special:Allmessages")
+ retry_idle_time = 1
+ while True:
+ get_throttle()
+ phppage = self.getUrl(self.get_address("Special:Allmessages")
+ "&ot=php")
- Rphpvals = re.compile(r"(?ms)'([^']*)' => '(.*?[^\\])',")
- for (phpkey, phpval) in Rphpvals.findall(phppage):
- self._mediawiki_messages[str(phpkey)] = phpval
+ Rphpvals = re.compile(r"(?ms)'([^']*)' => '(.*?[^\\])',")
+ count = 0
+ for (phpkey, phpval) in Rphpvals.findall(phppage):
+ count += 1
+ self._mediawiki_messages[str(phpkey)] = phpval
+ if count == 0:
+ # No messages could be added.
+ # We assume that the server is down.
+ # Wait some time, then try again.
+ output('WARNING: No messages found it Special:Allmessages. Maybe the server is down. Retrying in %i minutes...' % retry_idle_time)
+ time.sleep(retry_idle_time * 60)
+ # Next time wait longer, but not longer than half an hour
+ retry_idle_time *= 2
+ if retry_idle_time > 30:
+ retry_idle_time = 30
+ continue
+ break
self._phploaded = True
if self._mediawiki_messages[key] is None:
On Sat, December 29, 2007 1:08 am, valhallasw(a)svn.wikimedia.org wrote:
> Revision: 4774
> Author: valhallasw
> Date: 2007-12-29 00:08:32 +0000 (Sat, 29 Dec 2007)
>
> Log Message:
> -----------
> Added epydoc configuration
> Added threaded cookie-eating HTTP library, based on httplib2
>
> Added Paths:
> -----------
> branches/rewrite/pywikibot/data/threadedhttp.py
> branches/rewrite/pywikibot/epydoc.cfg
epydoc.cfg contains a simple configuration to create epydoc documentation.
Threadedhttp.py is a wrapper around httplib2, which implements a threaded
system and cookie support.
Example code:
import logging
logging.basicConfig(level=logging.DEBUG, format='%(levelname)s
[%(threadName)s] %(message)s')
import threadedhttp
cj = threadedhttp.LockableCookieJar()
cp = threadedhttp.ConnectionPool()
import Queue
q = Queue.Queue()
for i in range(10):
threadedhttp.HttpProcessor(q,cj,cp).start()
for i in range(10):
q.put(threadedhttp.HttpRequest('http://google.com'))
q.put(threadedhttp.HttpRequest('http://en.wikipedia.org'))
for i in range(10):
q.put(None)
I have not yet come up with a good set of unit tests as most of the
functions are hard to unit-test... any suggestions on that part are very
welcome. I have implemented epydoc-style comments for all external
functions.
For the ConnectionPool: I have now implemented a connection pool with a
maximum of 5 saved concurrent connections to each host. This means that it
is possible to have up to about N(threads) connections to one host open.
We may want to change this to a maximum of 5 concurrent connections, and
locking if a connection is required that is unavailable at that time.
Secondly, no timeouts are implemented, while these probably should be
implemented: keeping connections to rarely-used servers open is useless.
Thirdly, we may want to limit the total number of connections for those
who do not have the luxury of \infty outbound connections.
Any suggestions are, as always, very welcome!
--valhallasw
Bugs item #1860244, was opened at 2007-12-29 00:37
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1860244&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Bernhard Mayr (falk_steinhauer)
Assigned to: Nobody/Anonymous (nobody)
Summary: Deadlock in wikipedia.mediawiki_message()
Initial Comment:
All tries to run a script that implicitly needs to call wikipedia.mediawiki_message() fail with the following error message:
"WARNING: No text area found on www.wiki-aventurica.de/index.php?title=MediaWiki:whatlinkshere-next&action=….
Maybe the server is down. Retrying in 1 minutes..."
This message is repeated infinitely with longer wait periods.
The failure is due to the missing textarea in "http://www.wiki-aventurica.de/index.php?title=MediaWiki:Whatlinkshere-next&…".
So line 3992 of wikipedia.py (if tree.textarea is None:) returns TRUE all time and causes the deadlock.
With this problem I can't use the global option "-ref" (ReferringPageGenerator), because it obviously causes the call of mediawiki_message().
I hope you can find a solution for wikis that do not have text on this special page.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1860244&group_…
Revision: 4774
Author: valhallasw
Date: 2007-12-29 00:08:32 +0000 (Sat, 29 Dec 2007)
Log Message:
-----------
Added epydoc configuration
Added threaded cookie-eating HTTP library, based on httplib2
Added Paths:
-----------
branches/rewrite/pywikibot/data/threadedhttp.py
branches/rewrite/pywikibot/epydoc.cfg
Added: branches/rewrite/pywikibot/data/threadedhttp.py
===================================================================
--- branches/rewrite/pywikibot/data/threadedhttp.py (rev 0)
+++ branches/rewrite/pywikibot/data/threadedhttp.py 2007-12-29 00:08:32 UTC (rev 4774)
@@ -0,0 +1,354 @@
+# -*- coding: utf-8 -*-
+
+""" Httplib2 threaded cookie layer
+ This class extends Httplib2, adding support for:
+ * Cookies, guarded for cross-site redirects
+ * Thread safe ConnectionPool and LockableCookieJar classes
+ * HttpProcessor thread class
+ * HttpRequest object
+"""
+
+# (C) 2007 Pywikipedia bot team, 2007
+# (C) 2006 Httplib 2 team, 2006
+# (C) 2007 Metaweb Technologies, Inc.
+#
+# Partially distributed under the MIT license
+# Partially distributed under Metaweb Technologies, Incs license
+# which is compatible with the MIT license
+
+__version__ = '$Id$'
+__docformat__ = 'epytext'
+
+# standard python libraries
+import re
+import threading
+import time
+import logging
+
+import urllib
+import cookielib
+
+# easy_install safeguarded dependencies
+import pkg_resources
+pkg_resources.require("httplib2")
+import httplib2
+
+
+class ConnectionPool(object):
+ """ A thread-safe connection pool """
+ def __init__(self, maxnum=5):
+ """ @param maxnum: Maximum number of connections per identifier.
+ The pool drops excessive connections added.
+ """
+ self.connections = {}
+ self.lock = threading.Lock()
+ self.maxnum = maxnum
+
+ def __del__(self):
+ """ Destructor to close all connections in the pool """
+ self.lock.acquire()
+ try:
+ for key in self.connections:
+ for connection in self.connections[key]:
+ connection.close()
+ finally:
+ self.lock.release()
+
+ def __repr__(self):
+ return self.connections.__repr__()
+
+ def pop_connection(self, identifier):
+ """ Gets a connection from identifiers connection pool
+ @param identifier The pool identifier
+ @returns A connection object if found, None otherwise
+ """
+ self.lock.acquire()
+ try:
+ if identifier in self.connections:
+ if len(self.connections[identifier]) > 0:
+ return self.connections[identifier].pop()
+ return None
+ finally:
+ self.lock.release()
+
+ def push_connection(self, identifier, connection):
+ """ Adds a connection to identifiers connection pool
+ @param identifier The pool identifier
+ @param connection The connection to add to the pool
+ """
+ self.lock.acquire()
+ try:
+ if identifier not in self.connections:
+ self.connections[identifier] = []
+
+ if len(self.connections[identifier]) == self.maxnum:
+ logging.debug('closing %s connection %r' % (identifier, connection))
+ connection.close()
+ del connection
+ else:
+ self.connections[identifier].append(connection)
+ finally:
+ self.lock.release()
+
+class LockableCookieJar(cookielib.CookieJar):
+ """ CookieJar with integrated Lock object """
+ def __init__(self, *args, **kwargs):
+ cookielib.CookieJar.__init__(self, *args, **kwargs)
+ self.lock = threading.Lock()
+
+class Http(httplib2.Http):
+ """ Subclass of httplib2.Http that uses a `LockableCookieJar` to store cookies.
+ Overrides httplib2s internal redirect support to prevent cookies
+ being eaten by the wrong sites.
+ """
+ def __init__(self, *args, **kwargs):
+ """ @param cookiejar: (optional) CookieJar to use. A new one will be used when not supplied.
+ @param connection_pool: (optional) Connection pool to use. A new one will be used when not supplied.
+ @param max_redirects: (optional) The maximum number of redirects to follow. 5 is default.
+ """
+ self.cookiejar = kwargs.pop('cookiejar', LockableCookieJar())
+ self.connection_pool = kwargs.pop('connection_pool', ConnectionPool())
+ self.max_redirects = kwargs.pop('max_redirects', 5)
+ httplib2.Http.__init__(self, *args, **kwargs)
+
+ def request(self, uri, method="GET", body=None, headers=None, max_redirects=None, connection_type=None):
+ """ Starts an HTTP request.
+ @param uri: The uri to retrieve
+ @param method: (optional) The HTTP method to use. Default is 'GET'
+ @param body: (optional) The request body. Default is no body.
+ @param headers: (optional) Additional headers to send. Defaults include
+ C{connection: keep-alive}, C{user-agent} and C{content-type}.
+ @param max_redirects: (optional) The maximum number of redirects to use for this request.
+ The class instances max_redirects is default
+ @param connection_type: (optional) ?
+ @returns: (response, content) tuple
+ """
+ if max_redirects is None:
+ max_redirects = self.max_redirects
+ if headers is None:
+ headers = {}
+ # Prepare headers
+ headers.pop('cookie', None)
+ req = DummyRequest(uri, headers)
+ self.cookiejar.lock.acquire()
+ try:
+ self.cookiejar.add_cookie_header(req)
+ finally:
+ self.cookiejar.lock.release()
+ headers = req.headers
+
+ # Wikimedia squids: add connection: keep-alive to request headers unless overridden
+ headers['connection'] = headers.pop('connection', 'keep-alive')
+
+ # determine connection pool key and fetch connection
+ (scheme, authority, request_uri, defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
+ conn_key = scheme+":"+authority
+
+ connection = self.connection_pool.pop_connection(conn_key)
+ if connection is not None:
+ self.connections[conn_key] = connection
+
+ # Redirect hack: we want to regulate redirects
+ follow_redirects = self.follow_redirects
+ self.follow_redirects = False
+ logging.debug('%r' % ((uri, method, headers, max_redirects, connection_type),))
+ (response, content) = httplib2.Http.request(self, uri, method, body, headers, max_redirects, connection_type)
+ self.follow_redirects = follow_redirects
+
+ # return connection to pool
+ self.connection_pool.push_connection(conn_key, self.connections[conn_key])
+ del self.connections[conn_key]
+
+ # First write cookies
+ self.cookiejar.lock.acquire()
+ try:
+ self.cookiejar.extract_cookies(DummyResponse(response), req)
+ finally:
+ self.cookiejar.lock.release()
+
+ # Check for possible redirects
+ redirectable_response = ((response.status == 303) or
+ (response.status in [300, 301, 302, 307] and method in ["GET", "HEAD"]))
+ if self.follow_redirects and (max_redirects > 0) and redirectable_response:
+ (response, content) = self._follow_redirect(uri, method, body, headers, response, content, max_redirects)
+
+ return (response, content)
+
+ def _follow_redirect(self, uri, method, body, headers, response, content, max_redirects):
+ """ Internal function to follow a redirect recieved by L{request} """
+ (scheme, authority, absolute_uri, defrag_uri) = httplib2.urlnorm(httplib2.iri2uri(uri))
+ if self.cache:
+ cachekey = defrag_uri
+ else:
+ cachekey = None
+
+ # Pick out the location header and basically start from the beginning
+ # remembering first to strip the ETag header and decrement our 'depth'
+ if not response.has_key('location') and response.status != 300:
+ raise httplib2.RedirectMissingLocation("Redirected but the response is missing a Location: header.", response, content)
+ # Fix-up relative redirects (which violate an RFC 2616 MUST)
+ if response.has_key('location'):
+ location = response['location']
+ (scheme, authority, path, query, fragment) = httplib2.parse_uri(location)
+ if authority == None:
+ response['location'] = httplib2.urlparse.urljoin(uri, location)
+ logging.debug('Relative redirect: changed [%s] to [%s]' % (location, response['location']))
+ if response.status == 301 and method in ["GET", "HEAD"]:
+ response['-x-permanent-redirect-url'] = response['location']
+ if not response.has_key('content-location'):
+ response['content-location'] = absolute_uri
+ httplib2._updateCache(headers, response, content, self.cache, cachekey)
+
+ headers.pop('if-none-match', None)
+ headers.pop('if-modified-since', None)
+
+ if response.has_key('location'):
+ location = response['location']
+ redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method
+ return self.request(location, redirect_method, body=body, headers = headers, max_redirects = max_redirects - 1)
+ else:
+ raise RedirectLimit("Redirected more times than redirection_limit allows.", response, content)
+
+class HttpRequest(object):
+ """ Object wrapper for HTTP requests that need to block the requesters thread.
+ Usage:
+ >>> request = HttpRequest('http://www.google.com')
+ >>> queue.put(request)
+ >>> request.lock.acquire()
+ >>> print request.data
+
+ C{request.lock.acquire()} will block until the data is available.
+ """
+ def __init__(self, *args, **kwargs):
+ self.args = args
+ self.kwargs = kwargs
+ self.data = None
+ self.lock = threading.Semaphore(0)
+
+class HttpProcessor(threading.Thread):
+ """ Thread object to spawn multiple HTTP connection threads """
+ def __init__(self, queue, cookiejar, connection_pool):
+ """ @param queue: The C{Queue.Queue} object that contains L{HttpRequest} objects.
+ @param cookiejar: The C{LockableCookieJar} cookie object to share among requests.
+ @param connection_pool: The C{ConnectionPool} object which contains connections to share among requests.
+ """
+ threading.Thread.__init__(self)
+ self.queue = queue
+ self.http = Http(cookiejar=cookiejar, connection_pool=connection_pool)
+
+ def run(self):
+ # The Queue item is expected to either an HttpRequest object
+ # or None (to shut down the thread)
+ while (True):
+ item = self.queue.get()
+ if item is None:
+ return
+ try:
+ item.data = self.http.request(*item.args, **item.kwargs)
+ finally:
+ if item.lock:
+ item.lock.release()
+
+
+# Metaweb Technologies, Inc. License:
+ # ========================================================================
+ # The following dummy classes are:
+ # ========================================================================
+ # Copyright (c) 2007, Metaweb Technologies, Inc.
+ # All rights reserved.
+ #
+ # Redistribution and use in source and binary forms, with or without
+ # modification, are permitted provided that the following conditions
+ # are met:
+ # * Redistributions of source code must retain the above copyright
+ # notice, this list of conditions and the following disclaimer.
+ # * Redistributions in binary form must reproduce the above
+ # copyright notice, this list of conditions and the following
+ # disclaimer in the documentation and/or other materials provided
+ # with the distribution.
+ #
+ # THIS SOFTWARE IS PROVIDED BY METAWEB TECHNOLOGIES AND CONTRIBUTORS
+ # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL METAWEB
+ # TECHNOLOGIES OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ # POSSIBILITY OF SUCH DAMAGE.
+ # ========================================================================
+
+class DummyRequest(object):
+ """Simulated urllib2.Request object for httplib2
+ implements only what's necessary for cookielib.CookieJar to work
+ """
+ def __init__(self, url, headers=None):
+ self.url = url
+ self.headers = headers
+ self.origin_req_host = cookielib.request_host(self)
+ self.type, r = urllib.splittype(url)
+ self.host, r = urllib.splithost(r)
+ if self.host:
+ self.host = urllib.unquote(self.host)
+
+ def get_full_url(self):
+ return self.url
+
+ def get_origin_req_host(self):
+ # TODO to match urllib2 this should be different for redirects
+ return self.origin_req_host
+
+ def get_type(self):
+ return self.type
+
+ def get_host(self):
+ return self.host
+
+ def get_header(self, key, default=None):
+ return self.headers.get(key.lower(), default)
+
+ def has_header(self, key):
+ return key in self.headers
+
+ def add_unredirected_header(self, key, val):
+ # TODO this header should not be sent on redirect
+ self.headers[key.lower()] = val
+
+ def is_unverifiable(self):
+ # TODO to match urllib2, this should be set to True when the
+ # request is the result of a redirect
+ return False
+
+class DummyResponse(object):
+ """Simulated urllib2.Request object for httplib2
+ implements only what's necessary for cookielib.CookieJar to work
+ """
+ def __init__(self, response):
+ self.response = response
+
+ def info(self):
+ return DummyMessage(self.response)
+
+class DummyMessage(object):
+ """Simulated mimetools.Message object for httplib2
+ implements only what's necessary for cookielib.CookieJar to work
+ """
+ def __init__(self, response):
+ self.response = response
+
+ def getheaders(self, k):
+ k = k.lower()
+ v = self.response.get(k.lower(), None)
+ if k not in self.response:
+ return []
+ #return self.response[k].split(re.compile(',\\s*'))
+
+ # httplib2 joins multiple values for the same header
+ # using ','. but the netscape cookie format uses ','
+ # as part of the expires= date format. so we have
+ # to split carefully here - header.split(',') won't do it.
+ HEADERVAL= re.compile(r'\s*(([^,]|(,\s*\d))+)')
+ return [h[0] for h in HEADERVAL.findall(self.response[k])]
\ No newline at end of file
Property changes on: branches/rewrite/pywikibot/data/threadedhttp.py
___________________________________________________________________
Name: svn:keywords
+ Id
Added: branches/rewrite/pywikibot/epydoc.cfg
===================================================================
--- branches/rewrite/pywikibot/epydoc.cfg (rev 0)
+++ branches/rewrite/pywikibot/epydoc.cfg 2007-12-29 00:08:32 UTC (rev 4774)
@@ -0,0 +1,84 @@
+[epydoc] # Epydoc section marker (required by ConfigParser)
+
+# modules
+# The list of objects to document. Objects can be named using
+# dotted names, module filenames, or package directory names.
+# Alases for this option include "objects" and "values".
+modules: data.threadedhttp
+
+# output
+# The type of output that should be generated. Should be one
+# of: html, text, latex, dvi, ps, pdf.
+output: html
+
+# target
+# The path to the output directory. May be relative or absolute.
+target: doc/
+
+# css
+# The CSS stylesheet for HTML output. Can be the name of a builtin
+# stylesheet, or the name of a file.
+css: white
+
+# name
+# The documented project's name.
+name: Python Mediawiki Framework
+
+# url
+# The documented project's URL.
+url: http://pywikipediabot.sourceforge.net
+
+# frames
+# Whether or not to include a frames-based table of contents.
+frames: yes
+
+# private
+# Whether or not to inclue private variables. (Even if included,
+# private variables will be hidden by default.)
+private: yes
+
+# imports
+# Whether or not to list each module's imports.
+imports: yes
+
+# verbosity
+# An integer indicating how verbose epydoc should be. The default
+# value is 0; negative values will supress warnings and errors;
+# positive values will give more verbose output.
+verbosity: 0
+
+# parse
+# Whether or not parsing should be used to examine objects.
+parse: yes
+
+# introspect
+# Whether or not introspection should be used to examine objects.
+introspect: yes
+
+# graph
+# The list of graph types that should be automatically included
+# in the output. Graphs are generated using the Graphviz "dot"
+# executable. Graph types include: "classtree", "callgraph",
+# "umlclass". Use "all" to include all graph types
+graph: all
+
+# dotpath
+# The path to the Graphviz "dot" executable, used to generate
+# graphs.
+dotpath: /usr/bin/dot
+
+# sourcecode
+# Whether or not to include syntax highlighted source code in
+# the output (HTML only).
+sourcecode: no
+
+# pstat
+# The name of one or more pstat files (generated by the profile
+# or hotshot module). These are used to generate call graphs.
+pstat: profile.out
+
+# separate-classes
+# Whether each class should be listed in its own section when
+# generating LaTeX or PDF output.
+separate-classes: no
+
Rotem Liss wrote:
> Carlos Thompson wrote:
> > On my site, I have some rewrite rules, however a request such as
> > /index.php?title=Test&action=raw works fine. (As does /Test?action=raw
> > and
> > /w-raw/Test ).
>
> This is action=edit. You can override the function "path" in the family
> file.
For that matter /index.php?title=Test&action=edit , /Test?action=edit , and
/w-edit/Test have the same result.
> > The wiki is also using a skin designed for that site.
>
> If this skin changes the regular "history" tab, you should override the
> function
> "RversionTab" in the family file, with a regexp that can be used to check
> if the page exists or not.
Okay. After many attempts and not clear howto, I added:
def RversionTab(self, code):
return '<li id="ca-history"><a href="/w-history/.*?"[^>]*>'
into my Family class. It is now working.
-- Carlos Th
Revision: 4773
Author: filnik
Date: 2007-12-28 18:06:00 +0000 (Fri, 28 Dec 2007)
Log Message:
-----------
Forgot to delete an useless variable (no more used after the rewrite)
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2007-12-28 17:56:09 UTC (rev 4772)
+++ trunk/pywikipedia/add_text.py 2007-12-28 18:06:00 UTC (rev 4773)
@@ -113,7 +113,7 @@
yield wikipedia.Page(self.site, result)
def add_text(generator = None, addText = None, summary = None, regexSkip = None, regexSkipUrl = None,
- always = False, exceptUrl = False, up = False):
+ always = False, up = False):
# When a page is tagged as "really well written" it has a star in the interwiki links.
# This is a list of all the templates used (in regex format) to make the stars appear.
starsList = ['link[ _]fa', 'link[ _]adq', 'enllaç[ _]ad',
@@ -249,7 +249,7 @@
def main():
# If none, the var is setted only for check purpose.
summary = None; addText = None; regexSkip = None; regexSkipUrl = None;
- generator = None; always = False; exceptUrl = False
+ generator = None; always = False
# Load a lot of default generators
genFactory = pagegenerators.GeneratorFactory()
# Put the text above or below the text?
@@ -293,7 +293,7 @@
always = True
else:
generator = genFactory.handleArg(arg)
- add_text(generator, addText, summary, regexSkip, regexSkipUrl, always, exceptUrl, up)
+ add_text(generator, addText, summary, regexSkip, regexSkipUrl, always, up)
if __name__ == "__main__":
try:
Revision: 4772
Author: filnik
Date: 2007-12-28 17:56:09 +0000 (Fri, 28 Dec 2007)
Log Message:
-----------
Rewritten the main function to let use the script as module for other script (I'll write something using this script)
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2007-12-28 17:16:52 UTC (rev 4771)
+++ trunk/pywikipedia/add_text.py 2007-12-28 17:56:09 UTC (rev 4772)
@@ -37,6 +37,10 @@
python add_text.py -start:! -summary:"Bot: Adding a template" -text:"{{Something}}" -except:"\{\{(?:[Tt]emplate:|)[Ss]omething" -up
+# Command used on it.wikipedia to put the template in the page without any category.
+python add_text.py -excepturl:"<p class='catlinks'>" -uncat -text:"{{Categorizzare}}"
+-except:"\{\{(?:[Tt]emplate:|)[Cc]ategorizzare" -summary:"Bot: Aggiungo template Categorizzare"
+
--- Credits and Help ---
This script has been written by Botwiki's stuff, if you want to help us
or you need some help regarding this script, you can find us here:
@@ -108,62 +112,15 @@
for result in results:
yield wikipedia.Page(self.site, result)
-def main():
+def add_text(generator = None, addText = None, summary = None, regexSkip = None, regexSkipUrl = None,
+ always = False, exceptUrl = False, up = False):
# When a page is tagged as "really well written" it has a star in the interwiki links.
# This is a list of all the templates used (in regex format) to make the stars appear.
starsList = ['link[ _]fa', 'link[ _]adq', 'enllaç[ _]ad',
'link[ _]ua', 'legătură[ _]af', 'destacado',
'ua', 'liên k[ _]t[ _]chọn[ _]lọc']
- # If none, the var is setted only for check purpose.
- summary = None; addText = None; regexSkip = None
- generator = None; always = False; exceptUrl = False
- # Load a lot of default generators
- genFactory = pagegenerators.GeneratorFactory()
+
errorCount = 0
- # Put the text above or below the text?
- up = False
-
- # Loading the arguments
- for arg in wikipedia.handleArgs():
- if arg.startswith('-text'):
- if len(arg) == 5:
- addText = wikipedia.input(u'What text do you want to add?')
- else:
- addText = arg[6:]
- elif arg.startswith('-summary'):
- if len(arg) == 8:
- summary = wikipedia.input(u'What summary do you want to use?')
- else:
- summary = arg[9:]
- elif arg.startswith('-page'):
- if len(arg) == 5:
- generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
- else:
- generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
- elif arg.startswith('-excepturl'):
- exceptUrl = True
- if len(arg) == 10:
- regexSkip = wikipedia.input(u'What text should I skip?')
- else:
- regexSkip = arg[11:]
- elif arg.startswith('-except'):
- if len(arg) == 7:
- regexSkip = wikipedia.input(u'What text should I skip?')
- else:
- regexSkip = arg[8:]
- elif arg.startswith('-untagged'):
- if len(arg) == 9:
- untaggedProject = wikipedia.input(u'What project do you want to use?')
- else:
- untaggedProject = arg[10:]
- generator = untaggedGenerator(untaggedProject)
- elif arg == '-up':
- up = True
- elif arg == '-always':
- always = True
- else:
- generator = genFactory.handleArg(arg)
-
site = wikipedia.getSite()
# /wiki/ is not always the right path in non-wiki projects
pathWiki = site.family.nicepath(site.lang)
@@ -174,6 +131,7 @@
raise NoEnoughData('You have to specify what text you want to add!')
if not summary:
summary = wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg) % addText)
+
# Main Loop
for page in generator:
wikipedia.output(u'Loading %s...' % page.title())
@@ -186,16 +144,18 @@
wikipedia.output(u"%s is a redirect, skip!" % page.title())
continue
# Understand if the bot has to skip the page or not
- if regexSkip and exceptUrl:
+ # In this way you can use both -except and -excepturl
+ if regexSkipUrl != None:
url = '%s%s' % (pathWiki, page.urlname())
- result = re.findall(regexSkip, site.getUrl(url))
- elif regexSkip:
+ result = re.findall(regexSkipUrl, site.getUrl(url))
+ if result != []:
+ wikipedia.output(u'Exception! regex (or word) used with -exceptUrl is in the page. Skip!')
+ continue
+ if regexSkip != None:
result = re.findall(regexSkip, text)
- else:
- result = []
- if result != []:
- wikipedia.output(u'Exception! regex (or word) use with -except is in the page. Skip!')
- continue
+ if result != []:
+ wikipedia.output(u'Exception! regex (or word) used with -except is in the page. Skip!')
+ continue
# If not up, text put below
if not up:
newtext = text
@@ -286,6 +246,55 @@
# Break only if the errors are one after the other...
errorCount = 0
break
+def main():
+ # If none, the var is setted only for check purpose.
+ summary = None; addText = None; regexSkip = None; regexSkipUrl = None;
+ generator = None; always = False; exceptUrl = False
+ # Load a lot of default generators
+ genFactory = pagegenerators.GeneratorFactory()
+ # Put the text above or below the text?
+ up = False
+ # Loading the arguments
+ for arg in wikipedia.handleArgs():
+ if arg.startswith('-text'):
+ if len(arg) == 5:
+ addText = wikipedia.input(u'What text do you want to add?')
+ else:
+ addText = arg[6:]
+ elif arg.startswith('-summary'):
+ if len(arg) == 8:
+ summary = wikipedia.input(u'What summary do you want to use?')
+ else:
+ summary = arg[9:]
+ elif arg.startswith('-page'):
+ if len(arg) == 5:
+ generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
+ else:
+ generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
+ elif arg.startswith('-excepturl'):
+ if len(arg) == 10:
+ regexSkipUrl = wikipedia.input(u'What text should I skip?')
+ else:
+ regexSkipUrl = arg[11:]
+ elif arg.startswith('-except'):
+ if len(arg) == 7:
+ regexSkip = wikipedia.input(u'What text should I skip?')
+ else:
+ regexSkip = arg[8:]
+ elif arg.startswith('-untagged'):
+ if len(arg) == 9:
+ untaggedProject = wikipedia.input(u'What project do you want to use?')
+ else:
+ untaggedProject = arg[10:]
+ generator = untaggedGenerator(untaggedProject)
+ elif arg == '-up':
+ up = True
+ elif arg == '-always':
+ always = True
+ else:
+ generator = genFactory.handleArg(arg)
+ add_text(generator, addText, summary, regexSkip, regexSkipUrl, always, exceptUrl, up)
+
if __name__ == "__main__":
try:
main()