Revision: 4441
Author: russblau
Date: 2007-10-10 20:31:14 +0000 (Wed, 10 Oct 2007)
Log Message:
-----------
More docstring cleanup in wikipedia.py; fix logic error
in family.py normalizeNamespace() method.
Modified Paths:
--------------
trunk/pywikipedia/family.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2007-10-10 10:37:18 UTC (rev 4440)
+++ trunk/pywikipedia/family.py 2007-10-10 20:31:14 UTC (rev 4441)
@@ -2307,25 +2307,31 @@
return self.namespaces[ns_number].has_key(code)
def normalizeNamespace(self, code, value):
- """Given a value, attempt to match it with all available namespaces, with default and localized versions.
- Sites may have more than one way to write the same namespace - choose the first one in the list.
+ """Given a value, attempt to match it with all available namespaces,
+ with default and localized versions. Sites may have more than one
+ way to write the same namespace - choose the first one in the list.
If nothing can be normalized, return the original value.
"""
for ns, items in self.namespaces.iteritems():
if items.has_key(code):
v = items[code]
- if type(v) == type([]):
- if value in v: return v[0]
- else:
- if value == v: return v
+ elif items.has_key('_default'):
+ v = items['_default']
+ else:
+ continue
+ if type(v) is list:
+ if value in v: return v[0]
+ else:
+ if value == v: return v
if value == self.namespace('_default', ns):
return self.namespace(code, ns)
return value
def getNamespaceIndex(self, lang, namespace):
- """Given a namespace, attempt to match it with all available namespaces.
- Sites may have more than one way to write the same namespace - choose the first one in the list.
- Returns namespace index or None
+ """Given a namespace, attempt to match it with all available
+ namespaces. Sites may have more than one way to write the same
+ namespace - choose the first one in the list. Returns namespace
+ index or None.
"""
namespace = namespace.lower()
for n in self.namespaces.keys():
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-10-10 10:37:18 UTC (rev 4440)
+++ trunk/pywikipedia/wikipedia.py 2007-10-10 20:31:14 UTC (rev 4441)
@@ -203,6 +203,11 @@
# Pre-compile re expressions
reNamespace = re.compile("^(.+?) *: *(.*)$")
+Rwatch = re.compile(
+ r"<input type='hidden' value=\"(.*?)\" name=\"wpEditToken\"")
+Rwatchlist = re.compile(r"<input tabindex='[\d]+' type='checkbox' "
+ r"name='wpWatchthis' checked='checked'")
+Rlink = re.compile(r'\[\[(?P<title>[^\]\|]*)(\|[^\]]*)?\]\]')
class Page(object):
@@ -686,7 +691,6 @@
retry_idle_time = 30
# We now know that there is a textarea.
# Look for the edit token
- Rwatch = re.compile(r"\<input type='hidden' value=\"(.*?)\" name=\"wpEditToken\"")
tokenloc = Rwatch.search(text)
if tokenloc:
self.site().putToken(tokenloc.group(1), sysop = sysop)
@@ -716,8 +720,7 @@
if not matchVersionTab:
raise NoPage(self.site(), self.aslink(forceInterwiki = True))
# Look if the page is on our watchlist
- R = re.compile(r"\<input tabindex='[\d]+' type='checkbox' name='wpWatchthis' checked='checked'")
- matchWatching = R.search(text)
+ matchWatching = Rwatchlist.search(text)
if matchWatching:
isWatched = True
# Now process the contents of the textarea
@@ -1459,7 +1462,6 @@
# from text before processing
thistxt = removeDisabledParts(thistxt)
- Rlink = re.compile(r'\[\[(?P<title>[^\]\|]*)(\|[^\]]*)?\]\]')
for match in Rlink.finditer(thistxt):
title = match.group('title')
if title.strip().startswith("#"):
@@ -3380,7 +3382,7 @@
class Site(object):
"""A MediaWiki site. Do not instantiate directly; use getSite() function.
- Constructor takes four arguments; only site is mandatory:
+ Constructor takes four arguments; only code is mandatory:
code language code for Site
fam Wikimedia family (optional: defaults to configured).
@@ -3395,78 +3397,126 @@
loggedInAs: return current username, or None if not logged in.
forceLogin: require the user to log in to the site
- messages: return True if there are new messages on the site
- cookies: return user's cookies as a string
+ messages: return True if there are new messages on the site
+ cookies: return user's cookies as a string
- getUrl: retrieve an URL from the site
- urlEncode: Encode a query to be sent using an http POST request.
- postForm: Post form data to an address at this site.
- postData: Post encoded form data to an http address at this site.
+ getUrl: retrieve an URL from the site
+ urlEncode: Encode a query to be sent using an http POST request.
+ postForm: Post form data to an address at this site.
+ postData: Post encoded form data to an http address at this site.
redirect: Return the localized redirect tag for the site.
- redirectRegex: Return compiled regular expression matching on redirect pages.
+ redirectRegex: Return compiled regular expression matching on redirect
+ pages.
mediawiki_message: Retrieve the text of a specified MediaWiki message
has_mediawiki_message: True if this site defines specified MediaWiki
message
+ shared_image_repository: Return tuple of image repositories used by this
+ site.
+ category_on_one_line: Return True if this site wants all category links
+ on one line.
+ interwiki_putfirst: Return list of language codes for ordering of
+ interwiki links.
linkto(title): Return string in the form of a wikilink to 'title'
- isInterwikiLink(s): Return True if s is in the form of an interwiki link.
+ isInterwikiLink(s): Return True if 's' is in the form of an interwiki
+ link.
+ version: Return MediaWiki version string from Family file.
+ versionnumber: Return int identifying the MediaWiki version.
+ live_version: Return version number read from Special:Version.
+ checkCharset(charset): Warn if charset doesn't match family file.
-
Methods that yield Page objects derived from a wiki's Special: pages
(note, some methods yield other information in a tuple along with the
Pages; see method docs for details) --
- search(query): query results from Special:Search
- allpages(): Special:Allpages
- newpages(): Special:Newpages
- longpages(): Special:Longpages
- shortpages(): Special:Shortpages
- categories(): Special:Categories (yields Category objects)
- deadendpages(): Special:Deadendpages
- ancientpages(): Special:Ancientpages
- lonelypages(): Special:Lonelypages
- unwatchedpages(): Special:Unwatchedpages (sysop accounts only)
- uncategorizedcategories(): Special:Uncategorizedcategories (yields Category)
- uncategorizedpages(): Special:Uncategorizedpages
- uncategorizedimages(): Special:Uncategorizedimages (yields ImagePage)
- unusedcategories(): Special:Unusuedcategories (yields Category)
- unusedfiles(): Special:Unusedimages (yields ImagePage)
- withoutinterwiki: Special:Withoutinterwiki
- linksearch: Special:Linksearch
+ search(query): query results from Special:Search
+ allpages(): Special:Allpages
+ newpages(): Special:Newpages
+ longpages(): Special:Longpages
+ shortpages(): Special:Shortpages
+ categories(): Special:Categories (yields Category objects)
+ deadendpages(): Special:Deadendpages
+ ancientpages(): Special:Ancientpages
+ lonelypages(): Special:Lonelypages
+ unwatchedpages(): Special:Unwatchedpages (sysop accounts only)
+ uncategorizedcategories(): Special:Uncategorizedcategories (yields
+ Category objects)
+ uncategorizedpages(): Special:Uncategorizedpages
+ uncategorizedimages(): Special:Uncategorizedimages (yields
+ ImagePage objects)
+ unusedcategories(): Special:Unusuedcategories (yields Category)
+ unusedfiles(): Special:Unusedimages (yields ImagePage)
+ withoutinterwiki: Special:Withoutinterwiki
+ linksearch: Special:Linksearch
Convenience methods that provide access to properties of the wiki Family
object; all of these are read-only and return a unicode string unless
noted --
- encoding: The current encoding for this site.
- encodings: List of all historical encodings for this site.
- category_namespace: Canonical name of the Category namespace on this
- site.
- category_namespaces: List of all valid names for the Category namespace.
- image_namespace: Canonical name of the Image namespace on this site.
- template_namespace: Canonical name of the Template namespace on this
- site.
- export_address: URL path for Special:Export.
- query_address: URL path + '?' for query.php
- api_address: Return URL path + '?' for api.php
- apipath: URL path for api.php
- protocol: Protocol ('http' or 'https') for access to this site.
- hostname: Host portion of site URL.
- path: URL path for index.php on this Site.
- dbName: MySQL database name.
- move_address: URL path for Special:Movepage.
- delete_address(s): URL path to delete title 's'.
- undelete_view_address(s): URL path to view Special:Undelete for title 's'
- undelete_address: Return URL path to Special:Undelete.
- protect_address(s): Return URL path to protect title 's'.
- unprotect_address(s): Return URL path to unprotect title 's'.
- put_address(s): Return URL path to submit revision to page titled 's'.
- get_address(s): Return URL path to retrieve page titled 's'.
- nice_get_address(s): Return shorter URL path to retrieve page titled 's'.
- edit_address(s): Return URL path for edit form for page titled 's'.
- purge_address(s): Return URL path to purge cache and retrieve page 's'.
- block_address: Return path to block an IP address.
+ encoding: The current encoding for this site.
+ encodings: List of all historical encodings for this site.
+ category_namespace: Canonical name of the Category namespace on this
+ site.
+ category_namespaces: List of all valid names for the Category
+ namespace.
+ image_namespace: Canonical name of the Image namespace on this site.
+ template_namespace: Canonical name of the Template namespace on this
+ site.
+ protocol: Protocol ('http' or 'https') for access to this site.
+ hostname: Host portion of site URL.
+ path: URL path for index.php on this Site.
+ dbName: MySQL database name.
+ Methods that return addresses to pages on this site (usually in
+ Special: namespace); these methods only return URL paths, they do not
+ interact with the wiki --
+
+ export_address: Special:Export.
+ query_address: URL path + '?' for query.php
+ api_address: URL path + '?' for api.php
+ apipath: URL path for api.php
+ move_address: Special:Movepage.
+ delete_address(s): Delete title 's'.
+ undelete_view_address(s): Special:Undelete for title 's'
+ undelete_address: Special:Undelete.
+ protect_address(s): Protect title 's'.
+ unprotect_address(s): Unprotect title 's'.
+ put_address(s): Submit revision to page titled 's'.
+ get_address(s): Retrieve page titled 's'.
+ nice_get_address(s): Short URL path to retrieve page titled 's'.
+ edit_address(s): Edit form for page titled 's'.
+ purge_address(s): Purge cache and retrieve page 's'.
+ block_address: Block an IP address.
+ unblock_address: Unblock an IP address.
+ blocksearch_address(s): Search for blocks on IP address 's'.
+ linksearch_address(s): Special:Linksearch for target 's'.
+ search_address(q): Special:Search for query 'q'.
+ allpages_address(s): Special:Allpages.
+ newpages_address: Special:Newpages.
+ longpages_address: Special:Longpages.
+ shortpages_address: Special:Shortpages.
+ unusedfiles_address: Special:Unusedimages.
+ categories_address: Special:Categories.
+ deadendpages_address: Special:Deadendpages.
+ ancientpages_address: Special:Ancientpages.
+ lonelypages_address: Special:Lonelypages.
+ unwatchedpages_address: Special:Unwatchedpages.
+ uncategorizedcategories_address: Special:Uncategorizedcategories.
+ uncategorizedimages_address: Special:Uncategorizedimages.
+ uncategorizedpages_address: Special:Uncategorizedpages.
+ unusedcategories_address: Special:Unusedcategories.
+ withoutinterwiki_address: Special:Withoutinterwiki.
+ references_address(s): Special:Whatlinksere for page 's'.
+ allmessages_address: Special:Allmessages.
+ upload_address: Special:Upload.
+ maintenance_address(sub): Special:Maintenance for subfunction 'sub'.
+ double_redirects_address: Special:Doubleredirects.
+ broken_redirects_address: Special:Brokenredirects.
+ login_address: Special:Userlogin.
+ captcha_image_address(id): Special:Captcha for image 'id'.
+ watchlist_address: Special:Watchlist editor.
+ contribs_address(target): Special:Contributions for user 'target'.
+
"""
def __init__(self, code, fam=None, user=None, persistent_http = None):
self.lang = code.lower()
@@ -4508,89 +4558,131 @@
return self.family.block_address(self.lang)
def unblock_address(self):
+ """Return path to unblock an IP address."""
return self.family.unblock_address(self.lang)
def blocksearch_address(self, s):
+ """Return path to search for blocks on IP address 's'."""
return self.family.blocksearch_address(self.lang, s)
def linksearch_address(self, s, limit=500, offset=0):
+ """Return path to Special:Linksearch for target 's'."""
return self.family.linksearch_address(self.lang, s, limit=limit, offset=offset)
- def search_address(self, q, n=50, ns = 0):
+ def search_address(self, q, n=50, ns=0):
+ """Return path to Special:Search for query 'q'."""
return self.family.search_address(self.lang, q, n, ns)
def allpages_address(self, s, ns = 0):
- return self.family.allpages_address(self.lang, start = s, namespace = ns)
+ """Return path to Special:Allpages."""
+ return self.family.allpages_address(self.lang, start=s, namespace = ns)
def newpages_address(self, n=50):
+ """Return path to Special:Newpages."""
return self.family.newpages_address(self.lang, n)
def longpages_address(self, n=500):
+ """Return path to Special:Longpages."""
return self.family.longpages_address(self.lang, n)
def shortpages_address(self, n=500):
+ """Return path to Special:Shortpages."""
return self.family.shortpages_address(self.lang, n)
def unusedfiles_address(self, n=500):
+ """Return path to Special:Unusedimages."""
return self.family.unusedfiles_address(self.lang, n)
def categories_address(self, n=500):
+ """Return path to Special:Categories."""
return self.family.categories_address(self.lang, n)
def deadendpages_address(self, n=500):
+ """Return path to Special:Deadendpages."""
return self.family.deadendpages_address(self.lang, n)
def ancientpages_address(self, n=500):
+ """Return path to Special:Ancientpages."""
return self.family.ancientpages_address(self.lang, n)
def lonelypages_address(self, n=500):
+ """Return path to Special:Lonelypages."""
return self.family.lonelypages_address(self.lang, n)
def unwatchedpages_address(self, n=500):
+ """Return path to Special:Unwatchedpages."""
return self.family.unwatchedpages_address(self.lang, n)
def uncategorizedcategories_address(self, n=500):
+ """Return path to Special:Uncategorizedcategories."""
return self.family.uncategorizedcategories_address(self.lang, n)
def uncategorizedimages_address(self, n=500):
+ """Return path to Special:Uncategorizedimages."""
return self.family.uncategorizedimages_address(self.lang, n)
def uncategorizedpages_address(self, n=500):
+ """Return path to Special:Uncategorizedpages."""
return self.family.uncategorizedpages_address(self.lang, n)
def unusedcategories_address(self, n=500):
+ """Return path to Special:Unusedcategories."""
return self.family.unusedcategories_address(self.lang, n)
def withoutinterwiki_address(self, n=500):
+ """Return path to Special:Withoutinterwiki."""
return self.family.withoutinterwiki_address(self.lang, n)
def references_address(self, s):
+ """Return path to Special:Whatlinksere for page 's'."""
return self.family.references_address(self.lang, s)
def allmessages_address(self):
+ """Return path to Special:Allmessages."""
return self.family.allmessages_address(self.lang)
def upload_address(self):
+ """Return path to Special:Upload."""
return self.family.upload_address(self.lang)
def maintenance_address(self, sub, default_limit = True):
+ """Return path to Special:Maintenance for subfunction 'sub'."""
+ #TODO: this address seems to be non-functioning on Wikimedia projects
return self.family.maintenance_address(self.lang, sub, default_limit)
def double_redirects_address(self, default_limit = True):
+ """Return path to Special:Doubleredirects."""
return self.family.double_redirects_address(self.lang, default_limit)
def broken_redirects_address(self, default_limit = True):
+ """Return path to Special:Brokenredirects."""
return self.family.broken_redirects_address(self.lang, default_limit)
+ def login_address(self):
+ """Return path to Special:Userlogin."""
+ return self.family.login_address(self.lang)
+
+ def captcha_image_address(self, id):
+ """Return path to Special:Captcha for image 'id'."""
+ return self.family.captcha_image_address(self.lang, id)
+
+ def watchlist_address(self):
+ """Return path to Special:Watchlist editor."""
+ return self.family.watchlist_address(self.lang)
+
+ def contribs_address(self, target, limit=500, offset=''):
+ """Return path to Special:Contributions for user 'target'."""
+ return self.family.contribs_address(self.lang,target,limit,offset)
+
def __hash__(self):
return hash(repr(self))
def version(self):
- """Returns MediaWiki version number as a string."""
+ """Return MediaWiki version number as a string."""
return self.family.version(self.lang)
def versionnumber(self):
- """Returns an int identifying MediaWiki version.
+ """Return an int identifying MediaWiki version.
Currently this is implemented as returning the minor version
number; i.e., 'X' in version '1.X.Y'
@@ -4599,7 +4691,7 @@
return self.family.versionnumber(self.lang)
def live_version(self):
- """Return the 'real' version number found on [[Special:Versions]]
+ """Return the 'real' version number found on [[Special:Version]]
Return value is a tuple (int, int, str) of the major and minor
version numbers and any other text contained in the version.
@@ -4632,6 +4724,7 @@
% (repr(self), charset, self.encoding()))
def shared_image_repository(self):
+ """Return a tuple of image repositories used by this site."""
return self.family.shared_image_repository(self.lang)
def __cmp__(self, other):
@@ -4643,12 +4736,16 @@
return cmp(self.family.name, other.family.name)
def category_on_one_line(self):
+ """Return True if this site wants all category links on one line."""
return self.lang in self.family.category_on_one_line
def interwiki_putfirst(self):
- return self.family.interwiki_putfirst.get(self.lang,None)
+ """Return list of language codes for ordering of interwiki links."""
+ return self.family.interwiki_putfirst.get(self.lang, None)
- def interwiki_putfirst_doubled(self,list_of_links):
+ def interwiki_putfirst_doubled(self, list_of_links):
+ # TODO: is this even needed? No family in the framework has this
+ # dictionary defined!
if self.family.interwiki_putfirst_doubled.has_key(self.lang):
if len(list_of_links) >= self.family.interwiki_putfirst_doubled[self.lang][0]:
list_of_links2 = []
@@ -4666,28 +4763,35 @@
else:
return False
- def login_address(self):
- return self.family.login_address(self.lang)
-
- def captcha_image_address(self, id):
- return self.family.captcha_image_address(self.lang, id)
-
- def watchlist_address(self):
- return self.family.watchlist_address(self.lang)
-
- def contribs_address(self, target, limit=500, offset=''):
- return self.family.contribs_address(self.lang,target,limit,offset)
-
def getSite(self, code):
+ """Return Site object for language 'code' in this Family."""
return getSite(code = code, fam = self.family, user=self.user)
def namespace(self, num, all = False):
+ """Return string containing local name of namespace 'num'.
+
+ If optional argument 'all' is true, return a tuple of all recognized
+ values for this namespace.
+
+ """
return self.family.namespace(self.lang, num, all = all)
def normalizeNamespace(self, value):
+ """Return canonical name for namespace 'value' in this Site's language.
+
+ If no match, return 'value' unmodified.
+
+ """
return self.family.normalizeNamespace(self.lang, value)
def namespaces(self):
+ """Return list of canonical namespace names for this Site."""
+
+ # n.b.: this does not return namespace numbers; to determine which
+ # numeric namespaces the framework recognizes for this Site (which
+ # may or may not actually exist on the wiki), use
+ # self.family.namespaces.keys()
+
if _namespaceCache.has_key(self):
return _namespaceCache[self]
else:
@@ -4727,7 +4831,8 @@
def disambcategory(self):
import catlib
try:
- return catlib.Category(self,self.namespace(14)+':'+self.family.disambcatname[self.lang])
+ return catlib.Category(self,
+ self.namespace(14)+':'+self.family.disambcatname[self.lang])
except KeyError:
raise NoPage
@@ -4779,12 +4884,12 @@
default_family = site.family
def calledModuleName():
+ """Return the name of the module calling this function.
+
+ This is required because the -help option loads the module's docstring
+ and because the module name will be used for the filename of the log.
+
"""
- Gets the name of the module calling this function. This is
- required because the -help option loads the module's docstring
- and because the module name will be used for the filename of the
- log.
- """
# get commandline arguments
args = sys.argv
try:
@@ -4794,12 +4899,14 @@
return args[0]
def handleArgs():
- '''
+ """Handle standard command line arguments, return the rest as a list.
+
Takes the commandline arguments, converts them to Unicode, processes all
global parameters such as -lang or -log. Returns a list of all arguments
that are not global. This makes sure that global arguments are applied
first, regardless of the order in which the arguments were given.
- '''
+
+ """
global default_code, default_family, verbose
# get commandline arguments
args = sys.argv
@@ -4846,8 +4953,7 @@
return nonGlobalArgs
def makepath(path):
- """ creates missing directories for the given path and
- returns a normalized absolute version of the path.
+ """Return a normalized absolute version of the path argument.
- if the given path already exists in the filesystem
the filesystem is not modified.
@@ -4857,31 +4963,30 @@
a '/' to the path if you want it to be a directory path.
from holger(a)trillke.net 2002/03/18
+
"""
from os import makedirs
- from os.path import normpath,dirname,exists,abspath
+ from os.path import normpath, dirname, exists, abspath
dpath = normpath(dirname(path))
if not exists(dpath): makedirs(dpath)
return normpath(abspath(path))
def datafilepath(*filename):
- """Returns an absolute path to a data file, offset from the bot's
- base directory.
- Argument(s) are zero or more directory names, followed by a data file
- name.
- Any directories in the path that do not already exist are created.
+ """Return an absolute path to a data file in a standard location.
+
+ Argument(s) are zero or more directory names, optionally followed by a
+ data file name. The return path is offset to config.base_dir. Any
+ directories in the path that do not already exist are created.
+
"""
return makepath(os.path.join(config.base_dir, *filename))
def shortpath(path):
- """
- Short an absolute file path removing bot's base directory part if exists.
- """
- shortpath = path
+ """Return a file path relative to config.base_dir."""
if path.startswith(config.base_dir):
- shortpath = path[len(config.base_dir) + len(os.path.sep) : ]
- return shortpath
+ return path[len(config.base_dir) + len(os.path.sep) : ]
+ return path
#########################
# Interpret configuration