Revision: 4441 Author: russblau Date: 2007-10-10 20:31:14 +0000 (Wed, 10 Oct 2007)
Log Message: ----------- More docstring cleanup in wikipedia.py; fix logic error in family.py normalizeNamespace() method.
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2007-10-10 10:37:18 UTC (rev 4440) +++ trunk/pywikipedia/family.py 2007-10-10 20:31:14 UTC (rev 4441) @@ -2307,25 +2307,31 @@ return self.namespaces[ns_number].has_key(code)
def normalizeNamespace(self, code, value): - """Given a value, attempt to match it with all available namespaces, with default and localized versions. - Sites may have more than one way to write the same namespace - choose the first one in the list. + """Given a value, attempt to match it with all available namespaces, + with default and localized versions. Sites may have more than one + way to write the same namespace - choose the first one in the list. If nothing can be normalized, return the original value. """ for ns, items in self.namespaces.iteritems(): if items.has_key(code): v = items[code] - if type(v) == type([]): - if value in v: return v[0] - else: - if value == v: return v + elif items.has_key('_default'): + v = items['_default'] + else: + continue + if type(v) is list: + if value in v: return v[0] + else: + if value == v: return v if value == self.namespace('_default', ns): return self.namespace(code, ns) return value
def getNamespaceIndex(self, lang, namespace): - """Given a namespace, attempt to match it with all available namespaces. - Sites may have more than one way to write the same namespace - choose the first one in the list. - Returns namespace index or None + """Given a namespace, attempt to match it with all available + namespaces. Sites may have more than one way to write the same + namespace - choose the first one in the list. Returns namespace + index or None. """ namespace = namespace.lower() for n in self.namespaces.keys():
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-10-10 10:37:18 UTC (rev 4440) +++ trunk/pywikipedia/wikipedia.py 2007-10-10 20:31:14 UTC (rev 4441) @@ -203,6 +203,11 @@
# Pre-compile re expressions reNamespace = re.compile("^(.+?) *: *(.*)$") +Rwatch = re.compile( + r"<input type='hidden' value="(.*?)" name="wpEditToken"") +Rwatchlist = re.compile(r"<input tabindex='[\d]+' type='checkbox' " + r"name='wpWatchthis' checked='checked'") +Rlink = re.compile(r'[[(?P<title>[^]|]*)(|[^]]*)?]]')
class Page(object): @@ -686,7 +691,6 @@ retry_idle_time = 30 # We now know that there is a textarea. # Look for the edit token - Rwatch = re.compile(r"<input type='hidden' value="(.*?)" name="wpEditToken"") tokenloc = Rwatch.search(text) if tokenloc: self.site().putToken(tokenloc.group(1), sysop = sysop) @@ -716,8 +720,7 @@ if not matchVersionTab: raise NoPage(self.site(), self.aslink(forceInterwiki = True)) # Look if the page is on our watchlist - R = re.compile(r"<input tabindex='[\d]+' type='checkbox' name='wpWatchthis' checked='checked'") - matchWatching = R.search(text) + matchWatching = Rwatchlist.search(text) if matchWatching: isWatched = True # Now process the contents of the textarea @@ -1459,7 +1462,6 @@ # from text before processing thistxt = removeDisabledParts(thistxt)
- Rlink = re.compile(r'[[(?P<title>[^]|]*)(|[^]]*)?]]') for match in Rlink.finditer(thistxt): title = match.group('title') if title.strip().startswith("#"): @@ -3380,7 +3382,7 @@ class Site(object): """A MediaWiki site. Do not instantiate directly; use getSite() function.
- Constructor takes four arguments; only site is mandatory: + Constructor takes four arguments; only code is mandatory:
code language code for Site fam Wikimedia family (optional: defaults to configured). @@ -3395,78 +3397,126 @@
loggedInAs: return current username, or None if not logged in. forceLogin: require the user to log in to the site - messages: return True if there are new messages on the site - cookies: return user's cookies as a string + messages: return True if there are new messages on the site + cookies: return user's cookies as a string
- getUrl: retrieve an URL from the site - urlEncode: Encode a query to be sent using an http POST request. - postForm: Post form data to an address at this site. - postData: Post encoded form data to an http address at this site. + getUrl: retrieve an URL from the site + urlEncode: Encode a query to be sent using an http POST request. + postForm: Post form data to an address at this site. + postData: Post encoded form data to an http address at this site.
redirect: Return the localized redirect tag for the site. - redirectRegex: Return compiled regular expression matching on redirect pages. + redirectRegex: Return compiled regular expression matching on redirect + pages. mediawiki_message: Retrieve the text of a specified MediaWiki message has_mediawiki_message: True if this site defines specified MediaWiki message + shared_image_repository: Return tuple of image repositories used by this + site. + category_on_one_line: Return True if this site wants all category links + on one line. + interwiki_putfirst: Return list of language codes for ordering of + interwiki links. linkto(title): Return string in the form of a wikilink to 'title' - isInterwikiLink(s): Return True if s is in the form of an interwiki link. + isInterwikiLink(s): Return True if 's' is in the form of an interwiki + link. + version: Return MediaWiki version string from Family file. + versionnumber: Return int identifying the MediaWiki version. + live_version: Return version number read from Special:Version. + checkCharset(charset): Warn if charset doesn't match family file.
- Methods that yield Page objects derived from a wiki's Special: pages (note, some methods yield other information in a tuple along with the Pages; see method docs for details) --
- search(query): query results from Special:Search - allpages(): Special:Allpages - newpages(): Special:Newpages - longpages(): Special:Longpages - shortpages(): Special:Shortpages - categories(): Special:Categories (yields Category objects) - deadendpages(): Special:Deadendpages - ancientpages(): Special:Ancientpages - lonelypages(): Special:Lonelypages - unwatchedpages(): Special:Unwatchedpages (sysop accounts only) - uncategorizedcategories(): Special:Uncategorizedcategories (yields Category) - uncategorizedpages(): Special:Uncategorizedpages - uncategorizedimages(): Special:Uncategorizedimages (yields ImagePage) - unusedcategories(): Special:Unusuedcategories (yields Category) - unusedfiles(): Special:Unusedimages (yields ImagePage) - withoutinterwiki: Special:Withoutinterwiki - linksearch: Special:Linksearch + search(query): query results from Special:Search + allpages(): Special:Allpages + newpages(): Special:Newpages + longpages(): Special:Longpages + shortpages(): Special:Shortpages + categories(): Special:Categories (yields Category objects) + deadendpages(): Special:Deadendpages + ancientpages(): Special:Ancientpages + lonelypages(): Special:Lonelypages + unwatchedpages(): Special:Unwatchedpages (sysop accounts only) + uncategorizedcategories(): Special:Uncategorizedcategories (yields + Category objects) + uncategorizedpages(): Special:Uncategorizedpages + uncategorizedimages(): Special:Uncategorizedimages (yields + ImagePage objects) + unusedcategories(): Special:Unusuedcategories (yields Category) + unusedfiles(): Special:Unusedimages (yields ImagePage) + withoutinterwiki: Special:Withoutinterwiki + linksearch: Special:Linksearch
Convenience methods that provide access to properties of the wiki Family object; all of these are read-only and return a unicode string unless noted --
- encoding: The current encoding for this site. - encodings: List of all historical encodings for this site. - category_namespace: Canonical name of the Category namespace on this - site. - category_namespaces: List of all valid names for the Category namespace. - image_namespace: Canonical name of the Image namespace on this site. - template_namespace: Canonical name of the Template namespace on this - site. - export_address: URL path for Special:Export. - query_address: URL path + '?' for query.php - api_address: Return URL path + '?' for api.php - apipath: URL path for api.php - protocol: Protocol ('http' or 'https') for access to this site. - hostname: Host portion of site URL. - path: URL path for index.php on this Site. - dbName: MySQL database name. - move_address: URL path for Special:Movepage. - delete_address(s): URL path to delete title 's'. - undelete_view_address(s): URL path to view Special:Undelete for title 's' - undelete_address: Return URL path to Special:Undelete. - protect_address(s): Return URL path to protect title 's'. - unprotect_address(s): Return URL path to unprotect title 's'. - put_address(s): Return URL path to submit revision to page titled 's'. - get_address(s): Return URL path to retrieve page titled 's'. - nice_get_address(s): Return shorter URL path to retrieve page titled 's'. - edit_address(s): Return URL path for edit form for page titled 's'. - purge_address(s): Return URL path to purge cache and retrieve page 's'. - block_address: Return path to block an IP address. + encoding: The current encoding for this site. + encodings: List of all historical encodings for this site. + category_namespace: Canonical name of the Category namespace on this + site. + category_namespaces: List of all valid names for the Category + namespace. + image_namespace: Canonical name of the Image namespace on this site. + template_namespace: Canonical name of the Template namespace on this + site. + protocol: Protocol ('http' or 'https') for access to this site. + hostname: Host portion of site URL. + path: URL path for index.php on this Site. + dbName: MySQL database name.
+ Methods that return addresses to pages on this site (usually in + Special: namespace); these methods only return URL paths, they do not + interact with the wiki -- + + export_address: Special:Export. + query_address: URL path + '?' for query.php + api_address: URL path + '?' for api.php + apipath: URL path for api.php + move_address: Special:Movepage. + delete_address(s): Delete title 's'. + undelete_view_address(s): Special:Undelete for title 's' + undelete_address: Special:Undelete. + protect_address(s): Protect title 's'. + unprotect_address(s): Unprotect title 's'. + put_address(s): Submit revision to page titled 's'. + get_address(s): Retrieve page titled 's'. + nice_get_address(s): Short URL path to retrieve page titled 's'. + edit_address(s): Edit form for page titled 's'. + purge_address(s): Purge cache and retrieve page 's'. + block_address: Block an IP address. + unblock_address: Unblock an IP address. + blocksearch_address(s): Search for blocks on IP address 's'. + linksearch_address(s): Special:Linksearch for target 's'. + search_address(q): Special:Search for query 'q'. + allpages_address(s): Special:Allpages. + newpages_address: Special:Newpages. + longpages_address: Special:Longpages. + shortpages_address: Special:Shortpages. + unusedfiles_address: Special:Unusedimages. + categories_address: Special:Categories. + deadendpages_address: Special:Deadendpages. + ancientpages_address: Special:Ancientpages. + lonelypages_address: Special:Lonelypages. + unwatchedpages_address: Special:Unwatchedpages. + uncategorizedcategories_address: Special:Uncategorizedcategories. + uncategorizedimages_address: Special:Uncategorizedimages. + uncategorizedpages_address: Special:Uncategorizedpages. + unusedcategories_address: Special:Unusedcategories. + withoutinterwiki_address: Special:Withoutinterwiki. + references_address(s): Special:Whatlinksere for page 's'. + allmessages_address: Special:Allmessages. + upload_address: Special:Upload. + maintenance_address(sub): Special:Maintenance for subfunction 'sub'. + double_redirects_address: Special:Doubleredirects. + broken_redirects_address: Special:Brokenredirects. + login_address: Special:Userlogin. + captcha_image_address(id): Special:Captcha for image 'id'. + watchlist_address: Special:Watchlist editor. + contribs_address(target): Special:Contributions for user 'target'. + """ def __init__(self, code, fam=None, user=None, persistent_http = None): self.lang = code.lower() @@ -4508,89 +4558,131 @@ return self.family.block_address(self.lang)
def unblock_address(self): + """Return path to unblock an IP address.""" return self.family.unblock_address(self.lang)
def blocksearch_address(self, s): + """Return path to search for blocks on IP address 's'.""" return self.family.blocksearch_address(self.lang, s)
def linksearch_address(self, s, limit=500, offset=0): + """Return path to Special:Linksearch for target 's'.""" return self.family.linksearch_address(self.lang, s, limit=limit, offset=offset)
- def search_address(self, q, n=50, ns = 0): + def search_address(self, q, n=50, ns=0): + """Return path to Special:Search for query 'q'.""" return self.family.search_address(self.lang, q, n, ns)
def allpages_address(self, s, ns = 0): - return self.family.allpages_address(self.lang, start = s, namespace = ns) + """Return path to Special:Allpages.""" + return self.family.allpages_address(self.lang, start=s, namespace = ns)
def newpages_address(self, n=50): + """Return path to Special:Newpages.""" return self.family.newpages_address(self.lang, n)
def longpages_address(self, n=500): + """Return path to Special:Longpages.""" return self.family.longpages_address(self.lang, n)
def shortpages_address(self, n=500): + """Return path to Special:Shortpages.""" return self.family.shortpages_address(self.lang, n)
def unusedfiles_address(self, n=500): + """Return path to Special:Unusedimages.""" return self.family.unusedfiles_address(self.lang, n)
def categories_address(self, n=500): + """Return path to Special:Categories.""" return self.family.categories_address(self.lang, n)
def deadendpages_address(self, n=500): + """Return path to Special:Deadendpages.""" return self.family.deadendpages_address(self.lang, n)
def ancientpages_address(self, n=500): + """Return path to Special:Ancientpages.""" return self.family.ancientpages_address(self.lang, n)
def lonelypages_address(self, n=500): + """Return path to Special:Lonelypages.""" return self.family.lonelypages_address(self.lang, n)
def unwatchedpages_address(self, n=500): + """Return path to Special:Unwatchedpages.""" return self.family.unwatchedpages_address(self.lang, n)
def uncategorizedcategories_address(self, n=500): + """Return path to Special:Uncategorizedcategories.""" return self.family.uncategorizedcategories_address(self.lang, n)
def uncategorizedimages_address(self, n=500): + """Return path to Special:Uncategorizedimages.""" return self.family.uncategorizedimages_address(self.lang, n)
def uncategorizedpages_address(self, n=500): + """Return path to Special:Uncategorizedpages.""" return self.family.uncategorizedpages_address(self.lang, n)
def unusedcategories_address(self, n=500): + """Return path to Special:Unusedcategories.""" return self.family.unusedcategories_address(self.lang, n)
def withoutinterwiki_address(self, n=500): + """Return path to Special:Withoutinterwiki.""" return self.family.withoutinterwiki_address(self.lang, n)
def references_address(self, s): + """Return path to Special:Whatlinksere for page 's'.""" return self.family.references_address(self.lang, s)
def allmessages_address(self): + """Return path to Special:Allmessages.""" return self.family.allmessages_address(self.lang)
def upload_address(self): + """Return path to Special:Upload.""" return self.family.upload_address(self.lang)
def maintenance_address(self, sub, default_limit = True): + """Return path to Special:Maintenance for subfunction 'sub'.""" + #TODO: this address seems to be non-functioning on Wikimedia projects return self.family.maintenance_address(self.lang, sub, default_limit)
def double_redirects_address(self, default_limit = True): + """Return path to Special:Doubleredirects.""" return self.family.double_redirects_address(self.lang, default_limit)
def broken_redirects_address(self, default_limit = True): + """Return path to Special:Brokenredirects.""" return self.family.broken_redirects_address(self.lang, default_limit)
+ def login_address(self): + """Return path to Special:Userlogin.""" + return self.family.login_address(self.lang) + + def captcha_image_address(self, id): + """Return path to Special:Captcha for image 'id'.""" + return self.family.captcha_image_address(self.lang, id) + + def watchlist_address(self): + """Return path to Special:Watchlist editor.""" + return self.family.watchlist_address(self.lang) + + def contribs_address(self, target, limit=500, offset=''): + """Return path to Special:Contributions for user 'target'.""" + return self.family.contribs_address(self.lang,target,limit,offset) + def __hash__(self): return hash(repr(self))
def version(self): - """Returns MediaWiki version number as a string.""" + """Return MediaWiki version number as a string.""" return self.family.version(self.lang)
def versionnumber(self): - """Returns an int identifying MediaWiki version. + """Return an int identifying MediaWiki version.
Currently this is implemented as returning the minor version number; i.e., 'X' in version '1.X.Y' @@ -4599,7 +4691,7 @@ return self.family.versionnumber(self.lang)
def live_version(self): - """Return the 'real' version number found on [[Special:Versions]] + """Return the 'real' version number found on [[Special:Version]]
Return value is a tuple (int, int, str) of the major and minor version numbers and any other text contained in the version. @@ -4632,6 +4724,7 @@ % (repr(self), charset, self.encoding()))
def shared_image_repository(self): + """Return a tuple of image repositories used by this site.""" return self.family.shared_image_repository(self.lang)
def __cmp__(self, other): @@ -4643,12 +4736,16 @@ return cmp(self.family.name, other.family.name)
def category_on_one_line(self): + """Return True if this site wants all category links on one line.""" return self.lang in self.family.category_on_one_line
def interwiki_putfirst(self): - return self.family.interwiki_putfirst.get(self.lang,None) + """Return list of language codes for ordering of interwiki links.""" + return self.family.interwiki_putfirst.get(self.lang, None)
- def interwiki_putfirst_doubled(self,list_of_links): + def interwiki_putfirst_doubled(self, list_of_links): + # TODO: is this even needed? No family in the framework has this + # dictionary defined! if self.family.interwiki_putfirst_doubled.has_key(self.lang): if len(list_of_links) >= self.family.interwiki_putfirst_doubled[self.lang][0]: list_of_links2 = [] @@ -4666,28 +4763,35 @@ else: return False
- def login_address(self): - return self.family.login_address(self.lang) - - def captcha_image_address(self, id): - return self.family.captcha_image_address(self.lang, id) - - def watchlist_address(self): - return self.family.watchlist_address(self.lang) - - def contribs_address(self, target, limit=500, offset=''): - return self.family.contribs_address(self.lang,target,limit,offset) - def getSite(self, code): + """Return Site object for language 'code' in this Family.""" return getSite(code = code, fam = self.family, user=self.user)
def namespace(self, num, all = False): + """Return string containing local name of namespace 'num'. + + If optional argument 'all' is true, return a tuple of all recognized + values for this namespace. + + """ return self.family.namespace(self.lang, num, all = all)
def normalizeNamespace(self, value): + """Return canonical name for namespace 'value' in this Site's language. + + If no match, return 'value' unmodified. + + """ return self.family.normalizeNamespace(self.lang, value)
def namespaces(self): + """Return list of canonical namespace names for this Site.""" + + # n.b.: this does not return namespace numbers; to determine which + # numeric namespaces the framework recognizes for this Site (which + # may or may not actually exist on the wiki), use + # self.family.namespaces.keys() + if _namespaceCache.has_key(self): return _namespaceCache[self] else: @@ -4727,7 +4831,8 @@ def disambcategory(self): import catlib try: - return catlib.Category(self,self.namespace(14)+':'+self.family.disambcatname[self.lang]) + return catlib.Category(self, + self.namespace(14)+':'+self.family.disambcatname[self.lang]) except KeyError: raise NoPage
@@ -4779,12 +4884,12 @@ default_family = site.family
def calledModuleName(): + """Return the name of the module calling this function. + + This is required because the -help option loads the module's docstring + and because the module name will be used for the filename of the log. + """ - Gets the name of the module calling this function. This is - required because the -help option loads the module's docstring - and because the module name will be used for the filename of the - log. - """ # get commandline arguments args = sys.argv try: @@ -4794,12 +4899,14 @@ return args[0]
def handleArgs(): - ''' + """Handle standard command line arguments, return the rest as a list. + Takes the commandline arguments, converts them to Unicode, processes all global parameters such as -lang or -log. Returns a list of all arguments that are not global. This makes sure that global arguments are applied first, regardless of the order in which the arguments were given. - ''' + + """ global default_code, default_family, verbose # get commandline arguments args = sys.argv @@ -4846,8 +4953,7 @@ return nonGlobalArgs
def makepath(path): - """ creates missing directories for the given path and - returns a normalized absolute version of the path. + """Return a normalized absolute version of the path argument.
- if the given path already exists in the filesystem the filesystem is not modified. @@ -4857,31 +4963,30 @@ a '/' to the path if you want it to be a directory path.
from holger@trillke.net 2002/03/18 + """ from os import makedirs - from os.path import normpath,dirname,exists,abspath + from os.path import normpath, dirname, exists, abspath
dpath = normpath(dirname(path)) if not exists(dpath): makedirs(dpath) return normpath(abspath(path))
def datafilepath(*filename): - """Returns an absolute path to a data file, offset from the bot's - base directory. - Argument(s) are zero or more directory names, followed by a data file - name. - Any directories in the path that do not already exist are created. + """Return an absolute path to a data file in a standard location. + + Argument(s) are zero or more directory names, optionally followed by a + data file name. The return path is offset to config.base_dir. Any + directories in the path that do not already exist are created. + """ return makepath(os.path.join(config.base_dir, *filename))
def shortpath(path): - """ - Short an absolute file path removing bot's base directory part if exists. - """ - shortpath = path + """Return a file path relative to config.base_dir.""" if path.startswith(config.base_dir): - shortpath = path[len(config.base_dir) + len(os.path.sep) : ] - return shortpath + return path[len(config.base_dir) + len(os.path.sep) : ] + return path
######################### # Interpret configuration