Revision: 8324 Author: russblau Date: 2010-06-24 19:09:47 +0000 (Thu, 24 Jun 2010)
Log Message: ----------- Add "content" option for PageGenerator and all methods that use it, to allow preloading of page content without a separate ".preloadpages()" call.
Modified Paths: -------------- branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/pagegenerators.py branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2010-06-24 19:00:35 UTC (rev 8323) +++ branches/rewrite/pywikibot/data/api.py 2010-06-24 19:09:47 UTC (rev 8324) @@ -409,12 +409,13 @@ if name not in _modules: self.get_module() break + self.request = Request(**kwargs) self.prefix = None self.update_limit() # sets self.prefix - if self.query_limit is not None and "generator" in kwargs: + if self.api_limit is not None and "generator" in kwargs: self.prefix = "g" + self.prefix - self.request = Request(**kwargs) self.limit = None + self.query_limit = self.api_limit if "generator" in kwargs: self.resultkey = "pages" # name of the "query" subelement key else: # to look for when iterating @@ -448,8 +449,10 @@ limit = int(value) # don't update if limit is greater than maximum allowed by API self.update_limit() - if self.query_limit is None or limit < self.query_limit: - self.query_limit = int(limit) + if self.api_limit is None: + self.query_limit = limit + else: + self.query_limit = min(self.api_limit, limit)
def set_maximum_items(self, value): """Set the maximum number of items to be retrieved from the wiki. @@ -466,23 +469,23 @@ self.limit = int(value)
def update_limit(self): - """Set query_limit for self.module based on api response""" + """Set query limit for self.module based on api response"""
- self.query_limit = None + self.api_limit = None for mod in self.module.split('|'): for param in _modules[mod].get("parameters", []): if param["name"] == "limit": if (self.site.logged_in() and "apihighlimits" in self.site.getuserinfo()["rights"]): - self.query_limit = int(param["highmax"]) + self.api_limit = int(param["highmax"]) else: - self.query_limit = int(param["max"]) + self.api_limit = int(param["max"]) if self.prefix is None: self.prefix = _modules[mod]["prefix"] pywikibot.debug(u"%s: Set query_limit to %i." % (self.__class__.__name__, - self.query_limit), + self.api_limit), _logger) return
@@ -517,6 +520,13 @@ new_limit = min(self.query_limit, self.limit - count) else: new_limit = None + if "rvprop" in self.request \ + and "content" in self.request["rvprop"]: + # queries that retrieve page content have lower limits + # Note: although API allows up to 500 pages for content + # queries, these sometimes result in server-side errors + # so use 250 as a safer limit + new_limit = min(new_limit, self.api_limit // 10, 250) if new_limit is not None: self.request[self.prefix+"limit"] = str(new_limit) try: @@ -596,30 +606,39 @@ this class iterate Page objects.
""" - def __init__(self, generator, **kwargs): + def __init__(self, generator, g_content=False, **kwargs): """ Required and optional parameters are as for C{Request}, except that action=query is assumed and generator is required.
@param generator: the "generator=" type from api.php @type generator: str + @param g_content: if True, retrieve the contents of the current + version of each Page (default False)
""" - QueryGenerator.__init__(self, generator=generator, **kwargs) # get some basic information about every page generated - if 'prop' in self.request: - self.request['prop'] += "|info|imageinfo|categoryinfo" + if 'prop' in kwargs: + kwargs['prop'] += "|info|imageinfo|categoryinfo" else: - self.request['prop'] = 'info|imageinfo|categoryinfo' - if "inprop" in self.request: - if "protection" not in self.request["inprop"]: - self.request["inprop"] += "|protection" + kwargs['prop'] = 'info|imageinfo|categoryinfo' + if g_content: + # retrieve the current revision + kwargs['prop'] += "|revisions" + if "rvprop" in kwargs: + kwargs["rvprop"] += "ids|timestamp|flags|comment|user|content" + else: + kwargs["rvprop"] = "ids|timestamp|flags|comment|user|content" + if "inprop" in kwargs: + if "protection" not in kwargs["inprop"]: + kwargs["inprop"] += "|protection" else: - self.request['inprop'] = 'protection' - if "iiprop" in self.request: - self.request["iiprop"] += 'timestamp|user|comment|url|size|sha1|metadata' + kwargs['inprop'] = 'protection' + if "iiprop" in kwargs: + kwargs["iiprop"] += 'timestamp|user|comment|url|size|sha1|metadata' else: - self.request['iiprop'] = 'timestamp|user|comment|url|size|sha1|metadata' + kwargs['iiprop'] = 'timestamp|user|comment|url|size|sha1|metadata' + QueryGenerator.__init__(self, generator=generator, **kwargs) self.resultkey = "pages" # element to look for in result
def result(self, pagedata):
Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2010-06-24 19:00:35 UTC (rev 8323) +++ branches/rewrite/pywikibot/page.py 2010-06-24 19:09:47 UTC (rev 8324) @@ -74,72 +74,20 @@ """ if isinstance(source, pywikibot.site.BaseSite): self._link = Link(title, source=source, defaultNamespace=ns) -## self._site = source -## if ns not in source.namespaces(): -## raise pywikibot.Error( -## "Invalid namespace '%i' for site %s." -## % (ns, source.sitename())) -## self._ns = ns -## if ns and not title.startswith(source.namespace(ns)+u":"): -## title = source.namespace(ns) + u":" + title -## elif not ns and u":" in title: -## pos = title.index(u':') -## nsindex = source.ns_index(title[ :pos]) -## if nsindex: -## self._ns = nsindex -## # normalize namespace, in case an alias was used -## title = source.namespace(nsindex) + title[pos: ] -## if u"#" in title: -## title, self._section = title.split(u"#", 1) -## else: -## self._section = None -## if not title: -## raise pywikibot.Error( -## "Page object cannot be created from Site without title.") -## self._title = title elif isinstance(source, Page): # copy all of source's attributes to this object self.__dict__ = source.__dict__ if title: # overwrite title self._link = Link(title, source=source.site, defaultNamespace=ns) -## if ":" in title: -## prefix = title[ :title.index(":")] -## self._ns = self._site.ns_index(prefix) -## if self._ns is None: -## self._ns = 0 -## else: -## title = title[title.index(":")+1 : ].strip(" _") -## self._title = "%s:%s" % ( -## self.site.namespace(self._ns), -## self._title) -## else: -## self._ns = 0 -## if "#" in title: -## self._section = title[title.index("#") + 1 : ].strip(" _") -## title = title[ : title.index("#")].strip(" _") -## self._title = title elif isinstance(source, Link): self._link = source -## self._site = source.site -## self._section = source.section -## self._ns = source.namespace -## self._title = source.title -## # reassemble the canonical title from components -## if self._ns: -## self._title = "%s:%s" % (self.site.namespace(self._ns), -## self._title) else: raise pywikibot.Error( "Invalid argument type '%s' in Page constructor: %s" % (type(source), source)) -## if self._section is not None: -## self._title = self._title + "#" + self._section self._revisions = {}
-## # Always capitalize the first letter -## self._title = self._title[:1].upper() + self._title[1:] - @property def site(self): """Return the Site object for the wiki on which this Page resides.""" @@ -560,7 +508,7 @@
def getReferences(self, follow_redirects=True, withTemplateInclusion=True, onlyTemplateInclusion=False, redirectsOnly=False, - namespaces=None, step=None, total=None): + namespaces=None, step=None, total=None, content=False): """Return an iterator all pages that refer to or embed the page.
If you need a full list of referring pages, use @@ -576,6 +524,8 @@ @param namespaces: only iterate pages in these namespaces @param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total + @param content: if True, retrieve the content of the current version + of each referring page (default False)
""" # N.B.: this method intentionally overlaps with backlinks() and @@ -590,10 +540,10 @@ withTemplateInclusion=withTemplateInclusion, onlyTemplateInclusion=onlyTemplateInclusion, namespaces=namespaces, step=step, - total=total) + total=total, content=content)
def backlinks(self, followRedirects=True, filterRedirects=None, - namespaces=None, step=None, total=None): + namespaces=None, step=None, total=None, content=False): """Return an iterator for pages that link to this page.
@param followRedirects: if True, also iterate pages that link to a @@ -603,16 +553,18 @@ @param namespaces: only iterate pages in these namespaces @param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total + @param content: if True, retrieve the content of the current version + of each referring page (default False)
""" return self.site.pagebacklinks(self, followRedirects=followRedirects, filterRedirects=filterRedirects, namespaces=namespaces, step=step, - total=total) + total=total, content=content)
def embeddedin(self, filter_redirects=None, namespaces=None, step=None, - total=None): + total=None, content=False): """Return an iterator for pages that embed this page as a template.
@param filterRedirects: if True, only iterate redirects; if False, @@ -620,12 +572,15 @@ @param namespaces: only iterate pages in these namespaces @param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total + @param content: if True, retrieve the content of the current version + of each embedding page (default False)
""" return self.site.page_embeddedin(self, filterRedirects=filter_redirects, namespaces=namespaces, - step=step, total=total) + step=step, total=total, + content=content)
def canBeEdited(self): """Return bool indicating whether this page can be edited. @@ -790,7 +745,8 @@ """ return self.site.watchpage(self, unwatch)
- def linkedPages(self, namespaces=None, step=None, total=None): + def linkedPages(self, namespaces=None, step=None, total=None, + content=False): """Iterate Pages that this Page links to.
Only returns pages from "normal" internal links. Image and category @@ -801,11 +757,13 @@ @param namespaces: only iterate links in these namespaces @param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total + @param content: if True, retrieve the content of the current version + of each linked page (default False) @return: a generator that yields Page objects.
""" return self.site.pagelinks(self, namespaces=namespaces, step=step, - total=total) + total=total, content=content)
def interwiki(self, expand=True): """Iterate interwiki links in the page text, excluding language links. @@ -866,21 +824,24 @@ # iterated upon. return self.site.pagelanglinks(self, step=step, total=total)
- def templates(self): + def templates(self, content=False): """Return a list of Page objects for templates used on this Page.
Template parameters are ignored. This method only returns embedded templates, not template pages that happen to be referenced through a normal link.
+ @param content: if True, retrieve the content of the current version + of each template (default False) + """ # Data might have been preloaded if not hasattr(self, '_templates'): - self._templates = list(self.itertemplates()) + self._templates = list(self.itertemplates(content=content))
return self._templates
- def itertemplates(self, step=None, total=None): + def itertemplates(self, step=None, total=None, content=False): """Iterate Page objects for templates used on this Page.
Template parameters are ignored. This method only returns embedded @@ -889,23 +850,29 @@
@param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total + @param content: if True, retrieve the content of the current version + of each template (default False)
""" if hasattr(self, '_templates'): return iter(self._templates) - return self.site.pagetemplates(self, step=step, total=total) + return self.site.pagetemplates(self, step=step, total=total, + content=content)
@deprecate_arg("followRedirects", None) @deprecate_arg("loose", None) - def imagelinks(self, step=None, total=None): + def imagelinks(self, step=None, total=None, content=False): """Iterate ImagePage objects for images displayed on this Page.
@param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total + @param content: if True, retrieve the content of the current version + of each image description page (default False) @return: a generator that yields ImagePage objects.
""" - return self.site.pageimages(self, step=step, total=total) + return self.site.pageimages(self, step=step, total=total, + content=content)
def templatesWithParams(self): """Iterate templates used on this Page. @@ -948,17 +915,20 @@
@deprecate_arg("nofollow_redirects", None) @deprecate_arg("get_redirect", None) - def categories(self, withSortKey=False, step=None, total=None): + def categories(self, withSortKey=False, step=None, total=None, + content=False): """Iterate categories that the article is in.
@param withSortKey: if True, include the sort key in each Category. @param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total + @param content: if True, retrieve the content of the current version + of each category description page (default False) @return: a generator that yields Category objects.
""" return self.site.pagecategories(self, withSortKey=withSortKey, - step=step, total=total) + step=step, total=total, content=content)
def extlinks(self, step=None, total=None): """Iterate all external URLs (not interwiki links) from this page. @@ -992,8 +962,7 @@ Return value is a list of tuples, where each tuple represents one edit and is built of revision id, edit date/time, user name, and edit summary. Starts with the most current revision, unless - reverseOrder is True. Defaults to getting the first revCount edits, - unless getAll is True. + reverseOrder is True.
@param step: limit each API call to this number of revisions @param total: iterate no more than this number of revisions in total @@ -1490,14 +1459,17 @@ % (datetime, username, resolution, size, comment)) return u'{| border="1"\n! date/time || username || resolution || size || edit summary\n|----\n' + u'\n|----\n'.join(lines) + '\n|}'
- def usingPages(self, step=None, total=None): + def usingPages(self, step=None, total=None, content=False): """Yield Pages on which the image is displayed.
@param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total + @param content: if True, load the current content of each iterated page + (default False)
""" - return self.site.imageusage(self, step=step, total=total) + return self.site.imageusage(self, + step=step, total=total, content=content)
class Category(Page): @@ -1537,7 +1509,8 @@
@deprecate_arg("startFrom", None) @deprecate_arg("cacheResults", None) - def subcategories(self, recurse=False, step=None, total=None): + def subcategories(self, recurse=False, step=None, total=None, + content=False): """Iterate all subcategories of the current category.
@param recurse: if not False or 0, also iterate subcategories of @@ -1548,14 +1521,17 @@ @param step: limit each API call to this number of categories @param total: iterate no more than this number of subcategories in total (at all levels) + @param content: if True, retrieve the content of the current version + of each category description page (default False)
""" if not isinstance(recurse, bool) and recurse: recurse = recurse - 1 if not hasattr(self, "_subcats"): self._subcats = [] - for member in self.site.categorymembers(self, namespaces=[14], - step=step, total=total): + for member in self.site.categorymembers(self, + namespaces=[14], step=step, total=total, + content=content): subcat = Category(self.site, member.title()) self._subcats.append(subcat) yield subcat @@ -1564,8 +1540,9 @@ if not total: return if recurse: - for item in subcat.subcategories(recurse, - step=step, total=total): + for item in subcat.subcategories( + recurse, step=step, total=total, + content=content): yield item if total is not None: total -= 1 @@ -1579,8 +1556,9 @@ if not total: return if recurse: - for item in subcat.subcategories(recurse, - step=step, total=total): + for item in subcat.subcategories( + recurse, step=step, total=total, + content=content): yield item if total is not None: total -= 1 @@ -1588,7 +1566,7 @@ return
@deprecate_arg("startFrom", None) - def articles(self, recurse=False, step=None, total=None): + def articles(self, recurse=False, step=None, total=None, content=False): """ Yields all articles in the current category.
@@ -1600,13 +1578,16 @@ @param step: limit each API call to this number of pages @param total: iterate no more than this number of pages in total (at all levels) + @param content: if True, retrieve the content of the current version + of each page (default False)
""" namespaces = [x for x in self.site.namespaces() if x>=0 and x!=14] for member in self.site.categorymembers(self, - namespaces=namespaces, - step=step, total=total): + namespaces=namespaces, + step=step, total=total, + content=content): yield member if total is not None: total -= 1 @@ -1616,18 +1597,22 @@ if not isinstance(recurse, bool) and recurse: recurse = recurse - 1 for subcat in self.subcategories(step=step): - for article in subcat.articles(recurse, step=step, total=total): + for article in subcat.articles( + recurse, step=step, total=total, + content=content): yield article if total is not None: total -= 1 if not total: return
- def members(self, recurse=False, namespaces=None, step=None, total=None): + def members(self, recurse=False, namespaces=None, step=None, total=None, + content=False): """Yield all category contents (subcats, pages, and files)."""
- for member in self.site.categorymembers(self, namespaces, - step=step, total=total): + for member in self.site.categorymembers(self, + namespaces, step=step, total=total, + content=content): yield member if total is not None: total -= 1 @@ -1637,8 +1622,9 @@ if not isinstance(recurse, bool) and recurse: recurse = recurse - 1 for subcat in self.subcategories(step=step): - for article in subcat.members(recurse, namespaces, step=step, - total=total): + for article in subcat.members( + recurse, namespaces, step=step, + total=total, content=content): yield article if total is not None: total -= 1
Modified: branches/rewrite/pywikibot/pagegenerators.py =================================================================== --- branches/rewrite/pywikibot/pagegenerators.py 2010-06-24 19:00:35 UTC (rev 8323) +++ branches/rewrite/pywikibot/pagegenerators.py 2010-06-24 19:09:47 UTC (rev 8324) @@ -199,7 +199,7 @@ gensList = CombinedPageGenerator(self.gens) return DuplicateFilterPageGenerator(gensList)
- def getCategoryGen(self, arg, length, recurse = False): + def getCategoryGen(self, arg, length, recurse=False, content=False): if len(arg) == length: categoryname = pywikibot.input(u'Please enter the category name:') else: @@ -215,9 +215,10 @@ defaultNamespace=14)) # Link constructor automatically prepends localized namespace # if not included in user's input - return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse) + return CategorizedPageGenerator(cat, + start=startfrom, recurse=recurse, content=content)
- def setSubCategoriesGen(self, arg, length, recurse=False): + def setSubCategoriesGen(self, arg, length, recurse=False, content=False): if len(arg) == length: categoryname = pywikibot.input(u'Please enter the category name:') else: @@ -232,7 +233,8 @@
cat = pywikibot.Category(pywikibot.Link(categoryname, defaultNamespace=14)) - return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse) + return SubCategoriesPageGenerator(cat, + start=startfrom, recurse=recurse, content=content)
def handleArg(self, arg): """Parse one argument at a time. @@ -447,7 +449,7 @@
def AllpagesPageGenerator(start='!', namespace=0, includeredirects=True, - site=None, step=None, total=None): + site=None, step=None, total=None, content=False): """ Iterate Page objects for all titles in a single namespace.
@@ -456,6 +458,7 @@
@param step: Maximum number of pages to retrieve per API query @param total: Maxmum number of pages to retrieve in total + @param content: If True, load current version of each page (default False)
""" if site is None: @@ -468,11 +471,12 @@ else: filterredir = False return site.allpages(start=start, namespace=namespace, - filterredir=filterredir, step=step, total=total) + filterredir=filterredir, step=step, total=total, + content=content)
def PrefixingPageGenerator(prefix, namespace=None, includeredirects=True, - site=None, step=None, total=None): + site=None, step=None, total=None, content=False): if site is None: site = pywikibot.Site() prefixlink = pywikibot.Link(prefix, site) @@ -487,8 +491,10 @@ else: filterredir = False return site.allpages(prefix=title, namespace=namespace, - filterredir=filterredir, step=step, total=total) + filterredir=filterredir, step=step, total=total, + content=content)
+ @deprecate_arg("number", "total") @deprecate_arg("namespace", "namespaces") @deprecate_arg("repeat", None) @@ -507,6 +513,7 @@ step=step, total=total): yield pywikibot.Page(pywikibot.Link(item["title"], site))
+ def RecentChangesPageGenerator(start=None, end=None, reverse=False, namespaces=None, pagelist=None, changetype=None, showMinor=None, @@ -547,14 +554,15 @@ step=step, total=total): yield pywikibot.Page(pywikibot.Link(item["title"], site))
-def FileLinksGenerator(referredImagePage, step=None, total=None): - return referredImagePage.usingPages(step=step, total=total)
+def FileLinksGenerator(referredImagePage, step=None, total=None, content=False): + return referredImagePage.usingPages(step=step, total=total, content=content)
-def ImagesPageGenerator(pageWithImages, step=None, total=None): - return pageWithImages.imagelinks(step=step, total=total)
+def ImagesPageGenerator(pageWithImages, step=None, total=None, content=False): + return pageWithImages.imagelinks(step=step, total=total, content=content)
+ def InterwikiPageGenerator(page): """Iterator over all interwiki (non-language) links on a page.""" for link in page.interwiki(): @@ -570,54 +578,66 @@ def ReferringPageGenerator(referredPage, followRedirects=False, withTemplateInclusion=True, onlyTemplateInclusion=False, - step=None, total=None): + step=None, total=None, content=False): '''Yields all pages referring to a specific page.''' return referredPage.getReferences( follow_redirects=followRedirects, withTemplateInclusion=withTemplateInclusion, onlyTemplateInclusion=onlyTemplateInclusion, - step=step, total=total) + step=step, total=total, content=content)
def CategorizedPageGenerator(category, recurse=False, start=None, - step=None, total=None): - ''' - Yields all pages in a specific category. + step=None, total=None, content=False): + """Yield all pages in a specific category.
If recurse is True, pages in subcategories are included as well; if recurse is an int, only subcategories to that depth will be included (e.g., recurse=2 will get pages in subcats and sub-subcats, but will not go any further). + If start is a string value, only pages whose sortkey comes after start alphabetically are included. - ''' + + If content is True (default is False), the current page text of each + retrieved page will be downloaded. + + """ # TODO: page generator could be modified to use cmstartsortkey ... - for a in category.articles(recurse=recurse, step=step, total=total): + for a in category.articles( + recurse=recurse, step=step, total=total, content=content): if start is None or a.title(withNamespace=False) >= start: yield a
+ def SubCategoriesPageGenerator(category, recurse=False, start=None, - step=None, total=None): - ''' - Yields all subcategories in a specific category. + step=None, total=None, content=False): + """Yield all subcategories in a specific category.
If recurse is True, pages in subcategories are included as well; if recurse is an int, only subcategories to that depth will be included (e.g., recurse=2 will get pages in subcats and sub-subcats, but will not go any further). + If start is a string value, only categories whose sortkey comes after start alphabetically are included. - ''' + + If content is True (default is False), the current page text of each + category description page will be downloaded. + + """ # TODO: page generator could be modified to use cmstartsortkey ... - for s in category.subcategories(recurse=recurse, step=step, total=total): + for s in category.subcategories( + recurse=recurse, step=step, total=total, content=content): if start is None or s.title(withNamespace=False) >= start: yield s
-def LinkedPageGenerator(linkingPage, step=None, total=None): - """Yields all pages linked from a specific page.""" - return linkingPage.linkedPages(step=step, total=total)
+def LinkedPageGenerator(linkingPage, step=None, total=None, content=False): + """Yield all pages linked from a specific page.""" + return linkingPage.linkedPages(step=step, total=total, content=content)
+ def TextfilePageGenerator(filename=None, site=None): """Iterate pages from a list in a text file.
@@ -643,6 +663,7 @@ yield pywikibot.Page(pywikibot.Link(linkmatch.groups("title"), site)) f.close()
+ def PagesFromTitlesGenerator(iterable, site=None): """Generate pages from the titles (unicode strings) yielded by iterable.""" if site is None: @@ -744,11 +765,12 @@
def PageWithTalkPageGenerator(generator): + """Yield pages and associated talk pages from another generator. + + Only yields talk pages if the original generator yields a non-talk page, + and does not check if the talk page in fact exists. + """ - Wraps around another generator. Yields the same pages, but for non-talk - pages, it also includes associated talk pages. - This generator does not check if the talk page in fact exists. - """ for page in generator: yield page if not page.isTalkPage():
Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2010-06-24 19:00:35 UTC (rev 8323) +++ branches/rewrite/pywikibot/site.py 2010-06-24 19:09:47 UTC (rev 8324) @@ -143,8 +143,12 @@
@property def code(self): - """The identifying code for this Site.""" + """The identifying code for this Site.
+ By convention, this is usually an ISO language code, but it does + not have to be. + + """ return self.__code
@property @@ -194,7 +198,7 @@ % (self.__class__.__name__, attr) )
def sitename(self): - """Return string representing this Site's name and language.""" + """Return string representing this Site's name and code."""
return self.family.name+':'+self.code
@@ -270,10 +274,12 @@
def pagenamecodes(self, default=True): """Return list of localized PAGENAME tags for the site.""" + return [u"PAGENAME"]
def pagename2codes(self, default=True): """Return list of localized PAGENAMEE tags for the site.""" + return [u"PAGENAMEE"]
def lock_page(self, page, block=True): @@ -330,14 +336,14 @@ """ return pywikibot.Link(title, self).astext(othersite)
- def isInterwikiLink(self, s): - """Return True if s is in the form of an interwiki link. + def isInterwikiLink(self, text): + """Return True if text is in the form of an interwiki link.
- If a link object constructed using "s" as the link text parses as + If a link object constructed using "text" as the link text parses as belonging to a different site, this method returns True.
""" - linkfam, linkcode = pywikibot.Link(s, self).parse_site() + linkfam, linkcode = pywikibot.Link(text, self).parse_site() return (linkfam != self.family.name or linkcode != self.code)
def redirectRegex(self, pattern=None): @@ -413,6 +419,7 @@
def nice_get_address(self, title): """Return shorter URL path to retrieve page titled 'title'.""" + return self.family.nice_get_address(self.lang, title)
# deprecated methods for backwards-compatibility @@ -420,6 +427,7 @@ @deprecated("family attribute") def fam(self): """Return Family object for this Site.""" + return self.family
@deprecated("urllib.urlencode()") @@ -630,14 +638,6 @@ self._loginstatus = -3 return
- # ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__() - # Calculating valid languages took quite long, so we calculate it once - # in initialization instead of each time it is used. - self._validlanguages = [] - for language in self.languages(): - if not language[:1].upper() + language[1:] in self.namespaces(): - self._validlanguages.append(language) - def _generator(self, gen_class, type_arg=None, namespaces=None, step=None, total=None, **args): """Convenience method that returns an API generator. @@ -1048,16 +1048,15 @@ @param history: if true, return the image's version history
""" - title = page.title(withSection=False) + args = {"title": page.title(withSection=False)} + if history: + args["iilimit"] = "max" query = self._generator(api.PropertyGenerator, type_arg="imageinfo", - titles=title.encode(self.encoding()), iiprop=["timestamp", "user", "comment", "url", "size", "sha1", "mime", - "metadata", "archivename"] - ) - if history: - query.request["iilimit"] = "max" + "metadata", "archivename"], + **args) for pageitem in query: if pageitem['title'] != title: raise Error( @@ -1220,7 +1219,7 @@ # following group of methods map more-or-less directly to API queries
def pagebacklinks(self, page, followRedirects=False, filterRedirects=None, - namespaces=None, step=None, total=None): + namespaces=None, step=None, total=None, content=False): """Iterate all pages that link to the given page.
@param page: The Page to get links to. @@ -1233,15 +1232,18 @@ in this list. @param step: Limit on number of pages to retrieve per API query. @param total: Maximum number of pages to retrieve in total. + @param content: if True, load the current content of each iterated page + (default False)
""" bltitle = page.title(withSection=False).encode(self.encoding()) + blargs = {"gbltitle": bltitle} + if filterRedirects is not None: + blargs["gblfilterredir"] = filterRedirects and "redirects" \ + or "nonredirects" blgen = self._generator(api.PageGenerator, type_arg="backlinks", - gbltitle=bltitle, namespaces=namespaces, - step=step, total=total) - if filterRedirects is not None: - blgen.request["gblfilterredir"] = filterRedirects and "redirects"\ - or "nonredirects" + namespaces=namespaces, step=step, total=total, + g_content=content, **blargs) if followRedirects: # bug: see http://bugzilla.wikimedia.org/show_bug.cgi?id=7304 # links identified by MediaWiki as redirects may not really be, @@ -1265,13 +1267,14 @@ genlist[redir.title()] = self.pagebacklinks( redir, followRedirects=True, filterRedirects=filterRedirects, - namespaces=namespaces) + namespaces=namespaces, + content=content) import itertools return itertools.chain(*genlist.values()) return blgen
def page_embeddedin(self, page, filterRedirects=None, namespaces=None, - step=None, total=None): + step=None, total=None, content=False): """Iterate all pages that embedded the given page as a template.
@param page: The Page to get inclusions for. @@ -1280,93 +1283,121 @@ None, return both (no filtering). @param namespaces: If present, only return links from the namespaces in this list. + @param content: if True, load the current content of each iterated page + (default False)
""" - eititle = page.title(withSection=False).encode(self.encoding()) + eiargs = {"geititle": + page.title(withSection=False).encode(self.encoding())} + if filterRedirects is not None: + eiargs["geifilterredir"] = filterRedirects and "redirects"\ + or "nonredirects" eigen = self._generator(api.PageGenerator, type_arg="embeddedin", - geititle=eititle, namespaces=namespaces, - step=step, total=total) - if filterRedirects is not None: - eigen.request["geifilterredir"] = filterRedirects and "redirects"\ - or "nonredirects" + namespaces=namespaces, step=step, total=total, + g_content=content, **eiargs) return eigen
def pagereferences(self, page, followRedirects=False, filterRedirects=None, withTemplateInclusion=True, onlyTemplateInclusion=False, - namespaces=None, step=None, total=None): + namespaces=None, step=None, total=None, content=False): """Convenience method combining pagebacklinks and page_embeddedin."""
if onlyTemplateInclusion: return self.page_embeddedin(page, namespaces=namespaces, filterRedirects=filterRedirects, - step=step, total=total) + step=step, total=total, content=content) if not withTemplateInclusion: return self.pagebacklinks(page, followRedirects=followRedirects, - filterRedirects=filterRedirects, namespaces=namespaces, - step=step, total=total) + filterRedirects=filterRedirects, + namespaces=namespaces, + step=step, total=total, content=content) import itertools return itertools.islice( itertools.chain( self.pagebacklinks( page, followRedirects, filterRedirects, - namespaces=namespaces, step=step), + namespaces=namespaces, step=step, content=content), self.page_embeddedin( page, filterRedirects, namespaces=namespaces, - step=step) + step=step, content=content) ), total)
def pagelinks(self, page, namespaces=None, follow_redirects=False, - step=None, total=None): + step=None, total=None, content=False): """Iterate internal wikilinks contained (or transcluded) on page.
@param namespaces: Only iterate pages in these namespaces (default: all) @type namespaces: list of ints @param follow_redirects: if True, yields the target of any redirects, rather than the redirect page + @param content: if True, load the current content of each iterated page + (default False)
""" - plgen = self._generator(api.PageGenerator, type_arg="links", - namespaces=namespaces, step=step, total=total) + plargs = {} if hasattr(page, "_pageid"): - plgen.request['pageids'] = str(page._pageid) + plargs['pageids'] = str(page._pageid) else: pltitle = page.title(withSection=False).encode(self.encoding()) - plgen.request['titles'] = pltitle + plargs['titles'] = pltitle if follow_redirects: - plgen.request['redirects'] = '' + plargs['redirects'] = '' + plgen = self._generator(api.PageGenerator, type_arg="links", + namespaces=namespaces, step=step, total=total, + g_content=content, **plargs) return plgen
@deprecate_arg("withSortKey", None) # Sortkey doesn't work with generator - def pagecategories(self, page, step=None, total=None): - """Iterate categories to which page belongs.""" + def pagecategories(self, page, step=None, total=None, content=False): + """Iterate categories to which page belongs.
- clgen = self._generator(api.CategoryPageGenerator, - type_arg="categories", step=step, total=total) + @param content: if True, load the current content of each iterated page + (default False); note that this means the contents of the + category description page, not the pages contained in the category + + """ + clargs = {} if hasattr(page, "_pageid"): - clgen.request['pageids'] = str(page._pageid) + clargs['pageids'] = str(page._pageid) else: - cltitle = page.title(withSection=False).encode(self.encoding()) - clgen.request['titles'] = cltitle + clargs['titles'] = page.title(withSection=False + ).encode(self.encoding()) + clgen = self._generator(api.CategoryPageGenerator, + type_arg="categories", step=step, total=total, + g_content=content, **clargs) return clgen
- def pageimages(self, page, step=None, total=None): - """Iterate images used (not just linked) on the page.""" + def pageimages(self, page, step=None, total=None, content=False): + """Iterate images used (not just linked) on the page.
+ @param content: if True, load the current content of each iterated page + (default False); note that this means the content of the image + description page, not the image itself + + """ + imtitle = page.title(withSection=False).encode(self.encoding()) imgen = self._generator(api.ImagePageGenerator, type_arg="images", - titles=imtitle, step=step, total=total) + titles=imtitle, step=step, total=total, + g_content=content) return imgen
- def pagetemplates(self, page, namespaces=None, step=None, total=None): - """Iterate templates transcluded (not just linked) on the page.""" + def pagetemplates(self, page, namespaces=None, step=None, total=None, + content=False): + """Iterate templates transcluded (not just linked) on the page.
+ @param content: if True, load the current content of each iterated page + (default False) + + """ tltitle = page.title(withSection=False).encode(self.encoding()) tlgen = self._generator(api.PageGenerator, type_arg="templates", titles=tltitle, namespaces=namespaces, - step=step, total=total) + step=step, total=total, g_content=content) return tlgen
- def categorymembers(self, category, namespaces=None, step=None, total=None): + def categorymembers(self, category, namespaces=None, step=None, total=None, + content=False): """Iterate members of specified category.
@param category: The Category to iterate. @@ -1376,6 +1407,8 @@ however, that the iterated values are always Page objects, even if in the Category or Image namespace. @type namespaces: list of ints + @param content: if True, load the current content of each iterated page + (default False)
""" if category.namespace() != 14: @@ -1387,9 +1420,10 @@ type_arg="categorymembers", gcmtitle=cmtitle, gcmprop="ids|title|sortkey", -# namespaces=namespaces, +# namespaces=namespaces, # see note below step=step, - total=total) + total=total, + g_content=content) # workaround for https://bugzilla.wikimedia.org/show_bug.cgi?id=19640: if namespaces: if not isinstance(namespaces, list): @@ -1587,7 +1621,7 @@ def allpages(self, start="!", prefix="", namespace=0, filterredir=None, filterlanglinks=None, minsize=None, maxsize=None, protect_type=None, protect_level=None, reverse=False, - includeredirects=None, step=None, total=None): + includeredirects=None, step=None, total=None, content=False): """Iterate pages in a single namespace.
Note: parameters includeRedirects and throttle are deprecated and @@ -1614,6 +1648,8 @@ @param reverse: if True, iterate in reverse Unicode lexigraphic order (default: iterate in forward order) @param includeredirects: DEPRECATED, use filterredirs instead + @param content: if True, load the current content of each iterated page + (default False)
""" if not isinstance(namespace, int): @@ -1632,7 +1668,8 @@
apgen = self._generator(api.PageGenerator, type_arg="allpages", gapnamespace=str(namespace), - gapfrom=start, step=step, total=total) + gapfrom=start, step=step, total=total, + g_content=content) if prefix: apgen.request["gapprefix"] = prefix if filterredir is not None: @@ -1705,7 +1742,7 @@ yield p
def allcategories(self, start="!", prefix="", step=None, total=None, - reverse=False): + reverse=False, content=False): """Iterate categories used (which need not have a Category page).
Iterator yields Category objects. Note that, in practice, links that @@ -1716,11 +1753,14 @@ @param prefix: Only yield categories starting with this string. @param reverse: if True, iterate in reverse Unicode lexigraphic order (default: iterate in forward order) + @param content: if True, load the current content of each iterated page + (default False); note that this means the contents of the category + description page, not the pages that are members of the category
""" acgen = self._generator(api.CategoryPageGenerator, type_arg="allcategories", gacfrom=start, - step=step, total=total) + step=step, total=total, g_content=content) if prefix: acgen.request["gacprefix"] = prefix if reverse: @@ -1763,7 +1803,7 @@
def allimages(self, start="!", prefix="", minsize=None, maxsize=None, reverse=False, sha1=None, sha1base36=None, step=None, - total=None): + total=None, content=False): """Iterate all images, ordered by image title.
Yields ImagePages, but these pages need not exist on the wiki. @@ -1776,11 +1816,14 @@ @param sha1: only iterate image (it is theoretically possible there could be more than one) with this sha1 hash @param sha1base36: same as sha1 but in base 36 + @param content: if True, load the current content of each iterated page + (default False); note that this means the content of the image + description page, not the image itself
""" aigen = self._generator(api.ImagePageGenerator, type_arg="allimages", gaifrom=start, - step=step, total=total) + step=step, total=total, g_content=content) if prefix: aigen.request["gaiprefix"] = prefix if isinstance(minsize, int): @@ -1837,7 +1880,7 @@ return bkgen
def exturlusage(self, url, protocol="http", namespaces=None, - step=None, total=None): + step=None, total=None, content=False): """Iterate Pages that contain links to the given URL.
@param url: The URL to search for (without the protocol prefix); @@ -1849,11 +1892,11 @@ eugen = self._generator(api.PageGenerator, type_arg="exturlusage", geuquery=url, geuprotocol=protocol, namespaces=namespaces, step=step, - total=total) + total=total, g_content=content) return eugen
def imageusage(self, image, namespaces=None, filterredir=None, - step=None, total=None): + step=None, total=None, content=False): """Iterate Pages that contain links to the given ImagePage.
@param image: the image to search for (ImagePage need not exist on @@ -1861,6 +1904,8 @@ @type image: ImagePage @param filterredir: if True, only yield redirects; if False (and not None), only yield non-redirects (default: yield both) + @param content: if True, load the current content of each iterated page + (default False)
""" iuargs = dict(giutitle=image.title(withSection=False)) @@ -1869,7 +1914,7 @@ or "nonredirects") iugen = self._generator(api.PageGenerator, type_arg="imageusage", namespaces=namespaces, step=step, - total=total, **iuargs) + total=total, g_content=content, **iuargs) return iugen
def logevents(self, logtype=None, user=None, page=None, @@ -1984,7 +2029,7 @@
@deprecate_arg("number", "limit") def search(self, searchstring, namespaces=None, where="text", - getredirects=False, step=None, total=None): + getredirects=False, step=None, total=None, content=False): """Iterate Pages that contain the searchstring.
Note that this may include non-existing Pages if the wiki's database @@ -1997,6 +2042,8 @@ @param namespaces: search only in these namespaces (defaults to 0) @type namespaces: list of ints @param getredirects: if True, include redirects in results + @param content: if True, load the current content of each iterated page + (default False)
""" if not searchstring: @@ -2009,7 +2056,7 @@ srgen = self._generator(api.PageGenerator, type_arg="search", gsrsearch=searchstring, gsrwhat=where, namespaces=namespaces, step=step, - total=total) + total=total, g_content=content) if getredirects: srgen.request["gsrredirects"] = "" return srgen @@ -2188,7 +2235,7 @@ return usgen
def randompages(self, step=None, total=1, namespaces=None, - redirects=False): + redirects=False, content=False): """Iterate a number of random pages.
Pages are listed in a fixed sequence, only the starting point is @@ -2198,10 +2245,13 @@ @param namespaces: only iterate pages in these namespaces. @param redirects: if True, include only redirect pages in results (default: include only non-redirects) + @param content: if True, load the current content of each iterated page + (default False)
""" rngen = self._generator(api.PageGenerator, type_arg="random", - namespaces=namespaces, step=step, total=total) + namespaces=namespaces, step=step, total=total, + g_content=content) if redirects: rngen.request["grnredirect"] = "" return rngen