http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11201
Revision: 11201
Author: legoktm
Date: 2013-03-09 16:37:12 +0000 (Sat, 09 Mar 2013)
Log Message:
-----------
Move API call in WikibasePage.get to DataSite.loadcontent
since API calls should be in site per comments on r11182.
Modified Paths:
--------------
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2013-03-09 11:43:52 UTC (rev 11200)
+++ branches/rewrite/pywikibot/page.py 2013-03-09 16:37:12 UTC (rev 11201)
@@ -2227,17 +2227,9 @@
args can be used to specify custom props.
"""
if force or not hasattr(self, '_content'):
- params = dict(**self.__defined_by())
- params['action'] = 'wbgetentities'
- if args:
- params['props'] = '|'.join(args)
- #print params
- req = pywikibot.data.api.Request(site=self.repo, **params)
- data = req.submit()
- if not 'success' in data:
- raise pywikibot.data.api.APIError, data['errors']
- self.id = data['entities'].keys()[0]
- self._content = data['entities'][self.id]
+ data = self.repo.loadcontent(self.__defined_by(), *args)
+ self.id = data.keys()[0]
+ self._content = data[self.id]
self.lastrevid = self._content['lastrevid']
#aliases
self.aliases = {}
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2013-03-09 11:43:52 UTC (rev 11200)
+++ branches/rewrite/pywikibot/site.py 2013-03-09 16:37:12 UTC (rev 11201)
@@ -3312,6 +3312,27 @@
# not implemented yet
raise NotImplementedError
+ def loadcontent(self, identification, *props):
+ """
+ This is called loadcontent since
+ wbgetentities does not support fetching old
+ revisions. Eventually this will get replaced by
+ an actual loadrevisions.
+ @param identification Parameters used to identify the page(s)
+ @type identification dict
+ @param props the optional properties to fetch.
+ """
+ params = dict(**identification)
+ params['action'] = 'wbgetentities'
+ if props:
+ params['props'] = '|'.join(props)
+ req = api.Request(site=self, **params)
+ data = req.submit()
+ if not 'success' in data:
+ raise pywikibot.data.api.APIError, data['errors']
+ return data['entities']
+
+
# deprecated BaseSite methods
def fam(self):
raise NotImplementedError
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11195
Revision: 11195
Author: russblau
Date: 2013-03-08 20:06:40 +0000 (Fri, 08 Mar 2013)
Log Message:
-----------
Fix Bug #3606570: Namespace alias error with preloading and page.exists(); this turned out to be a pervasive problem caused by an unannounced (IIRC) breaking change in the API. Formerly, the API always returned a page title that contained the site's canonical namespace prefix, for pages outside namespace 0; now, it may return a title using a namespace alias instead, as in the case of the gender-specific namespaces identified in the bug report. Because it is conceivable that other cases of non-canonical namespaces may exist either now or in the future, this is a more general solution that checks all possible namespace aliases before rejecting a page title.
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2013-03-08 08:12:45 UTC (rev 11194)
+++ branches/rewrite/pywikibot/site.py 2013-03-08 20:06:40 UTC (rev 11195)
@@ -392,6 +392,48 @@
% locals(),
re.IGNORECASE | re.UNICODE | re.DOTALL)
+ def sametitle(self, title1, title2):
+ """Return True iff title1 and title2 identify the same wiki page."""
+ # title1 and title2 may be unequal but still identify the same page,
+ # if they use different aliases for the same namespace
+ def valid_namespace(text, number):
+ """Return True iff text is a valid alias for namespace with given number."""
+ for alias in self.namespace(number, all=True):
+ if text.lower() == alias.lower():
+ return True
+ return False
+ if title1 == title2:
+ return True
+ # determine whether titles contain namespace prefixes
+ if ":" in title1:
+ ns1, name1 = title1.split(":", 1)
+ else:
+ ns1, name1 = 0, title1
+ if ":" in title2:
+ ns2, name2 = title2.split(":", 1)
+ else:
+ ns2, name2 = 0, title2
+ for space in self.namespaces(): # iterate over all valid namespaces
+ if type(ns1) is not int and valid_namespace(ns1, space):
+ ns1 = space
+ if type(ns2) is not int and valid_namespace(ns2, space):
+ ns2 = space
+ if type(ns1) is not int:
+ # no valid namespace prefix found, so the string followed by ":"
+ # must be part of the title
+ name1 = ns1 + ":" + name1
+ ns1 = 0
+ if type(ns2) is not int:
+ name2 = ns2 + ":" + name2
+ ns2 = 0
+ if ns1 != ns2:
+ # pages in different namespaces
+ return False
+ if self.case() == "first-letter":
+ name1 = name1[:1].upper() + name1[1:]
+ name2 = name2[:1].upper() + name2[1:]
+ return name1 == name2
+
# namespace shortcuts for backwards-compatibility
def special_namespace(self):
@@ -1132,19 +1174,10 @@
titles=title.encode(self.encoding()),
inprop="protection")
for pageitem in query:
- if pageitem['title'] != title:
- if pageitem['title'] in query.normalized \
- and query.normalized[pageitem['title']] == title:
- # page title was normalized by api
- # this should never happen because the Link() constructor
- # normalizes the title
- pywikibot.log(
- u"loadpageinfo: Page title '%s' was normalized to '%s'"
- % (title, pageitem['title']))
- else:
- pywikibot.warning(
- u"loadpageinfo: Query on %s returned data on '%s'"
- % (page, pageitem['title']))
+ if not self.sametitle(pageitem['title'], title):
+ pywikibot.warning(
+ u"loadpageinfo: Query on %s returned data on '%s'"
+ % (page, pageitem['title']))
continue
api.update_page(page, pageitem)
@@ -1165,7 +1198,7 @@
"metadata", "archivename"],
**args)
for pageitem in query:
- if pageitem['title'] != title:
+ if not self.sametitle(pageitem['title'], title):
raise Error(
u"loadimageinfo: Query on %s returned data on '%s'"
% (page, pageitem['title']))
@@ -1236,7 +1269,7 @@
raise pywikibot.CircularRedirect(redirmap[title])
pagedata = result['query']['pages'].values()[0]
# there should be only one value in 'pages', and it is the target
- if pagedata['title'] == target_title:
+ if self.sametitle(pagedata['title'], target_title):
target = pywikibot.Page(self, pagedata['title'], pagedata['ns'])
api.update_page(target, pagedata)
page._redirtarget = target
@@ -1288,10 +1321,21 @@
pywikibot.debug(u"Preloading %s" % pagedata, _logger)
try:
if pagedata['title'] not in cache:
- pywikibot.warning(
- u"preloadpages: Query returned unexpected title '%s'"
- % pagedata['title'])
- continue
+# API always returns a "normalized" title which is
+# usually the same as the canonical form returned by
+# page.title(), but sometimes not (e.g.,
+# gender-specific localizations of "User" namespace).
+# This checks to see if there is a normalized title in
+# the response that corresponds to the canonical form
+# used in the query.
+ if pagedata['title'] in rvgen.normalized \
+ and rvgen.normalized[pagedata['title']] in cache:
+ cache[pagedata['title']] = cache[rvgen.normalized[pagedata['title']]]
+ else:
+ pywikibot.warning(
+ u"preloadpages: Query returned unexpected title '%s'"
+ % pagedata['title'])
+ continue
except KeyError:
pywikibot.debug(u"No 'title' in %s" % pagedata, _logger)
pywikibot.debug(u"pageids=%s" % pageids, _logger)
@@ -1314,7 +1358,7 @@
intoken=tokentype,
site=self)
for item in query:
- if item['title'] != page.title(withSection=False):
+ if not self.sametitle(item['title'], page.title(withSection=False)):
raise Error(
u"token: Query on page %s returned data on page [[%s]]"
% (page.title(withSection=False, asLink=True),
@@ -1705,21 +1749,11 @@
rvgen.continuekey = "revisions"
for pagedata in rvgen:
if page is not None:
- if pagedata['title'] != page.title(withSection=False):
- ok = False
- namespace = page.namespace()
- # gender settings ?
- if namespace in [2, 3]:
- ns, title = pagedata['title'].split(':', 1)
- if ns in page.site.namespace(namespace, all=True) and \
- title == page.title(withSection=False,
- withNamespace=False):
-
- ok = True
- if not ok:
- raise Error(
- u"loadrevisions: Query on %s returned data on '%s'"
- % (page, pagedata['title']))
+ if not self.sametitle(pagedata['title'],
+ page.title(withSection=False)):
+ raise Error(
+ u"loadrevisions: Query on %s returned data on '%s'"
+ % (page, pagedata['title']))
if "missing" in pagedata:
raise NoPage(page)
else:
@@ -1739,7 +1773,7 @@
titles=lltitle.encode(self.encoding()),
step=step, total=total)
for pageitem in llquery:
- if pageitem['title'] != lltitle:
+ if not self.sametitle(pageitem['title'], lltitle):
raise Error(
u"getlanglinks: Query on %s returned data on '%s'"
% (page, pageitem['title']))
@@ -1757,7 +1791,7 @@
titles=eltitle.encode(self.encoding()),
step=step, total=total)
for pageitem in elquery:
- if pageitem['title'] != eltitle:
+ if not self.sametitle(pageitem['title'], eltitle):
raise RuntimeError(
"getlanglinks: Query on %s returned data on '%s'"
% (page, pageitem['title']))
@@ -1773,7 +1807,7 @@
type_arg="categoryinfo",
titles=cititle.encode(self.encoding()))
for pageitem in ciquery:
- if pageitem['title'] != cititle:
+ if not self.sametitle(pageitem['title'], cititle):
raise Error(
u"categoryinfo: Query on %s returned data on '%s'"
% (category, pageitem['title']))
@@ -2591,7 +2625,7 @@
req['captchaword'] = input(captcha["question"])
continue
elif "url" in captcha:
- webbrowser.open(url)
+ webbrowser.open(captcha["url"])
req['captchaword'] = cap_answerwikipedia.input(
"Please view CAPTCHA in your browser, then type answer here:")
continue
@@ -3352,3 +3386,5 @@
f = open(fn)
self._cookies[index] = '; '.join([x.strip() for x in f.readlines()])
f.close()
+
+
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11194
Revision: 11194
Author: legoktm
Date: 2013-03-08 08:12:45 +0000 (Fri, 08 Mar 2013)
Log Message:
-----------
You can't mark __getattr__ as deprecated since it gets called on any method.
The warnings should still show up because of get_item.
Fixes r11187
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2013-03-07 22:01:23 UTC (rev 11193)
+++ branches/rewrite/pywikibot/site.py 2013-03-08 08:12:45 UTC (rev 11194)
@@ -3223,7 +3223,6 @@
class DataSite (APISite):
- @deprecated("pywikibot.ItemPage")
def __getattr__(self, attr):
"""Calls to methods get_info, get_sitelinks, get_aliases, get_labels,
get_descriptions, get_urls