Revision: 8149
Author: xqt
Date: 2010-05-06 07:21:16 +0000 (Thu, 06 May 2010)
Log Message:
-----------
site.search(): yield only the given number of pages
page.categories(): enable api depending of a parameter which is True per default. Documtentation added: pls be aware the api call returns also cats included by templates
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-05-06 05:46:59 UTC (rev 8148)
+++ trunk/pywikipedia/wikipedia.py 2010-05-06 07:21:16 UTC (rev 8149)
@@ -2165,19 +2165,14 @@
- def categories(self, get_redirect=False, force_api=False):
- """Return a list of categories that the article is in.
-
- This will retrieve the page text to do its work, so it can raise
- the same exceptions that are raised by the get() method.
-
- The return value is a list of Category objects, one for each of the
- category links in the page text.
+ def categories(self, get_redirect=False, api=True):
+ """Return a list of Category objects that the article is in.
+ Please be aware: the api call returns also categies which are included
+ by templates. This differs to the old non-api code. If you need only
+ these categories which are in the page text please use getCategoryLinks
+ (or set api=False but this could be deprecated in future).
"""
- # Note: This reads via api call if there is not page content
- # or page content already exist and force_api=True
- if not self.site().has_api() or \
- hasattr(self, '_contents') and not force_api:
+ if not (self.site().has_api() and api):
try:
category_links_to_return = getCategoryLinks(self.get(get_redirect=get_redirect), self.site())
except NoPage:
@@ -5701,7 +5696,7 @@
params['srnamespace'] = namespaces
offset = 0
- while True:
+ while offset < number:
params['sroffset'] = offset
data = query.GetData(params, self)['query']
if 'error' in data:
Revision: 8147
Author: xqt
Date: 2010-05-05 15:26:49 +0000 (Wed, 05 May 2010)
Log Message:
-----------
enable Page.categories() via api if there is no page content or force_api=True
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-05-05 07:12:45 UTC (rev 8146)
+++ trunk/pywikipedia/wikipedia.py 2010-05-05 15:26:49 UTC (rev 8147)
@@ -2165,7 +2165,7 @@
- def categories(self, get_redirect=False, api=False):
+ def categories(self, get_redirect=False, force_api=False):
"""Return a list of categories that the article is in.
This will retrieve the page text to do its work, so it can raise
@@ -2174,12 +2174,10 @@
The return value is a list of Category objects, one for each of the
category links in the page text.
"""
- # New add API query.
- # api.php?action=query&prop=categories&titles=Albert%20Einstein
- # Note: This needs an additional api call instead of collecting
- # these informations from page content
- # api param could be omitted in future!
- if not (api and self.site().has_api()):
+ # Note: This reads via api call if there is not page content
+ # or page content already exist and force_api=True
+ if not self.site().has_api() or \
+ hasattr(self, '_contents') and not force_api:
try:
category_links_to_return = getCategoryLinks(self.get(get_redirect=get_redirect), self.site())
except NoPage:
@@ -2207,15 +2205,10 @@
if c['ns'] is 14:
cat = catlib.Category(self.site(), c['title'])
cats.append(cat)
-# if len(data) == 2:
-# data = data[0] + data[1]
-# else:
-# data = data[0]
-
+
if 'query-continue' in datas:
if 'categories' in datas['query-continue']:
params['clcontinue'] = datas['query-continue']['categories']['clcontinue']
-
else:
allDone = True
return cats
Revision: 8145
Author: xqt
Date: 2010-05-04 16:07:48 +0000 (Tue, 04 May 2010)
Log Message:
-----------
categories() via api returns a list of category object instead of page objects (like the old method). This method is still deactivated for now since api=False of cause it needs an additional api query instead of collection these informations from page content.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-05-04 15:04:43 UTC (rev 8144)
+++ trunk/pywikipedia/wikipedia.py 2010-05-04 16:07:48 UTC (rev 8145)
@@ -2165,7 +2165,7 @@
- def categories(self, get_redirect=False,api=False):
+ def categories(self, get_redirect=False, api=False):
"""Return a list of categories that the article is in.
This will retrieve the page text to do its work, so it can raise
@@ -2174,13 +2174,12 @@
The return value is a list of Category objects, one for each of the
category links in the page text.
"""
-# New add API query.
-
-# api.php?action=query&prop=categories&titles=Albert%20Einstein
-# fixed bug 2995320 & 2995997.
- apitest=False
-#
- if apitest == api:
+ # New add API query.
+ # api.php?action=query&prop=categories&titles=Albert%20Einstein
+ # Note: This needs an additional api call instead of collecting
+ # these informations from page content
+ # api param could be omitted in future!
+ if not (api and self.site().has_api()):
try:
category_links_to_return = getCategoryLinks(self.get(get_redirect=get_redirect), self.site())
except NoPage:
@@ -2188,6 +2187,7 @@
return category_links_to_return
else:
+ import catlib
params = {
'action': 'query',
'prop' : 'categories',
@@ -2200,14 +2200,13 @@
allDone = False
cats=[]
while not allDone:
-
-
datas = query.GetData(params, self.site())
data=datas['query']['pages'].values()[0]
if "categories" in data:
for c in data['categories']:
- cpage=Page(self.site(), c['title'])
- cats.append(cpage)
+ if c['ns'] is 14:
+ cat = catlib.Category(self.site(), c['title'])
+ cats.append(cat)
# if len(data) == 2:
# data = data[0] + data[1]
# else:
@@ -2219,7 +2218,6 @@
else:
allDone = True
- print cats
return cats
def __cmp__(self, other):
Revision: 8142
Author: shizhao
Date: 2010-05-04 13:32:57 +0000 (Tue, 04 May 2010)
Log Message:
-----------
categories() compatible old non-api style. bug 2995320 & 2995997 no fixed.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-05-04 05:13:35 UTC (rev 8141)
+++ trunk/pywikipedia/wikipedia.py 2010-05-04 13:32:57 UTC (rev 8142)
@@ -2163,7 +2163,9 @@
self._interwikis = result
return result
- def categories(self, get_redirect=False):
+
+
+ def categories(self, get_redirect=False,api=False):
"""Return a list of categories that the article is in.
This will retrieve the page text to do its work, so it can raise
@@ -2171,49 +2173,52 @@
The return value is a list of Category objects, one for each of the
category links in the page text.
+ """
+# New add API query.
- """
- # New API query.
- # api.php?action=query&prop=categories&titles=Albert%20Einstein
- # deactivated due to bug 2995320 & 2995997
- if not self.site().has_api() or True:
+# api.php?action=query&prop=categories&titles=Albert%20Einstein
+# Plese solving bug 2995320 & 2995997.
+ apitest=False
+#
+ if apitest == api:
try:
category_links_to_return = getCategoryLinks(self.get(get_redirect=get_redirect), self.site())
except NoPage:
category_links_to_return = []
return category_links_to_return
- params = {
- 'action': 'query',
- 'prop' : 'categories',
- 'titles' : self.title(),
- }
- if not self.site().isAllowed('apihighlimits') and config.special_page_limit > 500:
- params['cllimit'] = 500
+ else:
+ params = {
+ 'action': 'query',
+ 'prop' : 'categories',
+ 'titles' : self.title(),
+ }
+ if not self.site().isAllowed('apihighlimits') and config.special_page_limit > 500:
+ params['cllimit'] = 500
-
- allDone = False
- cats=[]
- while not allDone:
output(u'Getting categories in %s via API...' % self.aslink())
+ allDone = False
+ cats=[]
+ while not allDone:
+
- datas = query.GetData(params, self.site())
- data=datas['query']['pages'].values()[0]
- if "categories" in data:
- for c in data['categories']:
- cats.append(c['title'])
+ datas = query.GetData(params, self.site())
+ data=datas['query']['pages'].values()[0]
+ if "categories" in data:
+ for c in data['categories']:
+ cats.append(c['title'])
# if len(data) == 2:
# data = data[0] + data[1]
# else:
# data = data[0]
- if 'query-continue' in datas:
- if 'categories' in datas['query-continue']:
- params['clcontinue'] = datas['query-continue']['categories']['clcontinue']
+ if 'query-continue' in datas:
+ if 'categories' in datas['query-continue']:
+ params['clcontinue'] = datas['query-continue']['categories']['clcontinue']
- else:
- allDone = True
- return cats
+ else:
+ allDone = True
+ return cats
def __cmp__(self, other):
"""Test for equality and inequality of Page objects"""