Revision: 6032 Author: filnik Date: 2008-10-27 14:07:30 +0000 (Mon, 27 Oct 2008)
Log Message: ----------- Making the loading of the categories even faster ^_^ and a little bugfix in the smartDetection
Modified Paths: -------------- trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2008-10-26 18:10:52 UTC (rev 6031) +++ trunk/pywikipedia/checkimages.py 2008-10-27 14:07:30 UTC (rev 6032) @@ -496,37 +496,16 @@ wikipedia.output(u'No data found.') return False
-def categoryElementsNumber(CatName): - #action=query&prop=categoryinfo&titles=Category:License_tags - """ - """ - params = { - 'action' :'query', - 'prop' :'categoryinfo', - 'titles' :CatName, - } - - data = query.GetData(params, - useAPI = True, encodeTitle = False) - pageid = data['query']['pages'].keys()[0] - elements = data['query']['pages'][pageid]['categoryinfo']['size'] - return elements - def categoryAllElements(CatName): #action=query&list=categorymembers&cmlimit=500&cmtitle=Category:License_tags """ + Category to load all the elements in a category. Limit: 5000 elements. """ wikipedia.output("Loading %s..." % CatName) - elements = int(categoryElementsNumber(CatName)) - elements += 20 # better to be sure that all the elements are loaded - if (elements - 20) > 5000: - raise wikipedia.Error(u'The category selected as more than 5.000 elements, limit reached') - elif elements > 5000: # if they are less then 5000, but for few elements - elements = 5000 params = { 'action' :'query', 'list' :'categorymembers', - 'cmlimit' :str(elements), + 'cmlimit' :'5000', 'cmtitle' :CatName, }
@@ -534,6 +513,8 @@ useAPI = True, encodeTitle = False)
members = data['query']['categorymembers'] + if len(members) == 5000: + raise wikipedia.Error(u'The category selected as >= 5.000 elements, limit reached.') allmembers = members results = list() for subcat in members: @@ -549,6 +530,9 @@ results.append(member) return results def categoryAllPageObjects(CatName): + """ + From a list of dictionaries, return a list of page objects. + """ final = list() for element in categoryAllElements(CatName): final.append(wikipedia.Page(wikipedia.getSite(), element['title'])) @@ -1132,7 +1116,8 @@ for templateReal in self.licenses_found: if self.convert_to_url(template_selected).lower().replace('template:', '') == \ self.convert_to_url(templateReal.title().lower().replace('template:', '')): - allLicenses.append(templateReal) + if templateReal not in allLicenses: # don't put the same template, twice. + allLicenses.append(templateReal) if self.licenses_found != []: for template in self.licenses_found: license_selected = template.title().replace('Template:', '')
pywikipedia-l@lists.wikimedia.org