Revision: 7988 Author: xqt Date: 2010-03-11 17:44:03 +0000 (Thu, 11 Mar 2010)
Log Message: ----------- test api with new has_api() method; move throttle.log to contol file
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Property Changed: ---------------- trunk/pywikipedia/pywikibot/
Property changes on: trunk/pywikipedia/pywikibot ___________________________________________________________________ Modified: svn:ignore - *.pyc
+ *.pyc *.ctrl
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2010-03-11 15:01:00 UTC (rev 7987) +++ trunk/pywikipedia/wikipedia.py 2010-03-11 17:44:03 UTC (rev 7988) @@ -4144,12 +4144,13 @@ self.releasepid = 1200 # Free the process id self.lastwait = 0.0 self.delay = 0 - if multiplydelay: + self.multiplydelay = multiplydelay + if self.multiplydelay: self.checkMultiplicity() self.setDelay(mindelay)
def logfn(self): - return config.datafilepath('logs', 'throttle.log') + return config.datafilepath('pywikibot', 'throttle.ctrl')
def checkMultiplicity(self): self.lock.acquire() @@ -4211,7 +4212,7 @@
def getDelay(self): thisdelay = self.delay - if self.pid: # If self.pid, we're checking for multiple processes + if self.multiplydelay: # If self.pid, we're checking for multiple processes if time.time() > self.checktime + self.checkdelay: self.checkMultiplicity() if thisdelay < (self.mindelay * self.next_multiplicity): @@ -4595,6 +4596,7 @@ mediawiki_message: Retrieve the text of a specified MediaWiki message has_mediawiki_message: True if this site defines specified MediaWiki message + has_api: True if this site's family provides api interface
shared_image_repository: Return tuple of image repositories used by this site. @@ -5677,21 +5679,13 @@ """Return the MediaWiki message text for key "key" """ # Allmessages is retrieved once for all per created Site object if (not self._mediawiki_messages) or forceReload: - api = False + api = self.has_api() if verbose: output(u"Retrieving mediawiki messages from Special:Allmessages") # Only MediaWiki r27393/1.12 and higher support XML output for Special:Allmessages if self.versionnumber() < 12: usePHP = True else: - try: - if config.use_api: - x = self.api_address() - del x - api = True - except NotImplementedError: - api = False - usePHP = False elementtree = True try: @@ -5797,6 +5791,17 @@ return True except KeyError: return False + + def has_api(self): + """Return True if this sites family has api interface.""" + try: + if config.use_api: + x = self.apipath() + del x + return True + except NotImplementedError: + pass + return False
def _load(self, sysop = False, force = False): """ @@ -5814,16 +5819,10 @@ if verbose: output(u'Getting information for site %s' % self)
- try: - api_url = self.api_address() - del api_url - except NotImplementedError: - config.use_api = False - # Get data # API Userinfo is available from version 1.11 # preferencetoken available from 1.14 - if config.use_api and self.versionnumber() >= 11: + if self.has_api() and self.versionnumber() >= 11: #Query userinfo params = { 'action': 'query', @@ -5860,66 +5859,47 @@ Use API when enabled use_api and version >= 1.11, or use Special:Search. """ - try: - if config.use_api and self.versionnumber() >= 11: - apiUrl = self.site().api_address() - del apiUrl - else: - raise NotImplementedError - except NotImplementedError: - _search = self._search_without_api + if self.has_api() and self.versionnumber() >= 11: + #Yield search results (using api) for query. + params = { + 'action': 'query', + 'list': 'search', + 'srsearch': q, + 'srlimit': number + } + if namespaces: + params['srnamespace'] = namespaces + + offset = 0 + while True: + params['sroffset'] = offset + data = query.GetData(params, self)['query'] + if 'error' in data: + raise RuntimeError('%s' % data['error']) + if not data['search']: + break + for s in data['search']: + offset += 1 + page = Page(self, s['title']) + yield page, s['snippet'], '', s['size'], s['wordcount'], s['timestamp'] else: - _search = self._search_with_api - return _search(query, number, namespaces) + #Yield search results (using Special:Search page) for query. + throttle = True + path = self.search_address(urllib.quote_plus(query.encode('utf-8')), + n=number, ns=namespaces) + get_throttle() + html = self.getUrl(path) + entryR = re.compile(ur'<li><a href=".+?" title="(?P<title>.+?)">.+?</a>', + re.DOTALL) + for m in entryR.finditer(html): + page = Page(self, m.group('title')) + yield page, '', '', '', '', ''
- def _search_with_api(self, q, number, namespaces): - """Yield search results (using api) for query.""" - params = { - 'action': 'query', - 'list': 'search', - 'srsearch': q, - 'srlimit': number - } - if namespaces: - params['srnamespace'] = namespaces - - offset = 0 - while True: - params['sroffset'] = offset - data = query.GetData(params, self)['query'] - if 'error' in data: - raise RuntimeError('%s' % data['error']) - if not data['search']: - break - for s in data['search']: - offset += 1 - page = Page(self, s['title']) - yield page, s['snippet'], '', s['size'], s['wordcount'], s['timestamp'] - - def _search_without_api(self, query, number, namespaces): - """Yield search results (using Special:Search page) for query.""" - throttle = True - path = self.search_address(urllib.quote_plus(query.encode('utf-8')), - n=number, ns=namespaces) - get_throttle() - html = self.getUrl(path) - - entryR = re.compile(ur'<li><a href=".+?" title="(?P<title>.+?)">.+?</a>', - re.DOTALL) - - for m in entryR.finditer(html): - page = Page(self, m.group('title')) - yield page, '', '', '', '', '' - # TODO: avoid code duplication for the following methods
def logpages(self, number=50, mode='', user=None, repeat=False, namespace=[], offset=-1): - if config.use_api: - apiURL = self.api_address() - del apiURL - else: - raise NotImplementedError - if mode not in ('block', 'protect', 'rights', 'delete', 'upload', + if not self.has_api() or \ + mode not in ('block', 'protect', 'rights', 'delete', 'upload', 'move', 'import', 'patrol', 'merge', 'suppress', 'review', 'stable', 'gblblock', 'renameuser', 'globalauth', 'gblrights', 'abusefilter', 'newusers'): @@ -5986,14 +5966,9 @@ # should use both offset and limit parameters, and have an # option to fetch older rather than newer pages seen = set() - try: - d = self.apipath() - del d - except NotImplementedError: - config.use_api = False - + api = self.has_api() while True: - if config.use_api and self.versionnumber() >= 10: + if api and self.versionnumber() >= 10: params = { 'action': 'query', 'list': 'recentchanges', @@ -6668,18 +6643,12 @@ """Yield Pages from results of Special:Linksearch for 'siteurl'.""" cache = [] R = re.compile('title ?="([^<>]*?)">[^<>]*</a></li>') - #Check API can work - if config.use_api: - try: - d = self.api_address() - del d - except NotImplementedError: - config.use_api = False - + api = self.has_api() urlsToRetrieve = [siteurl] if not siteurl.startswith('*.'): urlsToRetrieve.append('*.' + siteurl) - if config.use_api and self.versionnumber() >= 11: + + if api and self.versionnumber() >= 11: output(u'Querying API exturlusage...') for url in urlsToRetrieve: params = {