Revision: 4437 Author: russblau Date: 2007-10-09 17:41:43 +0000 (Tue, 09 Oct 2007)
Log Message: ----------- Minor cleanup; try to catch more Page-constructor errors.
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-10-09 15:33:07 UTC (rev 4436) +++ trunk/pywikipedia/wikipedia.py 2007-10-09 17:41:43 UTC (rev 4437) @@ -10,8 +10,8 @@ Site(lang, fam): A MediaWiki site
Factory functions: - Family(name): Import the named family. - getSite(lang, fam): Return a Site instance. + Family(name): Import the named family + getSite(lang, fam): Return a Site instance
Exceptions: Error: Base class for all exceptions in this module @@ -28,7 +28,7 @@ EditConflict: PageNotSaved due to edit conflict while uploading SpamfilterError: PageNotSaved due to MediaWiki spam filter ServerError: Got unexpected response from wiki server - BadTitle: Server responded with BadTitle. + BadTitle: Server responded with BadTitle UserBlocked: Client's username or IP has been blocked PageNotFound: Page not found in list
@@ -262,9 +262,8 @@ Pages) imagelinks (*) : The pictures on the page (list of ImagePages) templates (*) : All templates referenced on the page (list of - strings) + Pages) templatesWithParams(*): All templates on the page, with list of parameters - templatePages (*) : Page objects for all templates used on this page isDisambig (*) : True if the page is a disambiguation page getReferences : List of pages linking to the page canBeEdited (*) : True if page is unprotected or user has edit @@ -395,7 +394,7 @@ if not self.site().nocapitalize: t = t[0].upper() + t[1:] # else: - # output( u"DBG>>> Strange title: %s:%s" % (site.lang, title) ) + # output(u"DBG>>> Strange title: %s:%s" % (site.lang, title) )
if self._namespace != 0: t = self.site().namespace(self._namespace) + u':' + t @@ -520,6 +519,7 @@ _autoFormat = date.getAutoFormat(self.site().language(), self.titleWithoutNamespace()) return _autoFormat + def isAutoTitle(self): """Return True if title of this Page is in the autoFormat dictionary.""" return self.autoFormat()[0] is not None @@ -1022,8 +1022,13 @@ for link in reflist("li", recursive=False): title = link.a.string if title is None: - output("DBG> invalid <li> item in Whatlinkshere: %s" % link) - p = Page(self.site(), title) + output(u"DBG> invalid <li> item in Whatlinkshere: %s" % link) + try: + p = Page(self.site(), title) + except Error: + output(u"DBG> Whatlinkshere:%s contains invalid link to %s" + % (self.title(), title)) + continue isredirect, istemplate = False, False textafter = link.a.findNextSibling(text=True) if textafter is not None: @@ -1356,9 +1361,13 @@ if self.namespace() == 1: return Page(self.site(), self.titleWithoutNamespace()) else: - return Page(self.site(), self.site().namespace(ns - 1) + ':' + self.titleWithoutNamespace()) + return Page(self.site(), + self.site().namespace(ns - 1) + ':' + + self.titleWithoutNamespace()) else: - return Page(self.site(), self.site().namespace(ns + 1) + ':' + self.titleWithoutNamespace()) + return Page(self.site(), + self.site().namespace(ns + 1) + ':' + + self.titleWithoutNamespace())
def interwiki(self): """Return a list of interwiki links in the page text. @@ -1389,6 +1398,10 @@ output( u"ERROR: link from %s to [[%s:%s]] contains invalid unicode reference?!" % (self.aslink(), newSite, newTitle)) + except Error: + output( + u"ERROR: link from %s to [[%s:%s]] is improperly formatted?" + % (self.aslink(), newSite, newTitle)) return result
def categories(self, nofollow_redirects=False): @@ -1455,7 +1468,9 @@ if not self.site().isInterwikiLink(title): try: page = Page(self.site(), title) - except: + except Error: + output(u"Page %s contains invalid link to [[%s]]." + % (self.title(), title)) continue if page.sectionFreeTitle(): result.append(page) @@ -1493,7 +1508,7 @@ return list(set(results))
def templates(self): - """Return a list of strings containing template names used on this Page. + """Return a list of Page objects for templates used on this Page.
Template parameters are ignored.
@@ -1504,7 +1519,7 @@ """Return a list of templates used on this Page.
Return value is a list of tuples. There is one tuple for each use of - a template in the page, with the template name as the first entry + a template in the page, with the template Page as the first entry and a list of parameters as the second entry.
""" @@ -1526,18 +1541,15 @@ name = m.group('name') if self.site().isInterwikiLink(name): continue - name = Page(self.site(), name).title() + try: + name = Page(self.site(), name).title() + except Error: + output(u"Page %s contains invalid template name %s." + % (self.title(), name)) + continue result.append((name, params)) return result
- def templatePages(self): - """Return a list of Page objects for templates used on the page. - - Template parameters are ignored. - """ - return [Page(self.site(), template, self.site(), 10) - for template in self.templates()] - def getRedirectTarget(self): """Return a Page object for the target this Page redirects to.
@@ -2294,7 +2306,12 @@ lineR = re.compile( '<li><a href=".+?" title=".+?">(?P<title>.+?)</a></li>') for match in lineR.finditer(titleList): - yield Page(self.site(), match.group('title')) + try: + yield Page(self.site(), match.group('title')) + except Error: + output( + u"Image description page %s contains invalid reference to [[%s]]." + % (self.title(), match.group('title')))
class _GetAll(object): @@ -2899,7 +2916,13 @@ else: # we want the actual page objects rather than the titles site = insite.getSite(code = lang) - result[site] = Page(site, pagetitle, insite = insite) + try: + result[site] = Page(site, pagetitle, insite = insite) + except Error: + output( + u"[getLanguageLinks] Text contains invalid interwiki link [[%s:%s]]." + % (lang, pagetitle)) + continue return result
def removeLanguageLinks(text, site = None, marker = ''): @@ -3479,7 +3502,8 @@ if not language[0].upper() + language[1:] in self.namespaces(): self._validlanguages.append(language)
- if persistent_http is None: persistent_http = config.persistent_http + if persistent_http is None: + persistent_http = config.persistent_http self.persistent_http = persistent_http and self.protocol() in ('http', 'https') if persistent_http: if self.protocol() == 'http': @@ -3487,9 +3511,8 @@ elif self.protocol() == 'https': self.conn = httplib.HTTPSConnection(self.hostname())
+ self.sandboxpage = Page(self, self.family.sandboxpage(code))
- self.sandboxpage = Page(self,self.family.sandboxpage(code)) - def loggedInAs(self, sysop = False): """Return the current username if logged in, otherwise return None.
@@ -4813,8 +4836,8 @@ setLogfileStatus(False) elif arg == '-verbose' or arg == "-v": import version - output('Pywikipediabot %s' % (version.getversion())) - output('Python %s' % (sys.version)) + output(u'Pywikipediabot %s' % (version.getversion())) + output(u'Python %s' % (sys.version)) verbose += 1 else: # the argument is not global. Let the specific bot script care @@ -5258,9 +5281,7 @@
page_put_queue = Queue.Queue() def async_put(): - ''' - Daemon that takes pages from the queue and tries to save them on the wiki. - ''' + """Daemon; take pages from the queue and try to save them on the wiki.""" while True: (page, newtext, comment, watchArticle, minorEdit, force, callback) = page_put_queue.get() @@ -5309,13 +5330,17 @@ get_throttle.drop()
def _flush(): - '''Wait for the page-putter to flush its queue; - called automatically upon exiting from Python. - ''' + """Wait for the page-putter to flush its queue. + + Called automatically upon exiting from Python. + + """ if page_put_queue.qsize() > 0: import datetime - remaining = datetime.timedelta(seconds = page_put_queue.qsize() * config.put_throttle) - output('Waiting for %i pages to be put. Estimated time remaining: %s' % (page_put_queue.qsize(), remaining)) + remaining = datetime.timedelta( + seconds = page_put_queue.qsize() * config.put_throttle) + output(u'Waiting for %i pages to be put. Estimated time remaining: %s' + % (page_put_queue.qsize(), remaining))
page_put_queue.put((None, None, None, None, None, None, None))