Revision: 7493 Author: alexsh Date: 2009-10-18 06:23:41 +0000 (Sun, 18 Oct 2009)
Log Message: ----------- code cleanup
Modified Paths: -------------- trunk/pywikipedia/checkimages.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2009-10-17 18:34:18 UTC (rev 7492) +++ trunk/pywikipedia/checkimages.py 2009-10-18 06:23:41 UTC (rev 7493) @@ -506,8 +506,6 @@
self.logFulNumber = logFulNumber
- self.settings = wikipedia.translate(self.site, page_with_settings) - self.rep_page = wikipedia.translate(self.site, report_page)
self.rep_text = wikipedia.translate(self.site, report_text) @@ -1050,11 +1048,12 @@
def takesettings(self): """ Function to take the settings from the wiki. """ + settingsPage = wikipedia.translate(self.site, page_with_settings) try: - if not self.settings: + if not settingsPage: self.settingsData = None else: - wikiPage = wikipedia.Page(self.site, self.settings) + wikiPage = wikipedia.Page(self.site, settingsPage) self.settingsData = list() try: testo = wikiPage.get() @@ -1171,7 +1170,7 @@ result = self.miniTemplateCheck(template) if result: break - if self.license_found == None: + if not self.license_found: for template in self.licenses_found: try: template.pageAPInfo() @@ -1226,10 +1225,10 @@ #else: break
- if self.licenses_found != []: + if self.licenses_found: self.templateInList()
- if self.license_found == None and self.allLicenses != list(): + if not self.license_found and self.allLicenses: # If only iterlist = self.AllLicenses if I remove something # from iterlist it will be remove from self.AllLicenses too iterlist = list(self.allLicenses) @@ -1242,42 +1241,42 @@ except wikipedia.NoPage: self.allLicenses.remove(template)
- if self.allLicenses != list(): + if self.allLicenses: self.license_found = self.allLicenses[0].title() self.some_problem = False # If it has "some_problem" it must check # the additional settings. # if self.settingsData, use addictional settings - if self.settingsData != None: + if self.settingsData: self.findAdditionalProblems()
- if self.some_problem == False: - if not self.seems_ok and self.license_found != None: - rep_text_license_fake = u"\n*[[:File:%s]] seems to have " % self.imageName + \ - "a ''fake license'', license detected: <nowiki>%s</nowiki>" % self.license_found - regexFakeLicense = r"* ?[[:File:%s]] seems to have " % (re.escape(self.imageName)) + \ - "a ''fake license'', license detected: <nowiki>%s</nowiki>$" % (re.escape(self.license_found)) - printWithTimeZone(u"%s seems to have a fake license: %s, reporting..." % (self.imageName, self.license_found)) - self.report_image(self.imageName, rep_text = rep_text_license_fake, - addings = False, regex = regexFakeLicense) - elif self.license_found != None: - printWithTimeZone(u"%s seems ok, license found: %s..." % (self.imageName, self.license_found)) - else: + if self.some_problem: if self.mex_used in self.imageCheckText: wikipedia.output(u'File already fixed. Skip.') else: wikipedia.output(u"The file's description for %s contains %s..." % (self.imageName, self.name_used)) if self.mex_used.lower() == 'default': self.mex_used = self.unvertext - if self.imagestatus_used == False: - reported = self.report_image(self.imageName) - else: + if self.imagestatus_used: reported = True - if reported == True: + else: + reported = self.report_image(self.imageName) + if reported: #if self.imagestatus_used == True: self.report(self.mex_used, self.imageName, self.text_used, u"\n%s\n" % self.head_used, None, self.imagestatus_used, self.summary_used) else: wikipedia.output(u"Skipping the file...") self.some_problem = False + else: + if not self.seems_ok and self.license_found: + rep_text_license_fake = u"\n*[[:File:%s]] seems to have " % self.imageName + \ + "a ''fake license'', license detected: <nowiki>%s</nowiki>" % self.license_found + regexFakeLicense = r"* ?[[:File:%s]] seems to have " % (re.escape(self.imageName)) + \ + "a ''fake license'', license detected: <nowiki>%s</nowiki>$" % (re.escape(self.license_found)) + printWithTimeZone(u"%s seems to have a fake license: %s, reporting..." % (self.imageName, self.license_found)) + self.report_image(self.imageName, rep_text = rep_text_license_fake, + addings = False, regex = regexFakeLicense) + elif self.license_found: + printWithTimeZone(u"%s seems ok, license found: %s..." % (self.imageName, self.license_found)) return (self.license_found, self.whiteTemplatesFound)
def load(self, raw): @@ -1303,7 +1302,7 @@ return False if skip_number > limit: skip_number = limit # Print a starting message only if no images has been skipped - if self.skip_list == []: + if not self.skip_list: if skip_number == 1: wikipedia.output(u'Skipping the first file:\n') else: @@ -1386,19 +1385,19 @@
def isTagged(self): """ Understand if a file is already tagged or not. """ - TextFind = wikipedia.translate(self.site, txt_find) # Is the image already tagged? If yes, no need to double-check, skip - for i in TextFind: + for i in wikipedia.translate(self.site, txt_find): # If there are {{ use regex, otherwise no (if there's not the {{ may not be a template # and the regex will be wrong) if '{{' in i: regexP = re.compile(r'{{(?:template|)%s ?(?:||\n|}|<) ?' % i.split('{{')[1].replace(u' ', u'[ _]'), re.I) result = regexP.findall(self.imageCheckText) - if result != []: + if result: return True elif i.lower() in self.imageCheckText: return True - return False # Nothing Found? Ok: False + + return False # Nothing Found
def findAdditionalProblems(self): # In every tupla there's a setting configuration @@ -1418,8 +1417,7 @@ break summary = tupla[5] head_2 = tupla[6] - text = tupla[7] - text = text % self.imageName + text = tupla[7] % self.imageName mexCatched = tupla[8] for k in find_list: if find_tipe.lower() == 'findonly': @@ -1491,8 +1489,7 @@ self.imageCheckText = regex_nowiki.sub('', self.imageCheckText); self.imageCheckText = regex_pre.sub('', self.imageCheckText) # Deleting the useless template from the description (before adding something # in the image the original text will be reloaded, don't worry). - self.tagged = self.isTagged() - if self.tagged == True: + if self.isTagged(): # Tagged? Yes, skip. printWithTimeZone(u'%s is already tagged...' % self.imageName) return True @@ -1514,7 +1511,7 @@ # It works also without this... but i want only to be sure ^^ brackets = False return True - elif delete == True: + elif delete: wikipedia.output(u"%s is not a file!" % self.imageName) # Modify summary text wikipedia.setAction(dels)
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-10-17 18:34:18 UTC (rev 7492) +++ trunk/pywikipedia/wikipedia.py 2009-10-18 06:23:41 UTC (rev 7493) @@ -3467,25 +3467,22 @@ try: url = imagedata['query']['pages'].values()[0]['imageinfo'][0]['url'] return url -# urlR = re.compile(r'<div class="fullImageLink" id="file">.*?<a href="(?P<url>[^ ]+?)"(?! class="image")|<span class="dangerousLink"><a href="(?P<url2>.+?)"', re.DOTALL) -# m = urlR.search(self.getImagePageHtml()) + #urlR = re.compile(r'<div class="fullImageLink" id="file">.*?<a href="(?P<url>[^ ]+?)"(?! class="image")|<span class="dangerousLink"><a href="(?P<url2>.+?)"', re.DOTALL) + #m = urlR.search(self.getImagePageHtml())
-# url = m.group('url') or m.group('url2') + # url = m.group('url') or m.group('url2') except KeyError: - raise NoPage(u'Image file URL for %s not found.' - % self.aslink(forceInterwiki = True)) + raise NoPage(u'Image file URL for %s not found.' % self.aslink(forceInterwiki = True) ) return url
def fileIsOnCommons(self): """Return True if the image is stored on Wikimedia Commons""" - return self.fileUrl().startswith( - u'http://upload.wikimedia.org/wikipedia/commons/') + return self.fileUrl().startswith(u'http://upload.wikimedia.org/wikipedia/commons/')
def fileIsShared(self): """Return True if image is stored on Wikitravel shared repository.""" if 'wikitravel_shared' in self.site().shared_image_repository(): - return self.fileUrl().startswith( - u'http://wikitravel.org/upload/shared/') + return self.fileUrl().startswith(u'http://wikitravel.org/upload/shared/') return self.fileIsOnCommons()
# FIXME: MD5 might be performed on not complete file due to server disconnection @@ -5325,25 +5322,26 @@ #else: if config.proxy['host']: conn = httplib.HTTPConnection(config.proxy['host']) + proxyPutAddr = '%s://%s%s' % (self.protocol(), self.hostname(), address) + conn.putrequest('POST', proxyPutAddr) + if type(config.proxy['auth']) == tuple: + import base64 + authcode = base64.b64encode("%s:%s" % (config.proxy['auth'][0], config.proxy['auth'][1]) ) + conn.putheader('Proxy-Authorization', "Basic %s" % authcode ) + else: if self.protocol() == 'http': conn = httplib.HTTPConnection(self.hostname()) elif self.protocol() == 'https': conn = httplib.HTTPSConnection(self.hostname()) + + conn.putrequest('POST', address) + # Encode all of this into a HTTP request # otherwise, it will crash, as other protocols are not supported
if address[-1] == "?": address = address[:-1] - if config.proxy['host']: - proxyPutAddr = '%s://%s%s' % (self.protocol(), self.hostname(), address) - conn.putrequest('POST', proxyPutAddr) - if type(config.proxy['auth']) == tuple: - import base64 - authcode = base64.b64encode("%s:%s" % (config.proxy['auth'][0], config.proxy['auth'][1]) ) - conn.putheader('Proxy-Authorization', "Basic %s" % authcode ) - else: - conn.putrequest('POST', address) if self.hostname() in config.authenticate.keys(): import base64 authcode = base64.b64encode("%s:%s" % (config.authenticate[self.hostname()][0], config.authenticate[self.hostname()][1]) ) @@ -5405,32 +5403,8 @@ """
if retry is None: - retry=config.retry_on_fail + retry = config.retry_on_fail
- #if False: #self.persistent_http and not data: - # self.conn.putrequest('GET', path) - # self.conn.putheader('User-agent', useragent) - # self.conn.putheader('Cookie', self.cookies(sysop = sysop)) - # self.conn.putheader('Connection', 'Keep-Alive') - # if compress: - # self.conn.putheader('Accept-encoding', 'gzip') - # self.conn.endheaders() - - # # Prepare the return values - # # Note that this can raise network exceptions which are not - # # caught here. - # try: - # response = self.conn.getresponse() - # except httplib.BadStatusLine: - # # Blub. - # self.conn.close() - # self.conn.connect() - # return self.getUrl(path, retry, sysop, data, compress, back_response=back_response) - - # text = response.read() - # headers = dict(response.getheaders()) - - #else: if self.hostname() in config.authenticate.keys(): uo = authenticateURLopener else: @@ -5459,10 +5433,10 @@ while True: try: if self.hostname() in config.authenticate.keys(): - request = urllib2.Request(url, data) - request.add_header('User-agent', useragent) - opener = urllib2.build_opener() - f = opener.open(request) + request = urllib2.Request(url, data) + request.add_header('User-agent', useragent) + opener = urllib2.build_opener() + f = opener.open(request) else: f = uo.open(url, data)
@@ -5533,7 +5507,7 @@ self._getUserDataOld(text, sysop = sysop)
if back_response: - return response, text + return f, text else: return text