Revision: 6744 Author: nicdumz Date: 2009-04-28 05:35:32 +0000 (Tue, 28 Apr 2009)
Log Message: ----------- Replacing the 'text.find(substring) >= -1' and variants by 'substring in text'
'not text.find(subs) == -1' to mean 'subs in text' in particular, is quite hard to read. (This commit is sponsored by PEP290 ^_^ )
Modified Paths: -------------- trunk/pywikipedia/censure.py trunk/pywikipedia/copyright.py trunk/pywikipedia/imagerecat.py trunk/pywikipedia/interwiki.py trunk/pywikipedia/rcsort.py trunk/pywikipedia/solve_disambiguation.py trunk/pywikipedia/spellcheck.py trunk/pywikipedia/standardize_notes.py trunk/pywikipedia/titletranslate.py trunk/pywikipedia/weblinkchecker.py trunk/pywikipedia/wikipedia.py trunk/pywikipedia/wiktionary/header.py trunk/pywikipedia/wiktionary/meaning.py trunk/pywikipedia/wiktionary/term.py trunk/pywikipedia/wiktionary/wiktionarypage.py trunk/pywikipedia/wiktionary.py
Modified: trunk/pywikipedia/censure.py =================================================================== --- trunk/pywikipedia/censure.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/censure.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -89,7 +89,7 @@ report = False wordsIn = [] for badWord in ownWordList: - if text.find(' ' + badWord + ' ') != -1: + if (' ' + badWord + ' ') in text: wordsIn.append(badWord) report = True if report:
Modified: trunk/pywikipedia/copyright.py =================================================================== --- trunk/pywikipedia/copyright.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/copyright.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -347,7 +347,7 @@
def check(self, url, verbose = False): for entry in self.URLlist: - if url.find(entry) != -1: + if entry in url: if verbose > 1: warn('URL Excluded: %s\nReason: %s' % (url, entry)) elif verbose:
Modified: trunk/pywikipedia/imagerecat.py =================================================================== --- trunk/pywikipedia/imagerecat.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/imagerecat.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -241,7 +241,7 @@ #If cat contains the name of a country add it to the list else: for country in countries: - if not(cat.find(country)==-1): + if country in cat: listCountries.append(country)
if(len(listByCountry) > 0):
Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/interwiki.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -1547,7 +1547,8 @@ wikipedia.output(u'Skipping: %s is an auto entry %s(%s)' % (page.title(),dictName,year)) continue if globalvar.bracketonly: - if page.title().find("(") == -1: + # Only yield pages that have ( ) in titles + if "(" not in page.title(): continue break
Modified: trunk/pywikipedia/rcsort.py =================================================================== --- trunk/pywikipedia/rcsort.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/rcsort.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -55,19 +55,19 @@ count = 0 for line in text: if rcoptions: - if line.find('gesch') > -1: + if 'gesch' in line: try: user = Ruser.search(line).group(1) except AttributeError: user = None count += 1 lines.append((user,count,line)) - elif line.find('rcoptions') > -1: + elif 'rcoptions' in line: print line.replace(mysite.path() + "?title=Speciaal:RecenteWijzigingen&","rcsort.py?") rcoptions = True - elif newbies and line.find('Nieuwste') > -1: + elif newbies and 'Nieuwste' in line: line = line.replace(mysite.path() + "?title=Speciaal:Bijdragen&","rcsort.py?").replace("target=newbies","newbies=true") - if line.find('</fieldset>') > -1: + if '</fieldset>' in line: line = line[line.find('</fieldset>')+11:] print line rcoptions = True
Modified: trunk/pywikipedia/solve_disambiguation.py =================================================================== --- trunk/pywikipedia/solve_disambiguation.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/solve_disambiguation.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -393,7 +393,7 @@ # If text links to a page with title link uncapitalized, uncapitalize link, otherwise capitalize it linkupper = link.title() linklower = linkupper[0].lower() + linkupper[1:] - if text.find("[[%s]]"%linklower) > -1 or text.find("[[%s|"%linklower) > -1: + if "[[%s]]"%linklower in text or "[[%s|"%linklower in text: return linklower else: return linkupper
Modified: trunk/pywikipedia/spellcheck.py =================================================================== --- trunk/pywikipedia/spellcheck.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/spellcheck.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -109,7 +109,7 @@ simwords[i] = [] for alt in knownwords.keys(): if basetext: - if alt.lower().find(basetext) == -1: + if basetext not in alt.lower() == -1: dothis = False else: dothis = True @@ -347,7 +347,7 @@ # the user if rep == self.derive(): return self.word - if self.word.find(self.derive()) == -1: + if self.derive() not in self.word: return wikipedia.input(u"Please give the result of replacing %s by %s in %s:"%(self.derive(),rep,self.word)) return self.word.replace(self.derive(),rep)
Modified: trunk/pywikipedia/standardize_notes.py =================================================================== --- trunk/pywikipedia/standardize_notes.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/standardize_notes.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -245,7 +245,7 @@ skip_page = True break else: - if entry.text.find(exception) != -1: + if exception in entry.text: skip_page = True break if not skip_page: @@ -256,7 +256,7 @@ yield wikipedia.Page(mysite, entry.full_title()) break else: - if entry.text.find(old) != -1: + if old in entry.text: yield wikipedia.Page(mysite, entry.full_title()) break
Modified: trunk/pywikipedia/titletranslate.py =================================================================== --- trunk/pywikipedia/titletranslate.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/titletranslate.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -21,7 +21,7 @@ site = page.site() if hints: for h in hints: - if h.find(':') == -1: + if ':' not in h: # argument given as -hint:xy where xy is a language code codes = h newname = ''
Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/weblinkchecker.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -275,7 +275,7 @@ # the decompression for us, so we have to do it ourselves. import gzip, StringIO data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read() - if data.find("Search Results for ") != -1: + if "Search Results for " in data: return archiveURL else: return None
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/wikipedia.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -722,7 +722,7 @@ while not textareaFound: text = self.site().getUrl(path, sysop = sysop)
- if text.find("<title>Wiki does not exist</title>") != -1: + if "<title>Wiki does not exist</title>" in text: raise NoSuchSite(u'Wiki %s does not exist yet' % self.site())
# Extract the actual text from the textarea @@ -734,13 +734,13 @@ textareaFound = True else: # search for messages with no "view source" (aren't used in new versions) - if text.find(self.site().mediawiki_message('whitelistedittitle')) != -1: + if self.site().mediawiki_message('whitelistedittitle') in text: raise NoPage(u'Page editing is forbidden for anonymous users.') - elif self.site().has_mediawiki_message('nocreatetitle') and text.find(self.site().mediawiki_message('nocreatetitle')) != -1: + elif self.site().has_mediawiki_message('nocreatetitle') and self.site().mediawiki_message('nocreatetitle') in text: raise NoPage(self.site(), self.aslink(forceInterwiki = True)) # Bad title - elif text.find('var wgPageName = "Special:Badtitle";') != -1 \ - or text.find(self.site().mediawiki_message('badtitle')) != -1: + elif 'var wgPageName = "Special:Badtitle";' in text \ + or self.site().mediawiki_message('badtitle') in text: raise BadTitle('BadTitle: %s' % self) # find out if the username or IP has been blocked elif self.site().isBlocked(): @@ -748,17 +748,17 @@ # If there is no text area and the heading is 'View Source' # but user is not blocked, the page does not exist, and is # locked - elif text.find(self.site().mediawiki_message('viewsource')) != -1: + elif self.site().mediawiki_message('viewsource') in text: raise NoPage(self.site(), self.aslink(forceInterwiki = True)) # Some of the newest versions don't have a "view source" tag for # non-existant pages # Check also the div class because if the language is not english # the bot can not seeing that the page is blocked. - elif text.find(self.site().mediawiki_message('badaccess')) != -1 or \ - text.find("<div class="permissions-errors">") != -1: + elif self.site().mediawiki_message('badaccess') in text or \ + "<div class="permissions-errors">" in text: raise NoPage(self.site(), self.aslink(forceInterwiki = True)) elif config.retry_on_fail: - if text.find( "<title>Wikimedia Error</title>") > -1: + if "<title>Wikimedia Error</title>" in text: output( u"Wikimedia has technical problems; will retry in %i minutes." % retry_idle_time) else: output( unicode(text) ) @@ -2966,9 +2966,9 @@ elif dt < 360: dt += 60 else: - if data.find("<title>Wiki does not exist</title>") != -1: + if "<title>Wiki does not exist</title>" in data: raise NoSuchSite(u'Wiki %s does not exist yet' % self.site) - elif data.find("<siteinfo>") == -1: # This probably means we got a 'temporary unaivalable' + elif "<siteinfo>" not in data: # This probably means we got a 'temporary unaivalable' output(u'Got incorrect export page. Sleeping for %d seconds...' % dt) time.sleep(dt) if dt <= 60: @@ -3030,7 +3030,7 @@ if m: ## output(u"%s is a redirect" % page2.aslink()) redirectto = m.group(1) - if section and redirectto.find("#") == -1: + if section and not "#" in redirectto: redirectto = redirectto+"#"+section page2._getexception = IsRedirectPage page2._redirarg = redirectto @@ -4448,7 +4448,7 @@ try: text = self.getUrl(u'%saction=query&meta=userinfo&uiprop=blockinfo' % self.api_address(), sysop=sysop) - return text.find('blockedby=') > -1 + return 'blockedby=' in text except NotImplementedError: return False
Modified: trunk/pywikipedia/wiktionary/header.py =================================================================== --- trunk/pywikipedia/wiktionary/header.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/wiktionary/header.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -43,7 +43,7 @@ if line.count('=')>1: self.level = line.count('=') // 2 # integer floor division without fractional part self.header = line.replace('=','') - elif not line.find('{{')==-1: + elif '{{' in line: self.header = line.replace('{{-','').replace('-}}','')
self.header = self.header.replace('{{','').replace('}}','').strip().lower()
Modified: trunk/pywikipedia/wiktionary/meaning.py =================================================================== --- trunk/pywikipedia/wiktionary/meaning.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/wiktionary/meaning.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -147,13 +147,20 @@ partconsumed = True cleanpart=part.replace("'",'').lower() delim='' + # XXX The following 3 tests look wrong: + # find() returns either -1 if the substring is not found, + # or the position of the substring in the string. + # since bool(-1) = True, cleanpart.find(',') will always + # be False, unless cleanpart[0] is ',' + # + # the test "',' in cleanpart" might be the one to use. if cleanpart.find(','): delim=',' if cleanpart.find(';'): delim=';' if cleanpart.find('/'): delim='/' - if 0 <= part.find("'") <= 2 or part.find('{')!=-1: + if 0 <= part.find("'") <= 2 or '{' in part: if delim=='': delim='|' cleanpart=cleanpart+'|' @@ -181,7 +188,7 @@ if not partconsumed: # This must be our term termweareworkingon=part.replace("[",'').replace("]",'').lower() - if termweareworkingon.find('#')!=-1 and termweareworkingon.find('|')!=-1: + if '#' in termweareworkingon and '|' in termweareworkingon: termweareworkingon=termweareworkingon.split('#')[0] # Now we have enough information to create a term # object for this translation and add it to our list @@ -384,4 +391,4 @@ wrappedexamples = '' for example in self.examples: wrappedexamples = wrappedexamples + "#:'''" + example + "'''\n" - return wrappedexamples \ No newline at end of file + return wrappedexamples
Modified: trunk/pywikipedia/wiktionary/term.py =================================================================== --- trunk/pywikipedia/wiktionary/term.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/wiktionary/term.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -28,17 +28,17 @@ pos=len(wikiline) maybegender=wikiline[pos:].replace("'",'').replace('{','').replace('}','').strip() self.term=wikiline[:pos].replace("[",'').replace(']','').strip() - if maybegender.find('m')!=-1: + if 'm' in maybegender: self.gender='m' - if maybegender.find('f')!=-1: + if 'f' in maybegender: self.gender='f' - if maybegender.find('n')!=-1: + if 'n' in maybegender: self.gender='n' - if maybegender.find('c')!=-1: + if 'c' in maybegender: self.gender='c' - if maybegender.find('p')!=-1: + if 'p' in maybegender: self.number=2 - if maybegender.find('dim')!=-1: + if 'dim' in maybegender: self.diminutive=True
def __getitem__(self): @@ -177,8 +177,7 @@ """ Returns a string with this term as a link in a format ready for Wiktionary """ if wikilang=='en': - pos=self.term.lower().find('to ') - if pos==0: + if self.term.lower().startswith('to '): return 'to [[' + self.term[3:] + ']]' return Term.wikiWrapForList(self, wikilang)
Modified: trunk/pywikipedia/wiktionary/wiktionarypage.py =================================================================== --- trunk/pywikipedia/wiktionary/wiktionarypage.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/wiktionary/wiktionarypage.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -107,15 +107,15 @@ line=line.replace('\n','').strip() # Let's start by looking for general stuff, that provides information which is # interesting to store at the page level - if line.lower().find('{wikipedia}')!=-1: + if '{wikipedia}' in line.lower(): self.addLink('wikipedia') continue - if line.lower().find('[[category:')!=-1: + if '[[category:' in line.lower(): category=line.split(':')[1].replace(']','') self.addCategory(category) # print 'category: ', category continue - if line.find('|')==-1: + if '|' not in line: bracketspos=line.find('[[') colonpos=line.find(':') if bracketspos!=-1 and colonpos!=-1 and bracketspos < colonpos: @@ -133,7 +133,7 @@ templist.append(line) continue # print 'line0:',line[0], 'line-2:',line[-2],'|','stripped line-2',line.rstrip()[-2] - if line.strip()[0]=='='and line.rstrip()[-2]=='=' or not line.find('{{-')==-1 and not line.find('-}}')==-1: + if line.strip()[0]=='='and line.rstrip()[-2]=='=' or '{{-' in line and '-}}' in line: # When a new header is encountered, it is necessary to store the information # encountered under the previous header. if templist and aheader: @@ -162,16 +162,17 @@ # Under the translations header there is quite a bit of stuff # that's only needed for formatting, we can just skip that # and go on processing the next line - if line.lower().find('{top}')!=-1: continue - if line.lower().find('{mid}')!=-1: continue - if line.lower().find('{bottom}')!=-1: continue - if line.find('|-')!=-1: continue - if line.find('{|')!=-1: continue - if line.find('|}')!=-1: continue - if line.lower().find('here-->')!=-1: continue - if line.lower().find('width=')!=-1: continue - if line.lower().find('<!--left column')!=-1: continue - if line.lower().find('<!--right column')!=-1: continue + lower = line.lower() + if '{top}' in lower: continue + if '{mid}' in lower: continue + if '{bottom}' in lower: continue + if '|-' in line: continue + if '{|' in line: continue + if '|}' in line: continue + if 'here-->' in lower: continue + if 'width=' in lower: continue + if '<!--left column' in lower: continue + if '<!--right column' in lower: continue
templist.append(line)
@@ -349,9 +350,9 @@ score=0 for word in concisedefclean.split(): definition=anothermeaning.definition.replace("(",'').replace(")",'').replace("'",'').replace(":",'').replace(".",'').replace("#",'').lower() - if len(word)>1 and definition.find(' '+word+' ')!=-1: + if len(word)>1 and ' '+word+' ' in definition: score+=1 - if len(word)>2 and definition.find(word)!=-1: + if len(word)>2 and word in definition: score+=1 if score>highest: highest=score
Modified: trunk/pywikipedia/wiktionary.py =================================================================== --- trunk/pywikipedia/wiktionary.py 2009-04-28 01:29:50 UTC (rev 6743) +++ trunk/pywikipedia/wiktionary.py 2009-04-28 05:35:32 UTC (rev 6744) @@ -328,17 +328,18 @@ # print line # Let's get rid of line breaks and extraneous white space line=line.replace('\n','').strip() + lower = line.lower() # Let's start by looking for general stuff, that provides information which is # interesting to store at the page level - if line.lower().find('{wikipedia}')!=-1: + if '{wikipedia}' in lower: self.addLink('wikipedia') continue - if line.lower().find('[[category:')!=-1: + if '[[category:' in lower: category=line.split(':')[1].replace(']','') self.addCategory(category) # print 'category: ', category continue - if line.find('|')==-1: + if '|' not in line: bracketspos=line.find('[[') colonpos=line.find(':') if bracketspos!=-1 and colonpos!=-1 and bracketspos < colonpos: @@ -356,7 +357,7 @@ templist.append(line) continue # print 'line0:',line[0], 'line-2:',line[-2],'|','stripped line-2',line.rstrip()[-2] - if line.strip()[0]=='='and line.rstrip()[-2]=='=' or not line.find('{{-')==-1 and not line.find('-}}')==-1: + if line.strip()[0]=='='and line.rstrip()[-2]=='=' or '{{-' in line and '-}}' in line: # When a new header is encountered, it is necessary to store the information # encountered under the previous header. if templist: @@ -385,16 +386,17 @@ # Under the translations header there is quite a bit of stuff # that's only needed for formatting, we can just skip that # and go on processing the next line - if line.lower().find('{top}')!=-1: continue - if line.lower().find('{mid}')!=-1: continue - if line.lower().find('{bottom}')!=-1: continue - if line.find('|-')!=-1: continue - if line.find('{|')!=-1: continue - if line.find('|}')!=-1: continue - if line.lower().find('here-->')!=-1: continue - if line.lower().find('width=')!=-1: continue - if line.lower().find('<!--left column')!=-1: continue - if line.lower().find('<!--right column')!=-1: continue + lower = line.lower() + if '{top}' in lower: continue + if '{mid}' in lower: continue + if '{bottom}' in lower: continue + if '|-' in line: continue + if '{|' in line: continue + if '|}' in line: continue + if 'here-->' in lower: continue + if 'width=' in lower: continue + if '<!--left column' in lower: continue + if '<!--right column' in lower: continue
templist.append(line)
@@ -1010,7 +1012,7 @@ if line.count('=')>1: self.level = line.count('=') // 2 # integer floor division without fractional part self.header = line.replace('=','') - elif not line.find('{{')==-1: + elif '{{' in line: self.header = line.replace('{{-','').replace('-}}','')
self.header = self.header.replace('{{','').replace('}}','').strip().lower()
pywikipedia-svn@lists.wikimedia.org