Revision: 6744
Author: nicdumz
Date: 2009-04-28 05:35:32 +0000 (Tue, 28 Apr 2009)
Log Message:
-----------
Replacing the 'text.find(substring) >= -1' and variants by 'substring in text'
'not text.find(subs) == -1' to mean 'subs in text' in particular, is quite
hard to read.
(This commit is sponsored by PEP290 ^_^ )
Modified Paths:
--------------
trunk/pywikipedia/censure.py
trunk/pywikipedia/copyright.py
trunk/pywikipedia/imagerecat.py
trunk/pywikipedia/interwiki.py
trunk/pywikipedia/rcsort.py
trunk/pywikipedia/solve_disambiguation.py
trunk/pywikipedia/spellcheck.py
trunk/pywikipedia/standardize_notes.py
trunk/pywikipedia/titletranslate.py
trunk/pywikipedia/weblinkchecker.py
trunk/pywikipedia/wikipedia.py
trunk/pywikipedia/wiktionary/header.py
trunk/pywikipedia/wiktionary/meaning.py
trunk/pywikipedia/wiktionary/term.py
trunk/pywikipedia/wiktionary/wiktionarypage.py
trunk/pywikipedia/wiktionary.py
Modified: trunk/pywikipedia/censure.py
===================================================================
--- trunk/pywikipedia/censure.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/censure.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -89,7 +89,7 @@
report = False
wordsIn = []
for badWord in ownWordList:
- if text.find(' ' + badWord + ' ') != -1:
+ if (' ' + badWord + ' ') in text:
wordsIn.append(badWord)
report = True
if report:
Modified: trunk/pywikipedia/copyright.py
===================================================================
--- trunk/pywikipedia/copyright.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/copyright.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -347,7 +347,7 @@
def check(self, url, verbose = False):
for entry in self.URLlist:
- if url.find(entry) != -1:
+ if entry in url:
if verbose > 1:
warn('URL Excluded: %s\nReason: %s' % (url, entry))
elif verbose:
Modified: trunk/pywikipedia/imagerecat.py
===================================================================
--- trunk/pywikipedia/imagerecat.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/imagerecat.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -241,7 +241,7 @@
#If cat contains the name of a country add it to the list
else:
for country in countries:
- if not(cat.find(country)==-1):
+ if country in cat:
listCountries.append(country)
if(len(listByCountry) > 0):
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/interwiki.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -1547,7 +1547,8 @@
wikipedia.output(u'Skipping: %s is an auto entry %s(%s)' % (page.title(),dictName,year))
continue
if globalvar.bracketonly:
- if page.title().find("(") == -1:
+ # Only yield pages that have ( ) in titles
+ if "(" not in page.title():
continue
break
Modified: trunk/pywikipedia/rcsort.py
===================================================================
--- trunk/pywikipedia/rcsort.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/rcsort.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -55,19 +55,19 @@
count = 0
for line in text:
if rcoptions:
- if line.find('gesch') > -1:
+ if 'gesch' in line:
try:
user = Ruser.search(line).group(1)
except AttributeError:
user = None
count += 1
lines.append((user,count,line))
- elif line.find('rcoptions') > -1:
+ elif 'rcoptions' in line:
print line.replace(mysite.path() + "?title=Speciaal:RecenteWijzigingen&","rcsort.py?")
rcoptions = True
- elif newbies and line.find('Nieuwste') > -1:
+ elif newbies and 'Nieuwste' in line:
line = line.replace(mysite.path() + "?title=Speciaal:Bijdragen&","rcsort.py?").replace("target=newbies","newbies=true")
- if line.find('</fieldset>') > -1:
+ if '</fieldset>' in line:
line = line[line.find('</fieldset>')+11:]
print line
rcoptions = True
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/solve_disambiguation.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -393,7 +393,7 @@
# If text links to a page with title link uncapitalized, uncapitalize link, otherwise capitalize it
linkupper = link.title()
linklower = linkupper[0].lower() + linkupper[1:]
- if text.find("[[%s]]"%linklower) > -1 or text.find("[[%s|"%linklower) > -1:
+ if "[[%s]]"%linklower in text or "[[%s|"%linklower in text:
return linklower
else:
return linkupper
Modified: trunk/pywikipedia/spellcheck.py
===================================================================
--- trunk/pywikipedia/spellcheck.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/spellcheck.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -109,7 +109,7 @@
simwords[i] = []
for alt in knownwords.keys():
if basetext:
- if alt.lower().find(basetext) == -1:
+ if basetext not in alt.lower() == -1:
dothis = False
else:
dothis = True
@@ -347,7 +347,7 @@
# the user
if rep == self.derive():
return self.word
- if self.word.find(self.derive()) == -1:
+ if self.derive() not in self.word:
return wikipedia.input(u"Please give the result of replacing %s by %s in %s:"%(self.derive(),rep,self.word))
return self.word.replace(self.derive(),rep)
Modified: trunk/pywikipedia/standardize_notes.py
===================================================================
--- trunk/pywikipedia/standardize_notes.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/standardize_notes.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -245,7 +245,7 @@
skip_page = True
break
else:
- if entry.text.find(exception) != -1:
+ if exception in entry.text:
skip_page = True
break
if not skip_page:
@@ -256,7 +256,7 @@
yield wikipedia.Page(mysite, entry.full_title())
break
else:
- if entry.text.find(old) != -1:
+ if old in entry.text:
yield wikipedia.Page(mysite, entry.full_title())
break
Modified: trunk/pywikipedia/titletranslate.py
===================================================================
--- trunk/pywikipedia/titletranslate.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/titletranslate.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -21,7 +21,7 @@
site = page.site()
if hints:
for h in hints:
- if h.find(':') == -1:
+ if ':' not in h:
# argument given as -hint:xy where xy is a language code
codes = h
newname = ''
Modified: trunk/pywikipedia/weblinkchecker.py
===================================================================
--- trunk/pywikipedia/weblinkchecker.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/weblinkchecker.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -275,7 +275,7 @@
# the decompression for us, so we have to do it ourselves.
import gzip, StringIO
data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
- if data.find("Search Results for ") != -1:
+ if "Search Results for " in data:
return archiveURL
else:
return None
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wikipedia.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -722,7 +722,7 @@
while not textareaFound:
text = self.site().getUrl(path, sysop = sysop)
- if text.find("<title>Wiki does not exist</title>") != -1:
+ if "<title>Wiki does not exist</title>" in text:
raise NoSuchSite(u'Wiki %s does not exist yet' % self.site())
# Extract the actual text from the textarea
@@ -734,13 +734,13 @@
textareaFound = True
else:
# search for messages with no "view source" (aren't used in new versions)
- if text.find(self.site().mediawiki_message('whitelistedittitle')) != -1:
+ if self.site().mediawiki_message('whitelistedittitle') in text:
raise NoPage(u'Page editing is forbidden for anonymous users.')
- elif self.site().has_mediawiki_message('nocreatetitle') and text.find(self.site().mediawiki_message('nocreatetitle')) != -1:
+ elif self.site().has_mediawiki_message('nocreatetitle') and self.site().mediawiki_message('nocreatetitle') in text:
raise NoPage(self.site(), self.aslink(forceInterwiki = True))
# Bad title
- elif text.find('var wgPageName = "Special:Badtitle";') != -1 \
- or text.find(self.site().mediawiki_message('badtitle')) != -1:
+ elif 'var wgPageName = "Special:Badtitle";' in text \
+ or self.site().mediawiki_message('badtitle') in text:
raise BadTitle('BadTitle: %s' % self)
# find out if the username or IP has been blocked
elif self.site().isBlocked():
@@ -748,17 +748,17 @@
# If there is no text area and the heading is 'View Source'
# but user is not blocked, the page does not exist, and is
# locked
- elif text.find(self.site().mediawiki_message('viewsource')) != -1:
+ elif self.site().mediawiki_message('viewsource') in text:
raise NoPage(self.site(), self.aslink(forceInterwiki = True))
# Some of the newest versions don't have a "view source" tag for
# non-existant pages
# Check also the div class because if the language is not english
# the bot can not seeing that the page is blocked.
- elif text.find(self.site().mediawiki_message('badaccess')) != -1 or \
- text.find("<div class=\"permissions-errors\">") != -1:
+ elif self.site().mediawiki_message('badaccess') in text or \
+ "<div class=\"permissions-errors\">" in text:
raise NoPage(self.site(), self.aslink(forceInterwiki = True))
elif config.retry_on_fail:
- if text.find( "<title>Wikimedia Error</title>") > -1:
+ if "<title>Wikimedia Error</title>" in text:
output( u"Wikimedia has technical problems; will retry in %i minutes." % retry_idle_time)
else:
output( unicode(text) )
@@ -2966,9 +2966,9 @@
elif dt < 360:
dt += 60
else:
- if data.find("<title>Wiki does not exist</title>") != -1:
+ if "<title>Wiki does not exist</title>" in data:
raise NoSuchSite(u'Wiki %s does not exist yet' % self.site)
- elif data.find("<siteinfo>") == -1: # This probably means we got a 'temporary unaivalable'
+ elif "<siteinfo>" not in data: # This probably means we got a 'temporary unaivalable'
output(u'Got incorrect export page. Sleeping for %d seconds...' % dt)
time.sleep(dt)
if dt <= 60:
@@ -3030,7 +3030,7 @@
if m:
## output(u"%s is a redirect" % page2.aslink())
redirectto = m.group(1)
- if section and redirectto.find("#") == -1:
+ if section and not "#" in redirectto:
redirectto = redirectto+"#"+section
page2._getexception = IsRedirectPage
page2._redirarg = redirectto
@@ -4448,7 +4448,7 @@
try:
text = self.getUrl(u'%saction=query&meta=userinfo&uiprop=blockinfo'
% self.api_address(), sysop=sysop)
- return text.find('blockedby=') > -1
+ return 'blockedby=' in text
except NotImplementedError:
return False
Modified: trunk/pywikipedia/wiktionary/header.py
===================================================================
--- trunk/pywikipedia/wiktionary/header.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary/header.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -43,7 +43,7 @@
if line.count('=')>1:
self.level = line.count('=') // 2 # integer floor division without fractional part
self.header = line.replace('=','')
- elif not line.find('{{')==-1:
+ elif '{{' in line:
self.header = line.replace('{{-','').replace('-}}','')
self.header = self.header.replace('{{','').replace('}}','').strip().lower()
Modified: trunk/pywikipedia/wiktionary/meaning.py
===================================================================
--- trunk/pywikipedia/wiktionary/meaning.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary/meaning.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -147,13 +147,20 @@
partconsumed = True
cleanpart=part.replace("'",'').lower()
delim=''
+ # XXX The following 3 tests look wrong:
+ # find() returns either -1 if the substring is not found,
+ # or the position of the substring in the string.
+ # since bool(-1) = True, cleanpart.find(',') will always
+ # be False, unless cleanpart[0] is ','
+ #
+ # the test "',' in cleanpart" might be the one to use.
if cleanpart.find(','):
delim=','
if cleanpart.find(';'):
delim=';'
if cleanpart.find('/'):
delim='/'
- if 0 <= part.find("'") <= 2 or part.find('{')!=-1:
+ if 0 <= part.find("'") <= 2 or '{' in part:
if delim=='':
delim='|'
cleanpart=cleanpart+'|'
@@ -181,7 +188,7 @@
if not partconsumed:
# This must be our term
termweareworkingon=part.replace("[",'').replace("]",'').lower()
- if termweareworkingon.find('#')!=-1 and termweareworkingon.find('|')!=-1:
+ if '#' in termweareworkingon and '|' in termweareworkingon:
termweareworkingon=termweareworkingon.split('#')[0]
# Now we have enough information to create a term
# object for this translation and add it to our list
@@ -384,4 +391,4 @@
wrappedexamples = ''
for example in self.examples:
wrappedexamples = wrappedexamples + "#:'''" + example + "'''\n"
- return wrappedexamples
\ No newline at end of file
+ return wrappedexamples
Modified: trunk/pywikipedia/wiktionary/term.py
===================================================================
--- trunk/pywikipedia/wiktionary/term.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary/term.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -28,17 +28,17 @@
pos=len(wikiline)
maybegender=wikiline[pos:].replace("'",'').replace('{','').replace('}','').strip()
self.term=wikiline[:pos].replace("[",'').replace(']','').strip()
- if maybegender.find('m')!=-1:
+ if 'm' in maybegender:
self.gender='m'
- if maybegender.find('f')!=-1:
+ if 'f' in maybegender:
self.gender='f'
- if maybegender.find('n')!=-1:
+ if 'n' in maybegender:
self.gender='n'
- if maybegender.find('c')!=-1:
+ if 'c' in maybegender:
self.gender='c'
- if maybegender.find('p')!=-1:
+ if 'p' in maybegender:
self.number=2
- if maybegender.find('dim')!=-1:
+ if 'dim' in maybegender:
self.diminutive=True
def __getitem__(self):
@@ -177,8 +177,7 @@
""" Returns a string with this term as a link in a format ready for Wiktionary
"""
if wikilang=='en':
- pos=self.term.lower().find('to ')
- if pos==0:
+ if self.term.lower().startswith('to '):
return 'to [[' + self.term[3:] + ']]'
return Term.wikiWrapForList(self, wikilang)
Modified: trunk/pywikipedia/wiktionary/wiktionarypage.py
===================================================================
--- trunk/pywikipedia/wiktionary/wiktionarypage.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary/wiktionarypage.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -107,15 +107,15 @@
line=line.replace('\n','').strip()
# Let's start by looking for general stuff, that provides information which is
# interesting to store at the page level
- if line.lower().find('{wikipedia}')!=-1:
+ if '{wikipedia}' in line.lower():
self.addLink('wikipedia')
continue
- if line.lower().find('[[category:')!=-1:
+ if '[[category:' in line.lower():
category=line.split(':')[1].replace(']','')
self.addCategory(category)
# print 'category: ', category
continue
- if line.find('|')==-1:
+ if '|' not in line:
bracketspos=line.find('[[')
colonpos=line.find(':')
if bracketspos!=-1 and colonpos!=-1 and bracketspos < colonpos:
@@ -133,7 +133,7 @@
templist.append(line)
continue
# print 'line0:',line[0], 'line-2:',line[-2],'|','stripped line-2',line.rstrip()[-2]
- if line.strip()[0]=='='and line.rstrip()[-2]=='=' or not line.find('{{-')==-1 and not line.find('-}}')==-1:
+ if line.strip()[0]=='='and line.rstrip()[-2]=='=' or '{{-' in line and '-}}' in line:
# When a new header is encountered, it is necessary to store the information
# encountered under the previous header.
if templist and aheader:
@@ -162,16 +162,17 @@
# Under the translations header there is quite a bit of stuff
# that's only needed for formatting, we can just skip that
# and go on processing the next line
- if line.lower().find('{top}')!=-1: continue
- if line.lower().find('{mid}')!=-1: continue
- if line.lower().find('{bottom}')!=-1: continue
- if line.find('|-')!=-1: continue
- if line.find('{|')!=-1: continue
- if line.find('|}')!=-1: continue
- if line.lower().find('here-->')!=-1: continue
- if line.lower().find('width=')!=-1: continue
- if line.lower().find('<!--left column')!=-1: continue
- if line.lower().find('<!--right column')!=-1: continue
+ lower = line.lower()
+ if '{top}' in lower: continue
+ if '{mid}' in lower: continue
+ if '{bottom}' in lower: continue
+ if '|-' in line: continue
+ if '{|' in line: continue
+ if '|}' in line: continue
+ if 'here-->' in lower: continue
+ if 'width=' in lower: continue
+ if '<!--left column' in lower: continue
+ if '<!--right column' in lower: continue
templist.append(line)
@@ -349,9 +350,9 @@
score=0
for word in concisedefclean.split():
definition=anothermeaning.definition.replace("(",'').replace(")",'').replace("'",'').replace(":",'').replace(".",'').replace("#",'').lower()
- if len(word)>1 and definition.find(' '+word+' ')!=-1:
+ if len(word)>1 and ' '+word+' ' in definition:
score+=1
- if len(word)>2 and definition.find(word)!=-1:
+ if len(word)>2 and word in definition:
score+=1
if score>highest:
highest=score
Modified: trunk/pywikipedia/wiktionary.py
===================================================================
--- trunk/pywikipedia/wiktionary.py 2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary.py 2009-04-28 05:35:32 UTC (rev 6744)
@@ -328,17 +328,18 @@
# print line
# Let's get rid of line breaks and extraneous white space
line=line.replace('\n','').strip()
+ lower = line.lower()
# Let's start by looking for general stuff, that provides information which is
# interesting to store at the page level
- if line.lower().find('{wikipedia}')!=-1:
+ if '{wikipedia}' in lower:
self.addLink('wikipedia')
continue
- if line.lower().find('[[category:')!=-1:
+ if '[[category:' in lower:
category=line.split(':')[1].replace(']','')
self.addCategory(category)
# print 'category: ', category
continue
- if line.find('|')==-1:
+ if '|' not in line:
bracketspos=line.find('[[')
colonpos=line.find(':')
if bracketspos!=-1 and colonpos!=-1 and bracketspos < colonpos:
@@ -356,7 +357,7 @@
templist.append(line)
continue
# print 'line0:',line[0], 'line-2:',line[-2],'|','stripped line-2',line.rstrip()[-2]
- if line.strip()[0]=='='and line.rstrip()[-2]=='=' or not line.find('{{-')==-1 and not line.find('-}}')==-1:
+ if line.strip()[0]=='='and line.rstrip()[-2]=='=' or '{{-' in line and '-}}' in line:
# When a new header is encountered, it is necessary to store the information
# encountered under the previous header.
if templist:
@@ -385,16 +386,17 @@
# Under the translations header there is quite a bit of stuff
# that's only needed for formatting, we can just skip that
# and go on processing the next line
- if line.lower().find('{top}')!=-1: continue
- if line.lower().find('{mid}')!=-1: continue
- if line.lower().find('{bottom}')!=-1: continue
- if line.find('|-')!=-1: continue
- if line.find('{|')!=-1: continue
- if line.find('|}')!=-1: continue
- if line.lower().find('here-->')!=-1: continue
- if line.lower().find('width=')!=-1: continue
- if line.lower().find('<!--left column')!=-1: continue
- if line.lower().find('<!--right column')!=-1: continue
+ lower = line.lower()
+ if '{top}' in lower: continue
+ if '{mid}' in lower: continue
+ if '{bottom}' in lower: continue
+ if '|-' in line: continue
+ if '{|' in line: continue
+ if '|}' in line: continue
+ if 'here-->' in lower: continue
+ if 'width=' in lower: continue
+ if '<!--left column' in lower: continue
+ if '<!--right column' in lower: continue
templist.append(line)
@@ -1010,7 +1012,7 @@
if line.count('=')>1:
self.level = line.count('=') // 2 # integer floor division without fractional part
self.header = line.replace('=','')
- elif not line.find('{{')==-1:
+ elif '{{' in line:
self.header = line.replace('{{-','').replace('-}}','')
self.header = self.header.replace('{{','').replace('}}','').strip().lower()