SVN: [6744] trunk/pywikipedia - Pywikipedia-svn

28 Apr 2009

Revision: 6744
Author:   nicdumz
Date:     2009-04-28 05:35:32 +0000 (Tue, 28 Apr 2009)
Log Message:
-----------
Replacing the 'text.find(substring) >= -1' and variants by 'substring in text'
'not text.find(subs) == -1' to mean 'subs in text' in particular, is quite
hard to read.
(This commit is sponsored by PEP290 ^_^ )
Modified Paths:
--------------
    trunk/pywikipedia/censure.py
    trunk/pywikipedia/copyright.py
    trunk/pywikipedia/imagerecat.py
    trunk/pywikipedia/interwiki.py
    trunk/pywikipedia/rcsort.py
    trunk/pywikipedia/solve_disambiguation.py
    trunk/pywikipedia/spellcheck.py
    trunk/pywikipedia/standardize_notes.py
    trunk/pywikipedia/titletranslate.py
    trunk/pywikipedia/weblinkchecker.py
    trunk/pywikipedia/wikipedia.py
    trunk/pywikipedia/wiktionary/header.py
    trunk/pywikipedia/wiktionary/meaning.py
    trunk/pywikipedia/wiktionary/term.py
    trunk/pywikipedia/wiktionary/wiktionarypage.py
    trunk/pywikipedia/wiktionary.py
Modified: trunk/pywikipedia/censure.py
===================================================================

--- trunk/pywikipedia/censure.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/censure.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -89,7 +89,7 @@
     report = False
     wordsIn = []
     for badWord in ownWordList:
-        if text.find(' ' + badWord + ' ') != -1:
+        if (' ' + badWord + ' ') in text:
             wordsIn.append(badWord)
             report = True
     if report:
Modified: trunk/pywikipedia/copyright.py
===================================================================
--- trunk/pywikipedia/copyright.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/copyright.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -347,7 +347,7 @@
def check(self, url, verbose = False):
         for entry in self.URLlist:
-           if url.find(entry) != -1:
+           if entry in url:
                if verbose > 1:
                    warn('URL Excluded: %s\nReason: %s' % (url, entry))
                elif verbose:
Modified: trunk/pywikipedia/imagerecat.py
===================================================================
--- trunk/pywikipedia/imagerecat.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/imagerecat.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -241,7 +241,7 @@
         #If cat contains the name of a country add it to the list
         else:
             for country in countries:
-                if not(cat.find(country)==-1):
+                if country in cat:
                     listCountries.append(country)
if(len(listByCountry) > 0):
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/interwiki.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -1547,7 +1547,8 @@
                             wikipedia.output(u'Skipping: %s is an auto entry %s(%s)' % (page.title(),dictName,year))
                             continue
                     if globalvar.bracketonly:
-                        if page.title().find("(") == -1:
+                        # Only yield pages that have ( ) in titles
+                        if "(" not in page.title():
                             continue
                     break
Modified: trunk/pywikipedia/rcsort.py
===================================================================
--- trunk/pywikipedia/rcsort.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/rcsort.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -55,19 +55,19 @@
 count = 0
 for line in text:
     if rcoptions:
-        if line.find('gesch') > -1:
+        if 'gesch' in line:
             try:
                 user = Ruser.search(line).group(1)
             except AttributeError:
                 user = None
             count += 1
             lines.append((user,count,line))
-    elif line.find('rcoptions') > -1:
+    elif 'rcoptions' in line:
         print line.replace(mysite.path() + "?title=Speciaal:RecenteWijzigingen&amp;","rcsort.py?")
         rcoptions = True
-    elif newbies and line.find('Nieuwste') > -1:
+    elif newbies and 'Nieuwste' in line:
         line =  line.replace(mysite.path() + "?title=Speciaal:Bijdragen&amp;","rcsort.py?").replace("target=newbies","newbies=true")
-        if line.find('</fieldset>') > -1:
+        if '</fieldset>' in line:
             line = line[line.find('</fieldset>')+11:]
         print line
         rcoptions = True
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/solve_disambiguation.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -393,7 +393,7 @@
     # If text links to a page with title link uncapitalized, uncapitalize link, otherwise capitalize it
     linkupper = link.title()
     linklower = linkupper[0].lower() + linkupper[1:]
-    if text.find("[[%s]]"%linklower) > -1 or text.find("[[%s|"%linklower) > -1:
+    if "[[%s]]"%linklower in text or "[[%s|"%linklower in text:
         return linklower
     else:
         return linkupper
Modified: trunk/pywikipedia/spellcheck.py
===================================================================
--- trunk/pywikipedia/spellcheck.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/spellcheck.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -109,7 +109,7 @@
         simwords[i] = []
     for alt in knownwords.keys():
         if basetext:
-            if alt.lower().find(basetext) == -1:
+            if basetext not in alt.lower() == -1:
                 dothis = False
             else:
                 dothis = True
@@ -347,7 +347,7 @@
         # the user
         if rep == self.derive():
             return self.word
-        if self.word.find(self.derive()) == -1:
+        if self.derive() not in self.word:
             return wikipedia.input(u"Please give the result of replacing %s by %s in %s:"%(self.derive(),rep,self.word))
         return self.word.replace(self.derive(),rep)
Modified: trunk/pywikipedia/standardize_notes.py
===================================================================
--- trunk/pywikipedia/standardize_notes.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/standardize_notes.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -245,7 +245,7 @@
                             skip_page = True
                             break
                     else:
-                        if entry.text.find(exception) != -1:
+                        if exception in entry.text:
                             skip_page = True
                             break
             if not skip_page:
@@ -256,7 +256,7 @@
                             yield wikipedia.Page(mysite, entry.full_title())
                             break
                     else:
-                        if entry.text.find(old) != -1:
+                        if old in entry.text:
                             yield wikipedia.Page(mysite, entry.full_title())
                             break
Modified: trunk/pywikipedia/titletranslate.py
===================================================================
--- trunk/pywikipedia/titletranslate.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/titletranslate.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -21,7 +21,7 @@
     site = page.site()
     if hints:
         for h in hints:
-            if h.find(':') == -1:
+            if ':' not in h:
                 # argument given as -hint:xy where xy is a language code
                 codes = h
                 newname = ''
Modified: trunk/pywikipedia/weblinkchecker.py
===================================================================
--- trunk/pywikipedia/weblinkchecker.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/weblinkchecker.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -275,7 +275,7 @@
             # the decompression for us, so we have to do it ourselves.
             import gzip, StringIO
             data = gzip.GzipFile(fileobj=StringIO.StringIO(data)).read()
-        if data.find("Search Results for ") != -1:
+        if "Search Results for " in data:
             return archiveURL
         else:
             return None
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wikipedia.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -722,7 +722,7 @@
         while not textareaFound:
             text = self.site().getUrl(path, sysop = sysop)
-            if text.find("<title>Wiki does not exist</title>") != -1:
+            if "<title>Wiki does not exist</title>" in text:
                 raise NoSuchSite(u'Wiki %s does not exist yet' % self.site())
# Extract the actual text from the textarea
@@ -734,13 +734,13 @@
                 textareaFound = True
             else:
                 # search for messages with no "view source" (aren't used in new versions)
-                if text.find(self.site().mediawiki_message('whitelistedittitle')) != -1:
+                if self.site().mediawiki_message('whitelistedittitle') in text:
                     raise NoPage(u'Page editing is forbidden for anonymous users.')
-                elif self.site().has_mediawiki_message('nocreatetitle') and text.find(self.site().mediawiki_message('nocreatetitle')) != -1:
+                elif self.site().has_mediawiki_message('nocreatetitle') and self.site().mediawiki_message('nocreatetitle') in text:
                     raise NoPage(self.site(), self.aslink(forceInterwiki = True))
                 # Bad title
-                elif text.find('var wgPageName = "Special:Badtitle";') != -1 \
-                or text.find(self.site().mediawiki_message('badtitle')) != -1:
+                elif 'var wgPageName = "Special:Badtitle";' in text \
+                or self.site().mediawiki_message('badtitle') in text:
                     raise BadTitle('BadTitle: %s' % self)
                 # find out if the username or IP has been blocked
                 elif self.site().isBlocked():
@@ -748,17 +748,17 @@
                 # If there is no text area and the heading is 'View Source'
                 # but user is not blocked, the page does not exist, and is
                 # locked
-                elif text.find(self.site().mediawiki_message('viewsource')) != -1:
+                elif self.site().mediawiki_message('viewsource') in text:
                     raise NoPage(self.site(), self.aslink(forceInterwiki = True))
                 # Some of the newest versions don't have a "view source" tag for
                 # non-existant pages
                 # Check also the div class because if the language is not english
                 # the bot can not seeing that the page is blocked.
-                elif text.find(self.site().mediawiki_message('badaccess')) != -1 or \
-                text.find("<div class="permissions-errors">") != -1:
+                elif self.site().mediawiki_message('badaccess') in text or \
+                "<div class="permissions-errors">" in text:
                     raise NoPage(self.site(), self.aslink(forceInterwiki = True))
                 elif config.retry_on_fail:
-                    if text.find( "<title>Wikimedia Error</title>") > -1:
+                    if "<title>Wikimedia Error</title>" in text:
                         output( u"Wikimedia has technical problems; will retry in %i minutes." % retry_idle_time)
                     else:
                         output( unicode(text) )
@@ -2966,9 +2966,9 @@
                     elif dt < 360:
                         dt += 60
                 else:
-                    if data.find("<title>Wiki does not exist</title>") != -1:
+                    if "<title>Wiki does not exist</title>" in data:
                         raise NoSuchSite(u'Wiki %s does not exist yet' % self.site)
-                    elif data.find("<siteinfo>") == -1: # This probably means we got a 'temporary unaivalable'
+                    elif "<siteinfo>" not in data: # This probably means we got a 'temporary unaivalable'
                         output(u'Got incorrect export page. Sleeping for %d seconds...' % dt)
                         time.sleep(dt)
                         if dt <= 60:
@@ -3030,7 +3030,7 @@
                     if m:
                         ## output(u"%s is a redirect" % page2.aslink())
                         redirectto = m.group(1)
-                        if section and redirectto.find("#") == -1:
+                        if section and not "#" in redirectto:
                             redirectto = redirectto+"#"+section
                         page2._getexception = IsRedirectPage
                         page2._redirarg = redirectto
@@ -4448,7 +4448,7 @@
         try:
             text = self.getUrl(u'%saction=query&meta=userinfo&uiprop=blockinfo'
                                % self.api_address(), sysop=sysop)
-            return text.find('blockedby=') > -1
+            return 'blockedby=' in text
         except NotImplementedError:
             return False
Modified: trunk/pywikipedia/wiktionary/header.py
===================================================================
--- trunk/pywikipedia/wiktionary/header.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary/header.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -43,7 +43,7 @@
         if line.count('=')>1:
             self.level = line.count('=') // 2 # integer floor division without fractional part
             self.header = line.replace('=','')
-        elif not line.find('{{')==-1:
+        elif '{{' in line:
             self.header = line.replace('{{-','').replace('-}}','')
self.header = self.header.replace('{{','').replace('}}','').strip().lower()
Modified: trunk/pywikipedia/wiktionary/meaning.py
===================================================================
--- trunk/pywikipedia/wiktionary/meaning.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary/meaning.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -147,13 +147,20 @@
                         partconsumed = True
                     cleanpart=part.replace("'",'').lower()
                     delim=''
+                    # XXX The following 3 tests look wrong:
+                    # find() returns either -1 if the substring is not found,
+                    # or the position of the substring in the string.
+                    # since bool(-1) = True, cleanpart.find(',') will always
+                    # be False, unless cleanpart[0] is ','
+                    #
+                    # the test "',' in cleanpart" might be the one to use.
                     if cleanpart.find(','):
                         delim=','
                     if cleanpart.find(';'):
                         delim=';'
                     if cleanpart.find('/'):
                         delim='/'
-                    if 0 <= part.find("'") <= 2 or part.find('{')!=-1:
+                    if 0 <= part.find("'") <= 2 or '{' in part:
                         if delim=='':
                             delim='|'
                             cleanpart=cleanpart+'|'
@@ -181,7 +188,7 @@
                     if not partconsumed:
                         # This must be our term
                         termweareworkingon=part.replace("[",'').replace("]",'').lower()
-                        if termweareworkingon.find('#')!=-1 and termweareworkingon.find('|')!=-1:
+                        if '#' in termweareworkingon and '|' in termweareworkingon:
                             termweareworkingon=termweareworkingon.split('#')[0]
                 # Now we have enough information to create a term
                 # object for this translation and add it to our list
@@ -384,4 +391,4 @@
         wrappedexamples = ''
         for example in self.examples:
             wrappedexamples = wrappedexamples + "#:'''" + example + "'''\n"
-        return wrappedexamples
\ No newline at end of file
+        return wrappedexamples
Modified: trunk/pywikipedia/wiktionary/term.py
===================================================================
--- trunk/pywikipedia/wiktionary/term.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary/term.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -28,17 +28,17 @@
                 pos=len(wikiline)
             maybegender=wikiline[pos:].replace("'",'').replace('{','').replace('}','').strip()
             self.term=wikiline[:pos].replace("[",'').replace(']','').strip()
-            if maybegender.find('m')!=-1:
+            if 'm' in maybegender:
                 self.gender='m'
-            if maybegender.find('f')!=-1:
+            if 'f' in maybegender:
                 self.gender='f'
-            if maybegender.find('n')!=-1:
+            if 'n' in maybegender:
                 self.gender='n'
-            if maybegender.find('c')!=-1:
+            if 'c' in maybegender:
                 self.gender='c'
-            if maybegender.find('p')!=-1:
+            if 'p' in maybegender:
                 self.number=2
-            if maybegender.find('dim')!=-1:
+            if 'dim' in maybegender:
                 self.diminutive=True
def __getitem__(self):
@@ -177,8 +177,7 @@
         """ Returns a string with this term as a link in a format ready for Wiktionary
         """
         if wikilang=='en':
-            pos=self.term.lower().find('to ')
-            if pos==0:
+            if self.term.lower().startswith('to '):
                 return 'to [[' + self.term[3:] + ']]'
         return Term.wikiWrapForList(self, wikilang)
Modified: trunk/pywikipedia/wiktionary/wiktionarypage.py
===================================================================
--- trunk/pywikipedia/wiktionary/wiktionarypage.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary/wiktionarypage.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -107,15 +107,15 @@
             line=line.replace('\n','').strip()
             # Let's start by looking for general stuff, that provides information which is
             # interesting to store at the page level
-            if line.lower().find('{wikipedia}')!=-1:
+            if '{wikipedia}' in line.lower():
                 self.addLink('wikipedia')
                 continue
-            if line.lower().find('[[category:')!=-1:
+            if '[[category:' in line.lower():
                 category=line.split(':')[1].replace(']','')
                 self.addCategory(category)
 #                print 'category: ', category
                 continue
-            if line.find('|')==-1:
+            if '|' not in line:
                 bracketspos=line.find('[[')
                 colonpos=line.find(':')
                 if bracketspos!=-1 and colonpos!=-1 and bracketspos < colonpos:
@@ -133,7 +133,7 @@
                 templist.append(line)
                 continue
 #        print 'line0:',line[0], 'line-2:',line[-2],'|','stripped line-2',line.rstrip()[-2]
-            if line.strip()[0]=='='and line.rstrip()[-2]=='=' or not line.find('{{-')==-1 and not line.find('-}}')==-1:
+            if line.strip()[0]=='='and line.rstrip()[-2]=='=' or '{{-' in line and '-}}' in line:
                 # When a new header is encountered, it is necessary to store the information
                 # encountered under the previous header.
                 if templist and aheader:
@@ -162,16 +162,17 @@
                     # Under the translations header there is quite a bit of stuff
                     # that's only needed for formatting, we can just skip that
                     # and go on processing the next line
-                    if line.lower().find('{top}')!=-1: continue
-                    if line.lower().find('{mid}')!=-1: continue
-                    if line.lower().find('{bottom}')!=-1: continue
-                    if line.find('|-')!=-1: continue
-                    if line.find('{|')!=-1: continue
-                    if line.find('|}')!=-1: continue
-                    if line.lower().find('here-->')!=-1: continue
-                    if line.lower().find('width=')!=-1: continue
-                    if line.lower().find('<!--left column')!=-1: continue
-                    if line.lower().find('<!--right column')!=-1: continue
+                    lower = line.lower()
+                    if '{top}' in lower: continue
+                    if '{mid}' in lower: continue
+                    if '{bottom}' in lower: continue
+                    if '|-' in line: continue
+                    if '{|' in line: continue
+                    if '|}' in line: continue
+                    if 'here-->' in lower: continue
+                    if 'width=' in lower: continue
+                    if '<!--left column' in lower: continue
+                    if '<!--right column' in lower: continue
templist.append(line)
@@ -349,9 +350,9 @@
                             score=0
                             for word in concisedefclean.split():
                                 definition=anothermeaning.definition.replace("(",'').replace(")",'').replace("'",'').replace(":",'').replace(".",'').replace("#",'').lower()
-                                if len(word)>1 and definition.find(' '+word+' ')!=-1:
+                                if len(word)>1 and ' '+word+' ' in definition:
                                     score+=1
-                                if len(word)>2 and definition.find(word)!=-1:
+                                if len(word)>2 and word in definition:
                                     score+=1
                             if score>highest:
                                 highest=score
Modified: trunk/pywikipedia/wiktionary.py
===================================================================
--- trunk/pywikipedia/wiktionary.py	2009-04-28 01:29:50 UTC (rev 6743)
+++ trunk/pywikipedia/wiktionary.py	2009-04-28 05:35:32 UTC (rev 6744)
@@ -328,17 +328,18 @@
  #           print line
             # Let's get rid of line breaks and extraneous white space
             line=line.replace('\n','').strip()
+            lower = line.lower()
             # Let's start by looking for general stuff, that provides information which is
             # interesting to store at the page level
-            if line.lower().find('{wikipedia}')!=-1:
+            if '{wikipedia}' in lower:
                 self.addLink('wikipedia')
                 continue
-            if line.lower().find('[[category:')!=-1:
+            if '[[category:' in lower:
                 category=line.split(':')[1].replace(']','')
                 self.addCategory(category)
 #                print 'category: ', category
                 continue
-            if line.find('|')==-1:
+            if '|' not in line:
                 bracketspos=line.find('[[')
                 colonpos=line.find(':')
                 if bracketspos!=-1 and colonpos!=-1 and bracketspos < colonpos:
@@ -356,7 +357,7 @@
                 templist.append(line)
                 continue
 #        print 'line0:',line[0], 'line-2:',line[-2],'|','stripped line-2',line.rstrip()[-2]
-            if line.strip()[0]=='='and line.rstrip()[-2]=='=' or not line.find('{{-')==-1 and not line.find('-}}')==-1:
+            if line.strip()[0]=='='and line.rstrip()[-2]=='=' or '{{-' in line and '-}}' in line:
                 # When a new header is encountered, it is necessary to store the information
                 # encountered under the previous header.
                 if templist:
@@ -385,16 +386,17 @@
                     # Under the translations header there is quite a bit of stuff
                     # that's only needed for formatting, we can just skip that
                     # and go on processing the next line
-                    if line.lower().find('{top}')!=-1: continue
-                    if line.lower().find('{mid}')!=-1: continue
-                    if line.lower().find('{bottom}')!=-1: continue
-                    if line.find('|-')!=-1: continue
-                    if line.find('{|')!=-1: continue
-                    if line.find('|}')!=-1: continue
-                    if line.lower().find('here-->')!=-1: continue
-                    if line.lower().find('width=')!=-1: continue
-                    if line.lower().find('<!--left column')!=-1: continue
-                    if line.lower().find('<!--right column')!=-1: continue
+                    lower = line.lower()
+                    if '{top}' in lower: continue
+                    if '{mid}' in lower: continue
+                    if '{bottom}' in lower: continue
+                    if '|-' in line: continue
+                    if '{|' in line: continue
+                    if '|}' in line: continue
+                    if 'here-->' in lower: continue
+                    if 'width=' in lower: continue
+                    if '<!--left column' in lower: continue
+                    if '<!--right column' in lower: continue
templist.append(line)
@@ -1010,7 +1012,7 @@
         if line.count('=')>1:
             self.level = line.count('=') // 2 # integer floor division without fractional part
             self.header = line.replace('=','')
-        elif not line.find('{{')==-1:
+        elif '{{' in line:
             self.header = line.replace('{{-','').replace('-}}','')
self.header = self.header.replace('{{','').replace('}}','').strip().lower()