[Pywikipedia-svn] SVN: [8632] trunk/pywikipedia

9 Oct 2010

Revision: 8632
Author:   xqt
Date:     2010-10-09 21:59:27 +0000 (Sat, 09 Oct 2010)
Log Message:
-----------
import wikipedia as pywikibot for merging to rewrite
Modified Paths:
--------------
    trunk/pywikipedia/warnfile.py
    trunk/pywikipedia/watchlist.py
    trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/warnfile.py
===================================================================

--- trunk/pywikipedia/warnfile.py	2010-10-09 21:01:00 UTC (rev 8631)
+++ trunk/pywikipedia/warnfile.py	2010-10-09 21:59:27 UTC (rev 8632)
@@ -13,38 +13,44 @@
 """
 #
 # (C) Rob W.W. Hooft, 2003
+# (C) Pywikipedia bot team, 2003-2010
 #
 # Distributed under the terms of the MIT license.
 #
 __version__ = '$Id$'
 #
 import sys, os, re
-import wikipedia, interwiki
+import wikipedia as pywikibot
+import interwiki
+
 class WarnfileReader:
     def __init__(self, filename):
         self.filename = filename
def getHints(self):
         print "Parsing warnfile..."
-        R=re.compile(r'WARNING: (?P<family>.+?): [[(?P<locallang>.+?):(?P<localtitle>.+?)]](?P<warningtype>.+?)[[(?P<targetlang>.+?):(?P<targettitle>.+?)]]')
+        R=re.compile(
+            r'WARNING: (?P<family>.+?): [[(?P<locallang>.+?):(?P<localtitle>.+?)]](?P<warningtype>.+?)[[(?P<targetlang>.+?):(?P<targettitle>.+?)]]')
         import codecs
         f = codecs.open(self.filename, 'r', 'utf-8')
         hints={}
         removeHints={}
-        mysite=wikipedia.getSite()
+        mysite=pywikibot.getSite()
         for line in f.readlines():
             m=R.search(line)
             if m:
                 #print "DBG>",line
-                if m.group('locallang') == mysite.lang and m.group('family') == mysite.family.name:
-                    #wikipedia.output(u' '.join([m.group('locallang'), m.group('localtitle'), m.group('warningtype'), m.group('targetsite'), m.group('targettitle')]))
+                if m.group('locallang') == mysite.lang and \
+                   m.group('family') == mysite.family.name:
+                    #pywikibot.output(u' '.join([m.group('locallang'), m.group('localtitle'), m.group('warningtype'), m.group('targetsite'), m.group('targettitle')]))
                     #print m.group(3)
-                    page = wikipedia.Page(mysite, m.group('localtitle'))
+                    page = pywikibot.Page(mysite, m.group('localtitle'))
                     removing = (m.group('warningtype') == ' links to incorrect ')
                     try:
-                        targetSite = mysite.getSite(code = m.group('targetlang'))
-                        targetPage = wikipedia.Page(targetSite, m.group('targettitle'))
+                        targetSite = mysite.getSite(code=m.group('targetlang'))
+                        targetPage = pywikibot.Page(targetSite,
+                                                    m.group('targettitle'))
                         if removing:
                             if page not in removeHints:
                                 removeHints[page]=[]
@@ -53,7 +59,7 @@
                             if page not in hints:
                                 hints[page]=[]
                             hints[page].append(targetPage)
-                    except wikipedia.Error:
+                    except pywikibot.Error:
                         print "DBG> Failed to add", line
         f.close()
         return hints, removeHints
@@ -72,11 +78,13 @@
             try:
                 for page2 in page.interwiki():
                     old[page2.site()] = page2
-            except wikipedia.IsRedirectPage:
-                wikipedia.output(u"%s is a redirect page; not changing" % page.aslink())
+            except pywikibot.IsRedirectPage:
+                pywikibot.output(u"%s is a redirect page; not changing"
+                                 % page.title(asLink=True))
                 continue
-            except wikipedia.NoPage:
-                wikipedia.output(u"Page %s not found; skipping" % page.aslink())
+            except pywikibot.NoPage:
+                pywikibot.output(u"Page %s not found; skipping"
+                                 % page.title(asLink=True))
                 continue
             new={}
             new.update(old)
@@ -91,38 +99,43 @@
                         del new[site]
                     except KeyError:
                         pass
-            mods, adding, removing, modifying = interwiki.compareLanguages(old, new, insite = page.site())
+            mods, adding, removing, modifying = interwiki.compareLanguages(old,
+                                                                           new,
+                                                                           insite=page.site())
             if mods:
-                wikipedia.output(page.aslink() + mods)
+                pywikibot.output(page.title(asLink=True) + mods)
                 oldtext = page.get()
-                newtext = wikipedia.replaceLanguageLinks(oldtext, new)
+                newtext = pywikibot.replaceLanguageLinks(oldtext, new)
                 if 1:
-                    wikipedia.showDiff(oldtext, newtext)
+                    pywikibot.showDiff(oldtext, newtext)
                     try:
-                        status, reason, data = page.put(newtext, comment='warnfile '+mods)
-                    except wikipedia.LockedPage:
-                        wikipedia.output(u"Page is locked. Skipping.")
+                        status, reason, data = page.put(newtext,
+                                                        comment='warnfile '+mods)
+                    except pywikibot.LockedPage:
+                        pywikibot.output(u"Page is locked. Skipping.")
                         continue
-                    except wikipedia.SpamfilterError, e:
-                        wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
+                    except pywikibot.SpamfilterError, e:
+                        pywikibot.output(
+                            u'Cannot change %s because of blacklist entry %s'
+                            % (page.title(), e.url))
                         continue
-                    except wikipedia.Error:
-                        wikipedia.output(u"Error while saving page.")
+                    except pywikibot.Error:
+                        pywikibot.output(u"Error while saving page.")
                         continue
                     if str(status) != '302':
                         print status, reason
def main():
     filename = None
-    for arg in wikipedia.handleArgs():
+    for arg in pywikibot.handleArgs():
         if os.path.isabs(arg):
             filename = arg
         else:
-            filename = wikipedia.config.datafilepath("logs", arg)
+            filename = pywikibot.config.datafilepath("logs", arg)
if not filename:
-        mysite = wikipedia.getSite()
-        filename = wikipedia.config.datafilepath('logs',
+        mysite = pywikibot.getSite()
+        filename = pywikibot.config.datafilepath('logs',
                        'warning-%s-%s.log' % (mysite.family.name, mysite.lang))
     reader = WarnfileReader(filename)
     bot = WarnfileRobot(reader)
@@ -132,5 +145,5 @@
     try:
         main()
     finally:
-        wikipedia.stopme()
+        pywikibot.stopme()
Modified: trunk/pywikipedia/watchlist.py
===================================================================
--- trunk/pywikipedia/watchlist.py	2010-10-09 21:01:00 UTC (rev 8631)
+++ trunk/pywikipedia/watchlist.py	2010-10-09 21:59:27 UTC (rev 8632)
@@ -12,16 +12,18 @@
 Command line options:
     -all  -  Reloads watchlists for all wikis where a watchlist is already
              present
-    -new  -  Load watchlists for all wikis where accounts is setting in user-config.py
+    -new  -  Load watchlists for all wikis where accounts is setting in
+             user-config.py
 """
# (C) Daniel Herding, 2005
 #
 # Distributed under the terms of the MIT license.
-
+#
 __version__='$Id$'
+#
-import wikipedia
+import wikipedia as pywikibot
 import re, sys, pickle
 import os.path
 import time
@@ -30,19 +32,20 @@
def get(site = None):
     if site is None:
-        site = wikipedia.getSite()
+        site = pywikibot.getSite()
     if site in cache:
         # Use cached copy if it exists.
         watchlist = cache[site]
     else:
-        fn = wikipedia.config.datafilepath('watchlists',
+        fn = pywikibot.config.datafilepath('watchlists',
                   'watchlist-%s-%s.dat' % (site.family.name, site.lang))
         try:
             # find out how old our saved dump is (in seconds)
             file_age = time.time() - os.path.getmtime(fn)
             # if it's older than 1 month, reload it
             if file_age > 30 * 24 * 60 * 60:
-                wikipedia.output(u'Copy of watchlist is one month old, reloading')
+                pywikibot.output(
+                    u'Copy of watchlist is one month old, reloading')
                 refresh(site)
         except OSError:
             # no saved watchlist exists yet, retrieve one
@@ -69,15 +72,15 @@
     params = {
         'action': 'query',
         'list': 'watchlist',
-        'wllimit': wikipedia.config.special_page_limit,
+        'wllimit': pywikibot.config.special_page_limit,
         'wlprop': 'title',
     }
-    wikipedia.output(u'Retrieving watchlist for %s via API.' % repr(site))
-    #wikipedia.put_throttle() # It actually is a get, but a heavy one.
+    pywikibot.output(u'Retrieving watchlist for %s via API.' % repr(site))
+    #pywikibot.put_throttle() # It actually is a get, but a heavy one.
     watchlist = []
     while True:
-        data = wikipedia.query.GetData(params, site, sysop=sysop)
+        data = pywikibot.query.GetData(params, site, sysop=sysop)
         if 'error' in data:
             raise RuntimeError('ERROR: %s' % data)
         watchlist.extend([w['title'] for w in data['query']['watchlist']])
@@ -90,24 +93,29 @@
     # Save the watchlist to disk
     # The file is stored in the watchlists subdir. Create if necessary.
     if sysop:
-        f = open(wikipedia.config.datafilepath('watchlists',
-                 'watchlist-%s-%s-sysop.dat' % (site.family.name, site.lang)), 'w')    
+        f = open(pywikibot.config.datafilepath('watchlists',
+                                               'watchlist-%s-%s-sysop.dat'
+                                               % (site.family.name, site.lang)),
+                 'w')    
     else:
-        f = open(wikipedia.config.datafilepath('watchlists',
-                 'watchlist-%s-%s.dat' % (site.family.name, site.lang)), 'w')
+        f = open(pywikibot.config.datafilepath('watchlists',
+                                               'watchlist-%s-%s.dat'
+                                               % (site.family.name, site.lang)),
+                 'w')
     pickle.dump(watchlist, f)
     f.close()
def _refreshOld(site, sysop=False):
     # get watchlist special page's URL
     path = site.watchlist_address()
-    wikipedia.output(u'Retrieving watchlist for %s' % repr(site))
-    #wikipedia.put_throttle() # It actually is a get, but a heavy one.
+    pywikibot.output(u'Retrieving watchlist for %s' % repr(site))
+    #pywikibot.put_throttle() # It actually is a get, but a heavy one.
     watchlistHTML = site.getUrl(path, sysop=sysop)
-    wikipedia.output(u'Parsing watchlist')
+    pywikibot.output(u'Parsing watchlist')
     watchlist = []
-    for itemR in [re.compile(r'<li><input type="checkbox" name="id\[\]" value="(.+?)" />'), re.compile(r'<li><input name="titles\[\]" type="checkbox" value="(.+?)" />')]:
+    for itemR in [re.compile(r'<li><input type="checkbox" name="id\[\]" value="(.+?)" />'),
+                  re.compile(r'<li><input name="titles\[\]" type="checkbox" value="(.+?)" />')]:
         for m in itemR.finditer(watchlistHTML):
             pageName = m.group(1)
             watchlist.append(pageName)
@@ -115,28 +123,34 @@
     # Save the watchlist to disk
     # The file is stored in the watchlists subdir. Create if necessary.
     if sysop:
-        f = open(wikipedia.config.datafilepath('watchlists',
-                 'watchlist-%s-%s-sysop.dat' % (site.family.name, site.lang)), 'w')    
+        f = open(pywikibot.config.datafilepath('watchlists',
+                                               'watchlist-%s-%s-sysop.dat'
+                                               % (site.family.name, site.lang)),
+                 'w')    
     else:
-        f = open(wikipedia.config.datafilepath('watchlists',
-                 'watchlist-%s-%s.dat' % (site.family.name, site.lang)), 'w')
+        f = open(pywikibot.config.datafilepath('watchlists',
+                                               'watchlist-%s-%s.dat'
+                                               % (site.family.name, site.lang)),
+                 'w')
     pickle.dump(watchlist, f)
     f.close()
def refresh_all(new = False, sysop=False):
     if new:
         import config
-        wikipedia.output('Downloading All watchlists for your accounts in user-config.py');
+        pywikibot.output(
+            'Downloading All watchlists for your accounts in user-config.py')
         for family in config.usernames:
             for lang in config.usernames[ family ]:
-                refresh(wikipedia.getSite( code = lang, fam = family ), sysop=sysop )
+                refresh(pywikibot.getSite(code=lang, fam=family), sysop=sysop)
         for family in config.sysopnames:
-            for lang in config.sysopnames[ family ]:
-                refresh(wikipedia.getSite( code = lang, fam = family ), sysop=sysop )
+            for lang in config.sysopnames[family]:
+                refresh(pywikibot.getSite(code=lang, fam=family), sysop=sysop)
else:
         import dircache, time
-        filenames = dircache.listdir(wikipedia.config.datafilepath('watchlists'))
+        filenames = dircache.listdir(
+            pywikibot.config.datafilepath('watchlists'))
         watchlist_filenameR = re.compile('watchlist-([a-z-:]+).dat')
         for filename in filenames:
             match = watchlist_filenameR.match(filename)
@@ -144,13 +158,13 @@
                 arr = match.group(1).split('-')
                 family = arr[0]
                 lang = '-'.join(arr[1:])
-                refresh(wikipedia.getSite(code = lang, fam = family))
+                refresh(pywikibot.getSite(code = lang, fam = family))
def main():
     all = False
     new = False
     sysop = False
-    for arg in wikipedia.handleArgs():
+    for arg in pywikibot.handleArgs():
         if arg == '-all' or arg == '-update':
             all = True
         elif arg == '-new':
@@ -162,16 +176,16 @@
     elif new:
         refresh_all(new, sysop=sysop)
     else:
-        refresh(wikipedia.getSite(), sysop=sysop)
+        refresh(pywikibot.getSite(), sysop=sysop)
-        watchlist = get(wikipedia.getSite())
-        wikipedia.output(u'%i pages in the watchlist.' % len(watchlist))
+        watchlist = get(pywikibot.getSite())
+        pywikibot.output(u'%i pages in the watchlist.' % len(watchlist))
         for pageName in watchlist:
-            wikipedia.output( pageName, toStdout = True )
+            pywikibot.output( pageName, toStdout = True )
if __name__ == "__main__":
     try:
         main()
     finally:
-        wikipedia.stopme()
+        pywikibot.stopme()
Modified: trunk/pywikipedia/weblinkchecker.py
===================================================================
--- trunk/pywikipedia/weblinkchecker.py	2010-10-09 21:01:00 UTC (rev 8631)
+++ trunk/pywikipedia/weblinkchecker.py	2010-10-09 21:59:27 UTC (rev 8632)
@@ -88,16 +88,18 @@
#
 # (C) Daniel Herding, 2005
+# (C) Pywikipedia bot team, 2005-2010
 #
 # Distributed under the terms of the MIT license.
 #
 __version__='$Id$'
-import wikipedia, config, pagegenerators
 import sys, re
 import codecs, pickle
 import httplib, socket, urlparse, urllib, urllib2
 import threading, time
+import wikipedia as pywikibot
+import config, pagegenerators
 try:
     set # introduced in Python 2.4: faster and future
 except NameError:
@@ -208,7 +210,7 @@
 ]
def weblinksIn(text, withoutBracketed = False, onlyBracketed = False):
-    text = wikipedia.removeDisabledParts(text)
+    text = pywikibot.removeDisabledParts(text)
# MediaWiki parses templates before parsing external links. Thus, there
     # might be a | or a } directly after a URL which does not belong to
@@ -220,15 +222,16 @@
         text = nestedTemplateR.sub(r'{{\1 \2 \3}}', text)
# Then blow up the templates with spaces so that the | and }} will not be regarded as part of the link:.
-    templateWithParamsR = re.compile(r'{{([^}]*?[^ ])|([^ ][^}]*?)}}', re.DOTALL)
+    templateWithParamsR = re.compile(r'{{([^}]*?[^ ])|([^ ][^}]*?)}}',
+                                     re.DOTALL)
     while templateWithParamsR.search(text):
         text = templateWithParamsR.sub(r'{{ \1 | \2 }}', text)
-    linkR = wikipedia.compileLinkR(withoutBracketed, onlyBracketed)
+    linkR = pywikibot.compileLinkR(withoutBracketed, onlyBracketed)
# Remove HTML comments in URLs as well as URLs in HTML comments.
     # Also remove text inside nowiki links etc.
-    text = wikipedia.removeDisabledParts(text)
+    text = pywikibot.removeDisabledParts(text)
     for m in linkR.finditer(text):
         yield m.group('url')
@@ -237,7 +240,7 @@
         self.url = url
def getArchiveURL(self):
-        wikipedia.output(u'Consulting the Internet Archive for %s' % self.url)
+        pywikibot.output(u'Consulting the Internet Archive for %s' % self.url)
         archiveURL = 'http://web.archive.org/web/*/%s' % self.url
         try:
             f = urllib2.urlopen(archiveURL)
@@ -270,7 +273,7 @@
     Warning: Also returns false if your Internet connection isn't working
     correctly! (This will give a Socket Error)
     '''
-    def __init__(self, url, redirectChain = [], serverEncoding = None, HTTPignore = []):
+    def __init__(self, url, redirectChain = [], serverEncoding=None, HTTPignore=[]):
         """
         redirectChain is a list of redirects which were resolved by
         resolveRedirect(). This is needed to detect redirect loops.
@@ -278,7 +281,7 @@
         self.url = url
         self.serverEncoding = serverEncoding
         self.header = {
-            # 'User-agent': wikipedia.useragent,
+            # 'User-agent': pywikibot.useragent,
             # we fake being Firefox because some webservers block unknown
             # clients, e.g. http://images.google.de/images?q=Albit gives a 403
             # when using the PyWikipediaBot user agent.
@@ -302,7 +305,9 @@
     def getEncodingUsedByServer(self):
         if not self.serverEncoding:
             try:
-                wikipedia.output(u'Contacting server %s to find out its default encoding...' % self.host)
+                pywikibot.output(
+                    u'Contacting server %s to find out its default encoding...'
+                    % self.host)
                 conn = self.getConnection()
                 conn.request('HEAD', '/', None, self.header)
                 response = conn.getresponse()
@@ -313,7 +318,8 @@
             if not self.serverEncoding:
                 # TODO: We might also load a page, then check for an encoding
                 # definition in a HTML meta tag.
-                wikipedia.output(u'Error retrieving server's default charset. Using ISO 8859-1.')
+                pywikibot.output(
+                    u'Error retrieving server's default charset. Using ISO 8859-1.')
                 # most browsers use ISO 8859-1 (Latin-1) as the default.
                 self.serverEncoding = 'iso8859-1'
         return self.serverEncoding
@@ -358,9 +364,11 @@
         conn = self.getConnection()
         try:
             if useHEAD:
-                conn.request('HEAD', '%s%s' % (self.path, self.query), None, self.header)
+                conn.request('HEAD', '%s%s' % (self.path, self.query), None,
+                             self.header)
             else:
-                conn.request('GET', '%s%s' % (self.path, self.query), None, self.header)
+                conn.request('GET', '%s%s' % (self.path, self.query), None,
+                             self.header)
             response = conn.getresponse()
             # read the server's encoding, in case we need it later
             self.readEncodingFromResponse(response)
@@ -380,12 +388,15 @@
                 try:
                     redirTarget.encode('ascii')
                 except UnicodeError:
-                    redirTarget = redirTarget.decode(self.getEncodingUsedByServer())
-                if redirTarget.startswith('http://') or redirTarget.startswith('https://'):
+                    redirTarget = redirTarget.decode(
+                        self.getEncodingUsedByServer())
+                if redirTarget.startswith('http://') or \
+                   redirTarget.startswith('https://'):
                     self.changeUrl(redirTarget)
                     return True
                 elif redirTarget.startswith('/'):
-                    self.changeUrl(u'%s://%s%s' % (self.protocol, self.host, redirTarget))
+                    self.changeUrl(u'%s://%s%s'
+                                   % (self.protocol, self.host, redirTarget))
                     return True
                 else: # redirect to relative position
                     # cut off filename
@@ -399,7 +410,9 @@
                             # change /foo/bar/ to /foo/
                             directory = directory[:-1]
                             directory = directory[:directory.rindex('/') + 1]
-                    self.changeUrl('%s://%s%s%s' % (self.protocol, self.host, directory, redirTarget))
+                    self.changeUrl('%s://%s%s%s'
+                                   % (self.protocol, self.host, directory,
+                                      redirTarget))
                     return True
         else:
             return False # not a redirect
@@ -412,7 +425,8 @@
         try:
             wasRedirected = self.resolveRedirect(useHEAD = useHEAD)
         except UnicodeError, error:
-            return False, u'Encoding Error: %s (%s)' % (error.__class__.__name__, unicode(error))
+            return False, u'Encoding Error: %s (%s)' \
+                   % (error.__class__.__name__, unicode(error))
         except httplib.error, error:
             return False, u'HTTP Error: %s' % error.__class__.__name__
         except socket.error, error:
@@ -441,7 +455,9 @@
                     # which leads to a cyclic list of redirects.
                     # We simply start from the beginning, but this time,
                     # we don't use HEAD, but GET requests.
-                    redirChecker = LinkChecker(self.redirectChain[0], serverEncoding = self.serverEncoding, HTTPignore = self.HTTPignore)
+                    redirChecker = LinkChecker(self.redirectChain[0],
+                                               serverEncoding=self.serverEncoding,
+                                               HTTPignore=self.HTTPignore)
                     return redirChecker.check(useHEAD = False)
                 else:
                     urlList = ['[%s]' % url for url in self.redirectChain + [self.url]]
@@ -452,13 +468,17 @@
                     # which leads to a long (or infinite) list of redirects.
                     # We simply start from the beginning, but this time,
                     # we don't use HEAD, but GET requests.
-                    redirChecker = LinkChecker(self.redirectChain[0], serverEncoding = self.serverEncoding, HTTPignore = self.HTTPignore)
+                    redirChecker = LinkChecker(self.redirectChain[0],
+                                               serverEncoding=self.serverEncoding,
+                                               HTTPignore = self.HTTPignore)
                     return redirChecker.check(useHEAD = False)
                 else:
                     urlList = ['[%s]' % url for url in self.redirectChain + [self.url]]
                     return False, u'Long Chain of Redirects: %s' % ' -> '.join(urlList)
             else:
-                redirChecker = LinkChecker(self.url, self.redirectChain, self.serverEncoding, HTTPignore = self.HTTPignore)
+                redirChecker = LinkChecker(self.url, self.redirectChain,
+                                           self.serverEncoding,
+                                           HTTPignore=self.HTTPignore)
                 return redirChecker.check(useHEAD = useHEAD)
         else:
             try:
@@ -466,7 +486,8 @@
             except httplib.error, error:
                 return False, u'HTTP Error: %s' % error.__class__.__name__
             try:
-                conn.request('GET', '%s%s' % (self.path, self.query), None, self.header)
+                conn.request('GET', '%s%s'
+                             % (self.path, self.query), None, self.header)
             except socket.error, error:
                 return False, u'Socket Error: %s' % repr(error[1])
             try:
@@ -500,23 +521,25 @@
         try:
             ok, message = linkChecker.check()
         except:
-            wikipedia.output('Exception while processing URL %s in page %s' % (self.url, self.page.title()))
+            pywikibot.output('Exception while processing URL %s in page %s'
+                             % (self.url, self.page.title()))
             raise
         if ok:
             if self.history.setLinkAlive(self.url):
-                wikipedia.output('*Link to %s in [[%s]] is back alive.' % (self.url, self.page.title()))
+                pywikibot.output('*Link to %s in [[%s]] is back alive.'
+                                 % (self.url, self.page.title()))
         else:
-            wikipedia.output('*[[%s]] links to %s - %s.' % (self.page.title(), self.url, message))
+            pywikibot.output('*[[%s]] links to %s - %s.'
+                             % (self.page.title(), self.url, message))
             self.history.setLinkDead(self.url, message, self.page, day)
+
 class History:
-    '''
-    Stores previously found dead links.
-    The URLs are dictionary keys, and values are lists of tuples where each tuple
-    represents one time the URL was found dead. Tuples have the form
-    (title, date, error) where title is the wiki page where the URL was found,
-    date is an instance of time, and error is a string with error code and
-    message.
+    ''' Stores previously found dead links. The URLs are dictionary keys, and
+    values are lists of tuples where each tuple represents one time the URL was
+    found dead. Tuples have the form (title, date, error) where title is the
+    wiki page where the URL was found, date is an instance of time, and error is
+    a string with error code and message.
We assume that the first element in the list represents the first time we
     found this dead link, and the last element represents the last time.
@@ -528,13 +551,14 @@
             ('WikiPageTitle', DATE, '404: File not found'),
             ('WikiPageName2', DATE, '404: File not found'),
         ]
+
     '''
def __init__(self, reportThread):
         self.reportThread = reportThread
-        site = wikipedia.getSite()
+        site = pywikibot.getSite()
         self.semaphore = threading.Semaphore()
-        self.datfilename = wikipedia.config.datafilepath('deadlinks',
+        self.datfilename = pywikibot.config.datafilepath('deadlinks',
                                'deadlinks-%s-%s.dat'
                                % (site.family.name, site.lang))
         # Count the number of logged links, so that we can insert captions
@@ -552,7 +576,7 @@
         """
         Logs an error report to a text file in the deadlinks subdirectory.
         """
-        site = wikipedia.getSite()
+        site = pywikibot.getSite()
         if archiveURL:
             errorReport = u'* %s ([%s archive])\n' % (url, archiveURL)
         else:
@@ -560,10 +584,13 @@
         for (pageTitle, date, error) in self.historyDict[url]:
             # ISO 8601 formulation
             isoDate = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(date))
-            errorReport += "** In [[%s]] on %s, %s\n" % (pageTitle, isoDate, error)
-        wikipedia.output(u"** Logging link for deletion.")
-        txtfilename = wikipedia.config.datafilepath('deadlinks',
-                          'results-%s-%s.txt' % (site.family.name, site.lang))
+            errorReport += "** In [[%s]] on %s, %s\n" % (pageTitle, isoDate,
+                                                         error)
+        pywikibot.output(u"** Logging link for deletion.")
+        txtfilename = pywikibot.config.datafilepath('deadlinks',
+                                                    'results-%s-%s.txt'
+                                                    % (site.family.name,
+                                                       site.lang))
         txtfile = codecs.open(txtfilename, 'a', 'utf-8')
         self.logCount += 1
         if self.logCount % 30 == 0:
@@ -573,7 +600,8 @@
         txtfile.close()
if self.reportThread and not containingPage.isTalkPage():
-            self.reportThread.report(url, errorReport, containingPage, archiveURL)
+            self.reportThread.report(url, errorReport, containingPage,
+                                     archiveURL)
def setLinkDead(self, url, error, page, day):
@@ -589,9 +617,9 @@
             # ago, we won't save it in the history this time.
             if timeSinceLastFound > 60 * 60:
                 self.historyDict[url].append((page.title(), now, error))
-            # if the first time we found this link longer than x day ago (default is a week),
-            # it should probably be fixed or removed. We'll list it in a file
-            # so that it can be removed manually.
+            # if the first time we found this link longer than x day ago
+            # (default is a week), it should probably be fixed or removed.
+            # We'll list it in a file so that it can be removed manually.
             if timeSinceFirstFound > 60 * 60 * 24 * day:
                 # search for archived page
                 iac = InternetArchiveConsulter(url)
@@ -640,9 +668,10 @@
         self.killed = False
def report(self, url, errorReport, containingPage, archiveURL):
+        """ Tries to add an error report to the talk page belonging to the page
+        containing the dead link.
+
         """
-        Tries to add an error report to the talk page belonging to the page containing the dead link.
-        """
         self.semaphore.acquire()
         self.queue.append((url, errorReport, containingPage, archiveURL))
         self.semaphore.release()
@@ -666,37 +695,53 @@
                 (url, errorReport, containingPage, archiveURL) = self.queue[0]
                 self.queue = self.queue[1:]
                 talkPage = containingPage.toggleTalkPage()
-                wikipedia.output(u'\03{lightaqua}** Reporting dead link on %s...\03{default}' % talkPage.aslink())
+                pywikibot.output(
+                    u'\03{lightaqua}** Reporting dead link on %s...\03{default}'
+                    % talkPage.title(asLink=True))
                 try:
                     content = talkPage.get() + "\n\n"
                     if url in content:
-                        wikipedia.output(u'\03{lightaqua}** Dead link seems to have already been reported on %s\03{default}' % talkPage.aslink())
+                        pywikibot.output(
+                            u'\03{lightaqua}** Dead link seems to have already been reported on %s\03{default}'
+                            % talkPage.title(asLink=True))
                         self.semaphore.release()
                         continue
-                except (wikipedia.NoPage, wikipedia.IsRedirectPage):
+                except (pywikibot.NoPage, pywikibot.IsRedirectPage):
                     content = u''
if archiveURL:
-                    archiveMsg = wikipedia.translate(wikipedia.getSite(), talk_report_archive) % archiveURL
+                    archiveMsg = pywikibot.translate(pywikibot.getSite(),
+                                                     talk_report_archive) % archiveURL
                 else:
                     archiveMsg = u''
-                # The caption will default to "Dead link". But if there is already such a caption, we'll
-                # use "Dead link 2", "Dead link 3", etc.
-                caption = wikipedia.translate(wikipedia.getSite(), talk_report_caption)
+                # The caption will default to "Dead link". But if there is
+                # already such a caption, we'll use "Dead link 2",
+                # "Dead link 3", etc.
+                caption = pywikibot.translate(pywikibot.getSite(),
+                                              talk_report_caption)
                 i = 1
                 # Check if there is already such a caption on the talk page.
                 while re.search('= *' + caption + ' *=', content) is not None:
                     i += 1
-                    caption = wikipedia.translate(wikipedia.getSite(), talk_report_caption) + " " + str(i)
-                content += wikipedia.translate(wikipedia.getSite(), talk_report) % (caption, errorReport, archiveMsg)
-                comment = u'[[%s#%s|→]]%s' % (talkPage.title(), caption, wikipedia.translate(wikipedia.getSite(), talk_report_msg))
+                    caption = pywikibot.translate(pywikibot.getSite(),
+                                                  talk_report_caption) + " " + str(i)
+                content += pywikibot.translate(pywikibot.getSite(),
+                                               talk_report) % (caption,
+                                                               errorReport,
+                                                               archiveMsg)
+                comment = u'[[%s#%s|→]]%s' % (talkPage.title(), caption,
+                                              pywikibot.translate(pywikibot.getSite(),
+                                                                  talk_report_msg))
                 try:
                     talkPage.put(content, comment)
-                except wikipedia.SpamfilterError, error:
-                    wikipedia.output(u'\03{lightaqua}** SpamfilterError while trying to change %s: %s\03{default}' % (talkPage.aslink(), error.url))
+                except pywikibot.SpamfilterError, error:
+                    pywikibot.output(
+                        u'\03{lightaqua}** SpamfilterError while trying to change %s: %s\03{default}'
+                        % (talkPage.title(asLink=True), error.url))
self.semaphore.release()
+
 class WeblinkCheckerRobot:
     '''
     Robot which will use several LinkCheckThreads at once to search for dead
@@ -705,7 +750,7 @@
     def __init__(self, generator, HTTPignore = []):
         self.generator = generator
         if config.report_dead_links_on_talk:
-            #wikipedia.output("Starting talk page thread")
+            #pywikibot.output("Starting talk page thread")
             reportThread = DeadLinkReportThread()
             # thread dies when program terminates
             # reportThread.setDaemon(True)
@@ -722,8 +767,8 @@
     def checkLinksIn(self, page):
         try:
             text = page.get()
-        except wikipedia.NoPage:
-            wikipedia.output(u'%s does not exist.' % page.title())
+        except pywikibot.NoPage:
+            pywikibot.output(u'%s does not exist.' % page.title())
             return
         for url in weblinksIn(text):
             ignoreUrl = False
@@ -736,11 +781,13 @@
                 while threading.activeCount() >= config.max_external_links:
                     # wait 100 ms
                     time.sleep(0.1)
-                thread = LinkCheckThread(page, url, self.history, self.HTTPignore)
+                thread = LinkCheckThread(page, url, self.history,
+                                         self.HTTPignore)
                 # thread dies when program terminates
                 thread.setDaemon(True)
                 thread.start()
+
 def RepeatPageGenerator():
     history = History(None)
     pageTitles = set()
@@ -751,7 +798,7 @@
     pageTitles = list(pageTitles)
     pageTitles.sort()
     for pageTitle in pageTitles:
-        page = wikipedia.Page(wikipedia.getSite(), pageTitle)
+        page = pywikibot.Page(pywikibot.getSite(), pageTitle)
         yield page
def countLinkCheckThreads():
@@ -774,7 +821,7 @@
     genFactory = pagegenerators.GeneratorFactory()
     global day
     day = 7
-    for arg in wikipedia.handleArgs():
+    for arg in pywikibot.handleArgs():
         if arg == '-talk':
             config.report_dead_links_on_talk = True
         elif arg == '-notalk':
@@ -796,7 +843,7 @@
if singlePageTitle:
         singlePageTitle = ' '.join(singlePageTitle)
-        page = wikipedia.Page(wikipedia.getSite(), singlePageTitle)
+        page = pywikibot.Page(pywikibot.getSite(), singlePageTitle)
         gen = iter([page])
if not gen:
@@ -817,33 +864,35 @@
             # Don't wait longer than 30 seconds for threads to finish.
             while countLinkCheckThreads() > 0 and waitTime < 30:
                 try:
-                    wikipedia.output(u"Waiting for remaining %i threads to finish, please wait..." % countLinkCheckThreads())
+                    pywikibot.output(
+                        u"Waiting for remaining %i threads to finish, please wait..." % countLinkCheckThreads())
                     # wait 1 second
                     time.sleep(1)
                     waitTime += 1
                 except KeyboardInterrupt:
-                    wikipedia.output(u'Interrupted.')
+                    pywikibot.output(u'Interrupted.')
                     break
             if countLinkCheckThreads() > 0:
-                wikipedia.output(u'Remaining %i threads will be killed.' % countLinkCheckThreads())
+                pywikibot.output(u'Remaining %i threads will be killed.'
+                                 % countLinkCheckThreads())
                 # Threads will die automatically because they are daemonic.
             if bot.history.reportThread:
                 bot.history.reportThread.shutdown()
-                # wait until the report thread is shut down; the user can interrupt
-                # it by pressing CTRL-C.
+                # wait until the report thread is shut down; the user can
+                # interrupt it by pressing CTRL-C.
                 try:
                     while bot.history.reportThread.isAlive():
                         time.sleep(0.1)
                 except KeyboardInterrupt:
-                    wikipedia.output(u'Report thread interrupted.')
+                    pywikibot.output(u'Report thread interrupted.')
                     bot.history.reportThread.kill()
-            wikipedia.output(u'Saving history...')
+            pywikibot.output(u'Saving history...')
             bot.history.save()
     else:
-        wikipedia.showHelp()
+        pywikibot.showHelp()
if __name__ == "__main__":
     try:
         main()
     finally:
-        wikipedia.stopme()
+        pywikibot.stopme()

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

[Pywikipedia-svn] SVN: [8632] trunk/pywikipedia