Revision: 5643 Author: russblau Date: 2008-06-26 21:39:01 +0000 (Thu, 26 Jun 2008)
Log Message: ----------- Using keyword substitution in string formatting; will make life easier later when messages are translated, since word order may not be the same in all languages.
Modified Paths: -------------- branches/rewrite/pywikibot/__init__.py branches/rewrite/pywikibot/comms/http.py branches/rewrite/pywikibot/comms/threadedhttp.py branches/rewrite/pywikibot/config.py branches/rewrite/pywikibot/family.py branches/rewrite/pywikibot/login.py branches/rewrite/pywikibot/throttle.py
Modified: branches/rewrite/pywikibot/__init__.py =================================================================== --- branches/rewrite/pywikibot/__init__.py 2008-06-26 21:28:02 UTC (rev 5642) +++ branches/rewrite/pywikibot/__init__.py 2008-06-26 21:39:01 UTC (rev 5643) @@ -48,12 +48,12 @@ try: exec "from site import %s as __Site" % interface except ImportError: - raise ValueError("Invalid interface name '%s'" % interface) + raise ValueError("Invalid interface name '%(interface)s'" % locals()) key = '%s:%s:%s' % (fam, code, user) if not _sites.has_key(key): _sites[key] = __Site(code=code, fam=fam, user=user) - logger.debug("Instantiating Site object '%s'" - % _sites[key]) + logger.debug("Instantiating Site object '%(site)s'" + % {'site': _sites[key]}) return _sites[key]
getSite = Site # alias for backwards-compability @@ -68,7 +68,7 @@
def input(prompt, password=False): if isinstance(prompt, unicode): - prompt = prompt.encode(sys.stdout.encoding, "replace") + prompt = prompt.encode(sys.stdout.encoding, "xmlcharrefreplace") if password: import getpass return getpass.getpass(prompt) @@ -104,6 +104,7 @@ # Throttle and thread handling
threadpool = [] # add page-putting threads to this list as they are created +stopped = False
def stopme(): """Drop this process from the throttle log, after pending threads finish. @@ -112,13 +113,16 @@ at Python exit.
""" + global stopped + if stopped: + return logger = logging.getLogger("wiki")
logger.debug("stopme() called") - threadcount = sum(1 for thd in threadpool if thd.isAlive()) - if threadcount: - logger.info("Waiting for approximately %s threads to finish." - % threadcount) + count = sum(1 for thd in threadpool if thd.isAlive()) + if count: + logger.info("Waiting for about %(count)s pages to be saved." + % locals()) for thd in threadpool: if thd.isAlive(): thd.join() @@ -126,6 +130,7 @@ try: _sites[_sites.keys()[0]].throttle.drop() logger.info("Dropped throttle(s).") + stopped = True except IndexError: pass
Modified: branches/rewrite/pywikibot/comms/http.py =================================================================== --- branches/rewrite/pywikibot/comms/http.py 2008-06-26 21:28:02 UTC (rev 5642) +++ branches/rewrite/pywikibot/comms/http.py 2008-06-26 21:39:01 UTC (rev 5643) @@ -57,7 +57,7 @@
# Build up HttpProcessors -logger.info('Starting %i threads...' % numthreads) +logger.info('Starting %(numthreads)i threads...' % locals()) for i in range(numthreads): proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool) proc.setDaemon(True) @@ -94,13 +94,14 @@ http_queue.put(request) request.lock.acquire()
- #do some error correcting stuff + #TODO: do some error correcting stuff
#if all else fails if isinstance(request.data, Exception): raise request.data
if request.data[0].status != 200: - logger.warning("Http response status %s" % request.data[0].status) + logger.warning("Http response status %(status)s" + % {'status': request.data[0].status})
return request.data[1]
Modified: branches/rewrite/pywikibot/comms/threadedhttp.py =================================================================== --- branches/rewrite/pywikibot/comms/threadedhttp.py 2008-06-26 21:28:02 UTC (rev 5642) +++ branches/rewrite/pywikibot/comms/threadedhttp.py 2008-06-26 21:39:01 UTC (rev 5643) @@ -38,7 +38,7 @@ import pkg_resources except ImportError: logger.critical( - "Error : You need the python module setuptools to use this module") + "Error: You need the python module setuptools to use this module") sys.exit(1) pkg_resources.require("httplib2") import httplib2 @@ -67,6 +67,8 @@ for key in self.connections: for connection in self.connections[key]: connection.close() + except AttributeError: + pass # this shows up when logger has been destroyed first finally: self.lock.release()
Modified: branches/rewrite/pywikibot/config.py =================================================================== --- branches/rewrite/pywikibot/config.py 2008-06-26 21:28:02 UTC (rev 5642) +++ branches/rewrite/pywikibot/config.py 2008-06-26 21:39:01 UTC (rev 5643) @@ -8,6 +8,7 @@
import os, re import sys as __sys + # IMPORTANT: # Do not change any of the variables in this file. Instead, make # a file user-config.py, and overwrite values in there. @@ -108,10 +109,11 @@ base_dir = os.path.normpath(os.path.join(os.getcwd(), base_dir)) # make sure this path is valid and that it contains user-config file if not os.path.isdir(base_dir): - raise RuntimeError("Directory '%s' does not exist." % base_dir) + raise RuntimeError("Directory '%(base_dir)s' does not exist." + % locals()) if not os.path.exists(os.path.join(base_dir, "user-config.py")): - raise RuntimeError("No user-config.py found in directory '%s'." - % base_dir) + raise RuntimeError("No user-config.py found in directory '%(base_dir)s'." + % locals()) return base_dir
_base_dir = _get_base_dir() @@ -183,8 +185,7 @@ _key2 = _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '%s\shell\open\command' % _progID) _cmd = _winreg.QueryValueEx(_key2, None)[0] editor = _cmd.replace('%1', '') - # Notepad is even worse than our Tkinter editor. Nobody has - # deserved to use it. + # Notepad is even worse than our Tkinter editor. if editor.lower().endswith('notepad.exe'): editor = None except: @@ -319,7 +320,7 @@ # That can do very ugly results. deIndentTables = True # table2wiki.py works quite stable, so you might switch to True -table2wikiAskOnlyWarnings = True +table2wikiAskOnlyWarnngs = True table2wikiSkipWarnings = False
############## WEBLINK CHECKER SETTINGS ############## @@ -462,9 +463,11 @@ if __sys.platform=='win32' or _filemode&002==0: execfile(_filename) else: - print "WARNING: Skipped '%s': writeable by others."%_filename + print "WARNING: Skipped '%(fn)s': writeable by others."\ + % {'fn' :_filename} else: - print "WARNING: Skipped '%s': owned by someone else."%_filename + print "WARNING: Skipped '%(fn)s': owned by someone else."\ + % {'fn' :_filename}
# Test for obsoleted and/or unknown variables. for _key in globals().keys(): @@ -484,12 +487,14 @@ elif ot==type(1) and nt==type(True): pass else: - print "WARNING: Type of '%s' changed"%_key - print " Was: ",ot - print " Now: ",nt + print "WARNING: Type of '%(_key)s' changed" % locals() + print " %(was)s: %(old)s" % {'was': "Was", 'old': ot} + print " %(was)s: %(new)s" % {'now': "Now", 'new': nt} del nt,ot else: - print "WARNING: Configuration variable %r is defined but unknown. Misspelled?" %_key + logger.warn( + "Configuration variable %(_key)r is defined but unknown. Misspelled?" + % locals())
# Fix up default console_encoding if console_encoding == None: @@ -549,7 +554,7 @@ if _arg=="modified": _all=0 else: - print "Unknown arg %s ignored"%_arg + print "Unknown arg %(_arg)s ignored" % locals() _k=globals().keys() _k.sort() for _name in _k:
Modified: branches/rewrite/pywikibot/family.py =================================================================== --- branches/rewrite/pywikibot/family.py 2008-06-26 21:28:02 UTC (rev 5642) +++ branches/rewrite/pywikibot/family.py 2008-06-26 21:39:01 UTC (rev 5643) @@ -2505,11 +2505,17 @@ elif fallback: return self.linktrails[fallback] else: - raise KeyError('ERROR: linktrail in language %s unknown' % code) + raise KeyError( + "ERROR: linktrail in language %(language_code)s unknown" + % {'language_code': code})
- def namespace(self, code, ns_number, fallback = '_default', all = False): + def namespace(self, code, ns_number, fallback='_default', all=False): if not self.isDefinedNS(ns_number): - raise KeyError('ERROR: Unknown namespace %d for %s:%s' % (ns_number, code, self.name)) + raise KeyError( +'ERROR: Unknown namespace %(ns_number)d for %(language_code)s:%(ns_name)s' + % {'ns_number': ns_number, + 'language_code': code, + 'ns_name': self.name}) elif self.isNsI18N(ns_number, code): v = self.namespaces[ns_number][code] if type(v) is not list: @@ -2525,35 +2531,35 @@ if type(v) is not list: v = [v,] else: - raise KeyError('ERROR: title for namespace %d in language %s unknown' % (ns_number, code)) - + raise KeyError( +'ERROR: title for namespace %(ns_number)d in language %(language_code)s unknown' + % {'ns_number': ns_number, + 'language_code': code}) if all: - namespaces = [] - - # Unique list - for ns in v: - if ns not in namespaces: - namespaces.append(ns) - + namespaces = list(set(v)) # Lowercase versions of namespaces if code not in self.nocapitalize: - namespaces.extend([ns[0].lower() + ns[1:] for ns in namespaces if ns and ns[0].lower() != ns[0].upper()]) - + namespaces.extend([ns[0].lower() + ns[1:] + for ns in namespaces + if ns and ns[0].lower() != ns[0].upper()]) # Underscore versions of namespaces - namespaces.extend([ns.replace(' ', '_') for ns in namespaces if ns and ' ' in ns]) - + namespaces.extend([ns.replace(' ', '_') + for ns in namespaces if ns and ' ' in ns]) return tuple(namespaces) else: return v[0]
def isDefinedNS(self, ns_number): - """Return True if the namespace has been defined in this family. - """ + """Return True if the namespace has been defined in this family.""" + return self.namespaces.has_key(ns_number)
def isNsI18N(self, ns_number, code): """Return True if the namespace has been internationalized. - (it has a custom entry for a given language)""" + + (it has a custom entry for a given language) + + """ return self.namespaces[ns_number].has_key(code)
def isDefinedNSLanguage(self, ns_number, code, fallback='_default'): @@ -2623,8 +2629,8 @@ return self.disambiguationTemplates[fallback] else: raise KeyError( - 'ERROR: title for disambig template in language %s unknown' - % code) +"ERROR: title for disambig template in language %(language_code)s unknown" + % {'language_code': code})
# Returns the title of the special namespace in language 'code', taken from # dictionary above.
Modified: branches/rewrite/pywikibot/login.py =================================================================== --- branches/rewrite/pywikibot/login.py 2008-06-26 21:28:02 UTC (rev 5642) +++ branches/rewrite/pywikibot/login.py 2008-06-26 21:39:01 UTC (rev 5643) @@ -81,32 +81,36 @@ [self.site.family.name][self.site.code] except: raise NoUsername( -u'ERROR: Sysop username for %s:%s is undefined.\nIf you have a sysop account for that site, please add such a line to user-config.py:\n\nsysopnames['%s']['%s'] = 'myUsername'' - % (self.site.family.name, self.site.code, - self.site.family.name, self.site.code)) +u"""ERROR: Sysop username for %(fam_name)s:%(wiki_code)s is undefined. +If you have a sysop account for that site, please add a line to user-config.py: + +sysopnames['%(fam_name)s']['%(wiki_code)s'] = 'myUsername'""" + % {'fam_name': self.site.family.name, + 'wiki_code': self.site.code}) else: try: self.username = config.usernames\ [self.site.family.name][self.site.code] except: raise NoUsername( -u"""ERROR: Username for %s:%s is undefined. +u"""ERROR: Username for %(fam_name)s:%(wiki_code)s is undefined. If you have an account for that site, please add a line to user-config.py:
-usernames['%s']['%s'] = 'myUsername'""" - % (self.site.family.name, self.site.code, - self.site.family.name, self.site.code)) +usernames['%(fam_name)s']['%(wiki_code)s'] = 'myUsername'""" + % {'fam_name': self.site.family.name, + 'wiki_code': self.site.code}) self.password = password if getattr(config, 'password_file', ''): self.readPassword()
def botAllowed(self): - """ - Checks whether the bot is listed on a specific page to comply with + """Check whether the bot is listed on a specific page to comply with the policy on the respective wiki. + """ return True # DEBUG - if botList.has_key(self.site.family.name) and botList[self.site.family.name].has_key(self.site.code): + if botList.has_key(self.site.family.name)\ + and botList[self.site.family.name].has_key(self.site.code): botListPageTitle = botList[self.site.family.name][self.site.code] botListPage = pywikibot.Page(self.site, botListPageTitle) for linkedPage in botListPage.linkedPages(): @@ -117,15 +121,15 @@ # No bot policies on other return True
- def getCookie(self, remember=True, captchaId = None, captchaAnswer = None): - """ - Login to the site. + def getCookie(self, remember=True, captchaId=None, captchaAnswer=None): + """Login to the site.
remember Remember login (default: True) captchaId The id number of the captcha, if any captcha The word displayed in the captcha, if any
Returns cookie data if succesful, None otherwise. + """ predata = { "wpName": self.username.encode(self.site.encoding()), @@ -144,12 +148,16 @@ "User-agent": wikipedia.useragent } data = self.site.urlEncode(predata) - response = urllib2.urlopen(urllib2.Request(self.site.protocol() + '://' + self.site.hostname() + address, data, headers)) + response = urllib2.urlopen( + urllib2.Request(self.site.protocol() + + '://' + self.site.hostname() + + address, data, headers)) data = response.read() wikipedia.cj.save(wikipedia.COOKIEFILE) return "Ok" else: - response, data = self.site.postForm(address, predata, useCookie=False) + response, data = self.site.postForm(address, predata, + useCookie=False) n = 0 Reat=re.compile(': (.*?);') L = [] @@ -182,13 +190,14 @@ raise CaptchaError(id) url = self.site.protocol() + '://' + self.site.hostname() + self.site.captcha_image_address(id) answer = wikipedia.ui.askForCaptcha(url) - return self.getCookie(remember = remember, captchaId = id, captchaAnswer = answer) + return self.getCookie(remember=remember, captchaId=id, + captchaAnswer=answer) else: return None
def storecookiedata(self, data): """ - Stores cookie data. + Store cookie data.
The argument data is the raw data, as returned by getCookie().
@@ -202,30 +211,32 @@ f.close()
def readPassword(self): - """ - Reads passwords from a file. DO NOT FORGET TO REMOVE READ - ACCESS FOR OTHER USERS!!! Use chmod 600 password-file. - All lines below should be valid Python tuples in the form - (code, family, username, password) or (username, password) - to set a default password for an username. Default usernames - should occur above specific usernames. + """Read passwords from a file.
- Example: + DO NOT FORGET TO REMOVE READ ACCESS FOR OTHER USERS!!! Use chmod 600 + password-file. All lines below should be valid Python tuples in the + form (code, family, username, password) or (username, password) to + set a default password for an username. Default usernames should + occur above specific usernames.
- ("my_username", "my_default_password") - ("my_sysop_user", "my_sysop_password") - ("en", "wikipedia", "my_en_user", "my_en_pass") + Example: + + ("my_username", "my_default_password") + ("my_sysop_user", "my_sysop_password") + ("en", "wikipedia", "my_en_user", "my_en_pass") + """ file = open(config.password_file) for line in file: if not line.strip(): continue entry = eval(line) if len(entry) == 2: - if entry[0] == self.username: self.password = entry[1] + if entry[0] == self.username: + self.password = entry[1] elif len(entry) == 4: if entry[0] == self.site.code and \ - entry[1] == self.site.family.name and \ - entry[2] == self.username: + entry[1] == self.site.family.name and \ + entry[2] == self.username: self.password = entry[3] file.close()
@@ -233,18 +244,28 @@ if not self.password: # As we don't want the password to appear on the screen, we set # password = True - self.password = pywikibot.input(u'Password for user %s on %s:' % (self.username, self.site), password = True) + self.password = pywikibot.input( + u'Password for user %(name)s on %(site)s:' + % {'name': self.username, 'site': self.site}, + password = True)
# self.password = self.password.encode(self.site.encoding())
- logger.info(u"Logging in to %s as %s" % (self.site, self.username)) + logger.info(u"Logging in to %(site)s as %(name)s" + % {'name': self.username, 'site': self.site}) cookiedata = self.getCookie() if cookiedata: self.storecookiedata(cookiedata) logger.info(u"Should be logged in now") # Show a warning according to the local bot policy if not self.botAllowed(): - logger.error(u'*** Your username is not listed on [[%s]].\n*** Please make sure you are allowed to use the robot before actually using it!' % botList[self.site.family.name][self.site.code]) + logger.error( + u"Username '%(name)s' is not listed on [[%(page)s]]." + % {'name': self.username, + 'page': botList[self.site.family.name][self.site.code]}) + logger.error( +"Please make sure you are allowed to use the robot before actually using it!") + return False return True else: logger.error(u"Login failed. Wrong password or CAPTCHA answer?") @@ -265,7 +286,8 @@ for arg in wikipedia.handleArgs(): if arg.startswith("-pass"): if len(arg) == 5: - password = pywikibot.input(u'Password for all accounts:', password = True) + password = pywikibot.input(u'Password for all accounts:', + password = True) else: password = arg[6:] elif arg == "-sysop": @@ -286,12 +308,12 @@ for lang in namedict[familyName].iterkeys(): site = pywikibot.getSite(code=lang, fam=familyName) if not forceLogin and site.loggedInAs(sysop = sysop) != None: - logger.info(u'Already logged in on %s' % site) + logger.info(u'Already logged in on %(site)s' % locals()) else: - loginMan = LoginManager(password, sysop = sysop, site = site) + loginMan = LoginManager(password, sysop=sysop, site=site) loginMan.login() else: - loginMan = LoginManager(password, sysop = sysop) + loginMan = LoginManager(password, sysop=sysop) loginMan.login()
if __name__ == "__main__":
Modified: branches/rewrite/pywikibot/throttle.py =================================================================== --- branches/rewrite/pywikibot/throttle.py 2008-06-26 21:28:02 UTC (rev 5642) +++ branches/rewrite/pywikibot/throttle.py 2008-06-26 21:39:01 UTC (rev 5643) @@ -46,8 +46,8 @@ self.last_read = 0 self.last_write = 0 self.next_multiplicity = 1.0 - self.checkdelay = 120 # Check logfile again after this many seconds - self.dropdelay = 360 # Ignore processes that have not made + self.checkdelay = 300 # Check logfile again after this many seconds + self.dropdelay = 750 # Ignore processes that have not made # a check in this many seconds self.releasepid = 1800 # Free the process id after this many seconds self.lastwait = 0.0 @@ -60,7 +60,7 @@ def checkMultiplicity(self): global pid self.lock.acquire() - logger.debug("Checking multiplicity: pid = %s" % pid) + logger.debug("Checking multiplicity: pid = %(pid)s" % globals()) try: processes = [] my_pid = 1 @@ -110,8 +110,8 @@ self.process_multiplicity = count if self.verbosedelay: logger.info( - u"Found %s processes running, including the current process." - % count) +u"Found %(count)s processes running, including the current process." + % locals()) finally: self.lock.release()
@@ -206,10 +206,14 @@
Parameter requestsize is the number of Pages to be read/written; multiply delay time by an appropriate factor. + + Because this seizes the throttle lock, it will prevent any other + thread from writing to the same site until the wait expires. + """ self.lock.acquire() try: - waittime = self.waittime(write=write) + wait = self.waittime(write=write) # Calculate the multiplicity of the next delay based on how # big the request is that is being posted now. # We want to add "one delay" for each factor of two in the @@ -217,13 +221,13 @@ # the delay time for the server. self.next_multiplicity = math.log(1+requestsize)/math.log(2.0) # Announce the delay if it exceeds a preset limit - if waittime > config.noisysleep: - logger.info(u"Sleeping for %.1f seconds, %s" - % (waittime, - time.strftime("%Y-%m-%d %H:%M:%S", - time.localtime())) - ) - time.sleep(waittime) + if wait > config.noisysleep: + logger.info(u"Sleeping for %(wait).1f seconds, %(now)s" + % {'wait': wait, + 'now': time.strftime("%Y-%m-%d %H:%M:%S", + time.localtime()) + } ) + time.sleep(wait) if write: self.last_write = time.time() else: @@ -248,12 +252,12 @@ wait = delay - (time.time() - started) if wait > 0: if wait > config.noisysleep: - logger.warn(u"Sleeping for %.1f seconds, %s" - % (wait, - time.strftime("%Y-%m-%d %H:%M:%S", - time.localtime())) - ) + logger.info(u"Sleeping for %(wait).1f seconds, %(now)s" + % {'wait': wait, + 'now': time.strftime("%Y-%m-%d %H:%M:%S", + time.localtime()) + } ) time.sleep(wait) finally: self.lock.release() - +
pywikipedia-l@lists.wikimedia.org