jenkins-bot has submitted this change and it was merged.
Change subject: Log & warn about Site instantiation oddities ......................................................................
Log & warn about Site instantiation oddities
BaseSite constructor has a few branches that perform changes which may not be expected by the end user, and may be useful information when debugging a script.
Add a few asserts in Family.load to detect problems early.
Change-Id: I374461c8b7481e9edae7d5c5db6ef8d39416243b --- M pywikibot/__init__.py M pywikibot/families/wikisource_family.py M pywikibot/family.py M pywikibot/site.py 4 files changed, 59 insertions(+), 9 deletions(-)
Approvals: XZise: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py index b7161d8..53aaa13 100644 --- a/pywikibot/__init__.py +++ b/pywikibot/__init__.py @@ -21,6 +21,8 @@ else: from Queue import Queue
+from warnings import warn + # Use pywikibot. prefix for all in-package imports; this is to prevent # confusion with similarly-named modules in version 1 framework, for users # who want to continue using both @@ -601,6 +603,11 @@ _sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop) debug(u"Instantiated %s object '%s'" % (interface.__name__, _sites[key]), _logger) + + if _sites[key].code != code: + warn('Site %s instantiated using different code "%s"' + % (_sites[key], code), UserWarning, 2) + return _sites[key]
diff --git a/pywikibot/families/wikisource_family.py b/pywikibot/families/wikisource_family.py index 2996a08..2abbaaa 100644 --- a/pywikibot/families/wikisource_family.py +++ b/pywikibot/families/wikisource_family.py @@ -26,6 +26,8 @@
self.langs = dict([(lang, '%s.wikisource.org' % lang) for lang in self.languages_by_size]) + # FIXME: '-' is invalid at the beginning of a hostname, and + # '-' is not a valid subdomain. self.langs['-'] = 'wikisource.org'
# Global bot allowed languages on https://meta.wikimedia.org/wiki/Bot_policy/Implementation#Current_implementa... diff --git a/pywikibot/family.py b/pywikibot/family.py index 0969041..4848c6b 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -13,6 +13,7 @@ import re import collections import imp +import string import warnings
if sys.version_info[0] > 2: @@ -20,13 +21,19 @@ else: import urlparse
+from warnings import warn + import pywikibot
from pywikibot import config2 as config from pywikibot.tools import deprecated, deprecate_arg -from pywikibot.exceptions import UnknownFamily, Error +from pywikibot.exceptions import Error, UnknownFamily, FamilyMaintenanceWarning
logger = logging.getLogger("pywiki.wiki.family") + +# Legal characters for Family.name and Family.langs keys +NAME_CHARACTERS = string.ascii_letters + string.digits +CODE_CHARACTERS = string.ascii_lowercase + string.digits + '-'
class Family(object): @@ -858,6 +865,9 @@ """ if fam is None: fam = config.family + + assert(all(x in NAME_CHARACTERS for x in fam)) + if fam in Family._families: return Family._families[fam]
@@ -881,11 +891,26 @@ # RuntimeWarning's while loading. with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) - myfamily = imp.load_source(fam, config.family_files[fam]) + mod = imp.load_source(fam, config.family_files[fam]) except (ImportError, KeyError): - raise UnknownFamily("Family %s does not exist" % fam) - Family._families[fam] = myfamily.Family() - return Family._families[fam] + raise UnknownFamily(u'Family %s does not exist' % fam) + cls = mod.Family() + if cls.name != fam: + warn(u'Family name %s does not match family module name %s' + % (cls.name, fam), FamilyMaintenanceWarning) + # Family 'name' and the 'langs' codes must be ascii, and the + # codes must be lower-case due to the Site loading algorithm. + if not all(x in NAME_CHARACTERS for x in cls.name): + warn(u'Family name %s contains non-ascii characters' % cls.name, + FamilyMaintenanceWarning) + # FIXME: wikisource uses code '-' for www.wikisource.org + if not all(all(x in CODE_CHARACTERS for x in code) and + (cls.name == 'wikisource' or code[0] != '-') + for code in cls.langs.keys()): + warn(u'Family %s codes contains non-ascii characters', + FamilyMaintenanceWarning) + Family._families[fam] = cls + return cls
@property def iwkeys(self): diff --git a/pywikibot/site.py b/pywikibot/site.py index 525e98e..a6a5330 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -493,7 +493,15 @@ @type sysop: str
""" - self.__code = code.lower() + if code.lower() != code: + # Note the Site function in __init__ also emits a UserWarning + # for this condition, showing the callers file and line no. + pywikibot.log(u'BaseSite: code "%s" converted to lowercase' % code) + code = code.lower() + if not all(x in pywikibot.family.CODE_CHARACTERS for x in str(code)): + pywikibot.log(u'BaseSite: code "%s" contains invalid characters' + % code) + self.__code = code if isinstance(fam, basestring) or fam is None: self.__family = pywikibot.family.Family.load(fam) else: @@ -504,19 +512,27 @@ if self.__code in self.__family.obsolete: if self.__family.obsolete[self.__code] is not None: self.__code = self.__family.obsolete[self.__code] + # Note the Site function in __init__ emits a UserWarning + # for this condition, showing the callers file and line no. + pywikibot.log(u'Site %s instantiated using code %s' + % (self, code)) else: # no such language anymore self.obsolete = True + pywikibot.log(u'Site %s instantiated and marked "obsolete" ' + u'to prevent access' % self) elif self.__code not in self.languages(): if self.__family.name in list(self.__family.langs.keys()) and \ len(self.__family.langs) == 1: - oldcode = self.__code self.__code = self.__family.name if self.__family == pywikibot.config.family \ - and oldcode == pywikibot.config.mylang: + and code == pywikibot.config.mylang: pywikibot.config.mylang = self.__code + warn(u'Global configuration variable "mylang" changed to ' + u'"%s" while instantiating site %s' + % (self.__code, self), UserWarning) else: - raise UnknownSite("Language '%s' does not exist in family %s" + raise UnknownSite(u"Language '%s' does not exist in family %s" % (self.__code, self.__family.name))
self.nocapitalize = self.code in self.family.nocapitalize
pywikibot-commits@lists.wikimedia.org