jenkins-bot has submitted this change and it was merged.
Change subject: Log & warn about Site instantiation oddities
......................................................................
Log & warn about Site instantiation oddities
BaseSite constructor has a few branches that perform changes
which may not be expected by the end user, and may be useful
information when debugging a script.
Add a few asserts in Family.load to detect problems early.
Change-Id: I374461c8b7481e9edae7d5c5db6ef8d39416243b
---
M pywikibot/__init__.py
M pywikibot/families/wikisource_family.py
M pywikibot/family.py
M pywikibot/site.py
4 files changed, 59 insertions(+), 9 deletions(-)
Approvals:
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py
index b7161d8..53aaa13 100644
--- a/pywikibot/__init__.py
+++ b/pywikibot/__init__.py
@@ -21,6 +21,8 @@
else:
from Queue import Queue
+from warnings import warn
+
# Use pywikibot. prefix for all in-package imports; this is to prevent
# confusion with similarly-named modules in version 1 framework, for users
# who want to continue using both
@@ -601,6 +603,11 @@
_sites[key] = interface(code=code, fam=fam, user=user, sysop=sysop)
debug(u"Instantiated %s object '%s'"
% (interface.__name__, _sites[key]), _logger)
+
+ if _sites[key].code != code:
+ warn('Site %s instantiated using different code "%s"'
+ % (_sites[key], code), UserWarning, 2)
+
return _sites[key]
diff --git a/pywikibot/families/wikisource_family.py
b/pywikibot/families/wikisource_family.py
index 2996a08..2abbaaa 100644
--- a/pywikibot/families/wikisource_family.py
+++ b/pywikibot/families/wikisource_family.py
@@ -26,6 +26,8 @@
self.langs = dict([(lang, '%s.wikisource.org' % lang)
for lang in self.languages_by_size])
+ # FIXME: '-' is invalid at the beginning of a hostname, and
+ # '-' is not a valid subdomain.
self.langs['-'] = 'wikisource.org'
# Global bot allowed languages on
https://meta.wikimedia.org/wiki/Bot_policy/Implementation#Current_implement…
diff --git a/pywikibot/family.py b/pywikibot/family.py
index 0969041..4848c6b 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -13,6 +13,7 @@
import re
import collections
import imp
+import string
import warnings
if sys.version_info[0] > 2:
@@ -20,13 +21,19 @@
else:
import urlparse
+from warnings import warn
+
import pywikibot
from pywikibot import config2 as config
from pywikibot.tools import deprecated, deprecate_arg
-from pywikibot.exceptions import UnknownFamily, Error
+from pywikibot.exceptions import Error, UnknownFamily, FamilyMaintenanceWarning
logger = logging.getLogger("pywiki.wiki.family")
+
+# Legal characters for Family.name and Family.langs keys
+NAME_CHARACTERS = string.ascii_letters + string.digits
+CODE_CHARACTERS = string.ascii_lowercase + string.digits + '-'
class Family(object):
@@ -858,6 +865,9 @@
"""
if fam is None:
fam = config.family
+
+ assert(all(x in NAME_CHARACTERS for x in fam))
+
if fam in Family._families:
return Family._families[fam]
@@ -881,11 +891,26 @@
# RuntimeWarning's while loading.
with warnings.catch_warnings():
warnings.simplefilter("ignore", RuntimeWarning)
- myfamily = imp.load_source(fam, config.family_files[fam])
+ mod = imp.load_source(fam, config.family_files[fam])
except (ImportError, KeyError):
- raise UnknownFamily("Family %s does not exist" % fam)
- Family._families[fam] = myfamily.Family()
- return Family._families[fam]
+ raise UnknownFamily(u'Family %s does not exist' % fam)
+ cls = mod.Family()
+ if cls.name != fam:
+ warn(u'Family name %s does not match family module name %s'
+ % (cls.name, fam), FamilyMaintenanceWarning)
+ # Family 'name' and the 'langs' codes must be ascii, and the
+ # codes must be lower-case due to the Site loading algorithm.
+ if not all(x in NAME_CHARACTERS for x in cls.name):
+ warn(u'Family name %s contains non-ascii characters' % cls.name,
+ FamilyMaintenanceWarning)
+ # FIXME: wikisource uses code '-' for
www.wikisource.org
+ if not all(all(x in CODE_CHARACTERS for x in code) and
+ (cls.name == 'wikisource' or code[0] != '-')
+ for code in cls.langs.keys()):
+ warn(u'Family %s codes contains non-ascii characters',
+ FamilyMaintenanceWarning)
+ Family._families[fam] = cls
+ return cls
@property
def iwkeys(self):
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 525e98e..a6a5330 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -493,7 +493,15 @@
@type sysop: str
"""
- self.__code = code.lower()
+ if code.lower() != code:
+ # Note the Site function in __init__ also emits a UserWarning
+ # for this condition, showing the callers file and line no.
+ pywikibot.log(u'BaseSite: code "%s" converted to lowercase'
% code)
+ code = code.lower()
+ if not all(x in pywikibot.family.CODE_CHARACTERS for x in str(code)):
+ pywikibot.log(u'BaseSite: code "%s" contains invalid
characters'
+ % code)
+ self.__code = code
if isinstance(fam, basestring) or fam is None:
self.__family = pywikibot.family.Family.load(fam)
else:
@@ -504,19 +512,27 @@
if self.__code in self.__family.obsolete:
if self.__family.obsolete[self.__code] is not None:
self.__code = self.__family.obsolete[self.__code]
+ # Note the Site function in __init__ emits a UserWarning
+ # for this condition, showing the callers file and line no.
+ pywikibot.log(u'Site %s instantiated using code %s'
+ % (self, code))
else:
# no such language anymore
self.obsolete = True
+ pywikibot.log(u'Site %s instantiated and marked "obsolete"
'
+ u'to prevent access' % self)
elif self.__code not in self.languages():
if self.__family.name in list(self.__family.langs.keys()) and \
len(self.__family.langs) == 1:
- oldcode = self.__code
self.__code = self.__family.name
if self.__family == pywikibot.config.family \
- and oldcode == pywikibot.config.mylang:
+ and code == pywikibot.config.mylang:
pywikibot.config.mylang = self.__code
+ warn(u'Global configuration variable "mylang" changed
to '
+ u'"%s" while instantiating site %s'
+ % (self.__code, self), UserWarning)
else:
- raise UnknownSite("Language '%s' does not exist in family
%s"
+ raise UnknownSite(u"Language '%s' does not exist in family
%s"
% (self.__code, self.__family.name))
self.nocapitalize = self.code in self.family.nocapitalize
--
To view, visit
https://gerrit.wikimedia.org/r/190512
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I374461c8b7481e9edae7d5c5db6ef8d39416243b
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>