jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Use the "default"-field in Siteinfo to store cache time
......................................................................
[FEAT] Use the "default"-field in Siteinfo to store cache time
This allows Siteinfo to automatically reload after a certain time
has expired. It is also possible to determine the time of the last
request time for each field. As a use case live_version now caches
it for up to a day
The cache time is the time of the request, but the request might be
cached separately (which is the case in the tests) so it's not the
time the server actually send it.
Default values in the caches (siprops which the server didn't support)
are always expired and are always rerequested, except when the expiry
is set to False (that value basically never expires).
Change-Id: Iff4b6a68e4cdf7f3fb5be17d6704d2b23cae2175
---
M pywikibot/site.py
1 file changed, 80 insertions(+), 34 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 5132dfc..9b3a2ee 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -937,7 +937,7 @@
else:
return tools.EMPTY_DEFAULT
- def _get_siteinfo(self, prop, force=False):
+ def _get_siteinfo(self, prop, expiry):
"""
Retrieve a siteinfo property. All properties which the site doesn't
support contain the default value. Because pre-1.12 no data was
@@ -946,8 +946,8 @@
@param prop: The property names of the siteinfo.
@type prop: str or iterable
- @param force: Don't access the cached request.
- @type force: bool
+ @param expiry: The expiry date of the cached request.
+ @type expiry: int (days), L{datetime.timedelta}, False (config)
@return: A dictionary with the properties of the site. Each entry in
the dictionary is a tuple of the value and a boolean to save if it
is the default value.
@@ -962,7 +962,7 @@
raise ValueError('At least one property name must be provided.')
try:
data = pywikibot.data.api.CachedRequest(
- expiry=0 if force else pywikibot.config.API_config_expiry,
+ expiry=pywikibot.config.API_config_expiry if expiry is False else expiry,
site=self._site,
action='query',
meta='siteinfo',
@@ -971,14 +971,14 @@
if e.code == 'siunknown_siprop':
if len(props) == 1:
pywikibot.log(u"Unable to get siprop '{0}'".format(props[0]))
- return {props[0]: (Siteinfo._get_default(props[0]), True)}
+ return {props[0]: (Siteinfo._get_default(props[0]), False)}
else:
pywikibot.log(u"Unable to get siteinfo, because at least "
u"one property is unknown: '{0}'".format(
u"', '".join(props)))
results = {}
for prop in props:
- results.update(self._get_siteinfo(prop, force))
+ results.update(self._get_siteinfo(prop, expiry))
return results
else:
raise
@@ -991,17 +991,26 @@
data['warnings']['siteinfo']['*']).group(1).split(','):
prop = prop.strip()
invalid_properties += [prop]
- result[prop] = (Siteinfo._get_default(prop), True)
+ result[prop] = (Siteinfo._get_default(prop), False)
pywikibot.log(u"Unable to get siprop(s) '{0}'".format(
u"', '".join(invalid_properties)))
if 'query' in data:
- # todo iterate through the properties!
+ cache_time = datetime.datetime.utcnow()
for prop in props:
if prop in data['query']:
- result[prop] = (data['query'][prop], False)
+ result[prop] = (data['query'][prop], cache_time)
return result
- def _get_general(self, key, force):
+ @staticmethod
+ def _is_expired(cache_date, expire):
+ if expire is False: # can never expire
+ return False
+ elif not cache_date: # default values are always expired
+ return True
+ else:
+ return datetime.datetime.utcnow() - expire < cache_date
+
+ def _get_general(self, key, expiry):
"""
Return a siteinfo property which is loaded by default.
@@ -1012,10 +1021,11 @@
@param key: The key to search for.
@type key: str
- @param force: If 'general' should be queried in any case.
- @type force: bool
- @return: If that property was retrived via this method. Returns an
- empty tuple if it wasn't retrieved.
+ @param expiry: If the cache is older than the expiry it ignores the
+ cache and queries the server to get the newest value.
+ @type expiry: int (days), L{datetime.timedelta}, False (never)
+ @return: If that property was retrived via this method. Returns None if
+ the key was not in the retreived values.
@rtype: various (the value), bool (if the default value is used)
"""
if 'general' not in self._cache:
@@ -1023,6 +1033,7 @@
force = True
props = ['namespaces', 'namespacealiases']
else:
+ force = Siteinfo._is_expired(self._cache['general'][1], expiry)
props = []
if force:
props = [prop for prop in props if prop not in self._cache]
@@ -1031,23 +1042,26 @@
u"Load siteinfo properties '{0}' along with 'general'".format(
u"', '".join(props)), _logger)
props += ['general']
- default_info = self._get_siteinfo(props, force)
+ default_info = self._get_siteinfo(props, expiry)
for prop in props:
self._cache[prop] = default_info[prop]
if key in default_info:
return default_info[key]
if key in self._cache['general'][0]:
- return self._cache['general'][0][key], False
+ return self._cache['general'][0][key], self._cache['general']
else:
- return tuple()
+ return None
def __getitem__(self, key):
"""Return a siteinfo property, caching and not forcing it."""
return self.get(key, False) # caches and doesn't force it
- def get(self, key, get_default=True, cache=True, force=False):
+ def get(self, key, get_default=True, cache=True, expiry=False):
"""
Return a siteinfo property.
+
+ It will never throw an APIError if it only stated, that the siteinfo
+ property doesn't exist. Instead it will use the default value.
@param key: The name of the siteinfo property.
@type key: str
@@ -1056,30 +1070,38 @@
@param cache: Caches the result interally so that future accesses via
this method won't query the server.
@type cache: bool
- @param force: Ignores the cache and always queries the server to get
- the newest value.
- @type force: bool
+ @param expiry: If the cache is older than the expiry it ignores the
+ cache and queries the server to get the newest value.
+ @type expiry: int/float (days), L{datetime.timedelta}, False (never)
@return: The gathered property
@rtype: various
+ @raise KeyError: If the key is not a valid siteinfo property and the
+ get_default option is set to False.
@see: L{_get_siteinfo}
"""
- if not force:
+ # expire = 0 (or timedelta(0)) are always expired and their bool is
+ # False, so skip them EXCEPT if it's literally False, then they expire
+ # never: "expiry is False" is different than "not expiry"!
+ if expiry or expiry is False:
+ # if it's a int convert to timedelta
+ if expiry is not False and isinstance(expiry, (int, float)):
+ expiry = datetime.timedelta(expiry)
try:
cached = self._get_cached(key)
except KeyError:
cached = None
- # a not recognised result was cached, but they aren't requested
- if cached:
- if cached[1] and not get_default:
+ else: # cached value available
+ # is a default value, but isn't accepted
+ if not cached[1] and not get_default:
raise KeyError(key)
- else:
+ elif not Siteinfo._is_expired(cached[1], expiry):
return copy.deepcopy(cached[0])
- preloaded = self._get_general(key, force)
+ preloaded = self._get_general(key, expiry)
if not preloaded:
- preloaded = self._get_siteinfo(key, force)[key]
+ preloaded = self._get_siteinfo(key, expiry)[key]
else:
cache = False
- if preloaded[1] and not get_default:
+ if not preloaded[1] and not get_default:
raise KeyError(key)
else:
if cache:
@@ -1090,7 +1112,7 @@
"""Return the cached value or a KeyError exception if not cached."""
if 'general' in self._cache:
if key in self._cache['general'][0]:
- return (self._cache['general'][0][key], False)
+ return self._cache['general'][0][key], self._cache['general'][1]
else:
return self._cache[key]
raise KeyError(key)
@@ -1106,8 +1128,26 @@
def is_recognised(self, key):
"""Return if 'key' is a valid property name. 'None' if not cached."""
+ time = self.get_requested_time(key)
+ if time is None:
+ return None
+ else:
+ return bool(time)
+
+ def get_requested_time(self, key):
+ """
+ Return when 'key' was successfully requested from the server.
+
+ If the property is actually in the siprop 'general' it returns the
+ last request from the 'general' siprop.
+
+ @param key: The siprop value or a property of 'general'.
+ @type key: basestring
+ @return: The last time the siprop of 'key' was requested.
+ @rtype: None (never), False (default), L{datetime.datetime} (cached)
+ """
try:
- return not self._get_cached(key)[1]
+ return self._get_cached(key)[1]
except KeyError:
return None
@@ -1806,12 +1846,18 @@
def live_version(self, force=False):
"""Return the 'real' version number found on [[Special:Version]].
- Return value is a tuple (int, int, str) of the major and minor
- version numbers and any other text contained in the version.
+ By default the version number is cached for one day.
+ @param force: If the version should be read always from the server and
+ never from the cache.
+ @type force: bool
+ @return: A tuple containing the major, minor version number and any
+ text after that. If an error occured (0, 0, 0) is returned.
+ @rtype: int, int, str
"""
try:
- versionstring = self.siteinfo.get('generator', force=force)
+ versionstring = self.siteinfo.get('generator',
+ expiry=0 if force else 1)
m = re.match(r"^MediaWiki ([0-9]+)\.([0-9]+)(.*)$", versionstring)
if m:
return (int(m.group(1)), int(m.group(2)), m.group(3))
--
To view, visit https://gerrit.wikimedia.org/r/155097
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iff4b6a68e4cdf7f3fb5be17d6704d2b23cae2175
Gerrit-PatchSet: 11
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] One hasExtension() call was overlooked
......................................................................
[FIX] One hasExtension() call was overlooked
Change-Id: I0dfb3304f7a848df2da4ea3746b6adc2a1576bd9
---
M tests/page_tests.py
1 file changed, 1 insertion(+), 1 deletion(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/tests/page_tests.py b/tests/page_tests.py
index dca5d2f..0c6c259 100644
--- a/tests/page_tests.py
+++ b/tests/page_tests.py
@@ -311,7 +311,7 @@
Test the integration with
Extension:Disambiguator
"""
- if not site.hasExtension('Disambiguator', False):
+ if not site.has_extension('Disambiguator'):
raise unittest.SkipTest('Disambiguator extension not loaded on test site')
pg = pywikibot.Page(site, 'Random')
pg._pageprops = set(['disambiguation', ''])
--
To view, visit https://gerrit.wikimedia.org/r/155250
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0dfb3304f7a848df2da4ea3746b6adc2a1576bd9
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Disable script makecat in tests
......................................................................
Disable script makecat in tests
The makecat script needs to be disabled until bug 69781 is
resolved.
Change-Id: I39d0c868f8f81f9ddb358b57b63e2a1a23d2838e
---
M tests/script_tests.py
1 file changed, 1 insertion(+), 0 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/tests/script_tests.py b/tests/script_tests.py
index 7e0100d..b229222 100644
--- a/tests/script_tests.py
+++ b/tests/script_tests.py
@@ -58,6 +58,7 @@
and name != 'welcome.py' # result depends on speed
and name != 'script_wui.py' # depends on lua compiling
and name != 'editarticle.py' # requires a X DISPLAY
+ and name != 'makecat.py' # bug 69781
]
return ['login'] + script_list
--
To view, visit https://gerrit.wikimedia.org/r/155240
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I39d0c868f8f81f9ddb358b57b63e2a1a23d2838e
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Decoding text: catch exception
......................................................................
Decoding text: catch exception
Wrapped the decode instruction derived from
change Ia2051a2a80851b15b1a04a135763291bd633d4e3
in a "try: except:" block, as suggested in comment 9 of bug 67410
Also: added a comma in "self.CHARSET" regex in case of
"contentType" contains a list of value from HTML-meta
Change-Id: I3af86d3386ea919001287fe1c057932c16537eb4
---
M scripts/reflinks.py
1 file changed, 6 insertions(+), 2 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index 65dcb5b..a7f50f0 100644
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -434,7 +434,7 @@
# Regex to grasp content-type meta HTML tag in HTML source
self.META_CONTENT = re.compile(r'(?i)<meta[^>]*content\-type[^>]*>')
# Extract the encoding from a charset property (from content-type !)
- self.CHARSET = re.compile(r'(?i)charset\s*=\s*(?P<enc>[^\'";>/]*)')
+ self.CHARSET = re.compile(r'(?i)charset\s*=\s*(?P<enc>[^\'",;>/]*)')
# Extract html title from page
self.TITLE = re.compile(r'(?is)(?<=<title>).*?(?=</title>)')
# Matches content inside <script>/<style>/HTML comments
@@ -683,7 +683,11 @@
if 'utf-8' not in enc:
enc.append('utf-8')
- u = linkedpagetext.decode(enc[0]) # Bug 67410
+ try:
+ u = linkedpagetext.decode(enc[0]) # Bug 67410
+ except (UnicodeDecodeError, LookupError) as e:
+ pywikibot.output(u'%s : Decoding error - %s' % (ref.link, e))
+ continue
# Retrieves the first non empty string inside <title> tags
for m in self.TITLE.finditer(u):
--
To view, visit https://gerrit.wikimedia.org/r/155226
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I3af86d3386ea919001287fe1c057932c16537eb4
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Beta16 <l.rabinelli(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Wikibase API now reports the Qid for missing items
......................................................................
Wikibase API now reports the Qid for missing items
Whereas wikibase has reported -1 for deleted items, it is now reporting
the id of the item. e.g. Q404 is a deleted item, which wbgetentities now
returns a successful result of
{"id": "Q404", "missing": ""}
A test was expecting -1, and now needs to expect Q404.
Change-Id: I713476bfbac61e1e7561cac94e1fec21a44d4db1
---
M tests/wikibase_tests.py
1 file changed, 2 insertions(+), 3 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
XZise: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/tests/wikibase_tests.py b/tests/wikibase_tests.py
index a9142dd..0f1cba9 100644
--- a/tests/wikibase_tests.py
+++ b/tests/wikibase_tests.py
@@ -254,9 +254,8 @@
self.assertEquals(hasattr(item, '_content'), False)
self.assertRaises(pywikibot.NoPage, item.get)
self.assertEquals(hasattr(item, '_content'), True)
- # the title has now changed
- self.assertEquals(item._link._title, '-1')
- self.assertEquals(item.title(), '-1')
+ self.assertEquals(item._link._title, 'Q404')
+ self.assertEquals(item.title(), 'Q404')
self.assertEquals(item.exists(), False)
def test_fromPage_noprops(self):
--
To view, visit https://gerrit.wikimedia.org/r/155123
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I713476bfbac61e1e7561cac94e1fec21a44d4db1
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>