jenkins-bot has submitted this change and it was merged.
Change subject: timestripper: prevent recognizing components too far from each other
......................................................................
timestripper: prevent recognizing components too far from each other
timestripper should not be too flexible about the locations
of the components of a timestamp. The added test demonstrates
a false positive, incorrectly recognized as a timestamp.
This patch places a limit to the distance between neighboring
components of a timestamp. Tentatively the limit is set to 10.
Change-Id: I8ef86e21f08248d6abb7d1b78252029d2ce0c017
---
M pywikibot/textlib.py
M tests/timestripper_tests.py
2 files changed, 44 insertions(+), 14 deletions(-)
Approvals:
Dalba: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index fc7c4b1..5a0e80e 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -128,6 +128,11 @@
'or': u'୦୧୨୩୪୫୬୭୮୯',
}
+# Used in TimeStripper. When a timestamp-like line have longer gaps
+# than this between year, month, etc in it, then the line will not be
+# considered to contain a timestamp.
+TIMESTAMP_GAP_LIMIT = 10
+
def to_local_digits(phrase, lang):
"""
@@ -1972,17 +1977,19 @@
return (txt, None)
@staticmethod
- def _valid_date_dict_order(dateDict):
+ def _valid_date_dict_positions(dateDict):
"""Check consistency of reasonable positions for groups."""
- day_pos = dateDict['day']['pos']
- month_pos = dateDict['month']['pos']
- year_pos = dateDict['year']['pos']
- time_pos = dateDict['time']['pos']
- tzinfo_pos = dateDict['tzinfo']['pos']
+ time_pos = dateDict['time']['start']
+ tzinfo_pos = dateDict['tzinfo']['start']
+ date_pos = sorted(
+ (dateDict['day'], dateDict['month'], dateDict['year']),
+ key=lambda x: x['start'])
+ min_pos, max_pos = date_pos[0]['start'], date_pos[-1]['start']
+ max_gap = max(x[1]['start'] - x[0]['end']
+ for x in zip(date_pos, date_pos[1:]))
- date_pos = sorted((day_pos, month_pos, year_pos))
- min_pos, max_pos = date_pos[0], date_pos[-1]
-
+ if max_gap > TIMESTAMP_GAP_LIMIT:
+ return False
if tzinfo_pos < min_pos or tzinfo_pos < time_pos:
return False
if min_pos < tzinfo_pos < max_pos:
@@ -2023,15 +2030,16 @@
line, match_obj = self._last_match_and_replace(line, pat)
if match_obj:
for group, value in match_obj.groupdict().items():
- pos = match_obj.start(group)
- # Store also match pos in line, for later order check.
- matchDict = {group: {'value': value, 'pos': pos}}
- dateDict.update(matchDict)
+ start, end = (match_obj.start(group), match_obj.end(group))
+ # The positions are stored for later validation
+ dateDict[group] = {
+ 'value': value, 'start': start, 'end': end
+ }
# all fields matched -> date valid
# groups are in a reasonable order.
if (all(g in dateDict for g in self.groups) and
- self._valid_date_dict_order(dateDict)):
+ self._valid_date_dict_positions(dateDict)):
# remove 'time' key, now split in hour/minute and not needed
# by datetime.
del dateDict['time']
diff --git a/tests/timestripper_tests.py b/tests/timestripper_tests.py
index 90eca23..1fdd938 100644
--- a/tests/timestripper_tests.py
+++ b/tests/timestripper_tests.py
@@ -142,6 +142,28 @@
)
+class TestTimeStripperNumberAndDate(TestTimeStripperCase):
+
+ """Test cases for lines with (non-year) numbers and timestamps."""
+
+ family = 'wikipedia'
+ code = 'en'
+
+ def test_four_digit_is_not_year_with_no_timestamp(self):
+ """A 4-digit number should not be mistaken as year (w/o timestamp)."""
+ self.assertIsNone(
+ self.ts.timestripper(
+ '2000 people will meet on 16 December at 22:00 (UTC).'))
+
+ def test_four_digit_is_not_year_with_timestamp(self):
+ """A 4-digit number should not be mistaken as year (w/ timestamp)."""
+ self.assertEqual(
+ self.ts.timestripper(
+ '2000 people will attend. --12:12, 14 December 2015 (UTC)'),
+ datetime.datetime(
+ 2015, 12, 14, 12, 12, tzinfo=tzoneFixedOffset(0, 'UTC')))
+
+
class TestTimeStripperLanguage(TestCase):
"""Test cases for English language."""
--
To view, visit https://gerrit.wikimedia.org/r/321862
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8ef86e21f08248d6abb7d1b78252029d2ce0c017
Gerrit-PatchSet: 10
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Whym <whym(a)whym.org>
Gerrit-Reviewer: Dalba <dalba.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
Build Update for wikimedia/pywikibot-core
-------------------------------------
Build: #3737
Status: Errored
Duration: 25 seconds
Commit: b8a2684 (master)
Author: multichill
Message: T150521: Don't crash on queries with optional values
The current implementation assumes variables are always set. When using sparql OPTIONAL sometimes values are not set. In that case, set it to None
Change-Id: I4af22d4faf42ca1e20581c9dd094237cf3467f71
View the changeset: https://github.com/wikimedia/pywikibot-core/compare/6d1e785996f6...b8a26843…
View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/180486657
--
You can configure recipients for build notifications in your .travis.yml file. See https://docs.travis-ci.com/user/notifications
jenkins-bot has submitted this change and it was merged.
Change subject: T150521: Don't crash on queries with optional values
......................................................................
T150521: Don't crash on queries with optional values
The current implementation assumes variables are always set. When using sparql OPTIONAL sometimes values are not set. In that case, set it to None
Change-Id: I4af22d4faf42ca1e20581c9dd094237cf3467f71
---
M pywikibot/data/sparql.py
1 file changed, 10 insertions(+), 6 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py
index 70c6563..e95c916 100644
--- a/pywikibot/data/sparql.py
+++ b/pywikibot/data/sparql.py
@@ -65,13 +65,17 @@
for row in data['results']['bindings']:
values = {}
for var in qvars:
- if full_data:
- if row[var]['type'] not in VALUE_TYPES:
- raise ValueError('Unknown type: %s' % row[var]['type'])
- valtype = VALUE_TYPES[row[var]['type']]
- values[var] = valtype(row[var], entity_url=self.entity_url)
+ if var in row:
+ if full_data:
+ if row[var]['type'] not in VALUE_TYPES:
+ raise ValueError('Unknown type: %s' % row[var]['type'])
+ valtype = VALUE_TYPES[row[var]['type']]
+ values[var] = valtype(row[var], entity_url=self.entity_url)
+ else:
+ values[var] = row[var]['value']
else:
- values[var] = row[var]['value']
+ # var is not available (OPTIONAL is probably used)
+ values[var] = None
result.append(values)
return result
else:
--
To view, visit https://gerrit.wikimedia.org/r/322648
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4af22d4faf42ca1e20581c9dd094237cf3467f71
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <Ladsgroup(a)gmail.com>
Gerrit-Reviewer: Magul <tomasz.magulski(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: Smalyshev <smalyshev(a)wikimedia.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Add fake user agent test case
......................................................................
Add fake user agent test case
This test case will check if fake UA generation can be toggled on/off with the config file, and if the method generates a random UA every time. Tests repeat for the two UA-generating modules, browseragents and fake_useragent.
Bug: T124005
Change-Id: Id9b6ff2d62917f84cfabb4af115cc7a887e42762
---
M tests/http_tests.py
1 file changed, 52 insertions(+), 1 deletion(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/tests/http_tests.py b/tests/http_tests.py
index 35d705f..05df31b 100644
--- a/tests/http_tests.py
+++ b/tests/http_tests.py
@@ -24,7 +24,7 @@
)
from tests import join_images_path
-from tests.aspects import unittest, TestCase, DeprecationTestCase
+from tests.aspects import unittest, TestCase, DeprecationTestCase, require_modules
class HttpTestCase(TestCase):
@@ -285,6 +285,57 @@
self.assertIn('Python/' + str(PYTHON_VERSION[0]), http.user_agent())
+class FakeUserAgentTestCase(TestCase):
+
+ """Test the generation of fake user agents.
+
+ If the method cannot import either browseragents or fake_useragent, the
+ default user agent will be returned, causing tests to fail. Therefore tests
+ will skip if neither is present.
+ """
+
+ net = False
+
+ def setUp(self):
+ """Set up unit test."""
+ self.orig_fake_user_agent = config.fake_user_agent
+
+ def tearDown(self):
+ """Tear down unit test."""
+ config.fake_user_agent = self.orig_fake_user_agent
+
+ def _test_fake_user_agent_config(self):
+ """Test if method honours configuration toggle."""
+ # ON: True and None in config are considered turned on.
+ config.fake_user_agent = True
+ self.assertNotEqual(http.get_fake_user_agent(), http.user_agent())
+ config.fake_user_agent = None
+ self.assertNotEqual(http.get_fake_user_agent(), http.user_agent())
+
+ # OFF: All other values won't make it return random UA.
+ config.fake_user_agent = False
+ self.assertEqual(http.get_fake_user_agent(), http.user_agent())
+ config.fake_user_agent = 'ArbitraryValue'
+ self.assertEqual(http.get_fake_user_agent(), 'ArbitraryValue')
+
+ def _test_fake_user_agent_randomness(self):
+ """Test if user agent returns are randomized."""
+ config.fake_user_agent = True
+ self.assertNotEqual(http.get_fake_user_agent(), http.get_fake_user_agent())
+
+ @require_modules('browseragents')
+ def test_with_browseragents(self):
+ """Test fake user agent generation with browseragents module."""
+ self._test_fake_user_agent_config()
+ self._test_fake_user_agent_randomness()
+
+ @require_modules('fake_useragent')
+ def test_with_fake_useragent(self):
+ """Test fake user agent generation with fake_useragent module."""
+ self._test_fake_user_agent_config()
+ self._test_fake_user_agent_randomness()
+
+
class CharsetTestCase(TestCase):
"""Test that HttpRequest correct handles the charsets given."""
--
To view, visit https://gerrit.wikimedia.org/r/324633
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Id9b6ff2d62917f84cfabb4af115cc7a887e42762
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Dargasia <thx(a)riseup.net>
Gerrit-Reviewer: Dargasia <thx(a)riseup.net>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: jenkins-bot <>