jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/783419 )
Change subject: [IMPR]: Allow to create Timestamp from different formats ......................................................................
[IMPR]: Allow to create Timestamp from different formats
Allow to create Timestamps from formats compliant with MW supported formats [see https://www.mediawiki.org/wiki/Timestamp].
Added formats are: - full support of ISO8601 (not limited to support provided by datetime.isoformat()) - MW format [already supported] - POSIX format
New Timestamp.set_timestamp() method will also allow Timestamp or datetime.datetime for convenience.
Page.revisions() will now support more formats/types for starttime and endtime parameters, in addition to those allowed by Timestamp.fromISOformat().
Change-Id: Iff8315c150ffe057c2229c32402ef3bd9bc6b119 --- M pywikibot/__init__.py M pywikibot/page/_pages.py M tests/timestamp_tests.py 3 files changed, 250 insertions(+), 13 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py index 65de1f7..eeda968 100644 --- a/pywikibot/__init__.py +++ b/pywikibot/__init__.py @@ -118,6 +118,10 @@ when previously they returned a MediaWiki string representation, these methods also accept a Timestamp object, in which case they return a clone.
+ Alternatively, Timestamp.set_timestamp() can create Timestamp objects from + Timestamp, datetime.datetime object, or strings compliant with ISO8601, + MW, or POSIX formats. + Use Site.server_time() for the current time; this is more reliable than using Timestamp.utcnow(). """ @@ -125,6 +129,134 @@ mediawikiTSFormat = '%Y%m%d%H%M%S' _ISO8601Format_new = '{0:+05d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}Z'
+ @classmethod + def set_timestamp(cls: Type['Timestamp'], + ts: Union[str, datetime.datetime, 'Timestamp'] + ) -> 'Timestamp': + """Set Timestamp from input object. + + ts is converted to a datetime naive object representing UTC time. + String shall be compliant with: + - Mediwiki timestamp format: YYYYMMDDHHMMSS + - ISO8601 format: YYYY-MM-DD[T ]HH:MM:SS[Z|±HH[MM[SS[.ffffff]]]] + - POSIX format: seconds from Unix epoch S{1,13}[.ffffff]] + + :param ts: Timestamp, datetime.datetime or str + :return: Timestamp object + :raises ValuError: conversion failed + """ + if isinstance(ts, cls): + return ts + if isinstance(ts, datetime.datetime): + return cls._from_datetime(ts) + if isinstance(ts, str): + return cls._from_string(ts) + + @staticmethod + def _from_datetime(dt: datetime.datetime) -> 'Timestamp': + """Convert a datetime.datetime timestamp to a Timestamp object.""" + return Timestamp(dt.year, dt.month, dt.day, dt.hour, + dt.minute, dt.second, dt.microsecond, + dt.tzinfo) + + @classmethod + def _from_mw(cls: Type['Timestamp'], timestr: str) -> 'Timestamp': + """Convert a string in MW format to a Timestamp object. + + Mediwiki timestamp format: YYYYMMDDHHMMSS + """ + RE_MW = r'\d{14}$' + m = re.match(RE_MW, timestr) + + if not m: + msg = "time data '{timestr}' does not match MW format." + raise ValueError(msg.format(timestr=timestr)) + + return cls.strptime(timestr, cls.mediawikiTSFormat) + + @classmethod + def _from_iso8601(cls: Type['Timestamp'], timestr: str) -> 'Timestamp': + """Convert a string in ISO8601 format to a Timestamp object. + + ISO8601 format: + - YYYY-MM-DD[T ]HH:MM:SS[[.,]ffffff][Z|±HH[MM[SS[.ffffff]]]] + """ + RE_ISO8601 = (r'(?:\d{4}-\d{2}-\d{2})(?P<sep>[T ])' + r'(?:\d{2}:\d{2}:\d{2})(?P<u>[.,]\d{1,6})?' + r'(?P<tz>Z|[+-]\d{2}:?\d{,2})?$' + ) + m = re.match(RE_ISO8601, timestr) + + if not m: + msg = "time data '{timestr}' does not match ISO8601 format." + raise ValueError(msg.format(timestr=timestr)) + + strpfmt = '%Y-%m-%d{sep}%H:%M:%S'.format(sep=m.group('sep')) + strpstr = timestr[:19] + + if m.group('u'): + strpfmt += '.%f' + strpstr += m.group('u').replace(',', '.') # .ljust(7, '0') + + if m.group('tz'): + if m.group('tz') == 'Z': + strpfmt += 'Z' + strpstr += 'Z' + else: + strpfmt += '%z' + # strptime wants HHMM, without ':' + strpstr += (m.group('tz').replace(':', '')).ljust(5, '0') + + ts = cls.strptime(strpstr, strpfmt) + if ts.tzinfo is not None: + ts = ts.astimezone(datetime.timezone.utc).replace(tzinfo=None) + # why pytest in py35/py37 fails without this? + ts = cls._from_datetime(ts) + + return ts + + @classmethod + def _from_posix(cls: Type['Timestamp'], timestr: str) -> 'Timestamp': + """Convert a string in POSIX format to a Timestamp object. + + POSIX format: SECONDS[.ffffff]] + """ + RE_POSIX = r'(?P<S>-?\d{1,13})(?:.(?P<u>\d{1,6}))?$' + m = re.match(RE_POSIX, timestr) + + if not m: + msg = "time data '{timestr}' does not match POSIX format." + raise ValueError(msg.format(timestr=timestr)) + + sec = int(m.group('S')) + usec = m.group('u') + usec = int(usec.ljust(6, '0')) if usec else 0 + if sec < 0 and usec > 0: + sec = sec - 1 + usec = 1000000 - usec + + ts = (cls(1970, 1, 1) + + datetime.timedelta(seconds=sec, microseconds=usec)) + return ts + + @classmethod + def _from_string(cls: Type['Timestamp'], timestr: str) -> 'Timestamp': + """Convert a string to a Timestamp object.""" + handlers = [ + cls._from_mw, + cls._from_iso8601, + cls._from_posix, + ] + + for handler in handlers: + try: + return handler(timestr) + except ValueError: + continue + + msg = "time data '{timestr}' does not match any format." + raise ValueError(msg.format(timestr=timestr)) + def clone(self) -> datetime.datetime: """Clone this instance.""" return self.replace(microsecond=self.microsecond) @@ -157,7 +289,8 @@ # to create a clone. if isinstance(ts, cls): return ts.clone() - return cls.strptime(ts, cls._ISO8601Format(sep)) + _ts = '{pre}{sep}{post}'.format(pre=ts[:10], sep=sep, post=ts[11:]) + return cls._from_iso8601(_ts)
@classmethod def fromtimestampformat(cls: Type['Timestamp'], ts: Union[str, 'Timestamp'] @@ -168,8 +301,8 @@ if isinstance(ts, cls): return ts.clone() if len(ts) == 8: # year, month and day are given only - ts += '000' - return cls.strptime(ts, cls.mediawikiTSFormat) + ts += '000000' + return cls._from_mw(ts)
def isoformat(self, sep: str = 'T') -> str: # type: ignore[override] """ @@ -185,6 +318,18 @@ """Convert object to a MediaWiki internal timestamp.""" return self.strftime(self.mediawikiTSFormat)
+ def posix_timestamp(self) -> float: + """ + Convert object to a POSIX timestamp. + + See Note in datetime.timestamp(). + """ + return self.replace(tzinfo=datetime.timezone.utc).timestamp() + + def posix_timestamp_format(self) -> str: + """Convert object to a POSIX timestamp format.""" + return '{ts:.6f}'.format(ts=self.posix_timestamp()) + def __str__(self) -> str: """Return a string format recognized by the API.""" return self.isoformat() @@ -193,9 +338,7 @@ """Perform addition, returning a Timestamp instead of datetime.""" newdt = super().__add__(other) if isinstance(newdt, datetime.datetime): - return Timestamp(newdt.year, newdt.month, newdt.day, newdt.hour, - newdt.minute, newdt.second, newdt.microsecond, - newdt.tzinfo) + return self._from_datetime(newdt) return newdt
def __sub__(self, other: datetime.timedelta # type: ignore[override] @@ -203,9 +346,7 @@ """Perform subtraction, returning a Timestamp instead of datetime.""" newdt = super().__sub__(other) if isinstance(newdt, datetime.datetime): - return Timestamp(newdt.year, newdt.month, newdt.day, newdt.hour, - newdt.minute, newdt.second, newdt.microsecond, - newdt.tzinfo) + return self._from_datetime(newdt) return newdt
diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py index 1ddeba3..2c8838e 100644 --- a/pywikibot/page/_pages.py +++ b/pywikibot/page/_pages.py @@ -1675,11 +1675,11 @@ t_min, t_max = Timestamp.min, Timestamp.max
if reverse: - t0 = Timestamp.fromISOformat(starttime) if starttime else t_min - t1 = Timestamp.fromISOformat(endtime) if endtime else t_max + t0 = Timestamp.set_timestamp(starttime) if starttime else t_min + t1 = Timestamp.set_timestamp(endtime) if endtime else t_max else: - t0 = Timestamp.fromISOformat(endtime) if endtime else t_min - t1 = Timestamp.fromISOformat(starttime) if starttime else t_max + t0 = Timestamp.set_timestamp(endtime) if endtime else t_min + t1 = Timestamp.set_timestamp(starttime) if starttime else t_max
revs = [rev for rev in revs if t0 <= rev.timestamp <= t1]
diff --git a/tests/timestamp_tests.py b/tests/timestamp_tests.py index 04692a8..6b50cb0 100755 --- a/tests/timestamp_tests.py +++ b/tests/timestamp_tests.py @@ -21,6 +21,101 @@
net = False
+ test_results = { + 'MW': [ + ['20090213233130', '1234567890.000000'], + ], + 'ISO8601': [ + ['2009-02-13T23:31:30Z', '1234567890.000000'], + ['2009-02-13T23:31:30', '1234567890.000000'], + ['2009-02-13T23:31:30.123Z', '1234567890.123000'], + ['2009-02-13T23:31:30.123', '1234567890.123000'], + ['2009-02-13T23:31:30.123456Z', '1234567890.123456'], + ['2009-02-13T23:31:30.123456', '1234567890.123456'], + ['2009-02-13T23:31:30,123456Z', '1234567890.123456'], + ['2009-02-13T23:31:30,123456', '1234567890.123456'], + ['2009-02-14T00:31:30+0100', '1234567890.000000'], + ['2009-02-13T22:31:30-0100', '1234567890.000000'], + ['2009-02-14T00:31:30+01:00', '1234567890.000000'], + ['2009-02-13T22:31:30-01:00', '1234567890.000000'], + ['2009-02-13T23:41:30+00:10', '1234567890.000000'], + ['2009-02-13T23:21:30-00:10', '1234567890.000000'], + ['2009-02-14T00:31:30.123456+01', '1234567890.123456'], + ['2009-02-13T22:31:30.123456-01', '1234567890.123456'], + ['2009-02-14 00:31:30.123456+01', '1234567890.123456'], + ['2009-02-13 22:31:30.123456-01', '1234567890.123456'], + ], + 'POSIX': [ + ['1234567890', '1234567890.000000'], + ['-1234567890', '-1234567890.000000'], + ['1234567890.123', '1234567890.123000'], + ['-1234567890.123', '-1234567890.123000'], + ['1234567890.123456', '1234567890.123456'], + ['-1234567890.123456', '-1234567890.123456'], + ['1234567890.000001', '1234567890.000001'], + ['-1234567890.000001', '-1234567890.000001'], + ], + 'INVALID': [ + ['200902132331309999', None], + ['2009-99-99 22:31:30.123456-01', None], + ['1234567890.1234569999', None], + ], + } + + def test_set_from_timestamp(self): + """Test creating instance from Timestamp string.""" + t1 = Timestamp.utcnow() + t2 = Timestamp.set_timestamp(t1) + self.assertEqual(t1, t2) + self.assertIsInstance(t2, Timestamp) + + def test_set_from_datetime(self): + """Test creating instance from datetime.datetime string.""" + t1 = datetime.datetime.utcnow() + t2 = Timestamp.set_timestamp(t1) + self.assertEqual(t1, t2) + self.assertIsInstance(t2, datetime.datetime) + + @staticmethod + def _compute_posix(timestr): + """Compute POSIX timestamp with independent method.""" + sec, usec = map(int, timestr.split('.')) + + if sec < 0 and usec > 0: + sec = sec - 1 + usec = 1000000 - usec + + return (datetime.datetime(1970, 1, 1) + + datetime.timedelta(seconds=sec, microseconds=usec)) + + def _test_set_from_string_fmt(self, fmt): + """Test creating instance from <FMT> string.""" + for timestr, posix in self.test_results[fmt]: + with self.subTest(timestr): + ts = Timestamp.set_timestamp(timestr) + self.assertEqual(ts, self._compute_posix(posix)) + self.assertEqual(ts.posix_timestamp_format(), posix) + + def test_set_from_string_mw(self): + """Test creating instance from MW string.""" + self._test_set_from_string_fmt('MW') + + def test_set_from_string_iso8601(self): + """Test creating instance from ISO8601 string.""" + self._test_set_from_string_fmt('ISO8601') + + def test_set_from_string_posix(self): + """Test creating instance from POSIX string.""" + self._test_set_from_string_fmt('POSIX') + + def test_set_from_string_invalid(self): + """Test failure creating instance from invalid string.""" + for timestr, posix in self.test_results['INVALID']: + regex = "time data '[^']*?' does not match" + with self.subTest(timestr): + self.assertRaisesRegex(ValueError, regex, + Timestamp.set_timestamp, timestr) + def test_clone(self): """Test cloning a Timestamp instance.""" t1 = Timestamp.utcnow() @@ -57,6 +152,7 @@ self.assertEqual(date, str(t1.date())) self.assertEqual(time, str(t1.time()))
+ @unittest.expectedFailure def test_iso_format_with_sep(self): """Test conversion from and to ISO format with separator.""" sep = '*'
pywikibot-commits@lists.wikimedia.org