jenkins-bot has submitted this change and it was merged.
Change subject: page.py: introduced FileInfo ......................................................................
page.py: introduced FileInfo
Aded automatic upcasting of Page to FilePage or CategoryPage in api.py. This makes CategoryPageGenerator and ImagePageGenerator obsolete and then deprecated.
Added caching of 'imageinfo' property in api.update_page() for: - FilePage._file_revisions: dictionary with imageinfo of all versions of file - FilePage.latest_file_info(): imageinfo of latest version of file - FilePage.oldest_file_info(): imageinfo of oldest version of file
Added FileInfo, a structure to hold imageinfo for the latest revision of FilePage.
In page.py, centralise in one place the check for: if not hasattr(self, '_imageinfo') and replace _imageinfo with latest_file_info.
Introduced compat-compatible equivalent getLatestUploader() and deprecated its use, superseeded by accesing latest_file_info.user attribute.
Deprecated getFileSHA1Sum(), superseeded by accesing latest_file_info.sha1 attribute.
Introduced get_file_history() as a dictionary of FileInfo() objects.
Bug: 73023 Change-Id: I334539d56278c6e2e47fd058212df81f1dae20f8 --- M pywikibot/data/api.py M pywikibot/page.py M pywikibot/site.py M scripts/checkimages.py M scripts/maintenance/compat2core.py M scripts/nowcommons.py 6 files changed, 133 insertions(+), 20 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index b93582e..a84b08c 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -1996,10 +1996,17 @@
""" p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns']) + ns = pagedata['ns'] + # Upcast to proper Page subclass. + if ns == 6: + p = pywikibot.FilePage(p) + elif ns == 14: + p = pywikibot.Category(p) update_page(p, pagedata, self.props) return p
+@deprecated("PageGenerator") class CategoryPageGenerator(PageGenerator):
"""Like PageGenerator, but yields Category objects instead of Pages.""" @@ -2010,6 +2017,7 @@ return pywikibot.Category(p)
+@deprecated("PageGenerator") class ImagePageGenerator(PageGenerator):
"""Like PageGenerator, but yields FilePage objects instead of Pages.""" @@ -2210,6 +2218,12 @@ if page._revid in page._revisions: page._text = page._revisions[page._revid].text
+ if 'imageinfo' in pagedict: + assert(isinstance(page, pywikibot.FilePage)) + for file_rev in pagedict['imageinfo']: + file_revision = pywikibot.page.FileInfo(file_rev) + page._file_revisions[file_revision.timestamp] = file_revision + if "categoryinfo" in pagedict: page._catinfo = pagedict["categoryinfo"]
diff --git a/pywikibot/page.py b/pywikibot/page.py index 6fa838d..1c318df 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -1987,9 +1987,52 @@ @deprecate_arg("insite", None) def __init__(self, source, title=u""): """Constructor.""" + self._file_revisions = {} # dictionary to cache File history. super(FilePage, self).__init__(source, title, 6) if self.namespace() != 6: raise ValueError(u"'%s' is not in the file namespace!" % title) + + @property + def latest_file_info(self): + """Retrieve and store information of latest Image rev. of FilePage. + + At the same time, the whole history of Image is fetched and cached in + self._file_revisions + + @return: instance of FileInfo() + + """ + if not len(self._file_revisions): + self.site.loadimageinfo(self, history=True) + latest_ts = max(self._file_revisions) + return self._file_revisions[latest_ts] + + @property + def oldest_file_info(self): + """Retrieve and store information of oldest Image rev. of FilePage. + + At the same time, the whole history of Image is fetched and cached in + self._file_revisions + + @return: instance of FileInfo() + + """ + if not len(self._file_revisions): + self.site.loadimageinfo(self, history=True) + oldest_ts = min(self._file_revisions) + return self._file_revisions[oldest_ts] + + def get_file_history(self): + """Return the file's version history. + + @return: dictionary with: + key: timestamp of the entry + value: instance of FileInfo() + + """ + if not hasattr(self, '_file_revisions'): + self.site.loadimageinfo(self, history=True) + return self._file_revisions
def getImagePageHtml(self): """ @@ -2008,9 +2051,7 @@ def fileUrl(self): """Return the URL for the file described on this page.""" # TODO add scaling option? - if not hasattr(self, '_imageinfo'): - self._imageinfo = self.site.loadimageinfo(self) - return self._imageinfo['url'] + return self.latest_file_info.url
@deprecated("fileIsShared") def fileIsOnCommons(self): @@ -2036,11 +2077,11 @@ return self.fileUrl().startswith( 'https://upload.wikimedia.org/wikipedia/commons/')
- @deprecated("FilePage.getFileSHA1Sum()") + @deprecated("FilePage.latest_file_info.sha1") def getFileMd5Sum(self): """Return image file's MD5 checksum.""" -# FIXME: MD5 might be performed on incomplete file due to server disconnection -# (see bug #1795683). + # FIXME: MD5 might be performed on incomplete file due to server disconnection + # (see bug #1795683). f = urlopen(self.fileUrl()) # TODO: check whether this needs a User-Agent header added h = hashlib.md5() @@ -2049,12 +2090,21 @@ f.close() return md5Checksum
+ @deprecated("FilePage.latest_file_info.sha1") def getFileSHA1Sum(self): """Return the file's SHA1 checksum.""" - if not hasattr(self, '_imageinfo'): - self._imageinfo = self.site.loadimageinfo(self) - return self._imageinfo['sha1'] + return self.latest_file_info.sha1
+ @deprecated("FilePage.latest_file_info.user") + def getLatestUploader(self): + """Return a list with latest uploader of the FilePage and timestamp. + + For compatibility with compat only. + + """ + return [self.latest_file_info.user, self.latest_file_info.timestamp] + + @deprecated('FilePage.get_file_history()') def getFileVersionHistory(self): """Return the file's version history.
@@ -4158,6 +4208,51 @@ self.text, self.rollbacktoken)
+class FileInfo(pywikibot.UnicodeMixin): + + """A structure holding imageinfo of latest rev. of FilePage. + + All keys of API imageinfo dictionary are mapped to FileInfo attributes. + Attributes can be retrieved both as self['key'] or self.key. + + Following attributes will be returned: + - timestamp, user, comment, url, size, sha1, mime, metadata + - archivename (not for latest revision) + + See Site.loadimageinfo() for details. + + Note: timestamp will be casted to pywikibot.Timestamp. + """ + + def __init__(self, file_revision): + """ + Create class with the dictionary returned by site.loadimageinfo(). + + @param page: FilePage containing the image. + @type page: FilePage object + """ + self.__dict__.update(file_revision) + self.timestamp = pywikibot.Timestamp.fromISOformat(self.timestamp) + + def __getitem__(self, key): + """Access attributes also with dict.like keys.""" + return getattr(self, key) + + def __eq__(self, other): + """Test if two File_info objects are equal.""" + return self.__dict__ == other.__dict__ + + def __unicode__(self): + """Return string representation.""" + _content = u', '.join( + u'{0}: {1}'.format(k, v) for k, v in self.__dict__.items()) + return u'{{{0}}}'.format(_content) + + def __repr__(self): + """Return a more complete string representation.""" + return self.__dict__.__repr__() + + class Link(ComparableMixin):
"""A MediaWiki link (local or interwiki). diff --git a/pywikibot/site.py b/pywikibot/site.py index f86e430..6a9d50c 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -2376,21 +2376,25 @@ "url", "size", "sha1", "mime", "metadata", "archivename"], **args) + # kept for backward compatibility + # TODO: when backward compatibility can be broken, adopt + # self._update_page() pattern and remove return for pageitem in query: if not self.sametitle(pageitem['title'], title): raise Error( u"loadimageinfo: Query on %s returned data on '%s'" % (page, pageitem['title'])) api.update_page(page, pageitem, query.props) + if "imageinfo" not in pageitem: if "missing" in pageitem: raise NoPage(page) - raise PageRelatedError( page, u"loadimageinfo: Query on %s returned no imageinfo") - return (pageitem['imageinfo'] - if history else pageitem['imageinfo'][0]) + + return (pageitem['imageinfo'] + if history else pageitem['imageinfo'][0])
@deprecated('Check the content model instead') def loadflowinfo(self, page): diff --git a/scripts/checkimages.py b/scripts/checkimages.py index 19f07e6..2cdde5e 100644 --- a/scripts/checkimages.py +++ b/scripts/checkimages.py @@ -721,7 +721,7 @@ if reportPageObject == self.image and self.uploader: nick = self.uploader else: - nick = reportPageObject.getLatestUploader()[0] + nick = reportPageObject.latest_file_info.user except pywikibot.NoPage: pywikibot.output( u"Seems that %s has only the description and not the file..." @@ -897,7 +897,7 @@ commons_site = pywikibot.Site('commons', 'commons') regexOnCommons = r"[[:File:%s]] is also on '''Commons''': [[commons:File:.*?]](?: (same name)|)$" \ % re.escape(self.imageName) - hash_found = self.image.getFileSHA1Sum() + hash_found = self.image.latest_file_info.sha1 if not hash_found: return # Image deleted, no hash found. Skip the image.
@@ -959,7 +959,7 @@ duplicateRegex = r'[[:File:%s]] has the following duplicates' \ % re.escape(self.convert_to_url(self.imageName)) imagePage = pywikibot.FilePage(self.site, self.imageName) - hash_found = imagePage.getFileSHA1Sum() + hash_found = imagePage.latest_file_info.sha1 duplicates = self.site.getFilesFromAnHash(hash_found)
if not duplicates: @@ -982,7 +982,7 @@
if DupePage.title(asUrl=True) != self.image.title(asUrl=True) or \ self.timestamp is None: - self.timestamp = DupePage.getLatestUploader()[1] + self.timestamp = DupePage.latest_file_info.timestamp data = time.strptime(self.timestamp, u"%Y-%m-%dT%H:%M:%SZ") data_seconds = time.mktime(data) time_image_list.append([data_seconds, duplicate]) @@ -1475,7 +1475,7 @@ loadOtherImages = True for image in generator: try: - timestamp = image.getLatestUploader()[1] + timestamp = image.latest_file_info.timestamp except pywikibot.NoPage: continue # not relative to localtime diff --git a/scripts/maintenance/compat2core.py b/scripts/maintenance/compat2core.py index 5311aab..a8b2c58 100644 --- a/scripts/maintenance/compat2core.py +++ b/scripts/maintenance/compat2core.py @@ -98,7 +98,7 @@ 'MediaWiki one'), ('.getFileMd5Sum(', 'FilePage.getFileMd5Sum() is deprecated should be replaced by ' - 'getFileSHA1Sum()'), + 'FilePage.latest_file_info.sha1'), (' wikipedia.', '"wikipedia" library has been changed to "pywikibot".'), ('from wikipedia import', diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py index a6270c5..a6df733 100644 --- a/scripts/nowcommons.py +++ b/scripts/nowcommons.py @@ -324,7 +324,7 @@ if localImagePage.fileIsShared(): pywikibot.output(u'File is already on Commons.') continue - sha1 = localImagePage.getFileSHA1Sum() + sha1 = localImagePage.latest_file_info.sha1 if self.getOption('use_hash'): filenameOnCommons = images_list[1] else: @@ -394,7 +394,7 @@ % localImagePage.title(withNamespace=False)) commonsText = commonsImagePage.get() if self.getOption('replaceonly') is False: - if sha1 == commonsImagePage.getFileSHA1Sum(): + if sha1 == commonsImagePage.latest_file_info.sha1: pywikibot.output( u'The image is identical to the one on Commons.') if len(localImagePage.getFileVersionHistory()) > 1 and not self.getOption('use_hash'):
pywikibot-commits@lists.wikimedia.org