jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/773930 )
Change subject: [IMPR] Use itertools.zip_longest to find the most important image ......................................................................
[IMPR] Use itertools.zip_longest to find the most important image
- important_image uses usingPages() to find the most important image but retrieving all pages need a long time. Now we collect all usingPages from all images in list_given parallel and stop if the generator produces only one image and the other tuple entries are False. - make important_image a static method - test it
Change-Id: Idd8354469d444b72357a5a55b111cc9810fd6715 --- M scripts/checkimages.py M tests/checkimages_tests.py 2 files changed, 37 insertions(+), 14 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/checkimages.py b/scripts/checkimages.py index 4ec829b..af18f95 100755 --- a/scripts/checkimages.py +++ b/scripts/checkimages.py @@ -84,6 +84,8 @@ import collections import re import time + +from itertools import zip_longest from typing import Generator
import pywikibot @@ -803,26 +805,33 @@ for element in self.load(page_hidden_text): self.hiddentemplates.add(pywikibot.Page(self.site, element))
- def important_image(self, list_given) -> pywikibot.FilePage: + @staticmethod + def important_image( + list_given: List[Tuple[float, pywikibot.FilePage]] + ) -> pywikibot.FilePage: """ Get tuples of image and time, return the most used or oldest image.
+ .. versionchanged: 7.2 + itertools.zip_longest is used to stop `usingPages` as soon as + possible. + :param list_given: a list of tuples which hold seconds and FilePage - :type list_given: list :return: the most used or oldest image """ # find the most used image - inx_found = None # index of found image - max_usage = 0 # hold max amount of using pages - for num, element in enumerate(list_given): - image = element[1] - image_used = len(list(image.usingPages())) - if image_used > max_usage: - max_usage = image_used - inx_found = num + images = [image for _, image in list_given] + iterables = [image.usingPages() for image in images] + curr_images = [] + for values in zip_longest(*iterables, fillvalue=False): + curr_images = values + # bool(FilePage) is True because it is an object subclass + if sum(bool(image) for image in values) <= 1: + break
- if inx_found is not None: - return list_given[inx_found][1] + for inx, image in enumerate(curr_images): + if image is not False: + return images[inx]
# find the oldest image _, image = max(list_given, key=lambda element: element[0]) diff --git a/tests/checkimages_tests.py b/tests/checkimages_tests.py index b0da988..2666052 100755 --- a/tests/checkimages_tests.py +++ b/tests/checkimages_tests.py @@ -1,14 +1,16 @@ #!/usr/bin/python3 """Unit tests for checkimages script.""" # -# (C) Pywikibot team, 2015-2021 +# (C) Pywikibot team, 2015-2022 # # Distributed under the terms of the MIT license. # import unittest
+from pywikibot import FilePage + from scripts import checkimages -from tests.aspects import TestCase +from tests.aspects import DefaultSiteTestCase, TestCase
class TestSettings(TestCase): @@ -29,5 +31,17 @@ self.assertEqual(item1[1], 'a deprecated template')
+class TestMethods(DefaultSiteTestCase): + + """Test methods of CheckImagesBot.""" + + def test_important_image(self): + """Test important_image method.""" + filenames = ('Example.jpg', 'Demo.jpg') + images = [(0.0, FilePage(self.site, name)) for name in filenames] + self.assertEqual(checkimages.CheckImagesBot.important_image(images), + FilePage(self.site, 'Example.jpg')) + + if __name__ == '__main__': # pragma: no cover unittest.main()