jenkins-bot has submitted this change and it was merged.
Change subject: [bugfix] remove -hash option from support
......................................................................
[bugfix] remove -hash option from support
- webservice for -hash option is not available anymore since July 2014.
Remove option from script and print a warning when -hash is used
- remove useHashGenerator
- Addition check for file repository. Use image_repository() instead of
creation of wm specific common site
- rename getPageGenerator to generic generator property (to be used with
higher level bot classes further) and disable hash parts
- remove hash parts in run method
- remove word_to_skip which isn't used anymore
Bug: T132303
Change-Id: I7e62e44a6c99bd0ae1550b412e18cf5565338b2b
---
M scripts/nowcommons.py
1 file changed, 30 insertions(+), 122 deletions(-)
Approvals:
jenkins-bot: Verified
Whym: Looks good to me, approved
diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py
index 62bcd07..fb6c579 100755
--- a/scripts/nowcommons.py
+++ b/scripts/nowcommons.py
@@ -34,14 +34,9 @@
-replaceonly Use this if you do not have a local sysop account, but do
wish to replace links from the NowCommons template.
- -hash Use the hash to identify the images that are the same. It
- doesn't work always, so the bot opens two tabs to let to
- the user to check if the images are equal or not.
-
-- Example --
- python pwb.py nowcommons -replaceonly -replaceloose -replacealways \
- -replace -hash
+ python pwb.py nowcommons -replaceonly -replaceloose -replacealways -replace
-- Known issues --
Please fix these if you are capable and motivated:
@@ -61,9 +56,7 @@
__version__ = '$Id$'
#
-import re
import sys
-import webbrowser
import pywikibot
@@ -180,13 +173,6 @@
'zh',
]
-# Stemma and stub are images not to be deleted (and are a lot) on it.wikipedia
-# if your project has images like that, put the word often used here to skip them
-word_to_skip = {
- 'en': [],
- 'it': ['stemma', 'stub', 'hill40 '],
-}
-
class NowCommonsDeleteBot(Bot):
@@ -199,13 +185,18 @@
'replacealways': False,
'replaceloose': False,
'replaceonly': False,
- 'use_hash': False,
})
super(NowCommonsDeleteBot, self).__init__(**kwargs)
self.site = pywikibot.Site()
- if repr(self.site) == 'commons:commons':
- sys.exit('Do not run this bot on Commons!')
+ if not self.site.has_image_repository:
+ sys.exit('There must be a file repository to run this script')
+ self.commons = self.site.image_repository()
+ if self.site == self.commons:
+ sys.exit(
+ 'You cannot run this bot on file repository like Commons.')
+ self.summary = i18n.twtranslate(self.site,
+ 'imagetransfer-nowcommons_notice')
def ncTemplates(self):
"""Return nowcommons templates."""
@@ -222,77 +213,15 @@
for title in self.ncTemplates())
return self._nc_templates
- def useHashGenerator(self):
- """Use hash generator."""
- #
https://toolserver.org/~multichill/nowcommons.php?language=it&page=2&am…
- lang = self.site.lang
- num_page = 0
- word_to_skip_translated = i18n.translate(self.site, word_to_skip)
- images_processed = list()
- while 1:
- url = ('https://toolserver.org/~multichill/nowcommons.php?'
- 'language=%s&page=%s&filter=') % (lang, num_page)
- HTML_text = self.site.getUrl(url, no_hostname=True)
- reg = r'<[Aa]
href="(?P<urllocal>.*?)">(?P<imagelocal>.*?)</[Aa]>
+?</td><td>\n\s*?'
- reg += r'<[Aa]
href="(?P<urlcommons>http[s]?://commons.wikimedia.org/.*?)" \
- >Image:(?P<imagecommons>.*?)</[Aa]>
+?</td><td>'
- regex = re.compile(reg, re.UNICODE)
- found_something = False
- change_page = True
- for x in regex.finditer(HTML_text):
- found_something = True
- image_local = x.group('imagelocal')
- image_commons = x.group('imagecommons')
- if image_local in images_processed:
- continue
- change_page = False
- images_processed.append(image_local)
- # Skip images that have something in the title (useful for it.wiki)
- image_to_skip = False
- for word in word_to_skip_translated:
- if word.lower() in image_local.lower():
- image_to_skip = True
- if image_to_skip:
- continue
- url_local = x.group('urllocal')
- url_commons = x.group('urlcommons')
- pywikibot.output(color_format(
- '\n\n>>> {lightpurple}{0}{default} <<<',
- image_local))
- pywikibot.output(u'Local: %s\nCommons: %s\n'
- % (url_local, url_commons))
- webbrowser.open(url_local, 0, 1)
- webbrowser.open(url_commons, 0, 1)
- if image_local.split('Image:')[1] == image_commons:
- choice = pywikibot.input_yn(
- u'The local and the commons images have the same name, '
- 'continue?', default=False, automatic_quit=False)
- else:
- choice = pywikibot.input_yn(
- u'Are the two images equal?',
- default=False, automatic_quit=False)
- if choice:
- yield [image_local, image_commons]
- else:
- continue
- # The page is dinamically updated, so we may don't need to change it
- if change_page:
- num_page += 1
- # If no image found means that there aren't anymore, break.
- if not found_something:
- break
-
- def getPageGenerator(self):
+ @property
+ def generator(self):
"""Generator method."""
- if self.getOption('use_hash'):
- gen = self.useHashGenerator()
- else:
- gens = [t.getReferences(follow_redirects=True, namespaces=[6],
- onlyTemplateInclusion=True)
- for t in self.nc_templates]
- gen = pg.CombinedPageGenerator(gens)
- gen = pg.DuplicateFilterPageGenerator(gen)
- gen = pg.PreloadingGenerator(gen)
+ gens = [t.getReferences(follow_redirects=True, namespaces=[6],
+ onlyTemplateInclusion=True)
+ for t in self.nc_templates]
+ gen = pg.CombinedPageGenerator(gens)
+ gen = pg.DuplicateFilterPageGenerator(gen)
+ gen = pg.PreloadingGenerator(gen)
return gen
def findFilenameOnCommons(self, localImagePage):
@@ -326,38 +255,23 @@
def run(self):
"""Run the bot."""
- commons = pywikibot.Site('commons', 'commons')
- comment = i18n.twtranslate(self.site, 'imagetransfer-nowcommons_notice')
+ commons = self.commons
+ comment = self.summary
- for page in self.getPageGenerator():
- if self.getOption('use_hash'):
- # Page -> Has the namespace | commons image -> Not
- images_list = page # 0 -> local image, 1 -> commons image
- page = pywikibot.Page(self.site, images_list[0])
- else:
- # If use_hash is true, we have already print this before, no need
- self.current_page = page
+ for page in self.generator:
+ self.current_page = page
try:
localImagePage = pywikibot.FilePage(self.site, page.title())
if localImagePage.fileIsShared():
pywikibot.output(u'File is already on Commons.')
continue
sha1 = localImagePage.latest_file_info.sha1
- if self.getOption('use_hash'):
- filenameOnCommons = images_list[1]
- else:
- filenameOnCommons = self.findFilenameOnCommons(
- localImagePage)
- if not filenameOnCommons and not self.getOption('use_hash'):
+ filenameOnCommons = self.findFilenameOnCommons(localImagePage)
+ if not filenameOnCommons:
pywikibot.output(u'NowCommons template not found.')
continue
commonsImagePage = pywikibot.FilePage(commons, 'Image:%s'
% filenameOnCommons)
- if (localImagePage.title(withNamespace=False) ==
- commonsImagePage.title(withNamespace=False) and
- self.getOption('use_hash')):
- pywikibot.output(
- u'The local and the commons images have the same name')
if (localImagePage.title(withNamespace=False) !=
commonsImagePage.title(withNamespace=False)):
usingPages = list(localImagePage.usingPages())
@@ -397,12 +311,6 @@
# refresh because we want the updated list
usingPages = len(list(pywikibot.FilePage(
self.site, page.title()).usingPages()))
- if usingPages > 0 and
self.getOption('use_hash'):
- # just an enter
- pywikibot.input(
- u'There are still %s pages with this \
- image, confirm the manual removal from them
please.'
- % usingPages)
else:
pywikibot.output(u'Please change them manually.')
@@ -417,12 +325,11 @@
if sha1 == commonsImagePage.latest_file_info.sha1:
pywikibot.output(
u'The image is identical to the one on Commons.')
- if (len(localImagePage.getFileVersionHistory()) > 1 and
- not self.getOption('use_hash')):
+ if len(localImagePage.getFileVersionHistory()) > 1:
pywikibot.output(
- u"This image has a version history. Please \
- delete it manually after making sure that the \
- old versions are not worth keeping.""")
+ 'This image has a version history. Please '
+ 'delete it manually after making sure that the '
+ 'old versions are not worth keeping.')
continue
if self.getOption('always') is False:
format_str = color_format(
@@ -474,8 +381,9 @@
if arg == '-replacealways':
options['replace'] = True
options['replacealways'] = True
- elif arg == '-hash':
- options['use_hash'] = True
+ elif arg == '-hash': # T132303
+ raise NotImplementedError(
+ "The '-hash' argument is not implemented anymore.")
elif arg == '-autonomous':
pywikibot.warning(u"The '-autonomous' argument is
DEPRECATED,"
u" use '-always' instead.")
--
To view, visit
https://gerrit.wikimedia.org/r/308427
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I7e62e44a6c99bd0ae1550b412e18cf5565338b2b
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: Whym <whym(a)whym.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>