jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] reduce code complexity of imageharvest.py

Change-Id: I3200e4655310e5cd2a9caaa70d1f3de2c7e7e554
---
M scripts/imageharvest.py
1 file changed, 41 insertions(+), 43 deletions(-)

diff --git a/scripts/imageharvest.py b/scripts/imageharvest.py
index a1cfdce..daa9a73 100644
--- a/scripts/imageharvest.py
+++ b/scripts/imageharvest.py
@@ -3,7 +3,7 @@
Bot for getting multiple images from an external site.

It takes a URL as an argument and finds all images (and other files specified
-by the extensions in 'fileformats') that URL is referring to, asking whether to
+by the extensions in 'file_formats' that URL is referring to, asking whether to
upload them. If further arguments are given, they are considered to be the text
that is common to the descriptions. BeautifulSoup is needed only in this case.

@@ -37,20 +37,18 @@
except ImportError as e:
BeautifulSoup = e

-fileformats = ('jpg', 'jpeg', 'png', 'gif', 'svg', 'ogg')
+file_formats = ('.jpg', '.jpeg', '.png', '.gif', '.svg', '.ogg')


-def get_imagelinks(url):
+def get_imagelinks(url, shown):
"""Given a URL, get all images linked to by the page at that URL."""
- # Check if BeautifulSoup is imported.
- if isinstance(BeautifulSoup, ImportError):
- raise BeautifulSoup
+ links = []

response = fetch(url)
if response.status_code != 200:
pywikibot.output('Skipping url: {}'
.format(url))
- return []
+ return links

soup = BeautifulSoup(response.text, 'html.parser')

@@ -61,31 +59,35 @@
else:
tagname = ['a', 'img']

- links = []
for tag in soup.findAll(tagname):
link = tag.get('src', tag.get('href', None))
- if link:
- ext = os.path.splitext(link)[1].lower().strip('.')
- if ext in fileformats:
- links.append(urljoin(url, link))
+ if not link:
+ continue
+ _, ext = os.path.splitext(link)
+ if ext.lower() in file_formats:
+ links.append(urljoin(url, link))
return links


-def run_bot(give_url, image_url, desc):
+def get_categories(site):
+ """Get list of categories, if any."""
+ categories = []
+ while True:
+ cat = pywikibot.input('Specify a category (or press enter to '
+ 'end adding categories)')
+ if not cat.strip():
+ break
+ fmt = '[[{cat}]]' if ':' in cat else '[[{ns}:{cat}]]'
+ categories.append(fmt.format(ns=site.namespace(14), cat=cat))
+
+ return categories
+
+
+def run_bot(give_url, image_url, desc, shown):
"""Run the bot."""
- url = give_url
- if not url:
- if image_url:
- url = pywikibot.input('What URL range should I check '
- '(use $ for the part that is changeable)')
- else:
- url = pywikibot.input('From what URL should I get the images?')
-
- basicdesc = desc or pywikibot.input(
- 'What text should be added at the end of '
- 'the description of each image from this url?')
-
- if image_url:
+ if not give_url and image_url:
+ url = pywikibot.input('What URL range should I check '
+ '(use $ for the part that is changeable)')
minimum = int(pywikibot.input(
'What is the first number to check (default: 1)') or 1)
maximum = int(pywikibot.input(
@@ -93,8 +95,15 @@
ilinks = (url.replace('$', str(i))
for i in range(minimum, maximum + 1))
else:
- ilinks = get_imagelinks(url)
+ url = (give_url
+ or pywikibot.input('From what URL should I get the images?'))
+ ilinks = get_imagelinks(url, shown)

+ basicdesc = desc or pywikibot.input(
+ 'What text should be added at the end of '
+ 'the description of each image from this url?')
+
+ mysite = pywikibot.Site()
for image in ilinks:
try:
include = pywikibot.input_yn('Include image {}?'.format(image),
@@ -104,32 +113,21 @@
if not include:
continue

+ categories = get_categories(mysite)
desc = pywikibot.input('Give the description of this image:')
- categories = []
- mysite = pywikibot.Site()
- while True:
- cat = pywikibot.input('Specify a category (or press enter to '
- 'end adding categories)')
- if not cat.strip():
- break
- if ':' in cat:
- categories.append('[[{}]]'.format(cat))
- else:
- categories.append('[[{}:{}]]'
- .format(mysite.namespace(14), cat))
+
desc += '\n\n' + basicdesc + '\n\n' + '\n'.join(categories)
UploadRobot(image, description=desc).run()


def main(*args):
"""Process command line arguments and invoke bot."""
- global shown
url = ''
image_url = False
shown = False
desc = []

- for arg in pywikibot.handle_args():
+ for arg in pywikibot.handle_args(args):
if arg == '-pattern':
image_url = True
elif arg == '-shown':
@@ -143,9 +141,9 @@
desc = ' '.join(desc)

if isinstance(BeautifulSoup, ImportError):
- pywikibot.bot.suggest_help(missing_dependencies=('beautifulsoup4',))
+ pywikibot.bot.suggest_help(missing_dependencies=['beautifulsoup4'])
else:
- run_bot(url, image_url, desc)
+ run_bot(url, image_url, desc, shown)


if __name__ == '__main__':

To view, visit change 638659. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I3200e4655310e5cd2a9caaa70d1f3de2c7e7e554
Gerrit-Change-Number: 638659
Gerrit-PatchSet: 1
Gerrit-Owner: Mpaa <mpaa.wiki@gmail.com>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged