jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] changes
......................................................................
[PEP8] changes
Change-Id: Icf144501fa5939ccb12ae7278929175551573248
---
M extract_wikilinks.py
M fixing_redirects.py
M followlive.py
M get.py
4 files changed, 23 insertions(+), 19 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/extract_wikilinks.py b/extract_wikilinks.py
index 6f5e7d1..601cea2 100644
--- a/extract_wikilinks.py
+++ b/extract_wikilinks.py
@@ -19,9 +19,10 @@
#
# Distributed under the terms of the MIT license.
#
-__version__='$Id$'
+__version__ = '$Id$'
#
-import sys,re
+import sys
+import re
import codecs
import wikipedia as pywikibot
# This bot does not contact the Wiki, so no need to get it on the list
@@ -38,7 +39,7 @@
elif arg.startswith("-bare"):
complete = False
elif fn:
- print "Ignoring argument %s"%arg
+ print "Ignoring argument %s" % arg
else:
fn = arg
@@ -47,14 +48,14 @@
sys.exit(1)
mysite = pywikibot.getSite()
-f=open(fn,'r')
-text=f.read()
+f = open(fn, 'r')
+text = f.read()
f.close()
for hit in R.findall(text):
if complete:
list.append(mysite.linkto(hit))
else:
- list.append("[[%s]]"%hit)
+ list.append("[[%s]]" % hit)
if sorted:
list.sort()
for page in list:
diff --git a/fixing_redirects.py b/fixing_redirects.py
index 49627dd..d35badf 100644
--- a/fixing_redirects.py
+++ b/fixing_redirects.py
@@ -20,9 +20,10 @@
#
# Distributed under the terms of the MIT license.
#
-__version__='$Id$'
+__version__ = '$Id$'
#
-import re, sys
+import re
+import sys
import wikipedia as pywikibot
import pagegenerators
from pywikibot import i18n
@@ -72,7 +73,7 @@
curpos = 0
# This loop will run until we have finished the current page
while True:
- m = linkR.search(text, pos = curpos)
+ m = linkR.search(text, pos=curpos)
if not m:
break
# Make sure that next time around we will not find this same hit.
@@ -102,7 +103,7 @@
if not link_text:
# or like this: [[page_title]]trailing_chars
link_text = page_title
- if m.group('section') == None:
+ if m.group('section') is None:
section = ''
else:
section = m.group('section')
@@ -117,16 +118,16 @@
replaceit = choice in "rR"
# remove preleading ":"
- if link_text[0]==':':
+ if link_text[0] == ':':
link_text = link_text[1:]
if link_text[0].isupper():
new_page_title = targetPage.title()
else:
new_page_title = targetPage.title()[0].lower() + \
- targetPage.title()[1:]
+ targetPage.title()[1:]
# remove preleading ":"
- if new_page_title[0]==':':
+ if new_page_title[0] == ':':
new_page_title = new_page_title[1:]
if replaceit and trailing_chars:
@@ -149,6 +150,7 @@
pageCache = []
+
def workon(page):
mysite = pywikibot.getSite()
try:
@@ -163,7 +165,7 @@
% page.title())
links = page.linkedPages()
if len(links):
- pywikibot.getall(mysite,links)
+ pywikibot.getall(mysite, links)
else:
pywikibot.output('Nothing left to do.')
return
diff --git a/followlive.py b/followlive.py
index a8987a2..ba7a822 100644
--- a/followlive.py
+++ b/followlive.py
@@ -13,10 +13,11 @@
#
# Distributed under the terms of the MIT license.
#
-__version__='$Id$'
+__version__ = '$Id$'
import sys
-import datetime, time
+import datetime
+import time
import traceback
import wikipedia as pywikibot
import editarticle
diff --git a/get.py b/get.py
index 82343c6..4b97cdf 100644
--- a/get.py
+++ b/get.py
@@ -13,9 +13,10 @@
#
# Distributed under the terms of the MIT license.
-__version__='$Id$'
+__version__ = '$Id$'
import wikipedia as pywikibot
+
def main():
singlePageTitleParts = []
@@ -26,11 +27,10 @@
page = pywikibot.Page(pywikibot.getSite(), pageTitle)
# TODO: catch exceptions
- pywikibot.output(page.get(), toStdout = True)
+ pywikibot.output(page.get(), toStdout=True)
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()
-
--
To view, visit https://gerrit.wikimedia.org/r/103333
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Icf144501fa5939ccb12ae7278929175551573248
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: Port disambredir.py to core
......................................................................
Port disambredir.py to core
Change-Id: If6cb976cd47675ff780f49029f5fa5277b9fd95c
---
A scripts/disambredir.py
1 file changed, 184 insertions(+), 0 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/disambredir.py b/scripts/disambredir.py
new file mode 100644
index 0000000..3af5c9f
--- /dev/null
+++ b/scripts/disambredir.py
@@ -0,0 +1,184 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Goes through the disambiguation pages, checks their links, and asks for
+each link that goes to a redirect page whether it should be replaced.
+"""
+#
+# (C) André Engels and others, 2006-2009
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+import pywikibot
+from pywikibot import pagegenerators
+import re
+import sys
+from pywikibot import catlib
+
+msg = {
+ 'ar': u'تغيير التحويلات في صفحة توضيح',
+ 'be-x-old': u'Замена перанакіраваньняў на старонку неадназначнасьцяў',
+ 'en': u'Changing redirects on a disambiguation page',
+ 'he': u'משנה קישורים להפניות בדף פירושונים',
+ 'fa': u'اصلاح تغییرمسیرها در یک صفحه ابهامزدایی',
+ 'ja': u'ロボットによる: 曖昧さ回避ページのリダイレクト修正',
+ 'nl': u'Verandering van redirects op een doorverwijspagina',
+ 'pl': u'Zmiana przekierowań na stronie ujednoznaczającej',
+ 'pt': u'Arrumando redirects na página de desambiguação',
+ 'ru': u'Изменение перенаправлений на странице неоднозначности',
+ 'uk': u'Зміна перенаправлень на сторінці багатозначності',
+ 'zh': u'機器人: 修改消歧義頁中的重定向連結',
+}
+
+
+def firstcap(string):
+ return string[0].upper() + string[1:]
+
+
+def treat(text, linkedPage, targetPage):
+ """
+ Based on the method of the same name in solve_disambiguation.py.
+ """
+ # make a backup of the original text so we can show the changes later
+ linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')')
+ curpos = 0
+ # This loop will run until we have finished the current page
+ while True:
+ m = linkR.search(text, pos=curpos)
+ if not m:
+ break
+ # Make sure that next time around we will not find this same hit.
+ curpos = m.start() + 1
+ # ignore interwiki links and links to sections of the same page
+ if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')):
+ continue
+ else:
+ actualLinkPage = pywikibot.Page(page.site, m.group('title'))
+ # Check whether the link found is to page.
+ if actualLinkPage != linkedPage:
+ continue
+
+ # how many bytes should be displayed around the current link
+ context = 30
+ # at the beginning of the link, start red color.
+ # at the end of the link, reset the color to default
+ pywikibot.output(text[max(0, m.start() - context): m.start()] +
+ '\03{lightred}' + text[m.start(): m.end()] +
+ '\03{default}' + text[m.end(): m.end() + context])
+ while True:
+ choice = pywikibot.input(
+ u"Option (N=do not change, y=change link to \03{lightpurple}%s\03{default}, r=change and replace text, u=unlink)" % targetPage.title())
+ try:
+ choice = choice[0]
+ except:
+ choice = 'N'
+ if choice in 'nNyYrRuU':
+ break
+ if choice in "nN":
+ continue
+
+ # The link looks like this:
+ # [[page_title|link_text]]trailing_chars
+ page_title = m.group('title')
+ link_text = m.group('label')
+ if not link_text:
+ # or like this: [[page_title]]trailing_chars
+ link_text = page_title
+ if m.group('section') is None:
+ section = ''
+ else:
+ section = m.group('section')
+ trailing_chars = m.group('linktrail')
+ if trailing_chars:
+ link_text += trailing_chars
+
+ if choice in "uU":
+ # unlink - we remove the section if there's any
+ text = text[:m.start()] + link_text + text[m.end():]
+ continue
+ replaceit = choice in "rR"
+
+ if link_text[0].isupper():
+ new_page_title = targetPage.title()
+ else:
+ new_page_title = targetPage.title()[0].lower() + \
+ targetPage.title()[1:]
+ if replaceit and trailing_chars:
+ newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
+ elif replaceit or (new_page_title == link_text and not section):
+ newlink = "[[%s]]" % new_page_title
+ # check if we can create a link with trailing characters instead of a
+ # pipelink
+ elif len(new_page_title) <= len(link_text) and \
+ firstcap(link_text[:len(new_page_title)]) == \
+ firstcap(new_page_title) and \
+ re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section:
+ newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
+ link_text[len(new_page_title):])
+ else:
+ newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
+ text = text[:m.start()] + newlink + text[m.end():]
+ continue
+ return text
+
+
+def workon(page, links):
+ text = page.get()
+ # Show the title of the page we're working on.
+ # Highlight the title in purple.
+ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+ % page.title())
+ for page2 in links:
+ try:
+ target = page2.getRedirectTarget()
+ except (pywikibot.Error, pywikibot.SectionError):
+ continue
+ text = treat(text, page2, target)
+ if text != page.get():
+ comment = pywikibot.translate(mysite, msg)
+ page.put(text, comment)
+
+
+def main():
+ global mysite, linktrail, page
+ start = []
+ for arg in pywikibot.handleArgs():
+ start.append(arg)
+ if start:
+ start = " ".join(start)
+ else:
+ start = "!"
+ mysite = pywikibot.getSite()
+ linktrail = mysite.linktrail()
+ try:
+ generator = pagegenerators.CategorizedPageGenerator(
+ mysite.disambcategory(), start=start)
+ except pywikibot.NoPage:
+ pywikibot.output(
+ "The bot does not know the disambiguation category for your wiki.")
+ raise
+ # only work on articles
+ generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
+ generator = pagegenerators.PreloadingGenerator(generator)
+ pagestodo = []
+ pagestoload = []
+ for page in generator:
+ if page.isRedirectPage():
+ continue
+ linked = page.linkedPages()
+ pagestodo.append((page, linked))
+ pagestoload += linked
+ if len(pagestoload) > 49:
+ pagestoload = pagegenerators.PreloadingGenerator(pagestoload)
+ for page, links in pagestodo:
+ workon(page, links)
+ pagestoload = []
+ pagestodo = []
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
--
To view, visit https://gerrit.wikimedia.org/r/102912
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: If6cb976cd47675ff780f49029f5fa5277b9fd95c
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Vldandrew <vldandrew(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Strainu <wiki(a)strainu.ro>
Gerrit-Reviewer: Vldandrew <vldandrew(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: [L10N] remove wrong templates which does not exist on target wikipedia
......................................................................
[L10N] remove wrong templates which does not exist on target wikipedia
Change-Id: Ifab9ee9dac3f2558fc9e813f9d33e1febbe5adb7
---
M redirect.py
1 file changed, 1 insertion(+), 14 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/redirect.py b/redirect.py
index ca9c5bc..cfe3dd2 100644
--- a/redirect.py
+++ b/redirect.py
@@ -20,7 +20,7 @@
'redirect-fix-broken-moved': u'Edit summary when the bot fixes a broken redirect to a moved page whose origin has been deleted.\nParameters:\n* <code>%(to)s</code>: the new redirect target, as a wiki link.',
'redirect-fix-loop': u'Edit summary when the bot fixes redirect loops. <code>%(to)s</code> displays the new redirect target as a wiki link.',
'redirect-remove-loop': u'Edit summary when the bot tags a redirect loop for speedy deletion. The internal links are to pages on the English Wikipedia, [http://en.wikipedia.org/wiki/Wikipedia:CSD#G8 here] and [http://en.wikipedia.org/wiki/Wikipedia:Redirect here]. They won\'t work anywhere except on the English Wikipedia, as they stand.',
- 'redirect-broken-redirect-template': u'Template for speedy deletion of broken redirect or redirect loops which the bot tags onto the redirect page. This message may contain additional informations like template parameters or reasons for the deletion request.\n\nNOTE: If this system message is not given for a language code, speedy deletion request by a bot is not supported on your site except there is a bot with sysop flag.\n\n{{doc-important|Only use your deletion template like <code><nowiki>{{delete}}</nowiki></code> which exist on your local project.}}',
+ 'redirect-broken-redirect-template': u'NOTE TO TRANSLATOR: This should only be translated by someone on the Wikipedia of your language code. Thank you.\n\nTemplate for speedy deletion of broken redirect or redirect loops which the bot tags onto the redirect page. This message may contain additional informations like template parameters or reasons for the deletion request.\n\nNOTE: If this system message is not given for a language code, speedy deletion request by a bot is not supported on your site except there is a bot with sysop flag.\n\n{{doc-important|Only use your deletion template like <code><nowiki>{{delete}}</nowiki></code> which exist on your local project.}}',
},
# Author: Csisc
'aeb': {
@@ -28,7 +28,6 @@
'redirect-remove-broken': u'تحويلة إلى صفحة محذوفة أو غير موجودة',
'redirect-fix-loop': u'روبوت: تعديل حلقة إعادة التوجيه إلى %(to)s',
'redirect-remove-loop': u'هدف التحويلة يصنع عقدة تحويل: Robot',
- 'redirect-broken-redirect-template': u'{{شطب|تحويلة مكسورة}}',
},
# Author: Naudefj
# Author: Xqt
@@ -37,7 +36,6 @@
'redirect-remove-broken': u'Robot: Aanstuur na \'n geskrapte of nie-bestaande bladsy',
'redirect-fix-loop': u'Robot: sirkulêre aanstuur na %(to)s reggemaak',
'redirect-remove-loop': u'Robot: Aanstuur vorm \'n sirkulêre lus',
- 'redirect-broken-redirect-template': u'{{db-r1}}',
},
# Author: Als-Holder
# Author: Xqt
@@ -84,7 +82,6 @@
'redirect-fix-broken-moved': u'Bot: İşləməyən yönləndirilmənin yeri dəyişdirilmiş hədəf səhifəyə %(to)s düzəldilməsi',
'redirect-fix-loop': u'Bot: Sonsuz yönləndirilmənin %(to)s düzəldilməsi',
'redirect-remove-loop': u'Bot: Yönləndirilmə sonsuz yönləndirilmə formalaşdırır',
- 'redirect-broken-redirect-template': u'{{db-r1}}',
},
# Author: Amir a57
# Author: E THP
@@ -93,7 +90,6 @@
'redirect-remove-broken': u'[[ویکیپئدییا:سیل#یستیقامتلندیرمه|وپ:سیل]]: سیلینئن یا دا وار اولمایان صحیفهیه اولان ایستیقامیلندیرمه',
'redirect-fix-loop': u'روبوت: فیخینگ اوزوک اولان%(to)s یؤنلندیرن',
'redirect-remove-loop': u'بوت: ایستیقامتلندیرمه هدفی بیر ایستیقامتلندیرمه دؤورو تشکیل ائدیر',
- 'redirect-broken-redirect-template': u'{{سیل|y1}}',
},
# Author: Haqmar
# Author: Sagan
@@ -102,7 +98,6 @@
'redirect-remove-broken': u'Робот: булмаған йәки юйылған биткә йүнәлтеү',
'redirect-fix-loop': u'Робот: %(to)s битенә йүнәлтеүҙе төҙәтеү',
'redirect-remove-loop': u'Робот: бер ҡайҙа ла йүнәлтелмәгән',
- 'redirect-broken-redirect-template': u'{{db-r1}}',
},
# Author: Mucalexx
# Author: Xqt
@@ -114,10 +109,6 @@
},
'bat-smg': {
'redirect-fix-double': u'Robots: Taisuoms dvėgobs paradresavėms → %(to)s',
- },
- # Author: Stephensuleeman
- 'bbc-latn': {
- 'redirect-broken-redirect-template': u'{{db-r1}}',
},
# Author: EugeneZelenko
# Author: Jim-by
@@ -140,7 +131,6 @@
'redirect-fix-double': u'Robot: Pamasangan paugahan ganda ka %(to)s',
'redirect-remove-broken': u'[[WP:CSD#G8|G8]]: [[Wikipedia:Redirect|Paalihan]] ka tungkaran nang dihapus atawa kada ada',
'redirect-remove-loop': u'[[WP:CSD#G8|G8]]: Bidikan [[Wikipedia:Redirect|paalihan]] mahasilakan paalihan siklik',
- 'redirect-broken-redirect-template': u'{{db-r1}}',
},
# Author: Wikitanvir
'bn': {
@@ -156,7 +146,6 @@
'redirect-fix-broken-moved': u'Robot : O reizhañ an adkasoù torret war-zu ar bajenn bal %(to)s',
'redirect-fix-loop': u'Robot : O kempenn al lagadenn adkas war-zu %(to)s',
'redirect-remove-loop': u'Robot: Stumm ur c\'helc\'h-tro born zo gant an [[Wikipedia:Redirect|adkas]]',
- 'redirect-broken-redirect-template': u'{{db-r1}}',
},
# Author: CERminator
# Author: Edinwiki
@@ -208,7 +197,6 @@
'redirect-fix-double': u'Bot: Yn trwsio ailgyfeiriad dwbl i %(to)s',
'redirect-remove-broken': u'Bot: Yn ailgyfeirio i dudalen a ddilëwyd neu nad yw ar gael',
'redirect-remove-loop': u'Bot: Mae nod yr ailgyfeiriad yn ffurfio dolen ailgyfeirio',
- 'redirect-broken-redirect-template': u'{{db-r1}}',
},
# Author: Christian List
# Author: Kaare
@@ -389,7 +377,6 @@
'redirect-fix-broken-moved': u'機械人:修復損壞个重定向頁到移動目標頁面 %(to)s',
'redirect-fix-loop': u'機械人:修復重定向迴圈至%(to)s',
'redirect-remove-loop': u'機械人:重定向目標構成循環',
- 'redirect-broken-redirect-template': u'{{db-r1}}',
},
# Author: Amire80
# Author: YaronSh
--
To view, visit https://gerrit.wikimedia.org/r/102071
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ifab9ee9dac3f2558fc9e813f9d33e1febbe5adb7
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/i18n
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Siebrand <siebrand(a)wikimedia.org>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] changes, code improvements, insert __version__ string
......................................................................
[PEP8] changes, code improvements, insert __version__ string
Change-Id: Icff281c4d659d40a527eeecac12de17afaef8201
---
M data_ingestion.py
1 file changed, 80 insertions(+), 51 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/data_ingestion.py b/data_ingestion.py
index f5c8f3d..4098399 100644
--- a/data_ingestion.py
+++ b/data_ingestion.py
@@ -1,69 +1,85 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
-'''
+"""
A generic bot to do data ingestion (batch uploading) to Commons
-'''
-import pywikibot, upload
-import posixpath, urlparse
+"""
+#
+# (C) Pywikibot team, 2011-2013
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import posixpath
+import urlparse
import urllib
-import hashlib, base64
+import hashlib
+import base64
import StringIO
try:
import json
except ImportError:
import simplejson as json
+import pywikibot
+import upload
+
class Photo(object):
- '''
+ """
Represents a Photo (or other file), with metadata, to upload to Commons.
The constructor takes two parameters: URL (string) and metadata (dict with str:str key:value pairs)
that can be referred to from the title & template generation.
-
- '''
+ """
def __init__(self, URL, metadata):
self.URL = URL
self.metadata = metadata
self.metadata["_url"] = URL
- self.metadata["_filename"] = filename = posixpath.split(urlparse.urlparse(URL)[2])[1]
+ self.metadata["_filename"] = filename = posixpath.split(
+ urlparse.urlparse(URL)[2])[1]
self.metadata["_ext"] = ext = filename.split(".")[-1]
if ext == filename:
self.metadata["_ext"] = ext = None
self.contents = None
def downloadPhoto(self):
- '''
+ """
Download the photo and store it in a StringIO.StringIO object.
TODO: Add exception handling
- '''
+
+ """
if not self.contents:
- imageFile=urllib.urlopen(self.URL).read()
+ imageFile = urllib.urlopen(self.URL).read()
self.contents = StringIO.StringIO(imageFile)
return self.contents
- def findDuplicateImages(self, site = pywikibot.getSite(u'commons', u'commons')):
- '''
- Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of duplicates.
+ def findDuplicateImages(self,
+ site=pywikibot.getSite(u'commons', u'commons')):
+ """
+ Takes the photo, calculates the SHA1 hash and asks the mediawiki api
+ for a list of duplicates.
TODO: Add exception handling, fix site thing
- '''
+
+ """
hashObject = hashlib.sha1()
hashObject.update(self.downloadPhoto().getvalue())
return site.getFilesFromAnHash(base64.b16encode(hashObject.digest()))
def getTitle(self, fmt):
"""
- Given a format string with %(name)s entries, returns the string formatted with metadata
+ Given a format string with %(name)s entries, returns the string
+ formatted with metadata
+
"""
return fmt % self.metadata
def getDescription(self, template, extraparams={}):
- '''
- Generate a description for a file
- '''
+ """ Generate a description for a file """
params = {}
params.update(self.metadata)
@@ -72,13 +88,15 @@
for key in sorted(params.keys()):
value = params[key]
if not key.startswith("_"):
- description = description + (u'|%s=%s' % (key, self._safeTemplateValue(value))) + "\n"
- description = description + u'}}'
+ description += (u'|%s=%s'
+ % (key, self._safeTemplateValue(value))) + "\n"
+ description += u'}}'
return description
def _safeTemplateValue(self, value):
return value.replace("|", "{{!}}")
+
def CSVReader(fileobj, urlcolumn, *args, **kwargs):
import csv
@@ -88,30 +106,35 @@
yield Photo(line[urlcolumn], line)
-def JSONReader(baseurl, start=0, end=100, JSONBase=None, metadataFunction=None, fileurl=u'fileurl'):
- '''
+def JSONReader(baseurl, start=0, end=100, JSONBase=None, metadataFunction=None,
+ fileurl=u'fileurl'):
+ """
Loops over a bunch of json page and process them with processJSONPage().
Will yield Photo objects with metadata
- '''
+
+ """
if baseurl:
- for i in range(start , end):
+ for i in range(start, end):
url = baseurl % (i,)
- photo = processJSONPage(url, JSONBase=JSONBase, metadataFunction=metadataFunction, fileurl=u'fileurl')
+ photo = processJSONPage(url, JSONBase=JSONBase,
+ metadataFunction=metadataFunction,
+ fileurl=u'fileurl')
if photo:
yield photo
-
-def processJSONPage(url, JSONBase=None, metadataFunction=None, fileurl=u'fileurl'):
- '''
+def processJSONPage(url, JSONBase=None, metadataFunction=None,
+ fileurl=u'fileurl'):
+ """
Process a single JSON page.
For the JSON page you can rebase it to not get all the crap
You can apply a custom metadata function to do some modification on the metadata and checking
By default the field 'fileurl' is expected in the metadata to contain the file. You can change this.
Will a return Photo object with metadata or None if something is wrong
- '''
+
+ """
JSONPage = urllib.urlopen(url)
JSONData = json.load(JSONPage)
JSONPage.close()
@@ -130,17 +153,20 @@
if metadataFunction:
metadata = metadataFunction(metadata)
- # If the metadataFunction didn't return none (something was wrong). Return the photo
+ # If the metadataFunction didn't return none (something was wrong).
+ # Return the photo
if metadata:
return Photo(metadata.get(fileurl), metadata)
-
return False
+
def JSONRebase(JSONData, JSONBase):
- '''
+ """
Moves the base of the JSON object to the part you're intrested in.
- JSONBase is a list to crawl the tree. If one of the steps is not found, return None
- '''
+ JSONBase is a list to crawl the tree. If one of the steps is not found,
+ return None
+
+ """
for step in JSONBase:
if JSONData:
if type(JSONData) == dict:
@@ -148,21 +174,20 @@
elif type(JSONData) == list:
# FIXME: Needs error, length etc checking
JSONData = JSONData[step]
-
return JSONData
def JSONTree(metadata, fieldlist, record):
- '''
+ """
metadata: Dict with end result
key: The key we encountered
record: Record to work on
- '''
+ """
if type(record) == list:
for r in record:
metadata = JSONTree(metadata, fieldlist, r)
elif type(record) == dict:
- for k,v in record.items():
+ for k, v in record.items():
metadata = JSONTree(metadata, fieldlist + [k], v)
elif type(record) == unicode:
key = u'_'.join(fieldlist)
@@ -172,11 +197,13 @@
newkey = key + u'_2'
if not newkey in metadata:
metadata[newkey] = record
-
return metadata
+
class DataIngestionBot:
- def __init__(self, reader, titlefmt, pagefmt, site=pywikibot.getSite(u'commons', u'commons')):
+
+ def __init__(self, reader, titlefmt, pagefmt,
+ site=pywikibot.getSite(u'commons', u'commons')):
self.reader = reader
self.titlefmt = titlefmt
self.pagefmt = pagefmt
@@ -190,17 +217,16 @@
title = photo.getTitle(self.titlefmt)
description = photo.getDescription(self.pagefmt)
- bot = upload.UploadRobot(url = photo.URL,
- description = description,
- useFilename = title,
- keepFilename = True,
- verifyDescription = False,
+ bot = upload.UploadRobot(url=photo.URL,
+ description=description,
+ useFilename=title,
+ keepFilename=True,
+ verifyDescription=False,
ignoreWarning=True,
- targetSite = self.site)
+ targetSite=self.site)
bot._contents = photo.downloadPhoto().getvalue()
bot._retrieved = True
bot.run()
-
return title
def doSingle(self):
@@ -210,9 +236,12 @@
for photo in self.reader:
self._doUpload(photo)
-if __name__=="__main__":
+
+if __name__ == "__main__":
reader = CSVReader(open('tests/data/csv_ingestion.csv'), 'url')
- bot = DataIngestionBot(reader, "%(name)s - %(set)s.%(_ext)s", ":user:valhallasw/test_template", pywikibot.getSite('test', 'test'))
+ bot = DataIngestionBot(reader, "%(name)s - %(set)s.%(_ext)s",
+ ":user:valhallasw/test_template",
+ pywikibot.getSite('test', 'test'))
bot.run()
"""
--
To view, visit https://gerrit.wikimedia.org/r/103252
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Icff281c4d659d40a527eeecac12de17afaef8201
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: jenkins-bot
Gerrit-Reviewer: saper <saper(a)saper.info>
jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] changes
......................................................................
[PEP8] changes
Change-Id: I0562f6b814e4d83f5094d4cd851354cb86aee493
---
M daemonize.py
1 file changed, 4 insertions(+), 2 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/daemonize.py b/daemonize.py
index 121d3a3..0eb9ffb 100644
--- a/daemonize.py
+++ b/daemonize.py
@@ -1,16 +1,18 @@
# -*- coding: utf-8 -*-
#
-# (C) Pywikipedia bot team, 2007-2008, 2010
+# (C) Pywikibot team, 2007-2013
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
#
-import sys, os
+import sys
+import os
is_daemon = False
+
def daemonize(close_fd=True, chdir=True, write_pid=False, redirect_std=None):
""" Daemonize the current process. Only works on POSIX compatible operating
systems. The process will fork to the background and return control to
--
To view, visit https://gerrit.wikimedia.org/r/103250
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0562f6b814e4d83f5094d4cd851354cb86aee493
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot