http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10843
Revision: 10843
Author: xqt
Date: 2012-12-30 10:25:06 +0000 (Sun, 30 Dec 2012)
Log Message:
-----------
use_hard_category_redirects property for future update from rw r10836
Modified Paths:
--------------
branches/rewrite/pywikibot/family.py
trunk/pywikipedia/family.py
Modified: branches/rewrite/pywikibot/family.py
===================================================================
--- branches/rewrite/pywikibot/family.py 2012-12-29 20:20:42 UTC (rev 10842)
+++ branches/rewrite/pywikibot/family.py 2012-12-30 10:25:06 UTC (rev 10843)
@@ -546,6 +546,7 @@
self.category_redirect_templates = {
'_default': []
}
+
# A list of languages that use hard (instead of soft) category redirects
self.use_hard_category_redirects = []
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2012-12-29 20:20:42 UTC (rev 10842)
+++ trunk/pywikipedia/family.py 2012-12-30 10:25:06 UTC (rev 10843)
@@ -3677,6 +3677,9 @@
'_default': []
}
+ # A list of languages that use hard (instead of soft) category redirects
+ self.use_hard_category_redirects = []
+
# A list of disambiguation template names in different languages
self.disambiguationTemplates = {
'_default': []
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10841
Revision: 10841
Author: multichill
Date: 2012-12-29 15:54:22 +0000 (Sat, 29 Dec 2012)
Log Message:
-----------
Split JSONReader
Modified Paths:
--------------
trunk/pywikipedia/data_ingestion.py
Modified: trunk/pywikipedia/data_ingestion.py
===================================================================
--- trunk/pywikipedia/data_ingestion.py 2012-12-29 14:21:28 UTC (rev 10840)
+++ trunk/pywikipedia/data_ingestion.py 2012-12-29 15:54:22 UTC (rev 10841)
@@ -86,38 +86,52 @@
def JSONReader(baseurl, start=0, end=100, JSONBase=None, metadataFunction=None, fileurl=u'fileurl'):
'''
- Loops over a bunch of json objects.
- For each json page you can rebase it to not get all the crap
+ Loops over a bunch of json page and process them with processJSONPage().
+
+ Will yield Photo objects with metadata
+ '''
+ if baseurl:
+ for i in range(start , end):
+ url = baseurl % (i,)
+ photo = processJSONPage(url, JSONBase=JSONBase, metadataFunction=metadataFunction, fileurl=u'fileurl')
+ if photo:
+ yield photo
+
+
+
+def processJSONPage(url, JSONBase=None, metadataFunction=None, fileurl=u'fileurl'):
+ '''
+ Process a single JSON page.
+ For the JSON page you can rebase it to not get all the crap
You can apply a custom metadata function to do some modification on the metadata and checking
By default the field 'fileurl' is expected in the metadata to contain the file. You can change this.
- Will a Photo object with metadata
+ Will a return Photo object with metadata or None if something is wrong
'''
- if baseurl:
- for i in range(start , end):
- # How to do recursion?
- JSONPage = urllib.urlopen(baseurl % (i,))
- JSONData = json.load(JSONPage)
- JSONPage.close()
+ JSONPage = urllib.urlopen(url)
+ JSONData = json.load(JSONPage)
+ JSONPage.close()
- # Rebase based on jsonBase
- if JSONBase:
- JSONData = JSONRebase(JSONData, JSONBase)
+ # Rebase based on jsonBase
+ if JSONBase:
+ JSONData = JSONRebase(JSONData, JSONBase)
- if JSONData:
- # If rebasing worked, get the metadata
- metadata = dict()
- fieldlist = [u'']
- metadata = JSONTree(metadata, [], JSONData)
+ if JSONData:
+ # If rebasing worked, get the metadata
+ metadata = dict()
+ fieldlist = [u'']
+ metadata = JSONTree(metadata, [], JSONData)
- # If a metadataFunction is set, apply it
- if metadataFunction:
- metadata = metadataFunction(metadata)
+ # If a metadataFunction is set, apply it
+ if metadataFunction:
+ metadata = metadataFunction(metadata)
- # If the metadataFunction didn't return none (something was wrong). Yield the photo
- if metadata:
- yield Photo(metadata.get(fileurl), metadata)
+ # If the metadataFunction didn't return none (something was wrong). Return the photo
+ if metadata:
+ return Photo(metadata.get(fileurl), metadata)
+ return False
+
def JSONRebase(JSONData, JSONBase):
'''
Moves the base of the JSON object to the part you're intrested in.
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10840
Revision: 10840
Author: multichill
Date: 2012-12-29 14:21:28 +0000 (Sat, 29 Dec 2012)
Log Message:
-----------
Add JSON support. This is used for the RCE batch upload
Modified Paths:
--------------
trunk/pywikipedia/data_ingestion.py
Modified: trunk/pywikipedia/data_ingestion.py
===================================================================
--- trunk/pywikipedia/data_ingestion.py 2012-12-29 13:16:16 UTC (rev 10839)
+++ trunk/pywikipedia/data_ingestion.py 2012-12-29 14:21:28 UTC (rev 10840)
@@ -4,11 +4,11 @@
A generic bot to do data ingestion (batch uploading) to Commons
'''
-import pywikibot
+import pywikibot, upload
import posixpath, urlparse
import urllib
import hashlib, base64
-import StringIO
+import StringIO, json
class Photo(object):
'''
@@ -64,7 +64,7 @@
params = {}
params.update(self.metadata)
params.update(extraparams)
- description = u'{{%s\n' % template
+ description = u'{{subst:%s|subst=subst:\n' % template
for key in sorted(params.keys()):
value = params[key]
if not key.startswith("_"):
@@ -83,6 +83,80 @@
for line in reader:
yield Photo(line[urlcolumn], line)
+
+def JSONReader(baseurl, start=0, end=100, JSONBase=None, metadataFunction=None, fileurl=u'fileurl'):
+ '''
+ Loops over a bunch of json objects.
+ For each json page you can rebase it to not get all the crap
+ You can apply a custom metadata function to do some modification on the metadata and checking
+ By default the field 'fileurl' is expected in the metadata to contain the file. You can change this.
+
+ Will a Photo object with metadata
+ '''
+ if baseurl:
+ for i in range(start , end):
+ # How to do recursion?
+ JSONPage = urllib.urlopen(baseurl % (i,))
+ JSONData = json.load(JSONPage)
+ JSONPage.close()
+
+ # Rebase based on jsonBase
+ if JSONBase:
+ JSONData = JSONRebase(JSONData, JSONBase)
+
+ if JSONData:
+ # If rebasing worked, get the metadata
+ metadata = dict()
+ fieldlist = [u'']
+ metadata = JSONTree(metadata, [], JSONData)
+
+ # If a metadataFunction is set, apply it
+ if metadataFunction:
+ metadata = metadataFunction(metadata)
+
+ # If the metadataFunction didn't return none (something was wrong). Yield the photo
+ if metadata:
+ yield Photo(metadata.get(fileurl), metadata)
+
+def JSONRebase(JSONData, JSONBase):
+ '''
+ Moves the base of the JSON object to the part you're intrested in.
+ JSONBase is a list to crawl the tree. If one of the steps is not found, return None
+ '''
+ for step in JSONBase:
+ if JSONData:
+ if type(JSONBase) == dict:
+ JSONData = JSONData.get(step)
+ elif type(JSONBase) == list:
+ # FIXME: Needs error, length etc checking
+ JSONData = JSONData[step]
+
+ return JSONData
+
+
+def JSONTree(metadata, fieldlist, record):
+ '''
+ metadata: Dict with end result
+ key: The key we encountered
+ record: Record to work on
+ '''
+ if type(record) == list:
+ for r in record:
+ metadata = JSONTree(metadata, fieldlist, r)
+ elif type(record) == dict:
+ for k,v in record.items():
+ metadata = JSONTree(metadata, fieldlist + [k], v)
+ elif type(record) == unicode:
+ key = u'_'.join(fieldlist)
+ if not key in metadata:
+ metadata[key] = record
+ else:
+ newkey = key + u'_2'
+ if not newkey in metadata:
+ metadata[newkey] = record
+
+ return metadata
+
class DataIngestionBot:
def __init__(self, reader, titlefmt, pagefmt, site=pywikibot.getSite(u'commons', u'commons')):
self.reader = reader
@@ -93,7 +167,6 @@
def _doUpload(self, photo):
duplicates = photo.findDuplicateImages(self.site)
if duplicates:
- pywikibot.output(u"Skipping duplicate of %r" % (duplicates, ))
return duplicates[0]
title = photo.getTitle(self.titlefmt)
@@ -104,6 +177,7 @@
useFilename = title,
keepFilename = True,
verifyDescription = False,
+ ignoreWarning=True,
targetSite = self.site)
bot._contents = photo.downloadPhoto().getvalue()
bot._retrieved = True
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10838
Revision: 10838
Author: drtrigon
Date: 2012-12-29 12:25:33 +0000 (Sat, 29 Dec 2012)
Log Message:
-----------
more docu and usage info
Modified Paths:
--------------
trunk/pywikipedia/subster.py
Modified: trunk/pywikipedia/subster.py
===================================================================
--- trunk/pywikipedia/subster.py 2012-12-29 02:34:17 UTC (rev 10837)
+++ trunk/pywikipedia/subster.py 2012-12-29 12:25:33 UTC (rev 10838)
@@ -18,6 +18,28 @@
- subster_irc.py IRC Robot
- substersim.py Subster simulation panel
- subster_mail_queue.py Subster mail queue
+
+The following parameters are supported:
+
+¶ms;
+
+All other parameters will be ignored.
+
+Syntax example:
+ python subster.py
+ Default operating mode.
+
+ python subster.py -lang:en
+ Run bot on another site language than configured as default. E.g. 'en'.
+
+ python subster.py -family:meta -lang:
+ python subster.py -family:wikidata -lang:repo
+ Run bot on another site family and language than configured as default.
+ E.g. 'meta' or 'wikidata'.
+
+ python subster_irc.py
+ Default operating mode for IRC Robot. The IRC bot uses this script as
+ subclass.
"""
## @package subster
# @brief Dynamic Text Substitutions Robot
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10836
Revision: 10836
Author: russblau
Date: 2012-12-28 15:13:29 +0000 (Fri, 28 Dec 2012)
Log Message:
-----------
Create use_hard_category_redirects property for future use; see [[commons:Template talk:Category redirect]] for a proposal to replace category redirect templates with hard redirects.
Modified Paths:
--------------
branches/rewrite/pywikibot/family.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/family.py
===================================================================
--- branches/rewrite/pywikibot/family.py 2012-12-28 02:13:01 UTC (rev 10835)
+++ branches/rewrite/pywikibot/family.py 2012-12-28 15:13:29 UTC (rev 10836)
@@ -546,6 +546,8 @@
self.category_redirect_templates = {
'_default': []
}
+ # A list of languages that use hard (instead of soft) category redirects
+ self.use_hard_category_redirects = []
# A list of disambiguation template names in different languages
self.disambiguationTemplates = {
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2012-12-28 02:13:01 UTC (rev 10835)
+++ branches/rewrite/pywikibot/site.py 2012-12-28 15:13:29 UTC (rev 10836)
@@ -148,6 +148,8 @@
user = None if user is None else user[0].upper() + user[1:]
sysop = None if sysop is None else sysop[0].upper() + sysop[1:]
self._username = [user, sysop]
+ self.use_hard_category_redirects = \
+ self.code in self.family.use_hard_category_redirects
# following are for use with lock_page and unlock_page methods
self._pagemutex = threading.Lock()