Revision: 8372
Author: valhallasw
Date: 2010-07-29 14:04:50 +0000 (Thu, 29 Jul 2010)
Log Message:
-----------
Moving generate_family_file.py to correct location.
Added Paths:
-----------
trunk/pywikipedia/generate_family_file.py
Removed Paths:
-------------
pywikipedia/
Added: trunk/pywikipedia/generate_family_file.py
===================================================================
--- trunk/pywikipedia/generate_family_file.py (rev 0)
+++ trunk/pywikipedia/generate_family_file.py 2010-07-29 14:04:50 UTC (rev 8372)
@@ -0,0 +1,213 @@
+# -*- coding: utf-8 -*-
+"""
+This script generates a family file from a given URL.
+Hackish, etc. Regexps, yes. Sorry, jwz.
+
+"""
+__version__ = "$Id"
+
+#
+# (C) Merlijn van Deen, 2010
+# (C) Pywikipedia bot team, 2010
+#
+# Distributed under the terms of the MIT license
+#
+
+from urllib2 import HTTPError
+import urllib2
+
+def urlopen(url):
+ req = urllib2.Request(url, headers = {'User-agent': 'Pywikipedia family
generator 0.1 - pywikipediabot.sf.net'})
+ return urllib2.urlopen(req)
+
+from urlparse import urlparse, ParseResult
+import codecs
+import sys
+import re
+try:
+ import json
+except ImportError:
+ import simplejson as json
+
+# Monkey-patching wikipediatools to prevent questions about user_config.py
+import wikipediatools
+wikipediatools.get_base_dir = lambda: '.'
+import family
+STANDARDNAMESPACES = family.Family().namespaces
+
+def main(url=None, name=None):
+ if url == None:
+ url = raw_input("Please insert URL to wiki: ")
+ if name == None:
+ name = raw_input("Please insert a short name (eg: freeciv): ")
+
+ wikis = {}
+ print "Generating family file from %s" % url
+
+ w = Wiki(url)
+ wikis[w.iwpath] = w
+ print
+ print "=================================="
+ print "api url: %s" % w.api
+ print "MediaWiki version: %s" % w.version
+ print "=================================="
+ print
+
+ print "Determining other languages...",
+ try:
+ iw = json.load(urlopen(w.api +
"?action=query&meta=siteinfo&siprop=interwikimap&sifilteriw=local&format=json"))
+ langs = [wiki for wiki in iw['query']['interwikimap'] if
u'language' in wiki]
+ print u' '.join(sorted([wiki[u'prefix'] for wiki in langs]))
+
+ if raw_input("\nThere are %i languages available.\nDo you want to generate
interwiki links? This might take a long time. (y/N)" % len(langs)).lower() !=
"y":
+ langs = [wiki for wiki in langs if wiki[u'url'] == w.iwpath]
+ except HTTPError, e:
+ langs = []
+ print e, "; continuing..."
+
+ if langs == []:
+ print "Assuming English"
+ langs = [{u'language': u'English',
+ u'local': u'',
+ u'prefix': u'en',
+ u'url': w.iwpath}]
+
+ print "Loading wikis... "
+ for lang in langs:
+ print " * %s... " % (lang[u'prefix']),
+ if lang[u'url'] not in wikis:
+ wikis[lang[u'url']] = Wiki(lang[u'url'])
+ print "downloaded"
+ else:
+ print "in cache"
+
+ print "Retrieving namespaces... ",
+ namespaces = {}
+ for w in wikis.itervalues():
+ print "%s " % w.lang,
+ ns = json.load(urlopen(w.api +
"?action=query&meta=siteinfo&siprop=namespaces&format=json"))['query']['namespaces']
+ for namespace in ns:
+ if namespace == '0':
+ continue
+ if int(namespace) not in namespaces:
+ namespaces[int(namespace)] = {}
+
+ # Better method? You're very welcome.
+ try:
+ if STANDARDNAMESPACES[int(namespace)]['_default'] !=
ns[namespace][u'*'] and \
+ STANDARDNAMESPACES[int(namespace)][w.lang] !=
ns[namespace][u'*']:
+ raise KeyError # if the namespace name is different, act if it is
undefined
+ except KeyError:
+ namespaces[int(namespace)][w.lang] = ns[namespace][u'*']
+ print
+
+ fn = "families/%s_family.py" % name
+ print "Writing %s... " % fn
+ try:
+ open(fn)
+ if raw_input("%s already exists. Overwrite? (y/n)").lower() ==
'n':
+ print "Terminating."
+ sys.exit(1)
+ except IOError: # file not found
+ pass
+ f = codecs.open('families/%s_family.py' % name, 'w',
'utf-8')
+
+ f.write("""
+# -*- coding: utf-8 -*-
+\"\"\"
+This family file was auto-generated by $Id: generate_family_file.py 8371 2010-07-29
13:29:26Z valhallasw $
+Configuration parameters:
+ url = %(url)s
+ name = %(name)s
+
+Please do not commit this to the SVN repository!
+\"\"\"
+
+import family
+
+class Family(family.Family):
+ def __init__(self):
+ family.Family.__init__(self)
+ self.name = '%(name)s'
+ self.langs = {
+""".lstrip() % {'url': url, 'name': name})
+
+ for w in wikis.itervalues():
+ f.write(" '%(lang)s': u'%(hostname)s',\n" %
{'lang': w.lang, 'hostname': urlparse(w.server).netloc})
+
+ f.write(" }\n\n")
+
+ for nsid, nslangs in namespaces.iteritems():
+ for lang, nsname in nslangs.iteritems():
+ f.write(" self.namespaces[%(nsid)i]['%(lang)s'] =
u'%(nsname)s'\n" % {'nsid': nsid, 'lang': lang,
'nsname': nsname})
+ f.write("\n\n")
+
+ f.write(" def scriptpath(self, code):\n")
+ f.write(" return {\n")
+
+ for w in wikis.itervalues():
+ f.write(" '%(lang)s': u'%(path)s',\n" %
{'lang': w.lang, 'path': w.scriptpath})
+ f.write(" }[code]\n")
+ f.write("\n")
+
+ f.write(" def version(self, code):\n")
+ f.write(" return {\n")
+ for w in wikis.itervalues():
+ if w.version == None:
+ f.write(" '%(lang)s': None,\n" %
{'lang': w.lang})
+ else:
+ f.write(" '%(lang)s': u'%(ver)s',\n" %
{'lang': w.lang, 'ver': w.version})
+ f.write(" }[code]\n")
+
+
+class Wiki(object):
+ REwgEnableApi = re.compile(ur'wgEnableAPI ?= ?true')
+ REwgServer = re.compile(ur'wgServer ?= ?"([^"]*)"')
+ REwgScriptPath = re.compile(ur'wgScriptPath ?= ?"([^"]*)"')
+ REwgArticlePath = re.compile(ur'wgArticlePath ?= ?"([^"]*)"')
+ REwgContentLanguage = re.compile(ur'wgContentLanguage ?=
?"([^"]*)"')
+ REwgVersion = re.compile(ur'wgVersion ?= ?"([^"]*)"')
+
+ def __init__(self, fromurl):
+ if fromurl.endswith("$1"):
+ fromurl = fromurl[:-2]
+ try:
+ data = urlopen(fromurl).read()
+ except HTTPError, e:
+ if e.code != 404:
+ raise
+ data = e.read()
+ pass
+
+ if not self.REwgEnableApi.search(data):
+ print "*** WARNING: Api does not seem to be enabled on %s" %
fromurl
+ try:
+ self.version = self.REwgVersion.search(data).groups()[0]
+ except AttributeError:
+ self.version = None
+ self.server = self.REwgServer.search(data).groups()[0]
+ self.scriptpath = self.REwgScriptPath.search(data).groups()[0]
+ self.articlepath = self.REwgArticlePath.search(data).groups()[0]
+ self.lang = self.REwgContentLanguage.search(data).groups()[0]
+
+ def __cmp__(self, other):
+ return (self.server + self.scriptpath == other.server + other.scriptpath)
+
+ def __hash__(self):
+ return hash(self.server + self.scriptpath)
+
+ @property
+ def api(self):
+ return self.server + self.scriptpath + "/api.php"
+
+ @property
+ def iwpath(self):
+ return self.server + self.articlepath
+
+
+if __name__ == "__main__":
+ if len(sys.argv) != 3:
+ print "Usage: %s <url> <short name>"
+ print "Example: %s
http://www.mywiki.bogus/wiki/Main_Page mywiki"
+ print "This will create the file families/mywiki_family.py"
+ main(sys.argv[1], sys.argv[2])