http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11295
Revision: 11295
Author: xqt
Date: 2013-03-29 17:23:54 +0000 (Fri, 29 Mar 2013)
Log Message:
-----------
use new wikistats from wmflabs
enable wikivoyage for this check,
re-insert missing language codes
Modified Paths:
--------------
trunk/pywikipedia/maintenance/wikimedia_sites.py
Modified: trunk/pywikipedia/maintenance/wikimedia_sites.py
===================================================================
--- trunk/pywikipedia/maintenance/wikimedia_sites.py 2013-03-29 15:23:00 UTC (rev 11294)
+++ trunk/pywikipedia/maintenance/wikimedia_sites.py 2013-03-29 17:23:54 UTC (rev 11295)
@@ -4,76 +4,87 @@
against the language lists
"""
#
-# (C) Pywikipedia bot team, 2008-2010
+# (C) xqt, 2009-2013
+# (C) Pywikipedia bot team, 2008-2013
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
-import sys, re
+import sys
+import re
+import codecs
+import urllib
+from xml.etree import cElementTree
sys.path.insert(1, '..')
import pywikibot
-import codecs
+URL = 'http://wikistats.wmflabs.org/api.php?action=dump&table=%s&format=xml'
+
familiesDict = {
- 'wikipedia': 'wikipedias_wiki.php',
- 'wiktionary': 'wiktionaries_wiki.php',
- 'wikiquote': 'wikiquotes_wiki.php',
- 'wikisource': 'wikisources_wiki.php',
- 'wikibooks': 'wikibooks_wiki.php',
- 'wikinews': 'wikinews_wiki.php',
- 'wikiversity':'wikiversity_wiki.php',
+ 'wikibooks': 'wikibooks',
+ 'wikinews': 'wikinews',
+ 'wikipedia': 'wikipedias',
+ 'wikiquote': 'wikiquotes',
+ 'wikisource': 'wikisources',
+ 'wikiversity': 'wikiversity',
+ 'wikivoyage': 'wikivoyage',
+ 'wiktionary': 'wiktionaries',
}
+
exceptions = ['www']
+
def update_family(families):
if not families:
families = familiesDict.keys()
for family in families:
- pywikibot.output('Checking family %s:' % family)
+ pywikibot.output('\nChecking family %s:' % family)
original = pywikibot.Family(family).languages_by_size
obsolete = pywikibot.Family(family).obsolete
- url = 'http://s23.org/wikistats/%s' % familiesDict[family]
- uo = pywikibot.MyURLopener
- f = uo.open(url)
- text = f.read()
+ feed = urllib.urlopen(URL % familiesDict[family])
+ tree = cElementTree.parse(feed)
- if family == 'wikipedia':
- p = re.compile(r"\[\[:([a-z\-]{2,}):\|\1\]\].*?'''([0-9,]{1,})'''</span>\]", re.DOTALL)
- else:
- p = re.compile(r"\[http://([a-z\-]{2,}).%s.org/wiki/ \1].*?'''([0-9,]{1,})'''\]" % family, re.DOTALL)
-
new = []
- for lang, cnt in p.findall(text):
- if lang in obsolete or lang in exceptions:
- # Ignore this language
+ for field in tree.findall('row/field'):
+ if field.get('name') == 'prefix':
+ code = field.text
+ if not (code in obsolete or code in exceptions):
+ new.append(code)
continue
- new.append(lang)
+
+ # put the missing languages to the right place
+ missing = original != new and set(original) - set(new)
+ if missing:
+ pywikibot.output(u"WARNING: ['%s'] not listed at wikistats."
+ % "', '".join(missing))
+ index = {}
+ for code in missing:
+ index[original.index(code)] = code
+ i = len(index) - 1
+ for key in sorted(index.keys(), reverse=True):
+ new.insert(key - i, index[key])
+ i -= 1
+
if original == new:
pywikibot.output(u'The lists match!')
else:
pywikibot.output(u"The lists don't match, the new list is:")
- missing = set(original) - set(new)
- new += missing
text = u' self.languages_by_size = [\r\n'
line = ' ' * 11
- for lang in new:
- if len(line)+len(lang) <= 76:
- line += u" '%s'," % lang
+ for code in new:
+ if len(line) + len(code) <= 76:
+ line += u" '%s'," % code
else:
text += u'%s\r\n' % line
line = ' ' * 11
- line += u" '%s'," % lang
+ line += u" '%s'," % code
text += u'%s\r\n' % line
text += u' ]'
pywikibot.output(text)
- if missing:
- pywikibot.output(u"WARNING: ['%s'] not listed at wikistats.\n"
- u"Now listed as last item\n"
- % "', '".join(missing))
family_file_name = '../families/%s_family.py' % family
family_file = codecs.open(family_file_name, 'r', 'utf8')
old_text = family_text = family_file.read()
@@ -84,6 +95,7 @@
family_file.write(family_text)
family_file.close()
+
if __name__ == '__main__':
try:
fam = []