Revision: 7911
Author: xqt
Date: 2010-02-05 17:36:47 +0000 (Fri, 05 Feb 2010)
Log Message:
-----------
wikimedia_site.py writes self.languages_by_size direct into family file
Modified Paths:
--------------
trunk/pywikipedia/families/wikipedia_family.py
trunk/pywikipedia/families/wikiquote_family.py
trunk/pywikipedia/maintenance/wikimedia_sites.py
Modified: trunk/pywikipedia/families/wikipedia_family.py
===================================================================
--- trunk/pywikipedia/families/wikipedia_family.py 2010-02-05 09:03:56 UTC (rev 7910)
+++ trunk/pywikipedia/families/wikipedia_family.py 2010-02-05 17:36:47 UTC (rev 7911)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
import family
__version__ = '$Id$'
@@ -17,7 +17,7 @@
'lt', 'bg', 'fa', 'sl', 'hr',
'et', 'new', 'ms', 'simple', 'th',
'gl', 'nn', 'hi', 'ht', 'eu',
'el', 'te', 'ceb', 'mk', 'ka',
'la', 'br', 'az', 'bs', 'lb',
'sh', 'is', 'mr', 'cy', 'sq',
- 'lv', 'bpy', 'jv', 'tl', 'pms',
'be-x-old', 'bn', 'ta', 'oc', 'an',
+ 'lv', 'bpy', 'jv', 'tl', 'pms',
'be-x-old', 'ta', 'bn', 'oc', 'an',
'io', 'be', 'sw', 'nds', 'scn',
'fy', 'su', 'qu', 'af', 'zh-yue',
'nap', 'ast', 'ku', 'gu', 'ur',
'bat-smg', 'ml', 'war', 'wa', 'cv',
'ksh', 'ga', 'tg', 'roa-tara', 'vec',
'lmo', 'kn', 'gd', 'uz', 'pam',
@@ -26,13 +26,13 @@
'fiu-vro', 'mn', 'nds-nl', 'vls',
'tk', 'sa', 'fo', 'am', 'nrm', 'dv',
'pag', 'rm', 'map-bms', 'wuu', 'ne',
'gv', 'bar', 'pnb', 'my', 'sco',
'diq', 'se', 'fur', 'lij', 'si',
'nov', 'mt', 'bh', 'mzn', 'csb',
- 'ilo', 'pi', 'zh-classical', 'km',
'lad', 'ug', 'sc', 'frp', 'mg', 'ang',
+ 'ilo', 'pi', 'zh-classical', 'ug',
'km', 'lad', 'sc', 'frp', 'mg', 'ang',
'kw', 'haw', 'pdc', 'szl', 'ps',
'hif', 'ckb', 'bo', 'pa', 'kv',
- 'ie', 'to', 'hak', 'crh', 'myv',
'gn', 'stq', 'ln', 'nv', 'mhr',
- 'jbo', 'arc', 'ace', 'ky', 'ext',
'wo', 'tpi', 'ty', 'cbk-zam', 'so',
- 'eml', 'zea', 'srn', 'ay', 'pap',
'kab', 'ig', 'kg', 'ba', 'or',
+ 'ie', 'to', 'hak', 'crh', 'myv',
'gn', 'stq', 'ln', 'mhr', 'nv',
+ 'ace', 'jbo', 'arc', 'ky', 'ext',
'wo', 'tpi', 'ty', 'cbk-zam', 'so',
+ 'eml', 'zea', 'srn', 'ay', 'pap',
'ig', 'kab', 'kg', 'ba', 'or',
'lo', 'udm', 'dsb', 'rmy', 'cu',
'kaa', 'sm', 'ab', 'ce', 'xal',
- 'av', 'ks', 'tet', 'got', 'sd',
'mdf', 'na', 'kl', 'pnt', 'iu',
+ 'av', 'ks', 'tet', 'got', 'sd',
'mdf', 'kl', 'na', 'pnt', 'iu',
'bm', 'pih', 'as', 'mwl', 'pcd',
'cdo', 'om', 'chr', 'ee', 'zu',
'ti', 'za', 'ts', 'ss', 've',
'bi', 'ha', 'dz', 'bxr', 'ch',
'cr', 'bug', 'xh', 'tn', 'ki',
'ik', 'rw', 'sg', 'st', 'ny',
Modified: trunk/pywikipedia/families/wikiquote_family.py
===================================================================
--- trunk/pywikipedia/families/wikiquote_family.py 2010-02-05 09:03:56 UTC (rev 7910)
+++ trunk/pywikipedia/families/wikiquote_family.py 2010-02-05 17:36:47 UTC (rev 7911)
@@ -15,9 +15,9 @@
'sl', 'tr', 'fr', 'he', 'lt',
'cs', 'zh', 'el', 'hu', 'fa',
'uk', 'id', 'sv', 'nl', 'no',
'ja', 'eo', 'fi', 'simple', 'hy',
'nn', 'et', 'ca', 'ka', 'ar',
'ko', 'ku', 'hr', 'gl', 'ro',
- 'sr', 'ml', 'li', 'is', 'th',
'te', 'cy', 'af', 'da', 'az',
+ 'sr', 'ml', 'cy', 'li', 'is',
'th', 'te', 'af', 'da', 'az',
'sq', 'eu', 'vi', 'la', 'br',
'hi', 'be', 'ast', 'uz', 'ta',
- 'ang', 'zh-min-nan', 'mr', 'gu',
'lb', 'su', 'ur', 'wo', 'ky', 'kn',
+ 'ang', 'zh-min-nan', 'mr', 'lb',
'gu', 'su', 'ur', 'wo', 'kn', 'ky',
'kk', 'am', 'co',
]
Modified: trunk/pywikipedia/maintenance/wikimedia_sites.py
===================================================================
--- trunk/pywikipedia/maintenance/wikimedia_sites.py 2010-02-05 09:03:56 UTC (rev 7910)
+++ trunk/pywikipedia/maintenance/wikimedia_sites.py 2010-02-05 17:36:47 UTC (rev 7911)
@@ -1,10 +1,18 @@
-# This script checks the language list of each Wikimedia multiple-language site
-# against the language lists
+"""
+This script checks the language list of each Wikimedia multiple-language site
+against the language lists
+"""
+#
+# (C) Pywikipedia bot team, 2008-2010
+#
+# Distributed under the terms of the MIT license.
+#
import sys, re
sys.path.append('..')
import wikipedia
+import codecs
families = ['wikipedia', 'wiktionary', 'wikiquote',
'wikisource', 'wikibooks', 'wikinews', 'wikiversity']
familiesDict = {
@@ -14,48 +22,64 @@
'wikisource': 'wikisources_wiki.php',
'wikibooks': 'wikibooks_wiki.php',
'wikinews': 'wikinews_wiki.php',
- 'wikiversity': 'wikiversity_wiki.php',
+ 'wikiversity':'wikiversity_wiki.php',
}
exceptions = ['www']
-for family in families:
- wikipedia.output('Checking family %s:' % family)
+def update_family():
+ for family in families:
+ wikipedia.output('Checking family %s:' % family)
- original = wikipedia.Family(family).languages_by_size
- obsolete = wikipedia.Family(family).obsolete
+ original = wikipedia.Family(family).languages_by_size
+ obsolete = wikipedia.Family(family).obsolete
- url = 'http://s23.org/wikistats/%s' % familiesDict[family]
- uo = wikipedia.MyURLopener
- f = uo.open(url)
- text = f.read()
+ url = 'http://s23.org/wikistats/%s' % familiesDict[family]
+ uo = wikipedia.MyURLopener
+ f = uo.open(url)
+ text = f.read()
- if family == 'wikipedia':
- p =
re.compile(r"\[\[:([a-z\-]{2,}):\|\1\]\].*?'''([0-9,]{1,})'''</span>\]",
re.DOTALL)
- else:
- p =
re.compile(r"\[http://([a-z\-]{2,}).%s.org/wiki/
\1].*?'''([0-9,]{1,})'''\]" % family, re.DOTALL)
+ if family == 'wikipedia':
+ p =
re.compile(r"\[\[:([a-z\-]{2,}):\|\1\]\].*?'''([0-9,]{1,})'''</span>\]",
re.DOTALL)
+ else:
+ p =
re.compile(r"\[http://([a-z\-]{2,}).%s.org/wiki/
\1].*?'''([0-9,]{1,})'''\]" % family, re.DOTALL)
- new = []
- for lang, cnt in p.findall(text):
- if lang in obsolete or lang in exceptions:
- # Ignore this language
- continue
- new.append(lang)
- if original == new:
- wikipedia.output(u'The lists match!')
- else:
- wikipedia.output(u"The lists don't match, the new list is:")
- wikipedia.output(u' self.languages_by_size = [')
- line = ' '
- index = 0
- for lang in new:
- index += 1
- if index > 1:
- line += u' '
- line += u"'%s'," % lang
- if index == 10:
- wikipedia.output(u'%s' % line)
- line = ' '
- index = 0
- if index > 0:
- wikipedia.output(u'%s' % line)
- wikipedia.output(u' ]')
+ new = []
+ for lang, cnt in p.findall(text):
+ if lang in obsolete or lang in exceptions:
+ # Ignore this language
+ continue
+ new.append(lang)
+ if original == new:
+ wikipedia.output(u'The lists match!')
+ else:
+ wikipedia.output(u"The lists don't match, the new list is:")
+ text = u' self.languages_by_size = [\r\n'
+ line = ' '
+ index = 0
+ for lang in new:
+ index += 1
+ if index > 1:
+ line += u' '
+ line += u"'%s'," % lang
+ if index == 10:
+ text += u'%s\r\n' % line
+ line = ' '
+ index = 0
+ if index > 0:
+ text += u'%s\r\n' % line
+ text += u' ]'
+ wikipedia.output(text)
+ family_file_name = '../families/%s_family.py' % family
+ family_file = codecs.open(family_file_name, 'r', 'utf8')
+ old_text = family_text = family_file.read()
+ old = re.findall(ur'(?msu)^ {8}self.languages_by_size.+?\]',
family_text)[0]
+ family_text = family_text.replace(old, text)
+ family_file = codecs.open(family_file_name, 'w', 'utf8')
+ family_file.write(family_text)
+ family_file.close()
+
+if __name__ == '__main__':
+ try:
+ update_family()
+ finally:
+ wikipedia.stopme()