Revision: 7911 Author: xqt Date: 2010-02-05 17:36:47 +0000 (Fri, 05 Feb 2010)
Log Message: ----------- wikimedia_site.py writes self.languages_by_size direct into family file
Modified Paths: -------------- trunk/pywikipedia/families/wikipedia_family.py trunk/pywikipedia/families/wikiquote_family.py trunk/pywikipedia/maintenance/wikimedia_sites.py
Modified: trunk/pywikipedia/families/wikipedia_family.py =================================================================== --- trunk/pywikipedia/families/wikipedia_family.py 2010-02-05 09:03:56 UTC (rev 7910) +++ trunk/pywikipedia/families/wikipedia_family.py 2010-02-05 17:36:47 UTC (rev 7911) @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- import family
__version__ = '$Id$' @@ -17,7 +17,7 @@ 'lt', 'bg', 'fa', 'sl', 'hr', 'et', 'new', 'ms', 'simple', 'th', 'gl', 'nn', 'hi', 'ht', 'eu', 'el', 'te', 'ceb', 'mk', 'ka', 'la', 'br', 'az', 'bs', 'lb', 'sh', 'is', 'mr', 'cy', 'sq', - 'lv', 'bpy', 'jv', 'tl', 'pms', 'be-x-old', 'bn', 'ta', 'oc', 'an', + 'lv', 'bpy', 'jv', 'tl', 'pms', 'be-x-old', 'ta', 'bn', 'oc', 'an', 'io', 'be', 'sw', 'nds', 'scn', 'fy', 'su', 'qu', 'af', 'zh-yue', 'nap', 'ast', 'ku', 'gu', 'ur', 'bat-smg', 'ml', 'war', 'wa', 'cv', 'ksh', 'ga', 'tg', 'roa-tara', 'vec', 'lmo', 'kn', 'gd', 'uz', 'pam', @@ -26,13 +26,13 @@ 'fiu-vro', 'mn', 'nds-nl', 'vls', 'tk', 'sa', 'fo', 'am', 'nrm', 'dv', 'pag', 'rm', 'map-bms', 'wuu', 'ne', 'gv', 'bar', 'pnb', 'my', 'sco', 'diq', 'se', 'fur', 'lij', 'si', 'nov', 'mt', 'bh', 'mzn', 'csb', - 'ilo', 'pi', 'zh-classical', 'km', 'lad', 'ug', 'sc', 'frp', 'mg', 'ang', + 'ilo', 'pi', 'zh-classical', 'ug', 'km', 'lad', 'sc', 'frp', 'mg', 'ang', 'kw', 'haw', 'pdc', 'szl', 'ps', 'hif', 'ckb', 'bo', 'pa', 'kv', - 'ie', 'to', 'hak', 'crh', 'myv', 'gn', 'stq', 'ln', 'nv', 'mhr', - 'jbo', 'arc', 'ace', 'ky', 'ext', 'wo', 'tpi', 'ty', 'cbk-zam', 'so', - 'eml', 'zea', 'srn', 'ay', 'pap', 'kab', 'ig', 'kg', 'ba', 'or', + 'ie', 'to', 'hak', 'crh', 'myv', 'gn', 'stq', 'ln', 'mhr', 'nv', + 'ace', 'jbo', 'arc', 'ky', 'ext', 'wo', 'tpi', 'ty', 'cbk-zam', 'so', + 'eml', 'zea', 'srn', 'ay', 'pap', 'ig', 'kab', 'kg', 'ba', 'or', 'lo', 'udm', 'dsb', 'rmy', 'cu', 'kaa', 'sm', 'ab', 'ce', 'xal', - 'av', 'ks', 'tet', 'got', 'sd', 'mdf', 'na', 'kl', 'pnt', 'iu', + 'av', 'ks', 'tet', 'got', 'sd', 'mdf', 'kl', 'na', 'pnt', 'iu', 'bm', 'pih', 'as', 'mwl', 'pcd', 'cdo', 'om', 'chr', 'ee', 'zu', 'ti', 'za', 'ts', 'ss', 've', 'bi', 'ha', 'dz', 'bxr', 'ch', 'cr', 'bug', 'xh', 'tn', 'ki', 'ik', 'rw', 'sg', 'st', 'ny',
Modified: trunk/pywikipedia/families/wikiquote_family.py =================================================================== --- trunk/pywikipedia/families/wikiquote_family.py 2010-02-05 09:03:56 UTC (rev 7910) +++ trunk/pywikipedia/families/wikiquote_family.py 2010-02-05 17:36:47 UTC (rev 7911) @@ -15,9 +15,9 @@ 'sl', 'tr', 'fr', 'he', 'lt', 'cs', 'zh', 'el', 'hu', 'fa', 'uk', 'id', 'sv', 'nl', 'no', 'ja', 'eo', 'fi', 'simple', 'hy', 'nn', 'et', 'ca', 'ka', 'ar', 'ko', 'ku', 'hr', 'gl', 'ro', - 'sr', 'ml', 'li', 'is', 'th', 'te', 'cy', 'af', 'da', 'az', + 'sr', 'ml', 'cy', 'li', 'is', 'th', 'te', 'af', 'da', 'az', 'sq', 'eu', 'vi', 'la', 'br', 'hi', 'be', 'ast', 'uz', 'ta', - 'ang', 'zh-min-nan', 'mr', 'gu', 'lb', 'su', 'ur', 'wo', 'ky', 'kn', + 'ang', 'zh-min-nan', 'mr', 'lb', 'gu', 'su', 'ur', 'wo', 'kn', 'ky', 'kk', 'am', 'co', ]
Modified: trunk/pywikipedia/maintenance/wikimedia_sites.py =================================================================== --- trunk/pywikipedia/maintenance/wikimedia_sites.py 2010-02-05 09:03:56 UTC (rev 7910) +++ trunk/pywikipedia/maintenance/wikimedia_sites.py 2010-02-05 17:36:47 UTC (rev 7911) @@ -1,10 +1,18 @@ -# This script checks the language list of each Wikimedia multiple-language site -# against the language lists +""" +This script checks the language list of each Wikimedia multiple-language site +against the language lists +""" +# +# (C) Pywikipedia bot team, 2008-2010 +# +# Distributed under the terms of the MIT license. +#
import sys, re
sys.path.append('..') import wikipedia +import codecs
families = ['wikipedia', 'wiktionary', 'wikiquote', 'wikisource', 'wikibooks', 'wikinews', 'wikiversity'] familiesDict = { @@ -14,48 +22,64 @@ 'wikisource': 'wikisources_wiki.php', 'wikibooks': 'wikibooks_wiki.php', 'wikinews': 'wikinews_wiki.php', - 'wikiversity': 'wikiversity_wiki.php', + 'wikiversity':'wikiversity_wiki.php', } exceptions = ['www']
-for family in families: - wikipedia.output('Checking family %s:' % family) +def update_family(): + for family in families: + wikipedia.output('Checking family %s:' % family)
- original = wikipedia.Family(family).languages_by_size - obsolete = wikipedia.Family(family).obsolete + original = wikipedia.Family(family).languages_by_size + obsolete = wikipedia.Family(family).obsolete
- url = 'http://s23.org/wikistats/%s' % familiesDict[family] - uo = wikipedia.MyURLopener - f = uo.open(url) - text = f.read() + url = 'http://s23.org/wikistats/%s' % familiesDict[family] + uo = wikipedia.MyURLopener + f = uo.open(url) + text = f.read()
- if family == 'wikipedia': - p = re.compile(r"[[:([a-z-]{2,}):|\1]].*?'''([0-9,]{1,})'''</span>]", re.DOTALL) - else: - p = re.compile(r"[http://(%5Ba-z%5C-%5D%7B2,%7D).%s.org/wiki/ \1].*?'''([0-9,]{1,})''']" % family, re.DOTALL) + if family == 'wikipedia': + p = re.compile(r"[[:([a-z-]{2,}):|\1]].*?'''([0-9,]{1,})'''</span>]", re.DOTALL) + else: + p = re.compile(r"[http://(%5Ba-z%5C-%5D%7B2,%7D).%s.org/wiki/ \1].*?'''([0-9,]{1,})''']" % family, re.DOTALL)
- new = [] - for lang, cnt in p.findall(text): - if lang in obsolete or lang in exceptions: - # Ignore this language - continue - new.append(lang) - if original == new: - wikipedia.output(u'The lists match!') - else: - wikipedia.output(u"The lists don't match, the new list is:") - wikipedia.output(u' self.languages_by_size = [') - line = ' ' - index = 0 - for lang in new: - index += 1 - if index > 1: - line += u' ' - line += u"'%s'," % lang - if index == 10: - wikipedia.output(u'%s' % line) - line = ' ' - index = 0 - if index > 0: - wikipedia.output(u'%s' % line) - wikipedia.output(u' ]') + new = [] + for lang, cnt in p.findall(text): + if lang in obsolete or lang in exceptions: + # Ignore this language + continue + new.append(lang) + if original == new: + wikipedia.output(u'The lists match!') + else: + wikipedia.output(u"The lists don't match, the new list is:") + text = u' self.languages_by_size = [\r\n' + line = ' ' + index = 0 + for lang in new: + index += 1 + if index > 1: + line += u' ' + line += u"'%s'," % lang + if index == 10: + text += u'%s\r\n' % line + line = ' ' + index = 0 + if index > 0: + text += u'%s\r\n' % line + text += u' ]' + wikipedia.output(text) + family_file_name = '../families/%s_family.py' % family + family_file = codecs.open(family_file_name, 'r', 'utf8') + old_text = family_text = family_file.read() + old = re.findall(ur'(?msu)^ {8}self.languages_by_size.+?]', family_text)[0] + family_text = family_text.replace(old, text) + family_file = codecs.open(family_file_name, 'w', 'utf8') + family_file.write(family_text) + family_file.close() + +if __name__ == '__main__': + try: + update_family() + finally: + wikipedia.stopme()
pywikipedia-svn@lists.wikimedia.org