Revision: 5540 Author: nicdumz Date: 2008-06-08 21:48:23 +0000 (Sun, 08 Jun 2008)
Log Message: ----------- Committing #1985183 [Patch update for locale update and obsolete info...etc] by lin4h, with some minor fixes : * BeautifulSoup 3.0.6 * speedy_delete zh update * adding obsolete wikis
Modified Paths: -------------- trunk/pywikipedia/BeautifulSoup.py trunk/pywikipedia/families/wikibooks_family.py trunk/pywikipedia/families/wiktionary_family.py trunk/pywikipedia/maintenance/readtalk.py trunk/pywikipedia/speedy_delete.py
Modified: trunk/pywikipedia/BeautifulSoup.py =================================================================== --- trunk/pywikipedia/BeautifulSoup.py 2008-06-08 21:40:15 UTC (rev 5539) +++ trunk/pywikipedia/BeautifulSoup.py 2008-06-08 21:48:23 UTC (rev 5540) @@ -79,8 +79,8 @@ from __future__ import generators
__author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "3.0.5" -__copyright__ = "Copyright (c) 2004-2007 Leonard Richardson" +__version__ = "3.0.6" +__copyright__ = "Copyright (c) 2004-2008 Leonard Richardson" __license__ = "New-style BSD"
from sgmllib import SGMLParser, SGMLParseError @@ -157,6 +157,7 @@ if self.nextSibling: self.nextSibling.previousSibling = self.previousSibling self.previousSibling = self.nextSibling = None + return self
def _lastRecursiveChild(self): "Finds the last element beneath this object to be parsed." @@ -237,8 +238,9 @@ def findAllNext(self, name=None, attrs={}, text=None, limit=None, **kwargs): """Returns all items that match the given criteria and appear - before after Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.nextGenerator) + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs)
def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): """Returns the closest sibling to this Tag that matches the @@ -402,7 +404,7 @@ raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
def __unicode__(self): - return unicode(str(self)) + return str(self).decode(DEFAULT_OUTPUT_ENCODING)
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): if encoding: @@ -688,6 +690,16 @@ s = ''.join(s) return s
+ def decompose(self): + """Recursively destroys the contents of this tree.""" + contents = [i for i in self.contents] + for i in contents: + if isinstance(i, Tag): + i.decompose() + else: + i.extract() + self.extract() + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): return self.__str__(encoding, True)
@@ -1512,7 +1524,7 @@ # worked. Rewrite the meta tag. newAttr = self.CHARSET_RE.sub\ (lambda(match):match.group(1) + - "%SOUP-ENCODING%", value) + "%SOUP-ENCODING%", contentType) attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], newAttr) tagNeedsEncodingSubstitution = True @@ -1646,7 +1658,7 @@ import chardet # import chardet.constants # chardet.constants._debug = 1 -except: +except ImportError: chardet = None
# cjkcodecs and iconv_codec make Python know about more character encodings. @@ -1654,11 +1666,11 @@ # They're built in if you use Python 2.4. try: import cjkcodecs.aliases -except: +except ImportError: pass try: import iconv_codec -except: +except ImportError: pass
class UnicodeDammit:
Modified: trunk/pywikipedia/families/wikibooks_family.py =================================================================== --- trunk/pywikipedia/families/wikibooks_family.py 2008-06-08 21:40:15 UTC (rev 5539) +++ trunk/pywikipedia/families/wikibooks_family.py 2008-06-08 21:48:23 UTC (rev 5540) @@ -20,10 +20,10 @@ 'la', 'ml', 'cv', 'ie', 'hi', 'fy', 'lv', 'tl', 'hy', 'eu', 'ky', 'bn', 'pa', 'bs', 'ms', 'be', 'tg', 'te', 'af', 'cy', 'ast', 'sa', 'tt', 'az', 'ku', 'mg', 'si', 'co', 'sw', 'tk', - 'ne', 'qu', 'bm', 'ak', 'vo', 'uz', 'ks', 'bo', 'gu', 'su', + 'ne', 'qu', 'bm', 'ak', 'vo', 'uz', 'bo', 'su', 'na', 'se', 'ps', 'kn', 'kk', 'zh-min-nan', 'ay', 'lb', 'got', 'nah', - 'as', 'aa', 'mn', 'ch', 'gn', 'ln', 'ug', 'km', 'yo', 'nds', - 'xh', 'rm', 'ba', 'za', 'bi', 'my', 'ga', 'wa', 'zu', 'mi', + 'aa', 'mn', 'ch', 'gn', 'ln', 'km', 'nds', + 'xh', 'rm', 'ba', 'za', 'bi', 'my', 'wa', 'zu', 'mi', ]
for lang in self.languages_by_size: @@ -273,18 +273,19 @@
alphabetic = ['af','ar','roa-rup','om','bg','be','bn','bs', 'ca','chr','co','cs','cy','da','de','als','et', - 'el','en','es','eo','eu','fa','fr','fy','ga','gv', + 'el','en','es','eo','eu','fa','fr','fy','gv', 'gd','gl','ko','hi','hr','io','id','ia','is','it', - 'he','jv','ka','csb','ks','sw','la','lv','lt','hu', + 'he','jv','ka','csb','sw','la','lv','lt','hu', 'mk','mg','ml','mi','mr','ms','zh-cfr','mn','nah','na', 'nl','ja','no','nb','oc','nds','pl','pt','ro','ru', 'sa','st','sq','si','simple','sk','sl','sr','su', 'fi','sv','ta','tt','th','ur','vi', - 'tpi','tr','uk','vo','yi','yo','za','zh','zh-cn', + 'tpi','tr','uk','vo','yi','za','zh','zh-cn', 'zh-tw']
self.obsolete = { 'aa': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Afa... + 'as': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Ass... 'ay': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Aym... 'ba': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Bas... 'bi': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Bis... @@ -292,9 +293,12 @@ 'bo': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Tib... 'ch': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Cha... 'dk': 'da', + 'ga':None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Gae... 'got': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Got... 'gn': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Gua... + 'gu': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Guj... 'jp': 'ja', + 'ks': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Kas... 'ln': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Lin... 'minnan':'zh-min-nan', 'nah': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Nah... @@ -303,6 +307,8 @@ 'rm': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Rum... 'se': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Sam... 'tokipona': None, + 'ug': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Uyg... + 'yo': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Yor... 'zh-tw': 'zh', 'zh-cn': 'zh' }
Modified: trunk/pywikipedia/families/wiktionary_family.py =================================================================== --- trunk/pywikipedia/families/wiktionary_family.py 2008-06-08 21:40:15 UTC (rev 5539) +++ trunk/pywikipedia/families/wiktionary_family.py 2008-06-08 21:48:23 UTC (rev 5540) @@ -26,7 +26,7 @@ 'tg', 'dv', 'bn', 'ka', 'mg', 'ha', 'kw', 'ss', 'na', 'sa', 'gv', 'ay', 'tpi', 'ne', 'jbo', 'jv', 'gn', 'uz', 'tn', 'mt', 'sh', 'lb', 'ks', 'tk', 'sg', 'fj', 'als', 'ik', 'si', 'ln', - 'sm', 'za', 'roa-rup', 'sn', 'yo', 'dz', 'my', 'wa', 'sc', + 'sm', 'za', 'roa-rup', 'yo', 'dz', 'my', 'wa', 'sc', ]
for lang in self.languages_by_size: @@ -273,6 +273,7 @@ self.namespaces[104] = { 'bs': u'Dodatak', 'cy': u'WiciSawrws', + 'de': u'WikiSaurus', 'en': u'Index', 'fr': u'Portail', 'pl': u'Portal', @@ -284,6 +285,7 @@ self.namespaces[105] = { 'bs': u'Razgovor o Dodatku', 'cy': u'Sgwrs WiciSawrws', + 'de': u'WikiSaurus Diskussion', 'en': u'Index talk', 'fr': u'Discussion Portail', 'pl': u'Dyskusja portalu', @@ -369,6 +371,7 @@ 'pi': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Pal... 'rm': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Rha... 'rn': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Kir... + 'sn': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Sho... 'to': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Ton... 'tlh': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Kli... 'tw': None, # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Twi...
Modified: trunk/pywikipedia/maintenance/readtalk.py =================================================================== --- trunk/pywikipedia/maintenance/readtalk.py 2008-06-08 21:40:15 UTC (rev 5539) +++ trunk/pywikipedia/maintenance/readtalk.py 2008-06-08 21:48:23 UTC (rev 5540) @@ -9,22 +9,41 @@ *Error checking """ import sys, re -sys.path.append(re.sub('/[^/]*$', '', sys.path[0])) #sys.path.append('..') +sys.path.append(re.sub('/[^/]*$', '', sys.path[0])) +sys.path.append('..') import wikipedia, config
+ +def readtalk(lang, familyName): + site = wikipedia.getSite(code=lang, fam=familyName) + page = wikipedia.Page(site, u'User_Talk:' + config.usernames[familyName][lang]) + wikipedia.output(u'Reading talk page from %s:%s'% (lang,familyName)) + try: + wikipedia.output(page.get (get_redirect=True)+"\n") + except wikipedia.NoPage: + wikipedia.output("WARNING: Account talk page is not exist.\n") + except wikipedia.UserBlocked: + wikipedia.output("WARNING: Account in %s:%s is blocked.\n"% (familyName,lang)) + def main(): # Get a dictionary of all the usernames - namedict = config.usernames - for familyName in namedict.iterkeys(): - for lang in namedict[familyName].iterkeys(): - site = wikipedia.getSite(code=lang, fam=familyName) - username = config.usernames[familyName][lang] - page = wikipedia.Page(site, u'User_Talk:' + username) - wikipedia.output(u'Reading talk page from ' + lang + u' ' + familyName) - wikipedia.output(page.get (get_redirect=True)) + all = False + + for arg in wikipedia.handleArgs(): + if arg.startswith('-all'): + all = True + + if all == True: + namedict = config.usernames + for familyName in namedict.iterkeys(): + for lang in namedict[familyName].iterkeys(): + readtalk(lang,familyName) + else: + readtalk(wikipedia.default_code,wikipedia.default_family)
if __name__ == "__main__": try: main() finally: wikipedia.stopme() +
Modified: trunk/pywikipedia/speedy_delete.py =================================================================== --- trunk/pywikipedia/speedy_delete.py 2008-06-08 21:40:15 UTC (rev 5539) +++ trunk/pywikipedia/speedy_delete.py 2008-06-08 21:48:23 UTC (rev 5540) @@ -125,10 +125,10 @@ u'No license':u'[[WP:CSD#I3|CSD I3]]: 沒有版權模板,無法確認版權資訊', u'Unknown':u'[[WP:CSD#I3|CSD I3]]: 沒有版權模板,無法確認版權資訊', u'TempPage':u'[[WP:CSD]]: 臨時頁面', - u'NowCommons':u'[[WP:CSD#I7|CSD I7]]: 被[[:commons:|維基共享資源]]取代的圖片', - u'Nowcommons':u'[[WP:CSD#I7|CSD I7]]: 被[[:commons:|維基共享資源]]取代的圖片', + u'NowCommons':u'[[WP:CSD#I7|CSD I7]]: 此圖片已存在於[[:commons:|維基共享資源]]', + u'Nowcommons':u'[[WP:CSD#I7|CSD I7]]: 此圖片已存在於[[:commons:|維基共享資源]]', u'RoughTranslation':u'[[WP:CSD#G7|CSD G7]]: 機器翻譯', - u'Advert':u'[[WP:CSD#G11|CSD G11]]: 廣告、宣傳頁面', + u'Advert':u'[[WP:CSD#G11|CSD G11]]: [[WP:NOT#維基百科不是宣傳工具|廣告、宣傳頁面]]', }, }
pywikipedia-l@lists.wikimedia.org