Pywikipedia-svn

pywikipedia-svn@lists.wikimedia.org

5163 discussions

SVN: [11312] trunk/pywikipedia/pywikibot/i18n.py
by xqt＠svn.wikimedia.org 01 Apr '13

01 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11312 Revision: 11312 Author: xqt Date: 2013-04-01 10:37:24 +0000 (Mon, 01 Apr 2013) Log Message: ----------- PLURAL support for pywikibot.translate() method. For additional help please refer pywikibot.twntranslate() Modified Paths: -------------- trunk/pywikipedia/pywikibot/i18n.py Modified: trunk/pywikipedia/pywikibot/i18n.py =================================================================== --- trunk/pywikipedia/pywikibot/i18n.py 2013-03-31 17:05:13 UTC (rev 11311) +++ trunk/pywikipedia/pywikibot/i18n.py 2013-04-01 10:37:24 UTC (rev 11312) @@ -3,34 +3,38 @@ and for TranslateWiki-based translations """ # -# (C) Pywikipedia bot team, 2004-2012 +# (C) Pywikipedia bot team, 2004-2013 # # Distributed under the terms of the MIT license. # __version__ = '$Id$' -import re, sys +import re +import sys import locale from pywikibot.exceptions import Error import wikipedia as pywikibot import config -# Languages to use for comment text after the actual language but before -# en:. For example, if for language 'xx', you want the preference of -# languages to be: -# xx:, then fr:, then ru:, then en: -# you let altlang return ['fr','ru']. -# This code is used by translate() and twtranslate() below. +PLURAL_PATTERN = '{{PLURAL:(?:%$)?([^$]*?)(?:\)d)?\|(.*?)}}' + def _altlang(code): """Define fallback languages for particular languages. + @param code The language code + @type code string + @return a list of strings as language codes + If no translation is available to a specified language, translate() will try each of the specified fallback languages, in order, until it finds one with a translation, with 'en' and '_default' as a last resort. For example, if for language 'xx', you want the preference of languages to be: xx > fr > ru > en, you let altlang return ['fr', 'ru']. + + This code is used by other translating methods below. + """ #Akan if code in ['ak', 'tw']: @@ -64,7 +68,7 @@ return ['de', 'pl'] if code == 'rm': return ['de', 'it'] - if code =='stq': + if code == 'stq': return ['nds', 'de'] #Greek if code in ['grc', 'pnt']: @@ -73,7 +77,8 @@ if code in ['io', 'nov']: return ['eo'] #Spanish - if code in ['an', 'arn', 'ast', 'ay', 'ca', 'ext', 'lad', 'nah', 'nv', 'qu', 'yua']: + if code in ['an', 'arn', 'ast', 'ay', 'ca', 'ext', 'lad', 'nah', 'nv', 'qu', + 'yua']: return ['es'] if code in ['gl', 'gn']: return ['es', 'pt'] @@ -144,9 +149,9 @@ if code in ['mo', 'roa-rup']: return ['ro'] #Russian and Belarusian - if code in ['ab', 'av', 'ba', 'bxr', 'ce', 'cv', 'inh', 'kk', 'koi', 'krc', 'kv', - 'ky', 'lbe', 'lez', 'mdf', 'mhr', 'mn', 'mrj', 'myv', 'os', 'sah', - 'tg', 'udm', 'uk', 'xal']: + if code in ['ab', 'av', 'ba', 'bxr', 'ce', 'cv', 'inh', 'kk', 'koi', 'krc', + 'kv', 'ky', 'lbe', 'lez', 'mdf', 'mhr', 'mn', 'mrj', 'myv', + 'os', 'sah', 'tg', 'udm', 'uk', 'xal']: return ['ru'] if code in ['kbd', 'ady']: return ['kbd', 'ady', 'ru'] @@ -157,7 +162,7 @@ if code == 'kaa': return ['uz', 'ru'] #Serbocroatian - if code in ['bs', 'hr', 'sh',]: + if code in ['bs', 'hr', 'sh']: return ['sh', 'hr', 'bs', 'sr', 'sr-el'] if code == 'sr': return ['sr-el', 'sh', 'hr', 'bs'] @@ -212,9 +217,27 @@ #Default value return [] -def translate(code, xdict, fallback=True): + +class TranslationError(Error): + """ Raised when no correct translation could be found """ + pass + + +def translate(code, xdict, parameters=None, fallback=True): """Return the most appropriate translation from a translation dict. + @param code The language code + @type code string or Site object + @param xdict dictionary with language codes as keys or extended dictionary + with family names as keys containing language dictionaries or + a single (unicode) string. May contain PLURAL tags as described + in twntranslate + @type xdict dict, string, unicode + @param parameters For passing (plural) parameters + @type parameters dict, string, unicode, int + @param fallback Try an alternate language code + @type fallback boolean + Given a language code and a dictionary, returns the dictionary's value for key 'code' if this key exists; otherwise tries to return a value for an alternative language that is most applicable to use on the Wikipedia in @@ -225,7 +248,13 @@ the options gives result, we just take the first language in the list. + For PLURAL support have a look at the twntranslate method + """ + param = None + if type(parameters) == dict: + param = parameters + family = pywikibot.default_family # If a site is given instead of a code, use its language if hasattr(code, 'lang'): @@ -237,27 +266,62 @@ xdict = xdict[family] elif 'wikipedia' in xdict: xdict = xdict['wikipedia'] + + # Get the translated string + trans = None if type(xdict) != dict: - return xdict + trans = xdict + elif code in xdict: + trans = xdict[code] + elif fallback: + for alt in _altlang(code) + ['_default', 'en']: + if alt in xdict: + trans = xdict[alt] + break + else: + trans = xdict.values()[0] + if not trans: + return # return None if we have no translation found + if parameters is None: + return trans - if code in xdict: - return xdict[code] - if not fallback: - return None - for alt in _altlang(code): - if alt in xdict: - return xdict[alt] - if '_default' in xdict: - return xdict['_default'] - if 'en' in xdict: - return xdict['en'] - return xdict.values()[0] + # else we check for PLURAL variants + try: + selector, variants = re.search(PLURAL_PATTERN, trans).groups() + except AttributeError: + pass + else: # we found PLURAL patterns, process it + # no python 2.5 support anymore but we won't break old code + # therefore we import plural_rules here + from plural import plural_rules + if type(parameters) == dict: + num = param[selector] + elif isinstance(parameters, basestring): + num = int(parameters) + else: + num = parameters + # we only need the lang or _default, not a _altlang code + # TODO: check against plural_rules[lang]['nplurals'] + try: + index = plural_rules[code]['plural'](num) + print 1, num, index + except KeyError: + index = plural_rules['_default']['plural'](num) + print 2, num, index + except TypeError: + # we got an int, not a function + index = plural_rules[code]['plural'] + print 3, index + trans = re.sub(PLURAL_PATTERN, variants.split('|')[index], trans) + if param: + try: + return trans % param + except KeyError: + # parameter is for PLURAL variants only, don't change the string + pass + return trans -class TranslationError(Error): - """ Raised when no correct translation could be found """ - pass - def twtranslate(code, twtitle, parameters=None): """ Uses TranslateWiki files to provide translations based on the TW title twtitle, which corresponds to a page on TW. @@ -301,7 +365,8 @@ except KeyError: continue if not trans: - raise TranslationError("No English translation has been defined for TranslateWiki key %r" % twtitle) + raise TranslationError("No English translation has been defined " + "for TranslateWiki key %r" % twtitle) # send the language code back via the given list if code_needed: code.append(lang) @@ -310,6 +375,7 @@ else: return trans + # Maybe this function should be merged with twtranslate def twntranslate(code, twtitle, parameters=None): """ First implementation of plural support for translations based on the @@ -362,8 +428,8 @@ The translations are retrieved from i18n.<package>, based on the callers import table. + """ - PATTERN = '{{PLURAL:(?:%$)?([^$]*?)(?:\)d)?\|(.*?)}}' param = None if type(parameters) == dict: param = parameters @@ -374,14 +440,14 @@ code = [code] trans = twtranslate(code, twtitle, None) try: - selector, variants = re.search(PATTERN, trans).groups() + selector, variants = re.search(PLURAL_PATTERN, trans).groups() # No PLURAL tag found: nothing to replace except AttributeError: pass else: if type(parameters) == dict: num = param[selector] - elif type(parameters) == basestring: + elif isinstance(parameters, basestring): num = int(parameters) else: num = parameters @@ -391,7 +457,7 @@ # to use plural.py - use _default rules for all if sys.version_info < (2, 5): plural_func = lambda n: (n != 1) - else: + else: from plural import plural_rules # we only need the lang or _default, not a _altlang code # maybe we should implement this to i18n.translate() @@ -404,7 +470,7 @@ # we got an int index = plural_rules[lang]['plural'] repl = variants.split('|')[index] - trans = re.sub(PATTERN, repl, trans) + trans = re.sub(PLURAL_PATTERN, repl, trans) if param: try: return trans % param @@ -412,6 +478,7 @@ pass return trans + def twhas_key(code, twtitle): """ Uses TranslateWiki files to to check whether specified translation based on the TW title is provided. No code fallback is made. @@ -429,6 +496,7 @@ code = code.lang return code in transdict and twtitle in transdict[code] + def input(twtitle, parameters=None, password=False): """ Ask the user a question, return the user's answer. @param twtitle The TranslateWiki string title, in <package>-<key> format

1 0

SVN: [11311] branches/rewrite/pywikibot/site.py
by legoktm＠svn.wikimedia.org 31 Mar '13

31 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11311 Revision: 11311 Author: legoktm Date: 2013-03-31 17:05:13 +0000 (Sun, 31 Mar 2013) Log Message: ----------- Use json.dumps so that special characters are encoded properly. Modified Paths: -------------- branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2013-03-31 17:04:00 UTC (rev 11310) +++ branches/rewrite/pywikibot/site.py 2013-03-31 17:05:13 UTC (rev 11311) @@ -3376,9 +3376,9 @@ params['value'] = json.dumps({'entity-type': 'item', 'numeric-id': claim.getTarget().getID(numeric=True)}) elif claim.getType() == 'string': - params['value'] = '"' + claim.getTarget() + '"' + params['value'] = json.dumps(claim.getTarget()) elif claim.getType() == 'commonsMedia': - params['value'] = '"' + claim.getTarget().title(withNamespace=False) + '"' + params['value'] = json.dumps(claim.getTarget().title(withNamespace=False)) else: raise NotImplementedError('%s datatype is not supported yet.' % claim.getType()) params['token'] = self.token(item, 'edit') @@ -3413,9 +3413,9 @@ params['value'] = json.dumps({'entity-type': 'item', 'numeric-id': claim.getTarget().getID(numeric=True)}) elif claim.getType() == 'string': - params['value'] = '"' + claim.getTarget() + '"' + params['value'] = json.dumps(claim.getTarget()) elif claim.getType() == 'commonsMedia': - params['value'] = '"' + claim.getTarget().title(withNamespace=False) + '"' + params['value'] = json.dumps(claim.getTarget().title(withNamespace=False)) else: raise NotImplementedError('%s datatype is not supported yet.' % claim.getType())

1 0

SVN: [11310] branches/rewrite/pywikibot
by legoktm＠svn.wikimedia.org 31 Mar '13

31 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11310 Revision: 11310 Author: legoktm Date: 2013-03-31 17:04:00 +0000 (Sun, 31 Mar 2013) Log Message: ----------- Proper support for reading/creating claims for commonsMedia Modified Paths: -------------- branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2013-03-31 16:44:16 UTC (rev 11309) +++ branches/rewrite/pywikibot/page.py 2013-03-31 17:04:00 UTC (rev 11310) @@ -2562,7 +2562,11 @@ if claim.getType() == 'wikibase-item': claim.target = ItemPage(site, 'Q' + str(data['mainsnak']['datavalue']['value']['numeric-id'])) + elif claim.getType() == 'commonsMedia': + claim.target = ImagePage(site.image_repository(), 'File:' + + data['mainsnak']['datavalue']['value']) else: + #This covers string type claim.target = data['mainsnak']['datavalue']['value'] if 'references' in data: for source in data['references']: Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2013-03-31 16:44:16 UTC (rev 11309) +++ branches/rewrite/pywikibot/site.py 2013-03-31 17:04:00 UTC (rev 11310) @@ -3377,6 +3377,8 @@ 'numeric-id': claim.getTarget().getID(numeric=True)}) elif claim.getType() == 'string': params['value'] = '"' + claim.getTarget() + '"' + elif claim.getType() == 'commonsMedia': + params['value'] = '"' + claim.getTarget().title(withNamespace=False) + '"' else: raise NotImplementedError('%s datatype is not supported yet.' % claim.getType()) params['token'] = self.token(item, 'edit') @@ -3412,6 +3414,8 @@ 'numeric-id': claim.getTarget().getID(numeric=True)}) elif claim.getType() == 'string': params['value'] = '"' + claim.getTarget() + '"' + elif claim.getType() == 'commonsMedia': + params['value'] = '"' + claim.getTarget().title(withNamespace=False) + '"' else: raise NotImplementedError('%s datatype is not supported yet.' % claim.getType())

1 0

SVN: [11309] branches/rewrite/pywikibot
by legoktm＠svn.wikimedia.org 31 Mar '13

31 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11309 Revision: 11309 Author: legoktm Date: 2013-03-31 16:44:16 +0000 (Sun, 31 Mar 2013) Log Message: ----------- Split getting a property's datatype into its own request so it can be cached. Since a property's datatype will never change, we can safely cache it "forever". I also fixed it so that we don't guess the datatype based on what was returned in a "datavalue" object, since that is unreliable. Modified Paths: -------------- branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2013-03-30 16:56:02 UTC (rev 11308) +++ branches/rewrite/pywikibot/page.py 2013-03-31 16:44:16 UTC (rev 11309) @@ -2515,9 +2515,7 @@ Examples: item, commons media file, StringValue, NumericalValue """ if not hasattr(self, 'type'): - self.get() - if self.type == 'wikibase-entityid': - self.type = 'wikibase-item' + self.type = self.repo.getPropertyType(self) return self.type @@ -2561,8 +2559,7 @@ claim.isReference = True claim.snaktype = data['mainsnak']['snaktype'] if claim.getSnakType() == 'value': - claim.type = data['mainsnak']['datavalue']['type'] - if claim.type == 'wikibase-entityid': + if claim.getType() == 'wikibase-item': claim.target = ItemPage(site, 'Q' + str(data['mainsnak']['datavalue']['value']['numeric-id'])) else: Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2013-03-30 16:56:02 UTC (rev 11308) +++ branches/rewrite/pywikibot/site.py 2013-03-31 16:44:16 UTC (rev 11309) @@ -14,6 +14,7 @@ from hashlib import md5 except ImportError: from md5 import md5 +import datetime import itertools import os import re @@ -3331,6 +3332,21 @@ raise pywikibot.data.api.APIError, data['errors'] return data['entities'] + def getPropertyType(self, prop): + """ + This is used sepecifically because we can cache + the value for a much longer time (near infinite). + """ + params = dict(action='wbgetentities', + ids=prop.getID(), + props='datatype', + ) + expiry = datetime.timedelta(days=365*100) + #Store it for 100 years + req = api.CachedRequest(expiry, site=self, **params) + data = req.submit() + return data['entities'][prop.getID()]['datatype'] + def editEntity(self, identification, data, **kwargs): params = dict(**identification) params['action'] = 'wbeditentity'

1 0

SVN: [11308] trunk/pywikipedia/checkimages.py
by xqt＠svn.wikimedia.org 30 Mar '13

30 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11308 Revision: 11308 Author: xqt Date: 2013-03-30 16:56:02 +0000 (Sat, 30 Mar 2013) Log Message: ----------- strip trailing white space, PEP8 Modified Paths: -------------- trunk/pywikipedia/checkimages.py Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2013-03-30 16:42:04 UTC (rev 11307) +++ trunk/pywikipedia/checkimages.py 2013-03-30 16:56:02 UTC (rev 11308) @@ -178,7 +178,7 @@ 'ja': u'{{subst:Welcome/intro}}\n{{subst:welcome|--~~~~}}\n', 'ko': u'{{환영}}--~~~~\n', 'ta': u'{{welcome}}\n~~~~\n', - 'ur': u'{{خوش آمدید}}\n~~~~\n', + 'ur': u'{{خوش آمدید}}\n~~~~\n', 'zh': u'{{subst:welcome|sign=~~~~}}', } @@ -195,7 +195,7 @@ 'ja': u'ロボットによる:著作権情報明記のお願い', 'ko': u'로봇:라이선스 정보 요청', 'ta': u'தானியங்கி:மூலம் வழங்கப்படா படிமத்தை சுட்டுதல்', - 'ur': u'روبالہ:درخواست ماخذ تصویر', + 'ur': u'روبالہ:درخواست ماخذ تصویر', 'zh': u'機器人：告知用戶', } @@ -212,7 +212,7 @@ 'ja': u'{{db|知らないファイルフォーマット %s}}', 'ko': u'{{delete|잘못된 파일 형식 (.%s)}}', 'ta': u'{{delete|இந்தக் கோப்பு .%s என்றக் கோப்பு நீட்சியைக் கொண்டுள்ளது.}}', - 'ur': u"{{سریع حذف شدگی|اس ملف میں .%s بطور توسیع موجود ہے۔ }}", + 'ur': u"{{سریع حذف شدگی|اس ملف میں .%s بطور توسیع موجود ہے۔ }}", 'zh': u'{{delete|未知檔案格式%s}}', } @@ -228,7 +228,7 @@ 'it': u'\n\n== File non specificato ==\n', 'ko': u'\n== 잘못된 파일 형식 ==\n', 'ta': u'\n== இனங்காணப்படாத கோப்பு நீட்சி! ==\n', - 'ur': u"\n== نامعلوم توسیع! ==\n", + 'ur': u"\n== نامعلوم توسیع! ==\n", 'zh': u'\n==您上載的檔案格式可能有誤==\n', } @@ -244,7 +244,7 @@ 'it': u'{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Ext|%s|__botnick__}} --~~~~', 'ko': u'[[:그림:%s]]의 파일 형식이 잘못되었습니다. 확인 바랍니다.--~~~~', 'ta': u'[[:படிமம்:%s]] இனங்காணப்படாத கோப்பு நீட்சியை கொண்டுள்ளது தயவு செய்து ஒரு முறை சரி பார்க்கவும் ~~~~', - 'ur': u'ملف [[:File:%s]] کی توسیع شاید درست نہیں ہے، براہ کرم جانچ لیں۔ ~~~~', + 'ur': u'ملف [[:File:%s]] کی توسیع شاید درست نہیں ہے، براہ کرم جانچ لیں۔ ~~~~', 'zh' :u'您好，你上傳的[[:File:%s]]無法被識別，請檢查您的檔案，謝謝。--~~~~', } @@ -312,7 +312,7 @@ 'ja': [u'Alexbot'], 'ko': [u'Kwjbot IV'], 'ta': [u'TrengarasuBOT'], - 'ur': [u'Shuaib-bot', u'Tahir-bot', u'SAMI.bot'], + 'ur': [u'Shuaib-bot', u'Tahir-bot', u'SAMI.bot'], 'zh': [u'Alexbot'], } @@ -345,7 +345,7 @@ 'ja': u'User:Alexbot/report', 'ko': u'User:Kwjbot IV/Report', 'ta': u'User:Trengarasu/commonsimages', - 'ur': u'صارف:محمد شعیب/درخواست تصویر', + 'ur': u'صارف:محمد شعیب/درخواست تصویر', 'zh': u'User:Alexsh/checkimagereport', } @@ -365,7 +365,7 @@ 'ja': u"\n*[[:File:%s]] " + timeselected, 'ko': u"\n*[[:그림:%s]] " + timeselected, 'ta': u"\n*[[:படிமம்:%s]] " + timeselected, - 'ur': u"\n*[[:تصویر:%s]] " + timeselected, + 'ur': u"\n*[[:تصویر:%s]] " + timeselected, 'zh': u"\n*[[:File:%s]] " + timeselected, } @@ -1175,7 +1175,8 @@ else: # Adding the log if addings: - rep_text = rep_text % image_to_report # Adding the name of the image in the report if not done already + # Adding the name of the image in the report if not done already + rep_text = rep_text % image_to_report another_page.put(text_get + rep_text, comment=com, force=True, minorEdit=False) pywikibot.output(u"...Reported...") @@ -1463,7 +1464,8 @@ if skip_number == 0: pywikibot.output(u'\t\t>> No files to skip...<<') return False - if skip_number > limit: skip_number = limit + if skip_number > limit: + skip_number = limit # Print a starting message only if no images has been skipped if not self.skip_list: if skip_number == 1: @@ -1479,7 +1481,7 @@ pywikibot.output('') return True else: - pywikibot.output('') # Print a blank line. + pywikibot.output('') return False def wait(self, waitTime, generator, normal, limit): @@ -1496,7 +1498,7 @@ % waitTime) imagesToSkip = 0 while True: - loadOtherImages = True # ensure that all the images loaded aren't to skip! + loadOtherImages = True # ensure that all the images loaded aren't to skip! for image in generator: try: timestamp = image.getLatestUploader()[1] @@ -1518,10 +1520,10 @@ u'Skipping %s, uploaded %s seconds ago..' % (image.title(), int(secs_of_diff))) imagesToSkip += 1 - continue # Still wait + continue # Still wait else: loadOtherImages = False - break # No ok, continue + break # Not ok, continue # if yes, we have skipped all the images given! if loadOtherImages: generator = (x[0] for x in @@ -1563,7 +1565,7 @@ return True elif i.lower() in self.imageCheckText: return True - return False # Nothing Found + return False def findAdditionalProblems(self): # In every tuple there's a setting configuration @@ -1623,7 +1625,8 @@ brackets = False delete = False notification = None - extension = self.imageName.split('.')[-1] # get the extension from the image's name + # get the extension from the image's name + extension = self.imageName.split('.')[-1] # Load the notification messages HiddenTN = pywikibot.translate(self.site, HiddenTemplateNotification, fallback=False)

1 0

SVN: [11307] trunk/pywikipedia/wikipedia.py
by xqt＠svn.wikimedia.org 30 Mar '13

30 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11307 Revision: 11307 Author: xqt Date: 2013-03-30 16:42:04 +0000 (Sat, 30 Mar 2013) Log Message: ----------- some PEP8 changes Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2013-03-30 16:21:10 UTC (rev 11306) +++ trunk/pywikipedia/wikipedia.py 2013-03-30 16:42:04 UTC (rev 11307) @@ -4717,6 +4717,7 @@ Caches the HTML code, so that if you run this method twice on the same ImagePage object, the page will only be downloaded once. + """ if not self._imagePageHtml: path = self.site().get_address(self.urlname()) @@ -4758,7 +4759,8 @@ for info in pageInfo['imageinfo']: count += 1 if count == 1 and 'iistart' not in params: - # count 1 and no iicontinue mean first image revision is latest. + # count 1 and no iicontinue mean first image revision + # is latest. self._latestInfo = info infos.append(info) if limit == 1: @@ -4769,7 +4771,7 @@ else: break except KeyError: - output("Not image in imagepage") + output("No image in imagepage") self._infoLoaded = True if limit > 1: return infos @@ -4814,8 +4816,8 @@ return self.fileUrl().startswith(u'http://wikitravel.org/upload/shared/') return self.fileIsOnCommons() - # FIXME: MD5 might be performed on incomplete file due to server disconnection - # (see bug #1795683). + # FIXME: MD5 might be performed on incomplete file due to server + # disconnection (see bug #1795683). def getFileMd5Sum(self): """Return image file's MD5 checksum.""" f = MyURLopener.open(self.fileUrl())

1 0

SVN: [11306] branches/rewrite/pywikibot/textlib.py
by xqt＠svn.wikimedia.org 30 Mar '13

30 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11306 Revision: 11306 Author: xqt Date: 2013-03-30 16:21:10 +0000 (Sat, 30 Mar 2013) Log Message: ----------- PEP8 changes, update from trunk r11300, r11301, r11305, r10028 Modified Paths: -------------- branches/rewrite/pywikibot/textlib.py Modified: branches/rewrite/pywikibot/textlib.py =================================================================== --- branches/rewrite/pywikibot/textlib.py 2013-03-30 16:17:48 UTC (rev 11305) +++ branches/rewrite/pywikibot/textlib.py 2013-03-30 16:21:10 UTC (rev 11306) @@ -7,7 +7,7 @@ """ # -# (C) Pywikipedia bot team, 2008-2011 +# (C) Pywikipedia bot team, 2008-2013 # # Distributed under the terms of the MIT license. # @@ -16,11 +16,10 @@ import pywikibot import re - -from pywikibot.i18n import translate from HTMLParser import HTMLParser import config2 as config + def unescape(s): """Replace escaped HTML-special characters by their originals""" if '&' not in s: @@ -29,12 +28,12 @@ s = s.replace(">", ">") s = s.replace("'", "'") s = s.replace(""", '"') - s = s.replace("&", "&") # Must be last + s = s.replace("&", "&") # Must be last return s def replaceExcept(text, old, new, exceptions, caseInsensitive=False, - allowoverlap=False, marker = '', site = None): + allowoverlap=False, marker='', site=None): """ Return text with 'old' replaced by 'new', ignoring specified types of text. @@ -61,21 +60,21 @@ site = pywikibot.getSite() exceptionRegexes = { - 'comment': re.compile(r'(?s)'), + 'comment': re.compile(r'(?s)'), # section headers - 'header': re.compile(r'\r?\n=+.+=+ *\r?\n'), + 'header': re.compile(r'\r?\n=+.+=+ *\r?\n'), # preformatted text - 'pre': re.compile(r'(?ism)<pre>.*?</pre>'), - 'source': re.compile(r'(?is)<source .*?</source>'), + 'pre': re.compile(r'(?ism)<pre>.*?</pre>'), + 'source': re.compile(r'(?is)<source .*?</source>'), # inline references - 'ref': re.compile(r'(?ism)<ref[ >].*?</ref>'), + 'ref': re.compile(r'(?ism)<ref[ >].*?</ref>'), # lines that start with a space are shown in a monospace font and # have whitespace preserved. - 'startspace': re.compile(r'(?m)^ (.*?)$'), + 'startspace': re.compile(r'(?m)^ (.*?)$'), # tables often have whitespace that is used to improve wiki # source code readability. # TODO: handle nested tables. - 'table': re.compile(r'(?ims)^{\|.*?^\|}|<table>.*?</table>'), + 'table': re.compile(r'(?ims)^{\|.*?^\|}|<table>.*?</table>'), # templates with parameters often have whitespace that is used to # improve wiki source code readability. # 'template': re.compile(r'(?s){{.*?}}'), @@ -83,22 +82,25 @@ # templates cascaded up to level 2, but no deeper. For arbitrary # depth, we'd need recursion which can't be done in Python's re. # After all, the language of correct parenthesis words is not regular. - 'template': re.compile(r'(?s){{(({{.*?}})?.*?)*}}'), - 'hyperlink': compileLinkR(), - 'gallery': re.compile(r'(?is)<gallery.*?>.*?</gallery>'), + 'template': re.compile(r'(?s){{(({{.*?}})?.*?)*}}'), + 'hyperlink': compileLinkR(), + 'gallery': re.compile(r'(?is)<gallery.*?>.*?</gallery>'), # this matches internal wikilinks, but also interwiki, categories, and # images. - 'link': re.compile(r'\[\[[^\]\|]*(\|[^\]]*)?\]\]'), + 'link': re.compile(r'\[\[[^\]\|]*(\|[^\]]*)?\]\]'), # also finds links to foreign sites with preleading ":" - 'interwiki': re.compile(r'(?i)\[\[:?(%s)\s?:[^\]]*\]\][\s]*' - % '|'.join(site.validLanguageLinks() - + site.family.obsolete.keys()) - ), + 'interwiki': re.compile(r'(?i)\[\[:?(%s)\s?:[^\]]*\]\][\s]*' + % '|'.join(site.validLanguageLinks() + + site.family.obsolete.keys())), + # Wikidata property inclusions + 'property': re.compile(r'(?i)\{\{\s*#property:\s*p\d+\s*\}\}'), + # Module invocations (currently only Lua) + 'invoke': re.compile(r'(?i)\{\{\s*#invoke:.*?}\}'), } # if we got a string, compile it as a regular expression - if type(old) in [str, unicode]: + if isinstance(old, basestring): if caseInsensitive: old = re.compile(old, re.IGNORECASE | re.UNICODE) else: @@ -175,9 +177,14 @@ break groupID = groupMatch.group('name') or \ int(groupMatch.group('number')) - replacement = replacement[:groupMatch.start()] + \ - match.group(groupID) + \ - replacement[groupMatch.end():] + try: + replacement = replacement[:groupMatch.start()] + \ + match.group(groupID) + \ + replacement[groupMatch.end():] + except IndexError: + print '\nInvalid group reference:', groupID + print 'Groups found:\n', match.groups() + raise IndexError text = text[:match.start()] + replacement + text[match.end():] # continue the search on the remaining text @@ -190,7 +197,7 @@ return text -def removeDisabledParts(text, tags = ['*']): +def removeDisabledParts(text, tags=['*']): """ Return text without portions where wiki markup is disabled @@ -205,12 +212,12 @@ """ regexes = { - 'comments' : r'', - 'includeonly': r'<includeonly>.*?</includeonly>', - 'nowiki': r'<nowiki>.*?</nowiki>', - 'pre': r'<pre>.*?</pre>', - 'source': r'<source .*?</source>', - 'syntaxhighlight': r'<syntaxhighlight .*?</syntaxhighlight>', + 'comments': r'', + 'includeonly': r'<includeonly>.*?</includeonly>', + 'nowiki': r'<nowiki>.*?</nowiki>', + 'pre': r'<pre>.*?</pre>', + 'source': r'<source .*?</source>', + 'syntaxhighlight': r'<syntaxhighlight .*?</syntaxhighlight>', } if '*' in tags: tags = regexes.keys() @@ -223,7 +230,7 @@ return toRemoveR.sub('', text) -def removeHTMLParts(text, keeptags = ['tt', 'nowiki', 'small', 'sup']): +def removeHTMLParts(text, keeptags=['tt', 'nowiki', 'small', 'sup']): """ Return text without portions where HTML markup is disabled @@ -232,9 +239,9 @@ The exact set of parts which should NOT be removed can be passed as the 'keeptags' parameter, which defaults to ['tt', 'nowiki', 'small', 'sup']. + """ # try to merge with 'removeDisabledParts()' above into one generic function - # thanks to http://www.hellboundhackers.org/articles/841-using-python-39;s-htmlparser-c… parser = _GetDataHTML() parser.keeptags = keeptags @@ -242,6 +249,7 @@ parser.close() return parser.textdata + # thanks to http://docs.python.org/library/htmlparser.html class _GetDataHTML(HTMLParser): textdata = u'' @@ -251,17 +259,19 @@ self.textdata += data def handle_starttag(self, tag, attrs): - if tag in self.keeptags: self.textdata += u"<%s>" % tag + if tag in self.keeptags: + self.textdata += u"<%s>" % tag def handle_endtag(self, tag): - if tag in self.keeptags: self.textdata += u"</%s>" % tag + if tag in self.keeptags: + self.textdata += u"</%s>" % tag -def isDisabled(text, index, tags = ['*']): +def isDisabled(text, index, tags=['*']): """ Return True if text[index] is disabled, e.g. by a comment or by nowiki tags. + For the tags parameter, see removeDisabledParts() above. - For the tags parameter, see removeDisabledParts() above. """ # Find a marker that is not already in the text. marker = findmarker(text, '@@', '@') @@ -270,9 +280,9 @@ return (marker not in text) -def findmarker(text, startwith = u'@', append = u'@'): +def findmarker(text, startwith=u'@', append=None): # find a string which is not part of text - if len(append) <= 0: + if not append: append = u'@' mymarker = startwith while mymarker in text: @@ -280,7 +290,7 @@ return mymarker -def expandmarker(text, marker = '', separator = ''): +def expandmarker(text, marker='', separator=''): # set to remove any number of separator occurrences plus arbitrary # whitespace before, after, and between them, # by allowing to include them into marker. @@ -292,8 +302,8 @@ while firstinseparator > 0 and striploopcontinue: striploopcontinue = False if (firstinseparator >= lenseparator) and \ - (separator == text[firstinseparator - \ - lenseparator : firstinseparator]): + (separator == text[firstinseparator - + lenseparator:firstinseparator]): firstinseparator -= lenseparator striploopcontinue = True elif text[firstinseparator-1] < ' ': @@ -302,6 +312,7 @@ marker = text[firstinseparator:firstinmarker] + marker return marker + #------------------------------------------------- # Functions dealing with interwiki language links #------------------------------------------------- @@ -324,7 +335,8 @@ # do not find or change links of other kinds, nor any that are formatted # as in-line interwiki links (e.g., "[[:es:Articulo]]". -def getLanguageLinks(text, insite=None, pageLink="[[]]", template_subpage=False): +def getLanguageLinks(text, insite=None, pageLink="[[]]", + template_subpage=False): """ Return a dict of interlanguage links found in text. @@ -336,7 +348,8 @@ if insite is None: insite = pywikibot.getSite() fam = insite.family - # when interwiki links forward to another family, retrieve pages & other infos there + # when interwiki links forward to another family, retrieve pages & other + # infos there if fam.interwiki_forward: fam = pywikibot.Family(fam.interwiki_forward) result = {} @@ -351,8 +364,10 @@ # interwiki link. # NOTE: language codes are case-insensitive and only consist of basic latin # letters and hyphens. - #TODO: currently, we do not have any, but BCP 47 allows digits, and underscores. - #TODO: There is no semantic difference between hyphens and underscores -> fold them. + # TODO: currently, we do not have any, but BCP 47 allows digits, and + # underscores. + # TODO: There is no semantic difference between hyphens and + # underscores -> fold them. interwikiR = re.compile(r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]') for lang, pagetitle in interwikiR.findall(text): lang = lang.lower() @@ -369,14 +384,14 @@ try: result[site] = pywikibot.Page(site, pagetitle, insite=insite) except pywikibot.InvalidTitle: - pywikibot.output( - u"[getLanguageLinks] Text contains invalid interwiki link [[%s:%s]]." - % (lang, pagetitle)) + pywikibot.output(u'[getLanguageLinks] Text contains invalid ' + u'interwiki link [[%s:%s]].' + % (lang, pagetitle)) continue return result -def removeLanguageLinks(text, site = None, marker = ''): +def removeLanguageLinks(text, site=None, marker=''): """Return text with all interlanguage links removed. If a link to an unknown language is encountered, a warning is printed. @@ -391,7 +406,8 @@ return text # This regular expression will find every interwiki link, plus trailing # whitespace. - languages = '|'.join(site.validLanguageLinks() + site.family.obsolete.keys()) + languages = '|'.join(site.validLanguageLinks() + + site.family.obsolete.keys()) interwikiR = re.compile(r'\[\[(%s)\s?:[^\[\]\n]*\]\][\s]*' % languages, re.IGNORECASE) text = replaceExcept(text, interwikiR, '', @@ -421,7 +437,7 @@ def replaceLanguageLinks(oldtext, new, site=None, addOnly=False, - template=False, template_subpage=False): + template=False, template_subpage=False): """Replace interlanguage links in the text with a new set of links. 'new' should be a dict with the Site objects as keys, and Page or Link @@ -442,7 +458,7 @@ else: s2 = removeLanguageLinksAndSeparator(oldtext, site=site, marker=marker, separator=separatorstripped) - s = interwikiFormat(new, insite = site) + s = interwikiFormat(new, insite=site) if s: if site.language() in site.family.interwiki_attop or \ u'' in oldtext: @@ -462,28 +478,28 @@ if "</noinclude>" in s2[firstafter:]: if separatorstripped: s = separator + s - newtext = s2[:firstafter].replace(marker,'') + s \ - + s2[firstafter:] + newtext = s2[:firstafter].replace(marker, '') + s + \ + s2[firstafter:] elif site.language() in site.family.categories_last: - cats = getCategoryLinks(s2, site = site) + cats = getCategoryLinks(s2, site=site) s2 = removeCategoryLinksAndSeparator( - s2.replace(marker, cseparatorstripped).strip(), - site) + separator + s + s2.replace(marker, cseparatorstripped).strip(), site) + \ + separator + s newtext = replaceCategoryLinks(s2, cats, site=site, addOnly=True) # for Wikitravel's language links position. # (not supported by rewrite - no API) elif site.family.name == 'wikitravel': s = separator + s + separator - newtext = s2[:firstafter].replace(marker,'') + s + \ + newtext = s2[:firstafter].replace(marker, '') + s + \ s2[firstafter:] else: if template or template_subpage: if template_subpage: - includeOn = '<includeonly>' + includeOn = '<includeonly>' includeOff = '</includeonly>' else: - includeOn = '<noinclude>' + includeOn = '<noinclude>' includeOff = '</noinclude>' separator = '' # Do we have a noinclude at the end of the template? @@ -495,16 +511,16 @@ newtext = regexp.sub(s + includeOff, s2) else: # Put the langlinks at the end, inside noinclude's - newtext = s2.replace(marker,'').strip() + separator + \ + newtext = s2.replace(marker, '').strip() + separator + \ u'%s\n%s%s\n' % (includeOn, s, includeOff) else: - newtext = s2.replace(marker,'').strip() + separator + s + newtext = s2.replace(marker, '').strip() + separator + s else: - newtext = s2.replace(marker,'') + newtext = s2.replace(marker, '') return newtext -def interwikiFormat(links, insite = None): +def interwikiFormat(links, insite=None): """Convert interwiki link dict into a wikitext string. 'links' should be a dict with the Site objects as keys, and Page @@ -512,6 +528,7 @@ Return a unicode string that is formatted for inclusion in insite (defaulting to the current site). + """ if insite is None: insite = pywikibot.getSite() @@ -530,16 +547,16 @@ sep = u' ' else: sep = config.line_separator - s=sep.join(s) + config.line_separator + s = sep.join(s) + config.line_separator return s # Sort sites according to local interwiki sort logic -def interwikiSort(sites, insite = None): +def interwikiSort(sites, insite=None): + if not sites: + return [] if insite is None: - insite = pywikibot.getSite() - if not sites: - return [] + insite = pywikibot.getSite() sites.sort() putfirst = insite.interwiki_putfirst() @@ -547,11 +564,8 @@ #In this case I might have to change the order firstsites = [] for code in putfirst: - # The code may not exist in this family? -## if code in insite.family.obsolete: -## code = insite.family.obsolete[code] if code in insite.validLanguageLinks(): - site = insite.getSite(code = code) + site = insite.getSite(code=code) if site in sites: del sites[sites.index(site)] firstsites = firstsites + [site] @@ -561,6 +575,7 @@ sites = insite.interwiki_putfirst_doubled(sites) + sites return sites + #--------------------------------------- # Functions dealing with category links #--------------------------------------- @@ -588,7 +603,7 @@ '%s:%s' % (match.group('namespace'), match.group('catName')), site), - sortKey = match.group('sortKey')) + sortKey=match.group('sortKey')) result.append(cat) return result @@ -658,9 +673,10 @@ # spaces and underscores in page titles are interchangeable and collapsible title = title.replace(r"\ ", "[ _]+").replace(r"\_", "[ _]+") categoryR = re.compile(r'\[\[\s*(%s)\s*:\s*%s\s*((?:\|[^]]+)?\]\])' - % (catNamespace, title), re.I) - categoryRN = re.compile(r'^[^\S\n]*\[\[\s*(%s)\s*:\s*%s\s*((?:\|[^]]+)?\]\])[^\S\n]*\n' - % (catNamespace, title), re.I | re.M) + % (catNamespace, title), re.I) + categoryRN = re.compile( + r'^[^\S\n]*\[\[\s*(%s)\s*:\s*%s\s*((?:\|[^]]+)?\]\])[^\S\n]*\n' + % (catNamespace, title), re.I | re.M) if newcat is None: """ First go through and try the more restrictive regex that removes an entire line, if the category is the only thing on that line (this @@ -679,7 +695,7 @@ return text -def replaceCategoryLinks(oldtext, new, site = None, addOnly = False): +def replaceCategoryLinks(oldtext, new, site=None, addOnly=False): """ Replace the category links given in the wikitext given in oldtext by the new links given in new. @@ -692,7 +708,7 @@ """ # Find a marker that is not already in the text. - marker = findmarker( oldtext, u'@@') + marker = findmarker(oldtext, u'@@') if site is None: site = pywikibot.getSite() if site.sitename() == 'wikipedia:de' and "{{Personendaten" in oldtext: @@ -711,7 +727,7 @@ else: s2 = removeCategoryLinksAndSeparator(oldtext, site=site, marker=marker, separator=separatorstripped) - s = categoryFormat(new, insite = site) + s = categoryFormat(new, insite=site) if s: if site.language() in site.family.category_attop: newtext = s + separator + s2 @@ -730,7 +746,7 @@ newtext = s2[:firstafter].replace(marker, '') + s + \ s2[firstafter:] elif site.language() in site.family.categories_last: - newtext = s2.replace(marker,'').strip() + separator + s + newtext = s2.replace(marker, '').strip() + separator + s else: interwiki = getLanguageLinks(s2) s2 = removeLanguageLinksAndSeparator(s2.replace(marker, ''), @@ -740,11 +756,11 @@ newtext = replaceLanguageLinks(s2, interwiki, site=site, addOnly=True) else: - newtext = s2.replace(marker,'') + newtext = s2.replace(marker, '') return newtext.strip() -def categoryFormat(categories, insite = None): +def categoryFormat(categories, insite=None): """Return a string containing links to all categories in a list. 'categories' should be a list of Category objects or strings @@ -758,13 +774,14 @@ if insite is None: insite = pywikibot.getSite() - if isinstance(categories[0],basestring): + if isinstance(categories[0], basestring): if categories[0][0] == '[': catLinks = categories else: catLinks = ['[[Category:'+category+']]' for category in categories] else: - catLinks = [category.aslink(noInterwiki=True) for category in categories] + catLinks = [category.aslink(noInterwiki=True) + for category in categories] if insite.category_on_one_line(): sep = ' ' @@ -774,6 +791,7 @@ #catLinks.sort() return sep.join(catLinks) + config.line_separator + #--------------------------------------- # Functions dealing with external links #--------------------------------------- @@ -796,9 +814,9 @@ # not allowed inside links. For example, in this wiki text: # ''Please see http://www.example.org.'' # .'' shouldn't be considered as part of the link. - regex = r'(?P<url>http[s]?://[^' + notInside + ']*?[^' + notAtEnd \ - + '](?=[' + notAtEnd+ ']*\'\')|http[s]?://[^' + notInside \ - + ']*[^' + notAtEnd + '])' + regex = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ + r'(?=[%(notAtEnd)s]*\'\')|http[s]?://[^%(notInside)s]*' \ + r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEnd} if withoutBracketed: regex = r'(?<!\[)' + regex @@ -807,6 +825,7 @@ linkR = re.compile(regex) return linkR + #---------------------------------- # Functions dealing with templates #---------------------------------- @@ -845,7 +864,7 @@ inside = {} count = 0 Rtemplate = re.compile( - ur'{{(msg:)?(?P<name>[^{\|]+?)(\|(?P<params>[^{]+?))?}}') + ur'{{(msg:)?(?P<name>[^{\|]+?)(\|(?P<params>[^{]+?))?}}') Rmath = re.compile(ur'<math>[^<]+</math>') Rmarker = re.compile(ur'%s(\d+)%s' % (marker, marker)) Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2)) @@ -892,8 +911,8 @@ for m2 in pywikibot.link_regex.finditer(paramString): count2 += 1 text = m2.group(0) - paramString = paramString.replace(text, - '%s%d%s' % (marker2, count2, marker2)) + paramString = paramString.replace( + text, '%s%d%s' % (marker2, count2, marker2)) links[count2] = text # Parse string markedParams = paramString.split('|') @@ -927,22 +946,21 @@ You can use items from extract_templates_and_params here to get an equivalent template wiki text (it may happen that the order of the params changes). + """ (template, params) = template_and_params - text = u'' for item in params: - text += u'|%s=%s\n' % (item, params[item]) + text += u'|%s=%s\n' % (item, params[item]) return u'{{%s\n%s}}' % (template, text) + #---------------------------------- # Page parsing functionality #---------------------------------- def does_text_contain_section(pagetext, section): - """ Determines whether the page text contains the given - section title. - """ + """Determines whether the page text contains the given section title.""" m = re.search("=+[ ']*%s[ ']*=+" % re.escape(section), pagetext) return bool(m)

1 0

SVN: [11305] trunk/pywikipedia/pywikibot/textlib.py
by xqt＠svn.wikimedia.org 30 Mar '13

30 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11305 Revision: 11305 Author: xqt Date: 2013-03-30 16:17:48 +0000 (Sat, 30 Mar 2013) Log Message: ----------- PEP8 changes Modified Paths: -------------- trunk/pywikipedia/pywikibot/textlib.py Modified: trunk/pywikipedia/pywikibot/textlib.py =================================================================== --- trunk/pywikipedia/pywikibot/textlib.py 2013-03-30 12:42:36 UTC (rev 11304) +++ trunk/pywikipedia/pywikibot/textlib.py 2013-03-30 16:17:48 UTC (rev 11305) @@ -19,6 +19,7 @@ from HTMLParser import HTMLParser import config + def unescape(s): """Replace escaped HTML-special characters by their originals""" if '&' not in s: @@ -27,11 +28,12 @@ s = s.replace(">", ">") s = s.replace("'", "'") s = s.replace(""", '"') - s = s.replace("&", "&") # Must be last + s = s.replace("&", "&") # Must be last return s + def replaceExcept(text, old, new, exceptions, caseInsensitive=False, - allowoverlap=False, marker = '', site = None): + allowoverlap=False, marker='', site=None): """ Return text with 'old' replaced by 'new', ignoring specified types of text. @@ -88,9 +90,8 @@ 'link': re.compile(r'\[\[[^\]\|]*(\|[^\]]*)?\]\]'), # also finds links to foreign sites with preleading ":" 'interwiki': re.compile(r'(?i)\[\[:?(%s)\s?:[^\]]*\]\][\s]*' - % '|'.join(site.validLanguageLinks() - + site.family.obsolete.keys()) - ), + % '|'.join(site.validLanguageLinks() + + site.family.obsolete.keys())), # Wikidata property inclusions 'property': re.compile(r'(?i)\{\{\s*#property:\s*p\d+\s*\}\}'), # Module invocations (currently only Lua) @@ -99,7 +100,7 @@ } # if we got a string, compile it as a regular expression - if type(old) in [str, unicode]: + if isinstance(old, basestring): if caseInsensitive: old = re.compile(old, re.IGNORECASE | re.UNICODE) else: @@ -196,7 +197,7 @@ return text -def removeDisabledParts(text, tags = ['*']): +def removeDisabledParts(text, tags=['*']): """ Return text without portions where wiki markup is disabled @@ -211,12 +212,12 @@ """ regexes = { - 'comments' : r'', - 'includeonly': r'<includeonly>.*?</includeonly>', - 'nowiki': r'<nowiki>.*?</nowiki>', - 'pre': r'<pre>.*?</pre>', - 'source': r'<source .*?</source>', - 'syntaxhighlight': r'<syntaxhighlight .*?</syntaxhighlight>', + 'comments': r'', + 'includeonly': r'<includeonly>.*?</includeonly>', + 'nowiki': r'<nowiki>.*?</nowiki>', + 'pre': r'<pre>.*?</pre>', + 'source': r'<source .*?</source>', + 'syntaxhighlight': r'<syntaxhighlight .*?</syntaxhighlight>', } if '*' in tags: tags = regexes.keys() @@ -229,7 +230,7 @@ return toRemoveR.sub('', text) -def removeHTMLParts(text, keeptags = ['tt', 'nowiki', 'small', 'sup']): +def removeHTMLParts(text, keeptags=['tt', 'nowiki', 'small', 'sup']): """ Return text without portions where HTML markup is disabled @@ -238,9 +239,9 @@ The exact set of parts which should NOT be removed can be passed as the 'keeptags' parameter, which defaults to ['tt', 'nowiki', 'small', 'sup']. + """ # try to merge with 'removeDisabledParts()' above into one generic function - # thanks to http://www.hellboundhackers.org/articles/841-using-python-39;s-htmlparser-c… parser = _GetDataHTML() parser.keeptags = keeptags @@ -248,6 +249,7 @@ parser.close() return parser.textdata + # thanks to http://docs.python.org/library/htmlparser.html class _GetDataHTML(HTMLParser): textdata = u'' @@ -257,17 +259,19 @@ self.textdata += data def handle_starttag(self, tag, attrs): - if tag in self.keeptags: self.textdata += u"<%s>" % tag + if tag in self.keeptags: + self.textdata += u"<%s>" % tag def handle_endtag(self, tag): - if tag in self.keeptags: self.textdata += u"</%s>" % tag + if tag in self.keeptags: + self.textdata += u"</%s>" % tag -def isDisabled(text, index, tags = ['*']): +def isDisabled(text, index, tags=['*']): """ Return True if text[index] is disabled, e.g. by a comment or by nowiki tags. + For the tags parameter, see removeDisabledParts() above. - For the tags parameter, see removeDisabledParts() above. """ # Find a marker that is not already in the text. marker = findmarker(text, '@@', '@') @@ -276,9 +280,9 @@ return (marker not in text) -def findmarker(text, startwith = u'@', append = u'@'): +def findmarker(text, startwith=u'@', append=None): # find a string which is not part of text - if len(append) <= 0: + if not append: append = u'@' mymarker = startwith while mymarker in text: @@ -286,7 +290,7 @@ return mymarker -def expandmarker(text, marker = '', separator = ''): +def expandmarker(text, marker='', separator=''): # set to remove any number of separator occurrences plus arbitrary # whitespace before, after, and between them, # by allowing to include them into marker. @@ -298,8 +302,8 @@ while firstinseparator > 0 and striploopcontinue: striploopcontinue = False if (firstinseparator >= lenseparator) and \ - (separator == text[firstinseparator - \ - lenseparator : firstinseparator]): + (separator == text[firstinseparator - + lenseparator:firstinseparator]): firstinseparator -= lenseparator striploopcontinue = True elif text[firstinseparator-1] < ' ': @@ -308,6 +312,7 @@ marker = text[firstinseparator:firstinmarker] + marker return marker + #------------------------------------------------- # Functions dealing with interwiki language links #------------------------------------------------- @@ -330,7 +335,8 @@ # do not find or change links of other kinds, nor any that are formatted # as in-line interwiki links (e.g., "[[:es:Articulo]]". -def getLanguageLinks(text, insite=None, pageLink="[[]]", template_subpage=False): +def getLanguageLinks(text, insite=None, pageLink="[[]]", + template_subpage=False): """ Return a dict of interlanguage links found in text. @@ -342,7 +348,8 @@ if insite is None: insite = pywikibot.getSite() fam = insite.family - # when interwiki links forward to another family, retrieve pages & other infos there + # when interwiki links forward to another family, retrieve pages & other + # infos there if fam.interwiki_forward: fam = pywikibot.Family(fam.interwiki_forward) result = {} @@ -357,8 +364,10 @@ # interwiki link. # NOTE: language codes are case-insensitive and only consist of basic latin # letters and hyphens. - #TODO: currently, we do not have any, but BCP 47 allows digits, and underscores. - #TODO: There is no semantic difference between hyphens and underscores -> fold them. + # TODO: currently, we do not have any, but BCP 47 allows digits, and + # underscores. + # TODO: There is no semantic difference between hyphens and + # underscores -> fold them. interwikiR = re.compile(r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]') for lang, pagetitle in interwikiR.findall(text): lang = lang.lower() @@ -375,14 +384,14 @@ try: result[site] = pywikibot.Page(site, pagetitle, insite=insite) except pywikibot.InvalidTitle: - pywikibot.output( - u"[getLanguageLinks] Text contains invalid interwiki link [[%s:%s]]." - % (lang, pagetitle)) + pywikibot.output(u'[getLanguageLinks] Text contains invalid ' + u'interwiki link [[%s:%s]].' + % (lang, pagetitle)) continue return result -def removeLanguageLinks(text, site = None, marker = ''): +def removeLanguageLinks(text, site=None, marker=''): """Return text with all interlanguage links removed. If a link to an unknown language is encountered, a warning is printed. @@ -397,7 +406,8 @@ return text # This regular expression will find every interwiki link, plus trailing # whitespace. - languages = '|'.join(site.validLanguageLinks() + site.family.obsolete.keys()) + languages = '|'.join(site.validLanguageLinks() + + site.family.obsolete.keys()) interwikiR = re.compile(r'\[\[(%s)\s?:[^\[\]\n]*\]\][\s]*' % languages, re.IGNORECASE) text = replaceExcept(text, interwikiR, '', @@ -427,7 +437,7 @@ def replaceLanguageLinks(oldtext, new, site=None, addOnly=False, - template=False, template_subpage=False): + template=False, template_subpage=False): """Replace interlanguage links in the text with a new set of links. 'new' should be a dict with the Site objects as keys, and Page or Link @@ -448,7 +458,7 @@ else: s2 = removeLanguageLinksAndSeparator(oldtext, site=site, marker=marker, separator=separatorstripped) - s = interwikiFormat(new, insite = site) + s = interwikiFormat(new, insite=site) if s: if site.language() in site.family.interwiki_attop or \ u'' in oldtext: @@ -468,28 +478,28 @@ if "</noinclude>" in s2[firstafter:]: if separatorstripped: s = separator + s - newtext = s2[:firstafter].replace(marker,'') + s \ - + s2[firstafter:] + newtext = s2[:firstafter].replace(marker, '') + s + \ + s2[firstafter:] elif site.language() in site.family.categories_last: - cats = getCategoryLinks(s2, site = site) + cats = getCategoryLinks(s2, site=site) s2 = removeCategoryLinksAndSeparator( - s2.replace(marker, cseparatorstripped).strip(), - site) + separator + s + s2.replace(marker, cseparatorstripped).strip(), site) + \ + separator + s newtext = replaceCategoryLinks(s2, cats, site=site, addOnly=True) # for Wikitravel's language links position. # (not supported by rewrite - no API) elif site.family.name == 'wikitravel': s = separator + s + separator - newtext = s2[:firstafter].replace(marker,'') + s + \ + newtext = s2[:firstafter].replace(marker, '') + s + \ s2[firstafter:] else: if template or template_subpage: if template_subpage: - includeOn = '<includeonly>' + includeOn = '<includeonly>' includeOff = '</includeonly>' else: - includeOn = '<noinclude>' + includeOn = '<noinclude>' includeOff = '</noinclude>' separator = '' # Do we have a noinclude at the end of the template? @@ -501,16 +511,16 @@ newtext = regexp.sub(s + includeOff, s2) else: # Put the langlinks at the end, inside noinclude's - newtext = s2.replace(marker,'').strip() + separator + \ + newtext = s2.replace(marker, '').strip() + separator + \ u'%s\n%s%s\n' % (includeOn, s, includeOff) else: - newtext = s2.replace(marker,'').strip() + separator + s + newtext = s2.replace(marker, '').strip() + separator + s else: - newtext = s2.replace(marker,'') + newtext = s2.replace(marker, '') return newtext -def interwikiFormat(links, insite = None): +def interwikiFormat(links, insite=None): """Convert interwiki link dict into a wikitext string. 'links' should be a dict with the Site objects as keys, and Page @@ -518,6 +528,7 @@ Return a unicode string that is formatted for inclusion in insite (defaulting to the current site). + """ if insite is None: insite = pywikibot.getSite() @@ -536,16 +547,16 @@ sep = u' ' else: sep = config.line_separator - s=sep.join(s) + config.line_separator + s = sep.join(s) + config.line_separator return s # Sort sites according to local interwiki sort logic -def interwikiSort(sites, insite = None): +def interwikiSort(sites, insite=None): + if not sites: + return [] if insite is None: - insite = pywikibot.getSite() - if not sites: - return [] + insite = pywikibot.getSite() sites.sort() putfirst = insite.interwiki_putfirst() @@ -553,11 +564,8 @@ #In this case I might have to change the order firstsites = [] for code in putfirst: - # The code may not exist in this family? -## if code in insite.family.obsolete: -## code = insite.family.obsolete[code] if code in insite.validLanguageLinks(): - site = insite.getSite(code = code) + site = insite.getSite(code=code) if site in sites: del sites[sites.index(site)] firstsites = firstsites + [site] @@ -567,6 +575,7 @@ sites = insite.interwiki_putfirst_doubled(sites) + sites return sites + #--------------------------------------- # Functions dealing with category links #--------------------------------------- @@ -590,10 +599,9 @@ r'(?:\|(?P<sortKey>.+?))?\s*\]\]' % catNamespace, re.I) for match in R.finditer(text): - cat = catlib.Category(site, - '%s:%s' % (match.group('namespace'), - match.group('catName')), - sortKey = match.group('sortKey')) + cat = catlib.Category(site, '%s:%s' % (match.group('namespace'), + match.group('catName')), + sortKey=match.group('sortKey')) result.append(cat) return result @@ -663,9 +671,10 @@ # spaces and underscores in page titles are interchangeable and collapsible title = title.replace(r"\ ", "[ _]+").replace(r"\_", "[ _]+") categoryR = re.compile(r'\[\[\s*(%s)\s*:\s*%s\s*((?:\|[^]]+)?\]\])' - % (catNamespace, title), re.I) - categoryRN = re.compile(r'^[^\S\n]*\[\[\s*(%s)\s*:\s*%s\s*((?:\|[^]]+)?\]\])[^\S\n]*\n' - % (catNamespace, title), re.I | re.M) + % (catNamespace, title), re.I) + categoryRN = re.compile( + r'^[^\S\n]*\[\[\s*(%s)\s*:\s*%s\s*((?:\|[^]]+)?\]\])[^\S\n]*\n' + % (catNamespace, title), re.I | re.M) if newcat is None: """ First go through and try the more restrictive regex that removes an entire line, if the category is the only thing on that line (this @@ -684,7 +693,7 @@ return text -def replaceCategoryLinks(oldtext, new, site = None, addOnly = False): +def replaceCategoryLinks(oldtext, new, site=None, addOnly=False): """ Replace the category links given in the wikitext given in oldtext by the new links given in new. @@ -697,7 +706,7 @@ """ # Find a marker that is not already in the text. - marker = findmarker( oldtext, u'@@') + marker = findmarker(oldtext, u'@@') if site is None: site = pywikibot.getSite() if site.sitename() == 'wikipedia:de' and "{{Personendaten" in oldtext: @@ -716,7 +725,7 @@ else: s2 = removeCategoryLinksAndSeparator(oldtext, site=site, marker=marker, separator=separatorstripped) - s = categoryFormat(new, insite = site) + s = categoryFormat(new, insite=site) if s: if site.language() in site.family.category_attop: newtext = s + separator + s2 @@ -735,7 +744,7 @@ newtext = s2[:firstafter].replace(marker, '') + s + \ s2[firstafter:] elif site.language() in site.family.categories_last: - newtext = s2.replace(marker,'').strip() + separator + s + newtext = s2.replace(marker, '').strip() + separator + s else: interwiki = getLanguageLinks(s2) s2 = removeLanguageLinksAndSeparator(s2.replace(marker, ''), @@ -745,11 +754,11 @@ newtext = replaceLanguageLinks(s2, interwiki, site=site, addOnly=True) else: - newtext = s2.replace(marker,'') + newtext = s2.replace(marker, '') return newtext.strip() -def categoryFormat(categories, insite = None): +def categoryFormat(categories, insite=None): """Return a string containing links to all categories in a list. 'categories' should be a list of Category objects or strings @@ -763,13 +772,14 @@ if insite is None: insite = pywikibot.getSite() - if isinstance(categories[0],basestring): + if isinstance(categories[0], basestring): if categories[0][0] == '[': catLinks = categories else: catLinks = ['[[Category:'+category+']]' for category in categories] else: - catLinks = [category.aslink(noInterwiki=True) for category in categories] + catLinks = [category.aslink(noInterwiki=True) + for category in categories] if insite.category_on_one_line(): sep = ' ' @@ -779,6 +789,7 @@ #catLinks.sort() return sep.join(catLinks) + config.line_separator + #--------------------------------------- # Functions dealing with external links #--------------------------------------- @@ -801,9 +812,9 @@ # not allowed inside links. For example, in this wiki text: # ''Please see http://www.example.org.'' # .'' shouldn't be considered as part of the link. - regex = r'(?P<url>http[s]?://[^' + notInside + ']*?[^' + notAtEnd \ - + '](?=[' + notAtEnd+ ']*\'\')|http[s]?://[^' + notInside \ - + ']*[^' + notAtEnd + '])' + regex = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ + r'(?=[%(notAtEnd)s]*\'\')|http[s]?://[^%(notInside)s]*' \ + r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEnd} if withoutBracketed: regex = r'(?<!\[)' + regex @@ -812,6 +823,7 @@ linkR = re.compile(regex) return linkR + #---------------------------------- # Functions dealing with templates #---------------------------------- @@ -850,7 +862,7 @@ inside = {} count = 0 Rtemplate = re.compile( - ur'{{(msg:)?(?P<name>[^{\|]+?)(\|(?P<params>[^{]+?))?}}') + ur'{{(msg:)?(?P<name>[^{\|]+?)(\|(?P<params>[^{]+?))?}}') Rmath = re.compile(ur'<math>[^<]+</math>') Rmarker = re.compile(ur'%s(\d+)%s' % (marker, marker)) Rmarker2 = re.compile(ur'%s(\d+)%s' % (marker2, marker2)) @@ -897,8 +909,8 @@ for m2 in pywikibot.link_regex.finditer(paramString): count2 += 1 text = m2.group(0) - paramString = paramString.replace(text, - '%s%d%s' % (marker2, count2, marker2)) + paramString = paramString.replace( + text, '%s%d%s' % (marker2, count2, marker2)) links[count2] = text # Parse string markedParams = paramString.split('|') @@ -932,22 +944,21 @@ You can use items from extract_templates_and_params here to get an equivalent template wiki text (it may happen that the order of the params changes). + """ (template, params) = template_and_params - text = u'' for item in params: - text += u'|%s=%s\n' % (item, params[item]) + text += u'|%s=%s\n' % (item, params[item]) return u'{{%s\n%s}}' % (template, text) + #---------------------------------- # Page parsing functionality #---------------------------------- def does_text_contain_section(pagetext, section): - """ Determines whether the page text contains the given - section title. - """ + """Determines whether the page text contains the given section title.""" m = re.search("=+[ ']*%s[ ']*=+" % re.escape(section), pagetext) return bool(m)

1 0

SVN: [11304] branches/rewrite/pywikibot/families
by xqt＠svn.wikimedia.org 30 Mar '13

30 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11304 Revision: 11304 Author: xqt Date: 2013-03-30 12:42:36 +0000 (Sat, 30 Mar 2013) Log Message: ----------- update language_by_size from trunk r11303 Modified Paths: -------------- branches/rewrite/pywikibot/families/wikibooks_family.py branches/rewrite/pywikibot/families/wikipedia_family.py branches/rewrite/pywikibot/families/wiktionary_family.py Modified: branches/rewrite/pywikibot/families/wikibooks_family.py =================================================================== --- branches/rewrite/pywikibot/families/wikibooks_family.py 2013-03-30 12:41:24 UTC (rev 11303) +++ branches/rewrite/pywikibot/families/wikibooks_family.py 2013-03-30 12:42:36 UTC (rev 11304) @@ -14,7 +14,7 @@ 'en', 'de', 'fr', 'hu', 'ja', 'pt', 'it', 'vi', 'nl', 'pl', 'es', 'he', 'sq', 'ca', 'id', 'fi', 'ru', 'cs', 'zh', 'sv', 'hr', 'tr', 'da', 'no', 'th', 'gl', 'fa', 'ko', 'sr', 'ta', 'mk', 'ar', 'tl', - 'ro', 'is', 'tt', 'lt', 'ka', 'az', 'eo', 'uk', 'bg', 'el', 'hy', + 'ro', 'is', 'tt', 'lt', 'ka', 'az', 'uk', 'eo', 'bg', 'el', 'hy', 'sl', 'sk', 'si', 'li', 'la', 'ang', 'ia', 'cv', 'ms', 'et', 'ur', 'mr', 'bn', 'hi', 'ml', 'oc', 'kk', 'km', 'eu', 'fy', 'ie', 'ne', 'sa', 'te', 'af', 'tg', 'ky', 'bs', 'pa', 'mg', 'be', 'cy', Modified: branches/rewrite/pywikibot/families/wikipedia_family.py =================================================================== --- branches/rewrite/pywikibot/families/wikipedia_family.py 2013-03-30 12:41:24 UTC (rev 11303) +++ branches/rewrite/pywikibot/families/wikipedia_family.py 2013-03-30 12:42:36 UTC (rev 11304) @@ -15,9 +15,9 @@ 'zh', 'vi', 'uk', 'ca', 'no', 'ceb', 'war', 'fi', 'fa', 'cs', 'hu', 'ko', 'ro', 'ar', 'tr', 'id', 'kk', 'ms', 'sr', 'sk', 'eo', 'da', 'lt', 'eu', 'bg', 'he', 'sl', 'hr', 'vo', 'et', 'hi', 'uz', 'gl', - 'nn', 'simple', 'az', 'la', 'el', 'th', 'sh', 'ka', 'oc', 'new', - 'mk', 'tl', 'pms', 'be', 'ht', 'ta', 'te', 'be-x-old', 'lv', 'br', - 'mg', 'sq', 'hy', 'jv', 'cy', 'mr', 'lb', 'is', 'tt', 'bs', 'my', + 'nn', 'simple', 'az', 'la', 'el', 'th', 'sh', 'ka', 'oc', 'mk', + 'new', 'tl', 'pms', 'be', 'ht', 'ta', 'te', 'be-x-old', 'lv', 'br', + 'mg', 'sq', 'hy', 'jv', 'cy', 'tt', 'mr', 'lb', 'is', 'bs', 'my', 'yo', 'ba', 'ml', 'an', 'lmo', 'af', 'fy', 'pnb', 'bn', 'sw', 'bpy', 'io', 'ky', 'ur', 'scn', 'ne', 'gu', 'zh-yue', 'nds', 'ku', 'ga', 'ast', 'qu', 'su', 'cv', 'sco', 'ia', 'als', 'bug', 'nap', Modified: branches/rewrite/pywikibot/families/wiktionary_family.py =================================================================== --- branches/rewrite/pywikibot/families/wiktionary_family.py 2013-03-30 12:41:24 UTC (rev 11303) +++ branches/rewrite/pywikibot/families/wiktionary_family.py 2013-03-30 12:42:36 UTC (rev 11304) @@ -20,7 +20,7 @@ 'uz', 'da', 'wa', 'la', 'hy', 'sq', 'sm', 'sl', 'hi', 'nah', 'az', 'tt', 'pnb', 'ka', 'lv', 'bs', 'lb', 'tk', 'hsb', 'kk', 'sk', 'km', 'mk', 'be', 'ga', 'wo', 'ms', 'ang', 'ky', 'co', 'nds', 'gn', 'mr', - 'csb', 'st', 'ia', 'si', 'sh', 'sd', 'tg', 'ug', 'kl', 'mn', 'sa', + 'csb', 'st', 'ia', 'si', 'sh', 'sd', 'tg', 'ug', 'mn', 'kl', 'sa', 'jbo', 'an', 'ln', 'zu', 'or', 'gu', 'kw', 'rw', 'gv', 'fo', 'qu', 'ss', 'ie', 'mt', 'om', 'chr', 'roa-rup', 'iu', 'bn', 'pa', 'so', 'am', 'su', 'za', 'gd', 'mi', 'tpi', 'yi', 'ti', 'sg', 'na', 'dv',

1 0

SVN: [11303] trunk/pywikipedia/families
by xqt＠svn.wikimedia.org 30 Mar '13

30 Mar '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11303 Revision: 11303 Author: xqt Date: 2013-03-30 12:41:24 +0000 (Sat, 30 Mar 2013) Log Message: ----------- update language_by_size Modified Paths: -------------- trunk/pywikipedia/families/wikibooks_family.py trunk/pywikipedia/families/wikipedia_family.py trunk/pywikipedia/families/wikitravel_family.py trunk/pywikipedia/families/wiktionary_family.py Modified: trunk/pywikipedia/families/wikibooks_family.py =================================================================== --- trunk/pywikipedia/families/wikibooks_family.py 2013-03-30 12:36:14 UTC (rev 11302) +++ trunk/pywikipedia/families/wikibooks_family.py 2013-03-30 12:41:24 UTC (rev 11303) @@ -14,7 +14,7 @@ 'en', 'de', 'fr', 'hu', 'ja', 'pt', 'it', 'vi', 'nl', 'pl', 'es', 'he', 'sq', 'ca', 'id', 'fi', 'ru', 'cs', 'zh', 'sv', 'hr', 'tr', 'da', 'no', 'th', 'gl', 'fa', 'ko', 'sr', 'ta', 'mk', 'ar', 'tl', - 'ro', 'is', 'tt', 'lt', 'ka', 'az', 'eo', 'uk', 'bg', 'el', 'hy', + 'ro', 'is', 'tt', 'lt', 'ka', 'az', 'uk', 'eo', 'bg', 'el', 'hy', 'sl', 'sk', 'si', 'li', 'la', 'ang', 'ia', 'cv', 'ms', 'et', 'ur', 'mr', 'bn', 'hi', 'ml', 'oc', 'kk', 'km', 'eu', 'fy', 'ie', 'ne', 'sa', 'te', 'af', 'tg', 'ky', 'bs', 'pa', 'mg', 'be', 'cy', Modified: trunk/pywikipedia/families/wikipedia_family.py =================================================================== --- trunk/pywikipedia/families/wikipedia_family.py 2013-03-30 12:36:14 UTC (rev 11302) +++ trunk/pywikipedia/families/wikipedia_family.py 2013-03-30 12:41:24 UTC (rev 11303) @@ -15,9 +15,9 @@ 'zh', 'vi', 'uk', 'ca', 'no', 'ceb', 'war', 'fi', 'fa', 'cs', 'hu', 'ko', 'ro', 'ar', 'tr', 'id', 'kk', 'ms', 'sr', 'sk', 'eo', 'da', 'lt', 'eu', 'bg', 'he', 'sl', 'hr', 'vo', 'et', 'hi', 'uz', 'gl', - 'nn', 'simple', 'az', 'la', 'el', 'th', 'sh', 'ka', 'oc', 'new', - 'mk', 'tl', 'pms', 'be', 'ht', 'ta', 'te', 'be-x-old', 'lv', 'br', - 'mg', 'sq', 'hy', 'jv', 'cy', 'mr', 'lb', 'is', 'tt', 'bs', 'my', + 'nn', 'simple', 'az', 'la', 'el', 'th', 'sh', 'ka', 'oc', 'mk', + 'new', 'tl', 'pms', 'be', 'ht', 'ta', 'te', 'be-x-old', 'lv', 'br', + 'mg', 'sq', 'hy', 'jv', 'cy', 'tt', 'mr', 'lb', 'is', 'bs', 'my', 'yo', 'ba', 'ml', 'an', 'lmo', 'af', 'fy', 'pnb', 'bn', 'sw', 'bpy', 'io', 'ky', 'ur', 'scn', 'ne', 'gu', 'zh-yue', 'nds', 'ku', 'ga', 'ast', 'qu', 'su', 'cv', 'sco', 'ia', 'als', 'bug', 'nap', @@ -26,8 +26,8 @@ 'sa', 'nah', 'os', 'sah', 'pam', 'hsb', 'si', 'se', 'li', 'mi', 'bar', 'co', 'ilo', 'gan', 'bo', 'glk', 'fo', 'rue', 'pa', 'bcl', 'mrj', 'fiu-vro', 'nds-nl', 'tk', 'ps', 'vls', 'xmf', 'gv', 'diq', - 'pag', 'kv', 'zea', 'dv', 'km', 'nrm', 'or', 'rm', 'koi', 'udm', - 'mhr', 'lad', 'csb', 'frr', 'lij', 'fur', 'wuu', 'vep', + 'pag', 'kv', 'zea', 'dv', 'km', 'or', 'nrm', 'rm', 'koi', 'udm', + 'mhr', 'lad', 'csb', 'frr', 'lij', 'wuu', 'fur', 'vep', 'zh-classical', 'ug', 'stq', 'sc', 'roa-rup', 'pi', 'so', 'bh', 'ay', 'mt', 'ksh', 'nov', 'ang', 'kw', 'nv', 'pcd', 'hak', 'gn', 'ext', 'frp', 'as', 'szl', 'gag', 'ie', 'ln', 'eml', 'ce', 'xal', @@ -35,7 +35,7 @@ 'arc', 'myv', 'lez', 'bjn', 'sn', 'pap', 'kab', 'tpi', 'lbe', 'bxr', 'jbo', 'wo', 'mdf', 'cbk-zam', 'av', 'srn', 'mwl', 'kbd', 'ty', 'lo', 'ab', 'tet', 'ltg', 'na', 'kg', 'ig', 'nso', 'za', 'kaa', - 'zu', 'chy', 'rmy', 'cu', 'tn', 'chr', 'bi', 'got', 'cdo', 'sm', + 'zu', 'chy', 'rmy', 'cu', 'tn', 'chr', 'bi', 'cdo', 'got', 'sm', 'bm', 'iu', 'pih', 'pnt', 'sd', 'ss', 'ki', 'ee', 'ha', 'om', 'fj', 'ti', 'ts', 'ks', 've', 'sg', 'rn', 'st', 'dz', 'cr', 'ak', 'tum', 'lg', 'ny', 'ff', 'ik', 'ch', 'tw', 'xh', 'min', Modified: trunk/pywikipedia/families/wikitravel_family.py =================================================================== --- trunk/pywikipedia/families/wikitravel_family.py 2013-03-30 12:36:14 UTC (rev 11302) +++ trunk/pywikipedia/families/wikitravel_family.py 2013-03-30 12:41:24 UTC (rev 11303) @@ -1,41 +1,23 @@ # -*- coding: utf-8 -*- import family, config -# The wikitravel family - -# Translation used on all wikitravels for the 'article' text. -# A language not mentioned here is not known by the robot - __version__ = '$Id$' +# The wikitravel family + class Family(family.Family): def __init__(self): family.Family.__init__(self) self.name = 'wikitravel' - self.langs = { - 'ar':'ar', - 'ca':'ca', - 'de':'de', - 'en':'en', - 'eo':'eo', - 'es':'es', - 'fi':'fi', - 'fr':'fr', - 'he':'he', - 'hi':'hi', - 'hu':'hu', - 'it':'it', - 'ja':'ja', - 'ko':'ko', - 'nl':'nl', - 'pl':'pl', - 'pt':'pt', - 'ro':'ro', - 'ru':'ru', - 'sv':'sv', - 'zh':'zh', - 'wts':'wts', - } + + self.languages_by_size = [ + 'en', 'de', 'ja', 'it', 'nl', 'pt', 'fr', 'pl', 'es', 'fi', 'ru', + 'sv', 'hu', 'ca', 'ro', 'eo', 'hi', 'he', + ] + + self.langs = dict([(lang, '%s.wikibooks.org' % lang) + for lang in self.languages_by_size]) + self.namespaces[-2] = { '_default': u'Media', 'ar': u'ملف', @@ -362,12 +344,6 @@ 'wts': u'Tech talk', } - # A few selected big languages for things that we do not want to loop over - # all languages. This is only needed by the titletranslate.py module, so - # if you carefully avoid the options, you could get away without these - # for another wikimedia family. - self.languages_by_size = ['en','fr','ro'] - # for Wikitravel's /Run subpages check. self.wt_script_policy = { '_default': u'Script policy', Modified: trunk/pywikipedia/families/wiktionary_family.py =================================================================== --- trunk/pywikipedia/families/wiktionary_family.py 2013-03-30 12:36:14 UTC (rev 11302) +++ trunk/pywikipedia/families/wiktionary_family.py 2013-03-30 12:41:24 UTC (rev 11303) @@ -20,7 +20,7 @@ 'uz', 'da', 'wa', 'la', 'hy', 'sq', 'sm', 'sl', 'hi', 'nah', 'az', 'tt', 'pnb', 'ka', 'lv', 'bs', 'lb', 'tk', 'hsb', 'kk', 'sk', 'km', 'mk', 'be', 'ga', 'wo', 'ms', 'ang', 'ky', 'co', 'nds', 'gn', 'mr', - 'csb', 'st', 'ia', 'si', 'sh', 'sd', 'tg', 'ug', 'kl', 'mn', 'sa', + 'csb', 'st', 'ia', 'si', 'sh', 'sd', 'tg', 'ug', 'mn', 'kl', 'sa', 'jbo', 'an', 'ln', 'zu', 'or', 'gu', 'kw', 'rw', 'gv', 'fo', 'qu', 'ss', 'ie', 'mt', 'om', 'chr', 'roa-rup', 'iu', 'bn', 'pa', 'so', 'am', 'su', 'za', 'gd', 'mi', 'tpi', 'yi', 'ti', 'sg', 'na', 'dv',

1 0

Jump to page:

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn