Pywikipedia-svn April 2011

pywikipedia-svn@lists.wikimedia.org

7 participants
62 discussions

SVN: [9172] trunk/pywikipedia/interwiki.py
by a_engels＠svn.wikimedia.org 14 Apr '11

14 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9172 Revision: 9172 Author: a_engels Date: 2011-04-15 06:07:00 +0000 (Fri, 15 Apr 2011) Log Message: ----------- deal with the self.originPage == None case (I think it occurs if the original page has bug #3081100, but I'm not sure about that) Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2011-04-13 19:57:44 UTC (rev 9171) +++ trunk/pywikipedia/interwiki.py 2011-04-15 06:07:00 UTC (rev 9172) @@ -1725,24 +1725,25 @@ raise "Bugcheck: finish called before done" if not self.workonme: return - if self.forcedStop: # autonomous with problem - pywikibot.output(u"======Aborted processing %s======" % self.originPage.aslink(True)) - return if self.originPage: if self.originPage.isRedirectPage(): return if self.originPage.isCategoryRedirect(): return + else: + return if not self.untranslated and globalvar.untranslatedonly: return + if self.forcedStop: # autonomous with problem + pywikibot.output(u"======Aborted processing %s======" % self.originPage.aslink(True)) + return # The following check is not always correct and thus disabled. # self.done might contain no interwiki links because of the -neverlink # argument or because of disambiguation conflicts. # if len(self.done) == 1: # # No interwiki at all # return - if self.originPage: - pywikibot.output(u"======Post-processing %s======" % self.originPage.aslink(True)) + pywikibot.output(u"======Post-processing %s======" % self.originPage.aslink(True)) # Assemble list of accepted interwiki links new = self.assemble() if new is None: # User said give up

1 0

SVN: [9171] trunk/pywikipedia/BeautifulSoup.py
by shizhao＠svn.wikimedia.org 13 Apr '11

13 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9171 Revision: 9171 Author: shizhao Date: 2011-04-13 19:57:44 +0000 (Wed, 13 Apr 2011) Log Message: ----------- update to 3.2.0 Modified Paths: -------------- trunk/pywikipedia/BeautifulSoup.py Modified: trunk/pywikipedia/BeautifulSoup.py =================================================================== --- trunk/pywikipedia/BeautifulSoup.py 2011-04-13 04:41:33 UTC (rev 9170) +++ trunk/pywikipedia/BeautifulSoup.py 2011-04-13 19:57:44 UTC (rev 9171) @@ -42,7 +42,7 @@ Here, have some legalese: -Copyright (c) 2004-2009, Leonard Richardson +Copyright (c) 2004-2010, Leonard Richardson All rights reserved. @@ -79,39 +79,38 @@ from __future__ import generators __author__ = "Leonard Richardson (leonardr(a)segfault.org)" -__version__ = "3.1.0.1" -__copyright__ = "Copyright (c) 2004-2009 Leonard Richardson" +__version__ = "3.2.0" +__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson" __license__ = "New-style BSD" +from sgmllib import SGMLParser, SGMLParseError import codecs import markupbase import types import re -from HTMLParser import HTMLParser, HTMLParseError +import sgmllib try: - from htmlentitydefs import name2codepoint + from htmlentitydefs import name2codepoint except ImportError: - name2codepoint = {} + name2codepoint = {} try: set except NameError: from sets import Set as set #These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match DEFAULT_OUTPUT_ENCODING = "utf-8" +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + # First, the classes that represent markup elements. -def sob(unicode, encoding): - """Returns either the given Unicode string or its encoding.""" - if encoding is None: - return unicode - else: - return unicode.encode(encoding) - -class PageElement: +class PageElement(object): """Contains the navigational information for some part of the page (either a tag or a piece of text)""" @@ -129,10 +128,11 @@ def replaceWith(self, replaceWith): oldParent = self.parent - myIndex = self.parent.contents.index(self) - if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent: + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: # We're replacing this element with one of its siblings. - index = self.parent.contents.index(replaceWith) + index = replaceWith.parent.index(replaceWith) if index and index < myIndex: # Furthermore, it comes before this element. That # means that when we extract it, the index of this @@ -141,11 +141,20 @@ self.extract() oldParent.insert(myIndex, replaceWith) + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + def extract(self): """Destructively rips this element out of the tree.""" if self.parent: try: - self.parent.contents.remove(self) + del self.parent.contents[self.parent.index(self)] except ValueError: pass @@ -178,18 +187,17 @@ return lastChild def insert(self, position, newChild): - if (isinstance(newChild, basestring) - or isinstance(newChild, unicode)) \ + if isinstance(newChild, basestring) \ and not isinstance(newChild, NavigableString): newChild = NavigableString(newChild) position = min(position, len(self.contents)) - if hasattr(newChild, 'parent') and newChild.parent != None: + if hasattr(newChild, 'parent') and newChild.parent is not None: # We're 'inserting' an element that's already one # of this object's children. - if newChild.parent == self: - index = self.find(newChild) - if index and index < position: + if newChild.parent is self: + index = self.index(newChild) + if index > position: # Furthermore we're moving it further down the # list of this object's children. That means that # when we extract this element, our target index @@ -327,8 +335,21 @@ if isinstance(name, SoupStrainer): strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer else: - # Build a SoupStrainer strainer = SoupStrainer(name, attrs, text, **kwargs) results = ResultSet(strainer) g = generator() @@ -349,31 +370,31 @@ #NavigableStrings and Tags. def nextGenerator(self): i = self - while i: + while i is not None: i = i.next yield i def nextSiblingGenerator(self): i = self - while i: + while i is not None: i = i.nextSibling yield i def previousGenerator(self): i = self - while i: + while i is not None: i = i.previous yield i def previousSiblingGenerator(self): i = self - while i: + while i is not None: i = i.previousSibling yield i def parentGenerator(self): i = self - while i: + while i is not None: i = i.parent yield i @@ -415,7 +436,7 @@ return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) def __getnewargs__(self): - return (unicode(self),) + return (NavigableString.__str__(self),) def __getattr__(self, attr): """text.string gives you text. This is for backwards @@ -426,32 +447,34 @@ else: raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) - def encode(self, encoding=DEFAULT_OUTPUT_ENCODING): - return self.decode().encode(encoding) + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) - def decodeGivenEventualEncoding(self, eventualEncoding): - return self + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + if encoding: + return self.encode(encoding) + else: + return self class CData(NavigableString): - def decodeGivenEventualEncoding(self, eventualEncoding): - return u'<![CDATA[' + self + u']]>' + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) class ProcessingInstruction(NavigableString): - - def decodeGivenEventualEncoding(self, eventualEncoding): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): output = self - if u'%SOUP-ENCODING%' in output: - output = self.substituteEncoding(output, eventualEncoding) - return u'<?' + output + u'?>' + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "<?%s?>" % self.toEncoding(output, encoding) class Comment(NavigableString): - def decodeGivenEventualEncoding(self, eventualEncoding): - return u'' + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) class Declaration(NavigableString): - def decodeGivenEventualEncoding(self, eventualEncoding): - return u'<!' + self + u'>' + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "<!%s>" % NavigableString.__str__(self, encoding) class Tag(PageElement): @@ -506,8 +529,10 @@ self.parserClass = parser.__class__ self.isSelfClosing = parser.isSelfClosingTag(name) self.name = name - if attrs == None: + if attrs is None: attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() self.attrs = attrs self.contents = [] self.setup(parent, previous) @@ -517,21 +542,56 @@ self.convertXMLEntities = parser.convertXMLEntities self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities - def convert(kval): - "Converts HTML, XML and numeric entities in the attribute value." - k, val = kval - if val is None: - return kval - return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", - self._convertEntities, val)) + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) self.attrs = map(convert, self.attrs) + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + def get(self, key, default=None): """Returns the value of the 'key' attribute for the tag, or the value given for 'default' if it doesn't have that attribute.""" return self._getAttrMap().get(key, default) + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + def has_key(self, key): return self._getAttrMap().has_key(key) @@ -600,6 +660,8 @@ NOTE: right now this will return false if two tags have the same attributes in a different order. Should this be fixed?""" + if other is self: + return True if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): return False for i in range(0, len(self.contents)): @@ -614,8 +676,11 @@ def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): """Renders this tag as a string.""" - return self.decode(eventualEncoding=encoding) + return self.__str__(encoding) + def __unicode__(self): + return self.__str__(None) + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + ")") @@ -625,30 +690,24 @@ appropriate XML entity for an XML special character.""" return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" - def __unicode__(self): - return self.decode() + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. - def __str__(self): - return self.encode() + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" - def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - return self.decode(prettyPrint, indentLevel, encoding).encode(encoding) + encodedName = self.toEncoding(self.name, encoding) - def decode(self, prettyPrint=False, indentLevel=0, - eventualEncoding=DEFAULT_OUTPUT_ENCODING): - """Returns a string or Unicode representation of this tag and - its contents. To get Unicode, pass None for encoding.""" - attrs = [] if self.attrs: for key, val in self.attrs: fmt = '%s="%s"' - if isString(val): - if (self.containsSubstitutions - and eventualEncoding is not None - and '%SOUP-ENCODING%' in val): - val = self.substituteEncoding(val, eventualEncoding) + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) # The attribute value either: # @@ -677,26 +736,22 @@ # ampersands that aren't part of entities. We need # to escape those to XML entities too. val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) - if val is None: - # Handle boolean attributes. - decoded = key - else: - decoded = fmt % (key, val) - attrs.append(decoded) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) close = '' closeTag = '' if self.isSelfClosing: close = ' /' else: - closeTag = '</%s>' % self.name + closeTag = '</%s>' % encodedName indentTag, indentContents = 0, 0 if prettyPrint: indentTag = indentLevel space = (' ' * (indentTag-1)) indentContents = indentTag + 1 - contents = self.decodeContents(prettyPrint, indentContents, - eventualEncoding) + contents = self.renderContents(encoding, prettyPrint, indentContents) if self.hidden: s = contents else: @@ -706,7 +761,7 @@ attributeString = ' ' + ' '.join(attrs) if prettyPrint: s.append(space) - s.append('<%s%s%s>' % (self.name, attributeString, close)) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) if prettyPrint: s.append("\n") s.append(contents) @@ -722,32 +777,35 @@ def decompose(self): """Recursively destroys the contents of this tree.""" - contents = [i for i in self.contents] - for i in contents: - if isinstance(i, Tag): - i.decompose() - else: - i.extract() self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): - return self.encode(encoding, True) + return self.__str__(encoding, True) - def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, prettyPrint=False, indentLevel=0): - return self.decodeContents(prettyPrint, indentLevel).encode(encoding) - - def decodeContents(self, prettyPrint=False, indentLevel=0, - eventualEncoding=DEFAULT_OUTPUT_ENCODING): """Renders the contents of this tag as a string in the given encoding. If encoding is None, returns a Unicode string..""" s=[] for c in self: text = None if isinstance(c, NavigableString): - text = c.decodeGivenEventualEncoding(eventualEncoding) + text = c.__str__(encoding) elif isinstance(c, Tag): - s.append(c.decode(prettyPrint, indentLevel, eventualEncoding)) + s.append(c.__str__(encoding, prettyPrint, indentLevel)) if text and prettyPrint: text = text.strip() if text: @@ -788,7 +846,7 @@ return self._findAll(name, attrs, text, limit, generator, **kwargs) findChildren = findAll - # Pre-3.x compatibility methods. Will go away in 4.0. + # Pre-3.x compatibility methods first = find fetch = findAll @@ -798,15 +856,6 @@ def firstText(self, text=None, recursive=True): return self.find(text=text, recursive=recursive) - # 3.x compatibility methods. Will go away in 4.0. - def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, - prettyPrint=False, indentLevel=0): - if encoding is None: - return self.decodeContents(prettyPrint, indentLevel, encoding) - else: - return self.encodeContents(encoding, prettyPrint, indentLevel) - - #Private methods def _getAttrMap(self): @@ -819,6 +868,10 @@ return self.attrMap #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + def recursiveChildGenerator(self): if not len(self.contents): raise StopIteration @@ -828,14 +881,6 @@ yield current current = current.next - def childGenerator(self): - if not len(self.contents): - raise StopIteration - current = self.contents[0] - while current: - yield current - current = current.nextSibling - raise StopIteration # Next, a couple classes to represent queries and their results. class SoupStrainer: @@ -844,8 +889,8 @@ def __init__(self, name=None, attrs={}, text=None, **kwargs): self.name = name - if isString(attrs): - kwargs['class'] = attrs + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) attrs = None if kwargs: if attrs: @@ -904,7 +949,8 @@ found = None # If given a list of items, scan it for a text element that # matches. - if isList(markup) and not isinstance(markup, Tag): + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): for element in markup: if isinstance(element, NavigableString) \ and self.search(element): @@ -917,7 +963,7 @@ found = self.searchTag(markup) # If it's text, make sure the text matches. elif isinstance(markup, NavigableString) or \ - isString(markup): + isinstance(markup, basestring): if self._matches(markup, self.text): found = markup else: @@ -928,8 +974,8 @@ def _matches(self, markup, matchAgainst): #print "Matching %s against %s" % (markup, matchAgainst) result = False - if matchAgainst == True and type(matchAgainst) == types.BooleanType: - result = markup != None + if matchAgainst is True: + result = markup is not None elif callable(matchAgainst): result = matchAgainst(markup) else: @@ -937,18 +983,17 @@ #other ways of matching match the tag name as a string. if isinstance(markup, Tag): markup = markup.name - if markup is not None and not isString(markup): + if markup and not isinstance(markup, basestring): markup = unicode(markup) #Now we know that chunk is either a string, or None. if hasattr(matchAgainst, 'match'): # It's a regexp object. result = markup and matchAgainst.search(markup) - elif (isList(matchAgainst) - and (markup is not None or not isString(matchAgainst))): + elif hasattr(matchAgainst, '__iter__'): # list-like result = markup in matchAgainst elif hasattr(matchAgainst, 'items'): result = markup.has_key(matchAgainst) - elif matchAgainst and isString(markup): + elif matchAgainst and isinstance(markup, basestring): if isinstance(markup, unicode): matchAgainst = unicode(matchAgainst) else: @@ -967,20 +1012,6 @@ # Now, some helper functions. -def isList(l): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is listlike.""" - return ((hasattr(l, '__iter__') and not isString(l)) - or (type(l) in (types.ListType, types.TupleType))) - -def isString(s): - """Convenience method that works with all 2.x versions of Python - to determine whether or not something is stringlike.""" - try: - return isinstance(s, unicode) or isinstance(s, basestring) - except NameError: - return isinstance(s, str) - def buildTagMap(default, *args): """Turns a list of maps, lists, or scalars into a single map. Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and @@ -991,7 +1022,7 @@ #It's a map. Merge it. for k,v in portion.items(): built[k] = v - elif isList(portion) and not isString(portion): + elif hasattr(portion, '__iter__'): # is a list #It's a list. Map each item to the default. for k in portion: built[k] = default @@ -1002,123 +1033,8 @@ # Now, the parser classes. -class HTMLParserBuilder(HTMLParser): +class BeautifulStoneSoup(Tag, SGMLParser): - def __init__(self, soup): - HTMLParser.__init__(self) - self.soup = soup - - # We inherit feed() and reset(). - - def handle_starttag(self, name, attrs): - if name == 'meta': - self.soup.extractCharsetFromMeta(attrs) - else: - self.soup.unknown_starttag(name, attrs) - - def handle_endtag(self, name): - self.soup.unknown_endtag(name) - - def handle_data(self, content): - self.soup.handle_data(content) - - def _toStringSubclass(self, text, subclass): - """Adds a certain piece of text to the tree as a NavigableString - subclass.""" - self.soup.endData() - self.handle_data(text) - self.soup.endData(subclass) - - def handle_pi(self, text): - """Handle a processing instruction as a ProcessingInstruction - object, possibly one with a %SOUP-ENCODING% slot into which an - encoding will be plugged later.""" - if text[:3] == "xml": - text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" - self._toStringSubclass(text, ProcessingInstruction) - - def handle_comment(self, text): - "Handle comments as Comment objects." - self._toStringSubclass(text, Comment) - - def handle_charref(self, ref): - "Handle character references as data." - if self.soup.convertEntities: - data = unichr(int(ref)) - else: - data = '&#%s;' % ref - self.handle_data(data) - - def handle_entityref(self, ref): - """Handle entity references as data, possibly converting known - HTML and/or XML entity references to the corresponding Unicode - characters.""" - data = None - if self.soup.convertHTMLEntities: - try: - data = unichr(name2codepoint[ref]) - except KeyError: - pass - - if not data and self.soup.convertXMLEntities: - data = self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) - - if not data and self.soup.convertHTMLEntities and \ - not self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): - # TODO: We've got a problem here. We're told this is - # an entity reference, but it's not an XML entity - # reference or an HTML entity reference. Nonetheless, - # the logical thing to do is to pass it through as an - # unrecognized entity reference. - # - # Except: when the input is "&carol;" this function - # will be called with input "carol". When the input is - # "AT&T", this function will be called with input - # "T". We have no way of knowing whether a semicolon - # was present originally, so we don't know whether - # this is an unknown entity or just a misplaced - # ampersand. - # - # The more common case is a misplaced ampersand, so I - # escape the ampersand and omit the trailing semicolon. - data = "&%s" % ref - if not data: - # This case is different from the one above, because we - # haven't already gone through a supposedly comprehensive - # mapping of entities to Unicode characters. We might not - # have gone through any mapping at all. So the chances are - # very high that this is a real entity, and not a - # misplaced ampersand. - data = "&%s;" % ref - self.handle_data(data) - - def handle_decl(self, data): - "Handle DOCTYPEs and the like as Declaration objects." - self._toStringSubclass(data, Declaration) - - def parse_declaration(self, i): - """Treat a bogus SGML declaration as raw data. Treat a CDATA - declaration as a CData object.""" - j = None - if self.rawdata[i:i+9] == '<![CDATA[': - k = self.rawdata.find(']]>', i) - if k == -1: - k = len(self.rawdata) - data = self.rawdata[i+9:k] - j = k+3 - self._toStringSubclass(data, CData) - else: - try: - j = HTMLParser.parse_declaration(self, i) - except HTMLParseError: - toHandle = self.rawdata[i:] - self.handle_data(toHandle) - j = i + len(toHandle) - return j - - -class BeautifulStoneSoup(Tag): - """This class contains the basic parser and search code. It defines a parser that knows nothing about tag behavior except for the following: @@ -1163,15 +1079,14 @@ def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, markupMassage=True, smartQuotesTo=XML_ENTITIES, - convertEntities=None, selfClosingTags=None, isHTML=False, - builder=HTMLParserBuilder): + convertEntities=None, selfClosingTags=None, isHTML=False): """The Soup object is initialized as the 'root tag', and the provided markup (which can be a string or a file-like object) is fed into the underlying parser. - HTMLParser will process most bad HTML, and the BeautifulSoup + sgmllib will process most bad HTML, and the BeautifulSoup class has some tricks for dealing with some HTML that kills - HTMLParser, but Beautiful Soup can nonetheless choke or lose data + sgmllib, but Beautiful Soup can nonetheless choke or lose data if your data uses self-closing tags or declarations incorrectly. @@ -1181,7 +1096,7 @@ you'll get better performance. The default parser massage techniques fix the two most common - instances of invalid HTML that choke HTMLParser: + instances of invalid HTML that choke sgmllib: <br/> (No space between name of closing tag and tag close) <! --Comment--> (Extraneous whitespace in declaration) @@ -1219,8 +1134,7 @@ self.escapeUnrecognizedEntities = False self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) - self.builder = builder(self) - self.reset() + SGMLParser.__init__(self) if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() @@ -1230,9 +1144,18 @@ self._feed(isHTML=isHTML) except StopParsing: pass - self.markup = None # The markup can now be GCed. - self.builder = None # So can the builder. + self.markup = None # The markup can now be GCed + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + def _feed(self, inDocumentEncoding=None, isHTML=False): # Convert the document to Unicode. markup = self.markup @@ -1248,7 +1171,7 @@ self.declaredHTMLEncoding = dammit.declaredHTMLEncoding if markup: if self.markupMassage: - if not isList(self.markupMassage): + if not hasattr(self.markupMassage, "__iter__"): self.markupMassage = self.MARKUP_MASSAGE for fix, m in self.markupMassage: markup = fix.sub(m, markup) @@ -1258,14 +1181,27 @@ # was relying on the existence of markupMassage, this # might cause problems. del(self.markupMassage) - self.builder.reset() + self.reset() - self.builder.feed(markup) + SGMLParser.feed(self, markup) # Close out any unfinished strings and close all the open tags. self.endData() while self.currentTag.name != self.ROOT_TAG_NAME: self.popTag() + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + def isSelfClosingTag(self, name): """Returns true iff the given string is the name of a self-closing tag according to this parser.""" @@ -1275,7 +1211,7 @@ def reset(self): Tag.__init__(self, self, self.ROOT_TAG_NAME) self.hidden = 1 - self.builder.reset() + SGMLParser.reset(self) self.currentData = [] self.currentTag = None self.tagStack = [] @@ -1284,12 +1220,6 @@ def popTag(self): tag = self.tagStack.pop() - # Tags with just one string-owning child get the child as a - # 'string' property, so that soup.tag.string is shorthand for - # soup.tag.contents[0] - if len(self.currentTag.contents) == 1 and \ - isinstance(self.currentTag.contents[0], NavigableString): - self.currentTag.string = self.currentTag.contents[0] #print "Pop", tag.name if self.tagStack: @@ -1378,9 +1308,9 @@ #last occurance. popTo = name break - if (nestingResetTriggers != None + if (nestingResetTriggers is not None and p.name in nestingResetTriggers) \ - or (nestingResetTriggers == None and isResetNesting + or (nestingResetTriggers is None and isResetNesting and self.RESET_NESTING_TAGS.has_key(p.name)): #If we encounter one of the nesting reset triggers @@ -1399,7 +1329,7 @@ if self.quoteStack: #This is not a real tag. #print "<%s> is not real!" % name - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) self.handle_data('<%s%s>' % (name, attrs)) return self.endData() @@ -1440,10 +1370,100 @@ def handle_data(self, data): self.currentData.append(data) - def extractCharsetFromMeta(self, attrs): - self.unknown_starttag('meta', attrs) + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + class BeautifulSoup(BeautifulStoneSoup): """This parser knows the following facts about HTML: @@ -1499,8 +1519,8 @@ BeautifulStoneSoup.__init__(self, *args, **kwargs) SELF_CLOSING_TAGS = buildTagMap(None, - ['br' , 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base']) + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) @@ -1509,13 +1529,13 @@ #According to the HTML standard, each of these inline tags can #contain another tag of the same type. Furthermore, it's common #to actually use these tags this way. - NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', - 'center'] + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') #According to the HTML standard, these block tags can contain #another tag of the same type. Furthermore, it's common #to actually use these tags this way. - NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') #Lists can contain other lists, but there are restrictions. NESTABLE_LIST_TAGS = { 'ol' : [], @@ -1535,7 +1555,7 @@ 'tfoot' : ['table'], } - NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') #If one of these tags is encountered, all tags up to the next tag of #this type are popped. @@ -1550,7 +1570,7 @@ # Used to detect the charset in a META tag; see start_meta CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) - def extractCharsetFromMeta(self, attrs): + def start_meta(self, attrs): """Beautiful Soup can detect a charset included in a META tag, try to convert the document to that charset, and re-parse the document from the beginning.""" @@ -1597,7 +1617,6 @@ if tag and tagNeedsEncodingSubstitution: tag.containsSubstitutions = True - class StopParsing(Exception): pass @@ -1627,11 +1646,11 @@ wouldn't be.""" I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ - ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', - 'big'] + 'big') - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, @@ -1778,18 +1797,15 @@ self.unicode = u if not u: self.originalEncoding = None - def _subMSChar(self, match): + def _subMSChar(self, orig): """Changes a MS smart quote character to an XML or HTML entity.""" - orig = match.group(1) sub = self.MS_CHARS.get(orig) - if type(sub) == types.TupleType: + if isinstance(sub, tuple): if self.smartQuotesTo == 'xml': - sub = '&#x'.encode() + sub[1].encode() + ';'.encode() + sub = '&#x%s;' % sub[1] else: - sub = '&'.encode() + sub[0].encode() + ';'.encode() - else: - sub = sub.encode() + sub = '&%s;' % sub[0] return sub def _convertFrom(self, proposed): @@ -1804,9 +1820,9 @@ if self.smartQuotesTo and proposed.lower() in("windows-1252", "iso-8859-1", "iso-8859-2"): - smart_quotes_re = "([\x80-\x9f])" - smart_quotes_compiled = re.compile(smart_quotes_re) - markup = smart_quotes_compiled.sub(self._subMSChar, markup) + markup = re.compile("([\x80-\x9f])").sub \ + (lambda(x): self._subMSChar(x.group(1)), + markup) try: # print "Trying to convert document to %s" % proposed @@ -1895,15 +1911,13 @@ pass except: xml_encoding_match = None - xml_encoding_re = '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode() - xml_encoding_match = re.compile(xml_encoding_re).match(xml_data) + xml_encoding_match = re.compile( + '^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) if not xml_encoding_match and isHTML: - meta_re = '<\s*meta[^>]+charset=([^>]*?)[;\'">]'.encode() - regexp = re.compile(meta_re, re.I) + regexp = re.compile('<\s*meta[^>]+charset=([^>]*?)[;\'">]', re.I) xml_encoding_match = regexp.search(xml_data) if xml_encoding_match is not None: - xml_encoding = xml_encoding_match.groups()[0].decode( - 'ascii').lower() + xml_encoding = xml_encoding_match.groups()[0].lower() if isHTML: self.declaredHTMLEncoding = xml_encoding if sniffed_xml_encoding and \

1 0

SVN: [9170] branches/rewrite/scripts/cosmetic_changes.py
by xqt＠svn.wikimedia.org 12 Apr '11

12 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9170 Revision: 9170 Author: xqt Date: 2011-04-13 04:41:33 +0000 (Wed, 13 Apr 2011) Log Message: ----------- Do not putSpacesInLists inside template namespaces (update from trunk r9169 Modified Paths: -------------- branches/rewrite/scripts/cosmetic_changes.py Modified: branches/rewrite/scripts/cosmetic_changes.py =================================================================== --- branches/rewrite/scripts/cosmetic_changes.py 2011-04-13 04:38:23 UTC (rev 9169) +++ branches/rewrite/scripts/cosmetic_changes.py 2011-04-13 04:41:33 UTC (rev 9170) @@ -369,13 +369,17 @@ For better readability of bullet list and enumeration wiki source code, puts a space between the * or # and the text. - NOTE: This space is recommended in the syntax help on the English, German, - and French Wikipedia. It might be that it is not wanted on other wikis. - If there are any complaints, please file a bug report. + NOTE: This space is recommended in the syntax help on the English, + German, and French Wikipedia. It might be that it is not wanted on other + wikis. If there are any complaints, please file a bug report. """ exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'timeline'] - if not self.redirect and pywikibot.calledModuleName() <> 'capitalize_redirects': - text = pywikibot.replaceExcept(text, r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', exceptions) + if not (self.redirect or self.template) and \ + pywikibot.calledModuleName() != 'capitalize_redirects': + text = pywikibot.replaceExcept( + text, + r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', + exceptions) return text def replaceDeprecatedTemplates(self, text):

1 0

SVN: [9169] trunk/pywikipedia/cosmetic_changes.py
by xqt＠svn.wikimedia.org 12 Apr '11

12 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9169 Revision: 9169 Author: xqt Date: 2011-04-13 04:38:23 +0000 (Wed, 13 Apr 2011) Log Message: ----------- Do not putSpacesInLists inside template namespaces like http://hsb.wikipedia.org/w/index.php?title=P%C5%99ed%C5%82oha%3AInfoka%C5%A… Modified Paths: -------------- trunk/pywikipedia/cosmetic_changes.py Modified: trunk/pywikipedia/cosmetic_changes.py =================================================================== --- trunk/pywikipedia/cosmetic_changes.py 2011-04-12 23:08:51 UTC (rev 9168) +++ trunk/pywikipedia/cosmetic_changes.py 2011-04-13 04:38:23 UTC (rev 9169) @@ -637,13 +637,17 @@ For better readability of bullet list and enumeration wiki source code, puts a space between the * or # and the text. - NOTE: This space is recommended in the syntax help on the English, German, - and French Wikipedia. It might be that it is not wanted on other wikis. - If there are any complaints, please file a bug report. + NOTE: This space is recommended in the syntax help on the English, + German, and French Wikipedia. It might be that it is not wanted on other + wikis. If there are any complaints, please file a bug report. """ exceptions = ['comment', 'math', 'nowiki', 'pre', 'source', 'timeline'] - if not self.redirect and pywikibot.calledModuleName() <> 'capitalize_redirects': - text = pywikibot.replaceExcept(text, r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', exceptions) + if not (self.redirect or self.template) and \ + pywikibot.calledModuleName() != 'capitalize_redirects': + text = pywikibot.replaceExcept( + text, + r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)', '\g<bullet> \g<char>', + exceptions) return text def replaceDeprecatedTemplates(self, text):

1 0

SVN: [9168] trunk/pywikipedia/cfd.py
by cydeweys＠svn.wikimedia.org 12 Apr '11

12 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9168 Revision: 9168 Author: cydeweys Date: 2011-04-12 23:08:51 +0000 (Tue, 12 Apr 2011) Log Message: ----------- Improved linking in edit summaries. Modified Paths: -------------- trunk/pywikipedia/cfd.py Modified: trunk/pywikipedia/cfd.py =================================================================== --- trunk/pywikipedia/cfd.py 2011-04-12 09:28:03 UTC (rev 9167) +++ trunk/pywikipedia/cfd.py 2011-04-12 23:08:51 UTC (rev 9168) @@ -93,9 +93,9 @@ dest = m.result.group(2) thisDay = findDay(src, day) if (mode == "Move" and thisDay != "None"): - summary = "Robot - Moving category " + src + " to " + dest + " per [[WP:CFD|CFD]] at " + thisDay + "." + summary = "Robot - Moving category " + src + " to [[:Category:" + dest + "]] per [[WP:CFD|CFD]] at " + thisDay + "." elif (mode == "Speedy"): - summary = "Robot - Speedily moving category " + src + " to " + dest + " per [[WP:CFDS|CFDS]]." + summary = "Robot - Speedily moving category " + src + " to [[:Category:" + dest + "]] per [[WP:CFDS|CFDS]]." else: continue # If the category is redirect, we do NOT want to move articles to

1 0

SVN: [9167] trunk/pywikipedia/solve_disambiguation.py
by xqt＠svn.wikimedia.org 12 Apr '11

12 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9167 Revision: 9167 Author: xqt Date: 2011-04-12 09:28:03 +0000 (Tue, 12 Apr 2011) Log Message: ----------- update messages from rewrite r9157 Modified Paths: -------------- trunk/pywikipedia/solve_disambiguation.py Modified: trunk/pywikipedia/solve_disambiguation.py =================================================================== --- trunk/pywikipedia/solve_disambiguation.py 2011-04-12 09:02:30 UTC (rev 9166) +++ trunk/pywikipedia/solve_disambiguation.py 2011-04-12 09:28:03 UTC (rev 9167) @@ -131,42 +131,61 @@ # Summary message when working on disambiguation pages and the link is removed msg_unlink = { + 'af': u'Robot-ondersteunde aanstuur: %s - skakel(s) verwyder.', 'als': u'Bot-unterstitzti Begriffsklärig: %s - Link uusegnuu', 'ar': u'توضيح بمساعدة روبوت: %s - أزال الوصلة أو الوصلات.', + 'ast': u'Dixebra asistida por robot: %s - Enllaz desaniciáu.', 'be-tarask': u'Аўтаматычнае выпраўленьне неадназначнасьцяў: %s — выдаленая спасылка(і).', 'br': u'Kudenn diforc\'hañ diskoulmet dre ar robot : %s - liamm(où) tennet.', + 'bs': u'Čvor podržan od bota: %s - Uklonjen(i) link(ovi).', 'ca': u'Desambiguació assistida: %s - Eliminant enllaç(os).', 'cs': u'Odstranění linku na rozcestník [[%s]] s použitím robota - Odstraněn(y) odkaz(y)', + 'cy': u'Gwahaniaethu gyda chymorth robot: %s - Dolenni a dynnwyd.', 'da': u'Retter flertydigt link til: %s - Fjernede link(s)', 'de': u'Bot-unterstützte Begriffsklärung: %s - Link(s) entfernt', + 'el': u'Υποβοηθούμενη αποσαφήνιση από Ρομπότ: %s - Σύνδεσμος(οι) αφαιρέθηκε(αν).', 'en': u'Robot-assisted disambiguation: %s - Removed link(s).', 'eo': u'Robota unusencigo: %s - Forigis ligo(j)n', 'fa': u'ابهام زدایی به کمک ربات: حذف %s', 'fi': u'Täsmennystä botin avulla: %s - poistettiin linkkejä.', 'fr': u'Homonymie résolue à l’aide du robot: %s - Retrait du (des) lien(s)', 'frp': u'Homonimia solucionâ avouéc un robot : %s - Retrèt du (des) lim(s).', + 'gl': u'Páxina de homónimos resolta polo bot: eliminou unha ou varias ligazóns de "%s"', + 'gsw': u'Bot-unterstitzti Begriffsklärig: %s - Link uusegnuu', 'he': u'הסרת קישור לדף פירושונים באמצעות בוט: %s', 'hu': u'Bottal végzett egyértelműsítés: %s – hivatkozások eltávolítása', 'ia': u'Disambiguation assistite per robot: %s - Removed link(s).', + 'id': u'Disambiguasi berbantuan bot: %s - Tautan dihapus', 'it': u'Sistemazione automatica della disambigua: %s - Collegamenti rimossi', 'ja': u'ロボット補助による曖昧さ回避：　%s - リンクを除去しました', 'kk': u'Айрықты мағыналарды бот көмегімен шешу: %s - Removed link(s).', 'ko': u'로봇의 도움을 받아 동음이의 처리: [[%s]] - 링크 제거', + 'ksh': u'Bot: Watt-ėßß-datt?-Ömleidongs-Sigg %s — Lengk udder Lengks eruß jenumme.', 'lb': u'Bot-ënnerstetzten Homonymie: %s - Link(en) ewechgeholl', 'lt': u'Nuorodų į nukrepiamąjį straipsnį keitimas: %s - Pašalintos nuorodos', 'mk': u'Роботизирано појаснување: %s - Отстранување на врска/ки', + 'ms': u'Penyahkekaburan bantuan bot: %s - Pautan digugurkan', + 'mt': u'Sistemazzjoni awtomatika tar-rindirizz: %s - Ħolqa/Ħoloq imneħħija', + 'nds': u'Bot-stütt MB: %s - Lenken rutnahmen', 'ne': u'रोबोट-सहायक अस्पष्टता: %s लाइ - लिङ्क(हरु) हटाइयो', - 'nl': u'Botgeholpen doorverwijzing: [[%s]] - Verwijzing(en) verwijderd', + 'nl': u'Robotgeholpen doorverwijzing: [[%s]] - Verwijzing(en) verwijderd', 'no': u'bot: Retter lenke til peker: %s - Fjernet lenke(r)', 'pl': u'Wspomagane przez robota ujednoznacznienie: %s - Usunięto link(i)', - 'pt': u'Desambiguação assistida por bot: %s link(s) removido(s)', + 'pt': u'Desambiguação auxiliada por robô: %s - Links removidos.', + 'pt-br': u'Desambiguação auxiliada por robô: %s link(s) removido(s)', 'ru': u'Разрешение значений с помощью бота: %s - Removed link(s)', + 'sl': u'Razločitev s pomočjo robota: %s – Odstranitev povezav', 'sr': u'Решавање вишезначних одредница помоћу бота: %s - Removed link(s)', + 'sr-ec': u'Роботова вишезначна одредница: %s – уклоњене везе.', + 'sr-el': u'Robotova višeznačna odrednica: %s – uklonjene veze.', 'sv': u'Länkar direkt till rätt artikel för: %s - Tog bort länk(ar)', - 'tr': u'Robot yardımıyla anlam ayrımı: %s - Kaldırılan bağlantı(lar).', + 'tl': u'Paglilinaw na may tulong ng robot: %s - Tinanggal na (mga) kawing.', + 'tr': u'Robot destekli anlam ayrımı: %s - Bağlantı(lar) kaldırıldı.', 'tt-cyrl': u'Бот ярдәмендә мәгънәләр киңәйтелмәсе: %s - Removed link(s)', 'uk': u'Виправлення посилання на багатозначність за допомогою бота: %s вилучено', - 'vi': u'Rôbốt giúp định hướng: %s – Dời liên kết', + 'vi': u'Robot giúp định hướng: %s – Dời liên kết', + 'zh-hans': u'机器人辅助消歧义：%s - 移除链接', + 'zh-hant': u'機器人輔助消除歧義：%s - 移除鏈接。', } # Summary message when working on redirects @@ -212,40 +231,59 @@ msg_redir_unlink = { 'als': u'Bot-unterstitzti Wyterleitigsuflesig: %s - Link uusegnuu', 'ar': u'توضيح بمساعدة روبوت: %s - أزال الوصلة أو الوصلات', + 'ast': u'Dixebra asistida por robot: %s - Enllaz desaniciáu', 'be-tarask': u'Аўтаматычнае выпраўленьне неадназначнасьцяў: %s — выдаленая спасылка(і)', 'br': u'Kudenn diforc\'hañ diskoulmet dre ar robot : %s - liamm(où) tennet', + 'bs': u'Čvor podržan od bota: %s - Uklonjen(i) link(ovi)', 'ca': u'Desambiguació assistida: %s - Eliminant enllaç(os)', 'cs': u'Robot opravil přesměrování na %s - Odstraněn(y) odkaz(y)', + 'cy': u'Gwahaniaethu gyda chymorth robot: %s - Dolenni a dynnwyd', 'da': u'Retter flertydigt link til: %s - Fjernede link(s)', 'de': u'Bot-unterstützte Weiterleitungsauflösung: %s - Link(s) entfernt', + 'el': u'Υποβοηθούμενη αποσαφήνιση από ρομπότ: %s - Σύνδεσμος(οι) αφαιρέθηκε(αν)', 'en': u'Robot-assisted disambiguation: %s - Removed link(s)', 'eo': u'Robota unusencigo: %s - Forigis ligo(j)n', 'fa': u'ابهام زدایی به کمک ربات: حذف %s', 'fi': u'Täsmennystä botin avulla: %s - poistettiin linkkejä', 'fr': u'Correction de lien vers redirect: %s - Retrait du (des) lien(s)', 'frp': u'Homonimia solucionâ avouéc un robot : %s - Retrèt du (des) lim(s)', + 'gl': u'Páxina de homónimos resolta polo bot: eliminou unha ou varias ligazóns de "%s"', + 'gsw': u'Bot-unterstitzti Wyterleitigsuflesig: %s - Link uusegnuu', 'he': u'הסרת קישור לדף פירושונים באמצעות בוט: %s', 'hu': u'Bottal támogatott egyértelműsítés: %s – hivatkozások eltávolítása', 'ia': u'Resolution de redirectiones assistite per robot: %s - Removed link(s).', + 'id': u'Disambiguasi berbantuan bot: %s - Tautan dihapus', 'it': u'Sistemazione automatica del redirect: %s - Collegamenti rimossi', 'ja': u'ロボット補助による曖昧さ回避：　%s - リンクを除去しました', 'kk': u'Айрықты мағыналарды бот көмегімен шешу: %s - Removed link(s).', 'ko': u'로봇의 도움을 받아 동음이의 처리: [[%s]] - 링크 제거', + 'ksh': u'Bot: Watt-ėßß-datt?-Ömleidongs-Sigg %s — Lengk udder Lengks eruß jenumme.', 'lb': u'Bot-ënnerstetzten Homonymie: %s - Link(en) ewechgeholl', 'lt': u'Nuorodų į peradresavimo straipsnį keitimas: %s - Pašalintos nuorodos', + 'mg': u'Fanitsiana rohy mankany amin\'ny fihodinana %s - rohy voala', 'mk': u'Роботизирано појаснување: %s - Отстранување на врска/ки', + 'ms': u'Penyahkekaburan bantuan bot: %s - Pautan digugurkan', + 'mt': u'Sistemazzjoni awtomatika tar-rindirizz: %s - Ħolqa/Ħoloq imneħħija', + 'nds': u'Bot-stütt MB: %s - Lenken rutnahmen', 'ne': u'रोबोट-सहायक अस्पष्ट: %s लाइ - लिङ्क(हरु) हटाइयो', - 'nl': u'Botgeholpen oplossing voor doorverwijzing: [[%s]] - Verwijzing(en) verwijderd', + 'nl': u'Robotgeholpen oplossing voor doorverwijzing: [[%s]] - Verwijzing(en) verwijderd', 'no': u'bot: Endrer omdirigeringslenke: %s - Fjernet lenke(r)', 'pl': u'Wspomagane przez robota ujednoznacznienie: %s - Usunięto link(i)', - 'pt': u'Desambiguação assistida por bot: %s link(s) removidos', + 'pt': u'Desambiguação auxiliada por robô: %s - Link(s) removidos', + 'pt-br': u'Desambiguação auxiliada por robô: %s - link(s) removido(s)', 'ru': u'Разрешение значений с помощью бота: %s - Removed link(s)', + 'sl': u'Razločitev s pomočjo robota: %s – Odstranitev povezav', 'sr': u'Решавање вишезначних одредница помоћу бота: %s - Removed link(s)', + 'sr-ec': u'Роботова вишезначна одредница: %s – уклоњене везе', + 'sr-el': u'Robotova višeznačna odrednica: %s – uklonjene veze', 'sv': u'Länkar direkt till rätt artikel för: %s - Tog bort länk(ar)', - 'tr': u'Robot yardımıyla anlam ayrımı: %s - Kaldırılan bağlantı(lar).', + 'tl': u'Paglilinaw na tinutulungan ng robot: %s - Tinanggal na (mga) kawing', + 'tr': u'Robot destekli anlam ayrımı: %s - Bağlantı(lar) kaldırıldı.', 'tt-cyrl': u'Бот ярдәмендә мәгънәләр киңәйтелмәсе: %s - Removed link(s)', 'uk': u'Виправлення посилання на багатозначність за допомогою бота: %s вилучено', - 'vi': u'Rôbốt giúp định hướng: %s – Dời liên kết', + 'vi': u'Robot giúp định hướng: %s – Dời liên kết', + 'zh-hans': u'机器人辅助消歧义：%s - 移除链接', + 'zh-hant': u'機器人輔助消除歧義：%s - 刪除鏈接', } # Disambiguation Needed template @@ -257,93 +295,179 @@ msg_dn = { 'als': u'Bot-unterstitzti Begriffsklärig: %s - brucht Ufmerksamkeit vun eme Expert', 'ar': u'توضيح بمساعدة روبوت: %s - التعليم كمحتاجة لانتباه خبير', + 'ast': u'Dixebra asistida por robot: %s - Marcada pa pidir l\'atención d\'un espertu', 'be-tarask': u'Аўтаматычнае выпраўленьне неадназначнасьцяў: %s — пазначаная як патрабуючая увагі экспэртаў', 'br': u'Kudenn diforc\'hañ diskoulmet dre ar robot : %s - merket evel da vezañ pledet ganti gant ur mailh', + 'bs': u'Čvor podržan od bota: %s - Označen za pregled od strane stručnjaka', + 'ca': u'Desambiguació assistida per robot: %s - Necessita la revisió d\'un expert', + 'cs': u'Robotem asistovaný rozcestník: %s - Označeno, že vyžaduje pozornost odborníka', + 'cy': u'Gwahaniaethu gyda chymorth robot: %s - Marcir ar gyfer sylw arbenigwr', + 'de': u'Bot-unterstützte Begriffsklärung: %s - markiert, benötigt kundige Wartung', 'en': u'Robot-assisted disambiguation: %s - Marked as needing expert attention', 'eo': u'Robota unusencigo: %s - Markis ke ĝi bezonas atenton de eksperto.', - 'fa': u'ربات نیمه خودکار علامت زدن %s به عنوان نیازمند بررسی بیشتر', - 'fr': u"Homonymie résolue à l’aide du robot : %s - marquée comme demandant l'attention d'un expert", + 'fa': u'ربات نیمه خودکار: علامت زدن %s به عنوان نیازمند بررسی بیشتر', + 'fr': u'Homonymie résolue à l’aide du robot : %s - marquée comme demandant l\'attention d\'un expert', 'frp': u'Homonimia solucionâ avouéc un robot : %s - Marcâ coment demandent l’atencion d’un èxpèrt', + 'gl': u'Páxina de homónimos resolta polo bot: "%s" necesita atención dun experto', + 'gsw': u'Bot-unterstitzti Begriffsklärig: %s - brucht Ufmerksamkeit vun eme Expert', 'he': u'טיפול בפירושונים בעזרת רובוט: %s - סומן כדורש תשומת לב ממומחה', 'ia': u'Disambiguation con robot: %s - Marcate como necessitante le attention de un experto', + 'id': u'Disambiguasi berbantuan bot: %s - Perlu perhatian pakar', 'ja': u'ロボット補助による曖昧さ回避：　%s - 専門家のチェックが必要として印付けしました', + 'ksh': u'Bot: Han de Watt-ėßß-datt?-Sigg %s makeet, doh moß ene Minsch noh looere.', 'lb': u'Bot-assistéiert Homonymie: %s - markéiert fir duerch en Expert nogekuckt ze ginn', 'mk': u'Роботизирано појаснување: %s - Означено како „потребно внимание од стручњак“', + 'ms': u'Penyahkekaburan bantuan bot: %s - Ditandai kerana memerlukan perhatian pakar', + 'mt': u'Sistemazzjoni awtomatika tar-rindirizz: %s - Immarkata bħala li teħtieġ attenzjoni minn espert/i', + 'nds': u'Bot-stütt MB: %s - Hülp von en Experten nödig', 'ne': u'रोबोट-सहायक अस्पष्ट: %s लाइ - विशेषज्ञ को ध्यानाकर्षण गराउँदै', 'nl': u'Robotgeholpen disambiguatie: %s - heeft aandacht van een expert nodig', + 'no': u'Robothjulpet løsing av flertydig lenke: %s – trenger eksperthjelp', 'pl': u'Wspomagane robotem ujednoznacznienie – %s – oznaczone jako wymagające uwagi eksperta', - 'pt': u'Desambiguação assistida por bot: %s - Marcada como necessitando de atenção especializada', + 'pt': u'Desambiguação auxiliada por robô: %s - Marcada como necessitando de atenção especializada', + 'pt-br': u'Desambiguação assistida por bot: %s - Marcada como necessitando de atenção especializada', 'ru': u'Неоднозначность с помощью робота: %s — помечена как требующая внимания эксперта', + 'sl': u'Razločitev s pomočjo robota: %s – Označeno kot potrebno pozornosti strokovnjaka', 'sr': u'Роботова вишезначна одредница: %s – означено као „потребна стручна пажња“', + 'sr-ec': u'Роботова вишезначна одредница: %s – означено као „потребна стручна пажња“', + 'sr-el': u'Robotova višeznačna odrednica: %s – označeno kao „potrebna stručna pažnja“', + 'sv': u'Robot-assisterad olika betydelser: %s - Märkt som i behov av uppmärksamhet från expert', + 'tl': u'Paglilinaw na may tulong ng robot: %s - Tinatakan bilang kailangan ng pagpansin ng dalubhasa', + 'tr': u'Robot destekli anlam ayrımı: %s - Uzman ilgisine ihtiyaç duyduğuna dair işaretleme gerçekleştirildi', 'tt-cyrl': u'Робот ярдәмендә: %s — экспертның игътибарын сораучы дип билгеләнде', - 'vi': u'Rôbốt giúp định hướng: %s – Đánh dấu là cần chuyên gia chú ý', + 'uk': u'Усунення неоднозначності за допомогою робота: %s — Позначена як така, що потребує уваги експерта', + 'vi': u'Robot giúp định hướng: %s – Đánh dấu là cần chuyên gia chú ý', + 'zh-hans': u'机器人辅助消歧义：%s - 已标记为需要专家关注', + 'zh-hant': u'機器人輔助消除歧義：%s - 標記為需要專家關注', } # Summary message when adding Disambiguation Needed template to a redirect link msg_redir_dn = { 'als': u'Bot-unterstitzti Begriffsklärig: %s - brucht Ufmerksamkeit vun eme Expert', 'ar': u'توضيح بمساعدة روبوت: %s - التعليم كمحتاجة لانتباه خبير', + 'ast': u'Dixebra asistida por robot: %s - Marcada pa pidir l\'atención d\'un espertu', 'be-tarask': u'Аўтаматычнае выпраўленьне неадназначнасьцяў: %s — пазначаная як патрабуючая ўвагі экспэртаў', 'br': u'Kudenn diforc\'hañ diskoulmet dre ar robot : %s - merket evel da vezañ pledet ganti gant ur mailh', + 'bs': u'Čvor podržan od bota: %s - Označen za pregled od strane stručnjaka', + 'ca': u'Desambiguació assistida per robot: %s - Necessita la revisió d\'un expert', + 'cs': u'Robotem asistovaný rozcestník: %s - Označeno, že vyžaduje pozornost odborníka', + 'cy': u'Gwahaniaethu gyda chymorth robot: %s - Marcir ar gyfer sylw arbenigwr', + 'da': u'Robotassisteret flertydig: %s - markeret til at kræve en eksperts opmærksomhed', + 'de': u'Bot-unterstützte Begriffsklärung: %s - markiert, benötigt kundige Wartung', + 'el': u'Υποβοηθούμενη αποσαφήνιση από Ρομπότ: %s - Επισημάνθηκε ως χρήζουσα της προσοχής ενος ειδικού', 'en': u'Robot-assisted disambiguation: %s - Marked as needing expert attention', 'eo': u'Robota unusencigo: %s - Markis ke ĝi bezonas atenton de eksperto.', - 'fa': u'ربات نیمه خودکار علامت زدن %s به عنوان نیازمند بررسی بیشتر', - 'fr': u"Homonymie résolue à l’aide du robot : %s - marquée comme demandant l'attention d'un expert", + 'fa': u'ربات نیمه خودکار: علامت زدن %s به عنوان نیازمند بررسی بیشتر', + 'fr': u'Homonymie résolue à l’aide du robot : %s - marquée comme demandant l\'attention d\'un expert', 'frp': u'Homonimia solucionâ avouéc un robot : %s - Marcâ coment demandent l’atencion d’un èxpèrt', + 'gl': u'Páxina de homónimos resolta polo bot: "%s" necesita atención dun experto', + 'gsw': u'Bot-unterstitzti Begriffsklärig: %s - brucht Ufmerksamkeit vun eme Expert', 'he': u'טיפול בפירושונים בעזרת רובוט: %s - סומן כדורש תשומת לב ממומחה', 'ia': u'Disambiguation con robot: %s - Marcate como necessitante le attention de un experto', + 'id': u'Disambiguasi berbantuan bot: %s - Perlu perhatian pakar', 'ja': u'ロボット補助による曖昧さ回避：　%s - 専門家のチェックが必要として印付けしました', + 'ksh': u'Bot: Han de Watt-ėßß-datt?-Ömleidongs-Sigg %s makeet, doh moß ene Minsch noh looere.', 'lb': u'Bot-assistéiert Homonymie: %s - markéiert fir duerch en Expert nogekuckt ze ginn', + 'mg': u'Fitovizana anarana voavahaolana tamin\'ny alàlan\'ny rôbô : %s - mila fitandreman\'ny mpahay.', 'mk': u'Роботизирано појаснување: %s - Означено како „потребно внимание од стручњак“', + 'ms': u'Penyahkekaburan bantuan bot: %s - Ditandai kerana memerlukan perhatian pakar', + 'mt': u'Sistemazzjoni awtomatika tar-rindirizz: %s - Immarkata bħala li teħtieġ attenzjoni minn espert/i', + 'nds': u'Bot-stütt MB: %s - Hülp von en Experten nödig', 'ne': u'रोबोट-सहायक अस्पष्ट: %s लाइ - विशेषज्ञ को ध्यानाकर्षण गराउँदै', 'nl': u'Robotgeholpen disambiguatie: %s - heeft aandacht van een expert nodig', + 'no': u'Robothjulpet løsing av flertydig lenke: %s – trenger eksperthjelp', 'pl': u'Wspomagane robotem ujednoznacznienie – %s – oznaczone jako wymagające uwagi eksperta', - 'pt': u'Desambiguação assistida por bot: %s - Marcada como necessitando de atenção especializada', + 'pt': u'Desambiguação auxiliada por robô: %s - Marcada como necessitando de atenção especializada', + 'pt-br': u'Desambiguação auxiliada por robô: %s - Marcado como precisando de atenção de especialistas', 'ru': u'Неоднозначность с помощью робота: %s — помечена как требующая внимания эксперта', + 'sl': u'Razločitev s pomočjo robota: %s – Označeno kot potrebno pozornosti strokovnjaka', + 'sr-ec': u'Роботова вишезначна одредница: %s – означено као „потребна стручна пажња“', + 'sr-el': u'Robotova višeznačna odrednica: %s – označeno kao „potrebna stručna pažnja“', + 'sv': u'Robot-assisterad olika betydelser: %s - Märkt som i behov av uppmärksamhet från expert', + 'tl': u'Paglilinaw na may tulong ng robot: %s - Minarkahan bilang nangangailangan ng pagpansin ng dalubhasa', + 'tr': u'Robot destekli anlam ayrımı: %s - Uzman ilgisine ihtiyaç duyduğuna dair işaretleme gerçekleştirildi', 'tt-cyrl': u'Робот ярдәмендә: %s — экспертның игътибарын сораучы дип билгеләнде', - 'vi': u'Rôbốt giúp định hướng: %s – Đánh dấu là cần chuyên gia chú ý', + 'uk': u'Усунення неоднозначності за допомогою робота: %s - Позначена як така, що потребує уваги експерта', + 'vi': u'Robot giúp định hướng: %s – Đánh dấu là cần chuyên gia chú ý', + 'zh-hans': u'机器人辅助消歧义：%s - 已标记为需要专家关注', + 'zh-hant': u'機器人輔助消除歧義：%s - 標記為需要專家關注', } # Summary message to (unknown) unknown_msg = { + 'af': u'(onbekend)', 'als': u'(nit bekannt)', 'ar': u'(غير معروف)', + 'ast': u'(desconocíu)', 'be-tarask': u'(невядома)', 'bn': u'(অজানা)', 'br': u'(dianav)', + 'bs': u'(nepoznato)', 'ca': u'(desconegut)', + 'ckb': u'(نەناسراو)', + 'cs': u'(neznámé)', + 'cy': u'(anhysbys)', + 'da': u'(ukendt)', + 'de': u'(unbekannt)', + 'el': u'(άγνωστο)', 'en': u'(unknown)', 'eo': u'(nesciata)', + 'es': u'(desconocido)', + 'eu': u'(ezezaguna)', 'fa': u'(نامعلوم)', 'fi': u'(tuntematon)', 'fr': u'(inconnu)', 'frp': u'(encognu)', + 'fur': u'(no cognossût)', + 'gl': u'(descoñecido)', + 'gsw': u'(nit bekannt)', 'he': u'(לא ידוע)', 'hsb': u'[njeznaty]', 'hu': u'(ismeretlen)', 'ia': u'(incognite)', + 'id': u'(tidak dikenal)', + 'it': u'(sconosciuto)', 'ja': u'（不明）', 'ksh': u'(onbekannt)', 'ku-latn': u'(nenas)', 'lb': u'(onbekannt)', + 'lt': u'(nežinomas)', + 'ltg': u'(nazynoms)', + 'lv': u'(nezināms)', + 'mg': u'(tsy fantatra)', 'mk': u'(непозната)', 'ml': u'(അപരിചിതം)', + 'ms': u'(tidak diketahui)', + 'mt': u'(mhux magħruf)', + 'my': u'(အမည်မသိ)', + 'nah': u'(âmò ìxmatkàyö)', + 'nds': u'(nich kennt)', 'ne': u'[अज्ञात]', 'nl': u'(onbekend)', + 'nn': u'(ukjend)', 'no': u'(ukjent)', 'pl': u'(nieznana)', 'ps': u'(ناڅرګند)', 'pt': u'(desconhecido)', + 'pt-br': u'(desconhecido)', 'ro': u'(necunoscut)', 'ru': u'(неизвестно)', 'rue': u'(незнаме)', + 'si': u'(නොදත්)', + 'sl': u'(neznano)', 'sr': u'(непознато)', + 'sr-ec': u'(непознато)', + 'sr-el': u'(nepoznato)', 'sv': u'(okänd)', + 'tl': u'(hindi nalalaman)', 'tr': u'(bilinmiyor)', 'tt-cyrl': u'(билгесез)', + 'uk': u'(невідома)', 'vi': u'(không rõ)', 'vo': u'(nesevädik)', - 'zh-hans': u'（未知）', + 'yi': u'(אומבאַקאַנט)', + 'zh-hans': u'(未知)', + 'zh-hant': u'(未知)', } # disambiguation page name format for "primary topic" disambiguations

1 0

SVN: [9166] trunk/pywikipedia/replace.py
by xqt＠svn.wikimedia.org 12 Apr '11

12 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9166 Revision: 9166 Author: xqt Date: 2011-04-12 09:02:30 +0000 (Tue, 12 Apr 2011) Log Message: ----------- update messages from rewrite r9157 Modified Paths: -------------- trunk/pywikipedia/replace.py Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2011-04-12 08:56:32 UTC (rev 9165) +++ trunk/pywikipedia/replace.py 2011-04-12 09:02:30 UTC (rev 9166) @@ -164,24 +164,31 @@ # NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes' # below. msg = { + 'af': u'Robot: geoutomatiseerde teks vervanging %s', 'als': u'Bot: het dr Text automatisch uustuscht: %s', 'ar': u'%s بوت: استبدال تلقائي للنص', + 'ast': u'Bot: Troquéu automáticu de testu %s', 'be-tarask': u'Робат: аўтаматызаваная замена тэксту %s', 'br': u'Robot : Erlec\'hiañ testenn emgefre %s', 'bs': u'Bot: Automatska zamjena teksta %s', 'ca': u'Robot: Reemplaçament automàtic de text %s', 'cs': u'Robot automaticky nahradil text: %s', + 'cy': u'Bot: Amnewid testun awtomataidd %s', + 'da': u'Bot: Automatisk teksterstatning: %s', 'de': u'Bot: Automatisierte Textersetzung %s', 'el': u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s', 'en': u'Bot: Automated text replacement %s', 'eo': u'Roboto: Automata tekst-anstataŭigo: %s', 'es': u'Robot: Reemplazo automático de texto %s', + 'eu': u'Robota: Testu aldaketa automatikoa %s', 'fa': u'ربات: تغییر خودکار متن %s', 'fi': u'Botti korvasi automaattisesti tekstin %s', 'fr': u'Robot : Remplacement de texte automatisé %s', 'frp': u'Bot : remplacement de tèxto ôtomatisâ %s', 'frr': u'Bot: Automatisiaret ütjwakselt tekst %s', + 'fur': u'Robot: Sostituzion automatiche di test %s', 'gl': u'Bot: Substitución automática de texto %s', + 'gsw': u'Bot: het dr Text automatisch uustuscht: %s', 'he': u'בוט: החלפת טקסט אוטומטית %s', 'hsb': u'Boćik: Awtomatiske narunanje teksta %s', 'hu': u'Robot: Automatikus szövegcsere %s', @@ -196,9 +203,12 @@ 'ksh': u'Bot: hät outomatesch Täx jetuusch: %s', 'la': u'automaton: mutans textum automatice: %s', 'lb': u'Bot: Automatescht Ersetze vun Text %s', + 'li': u'Robot: autematis teks vervange %s', 'lt': u'robotas: Automatinis teksto keitimas %s', 'mk': u'Бот: Автоматизирана замена на текст %s', 'ms': u'Bot: Penggantian teks automatik %s', + 'mt': u'Bot: Sostituzzjoni awtomatika %s', + 'my': u'ဘော့ - စာသားများကို အလိုအလျောက် အစားထိုးခြင်း %s', 'nds': u'Bot: Text automaatsch utwesselt: %s', 'nds-nl': u'Bot: autematisch tekse vervungen %s', 'ne': u'बोट: स्वचालित रुपमा हरफहरु परिवर्तन गरिएको %s', @@ -206,18 +216,24 @@ 'nn': u'robot: automatisk teksterstatning: %s', 'no': u'robot: automatisk teksterstatning: %s', 'pl': u'Robot automatycznie zamienia tekst %s', - 'pt': u'Bot: Mudança automática %s', + 'pt': u'Robô: Substituição de texto automática %s', + 'pt-br': u'Robô: Substituição automática de texto %s', 'ro': u'Robot. Înlocuire automată de text %s', 'ru': u'Робот: Автоматизированная замена текста %s', 'rue': u'Робот: Автоматізована заміна тексту: %s', 'sl': u'Bot: Samodejna zamenjava besedila %s', 'sr': u'Бот: Аутоматска замена текста %s', + 'sr-ec': u'Бот: самостална замена текста %s', + 'sr-el': u'Bot: samostalna zamena teksta %s', 'sv': u'Bot: Automatisk textersättning: %s', + 'tl': u'Bot: Kusang pagpapalit ng teksto %s', 'tr': u'Bot: Otomatik metin değiştirme %s', 'tt-cyrl': u'Робот: %s текстын автомат алмаштыру', 'uk': u'Бот: Автоматизована заміна тексту: %s', - 'vi': u'Rôbốt: Tự động thay thế văn bản %s', + 'vi': u'Robot: Tự động thay thế văn bản %s', 'zh': u'機器人:執行文字代換作業 %s', + 'zh-hans': u'机器人：自动文本替换%s', + 'zh-hant': u'機器人：自動替換文字%s', }

1 0

SVN: [9165] trunk/pywikipedia/cosmetic_changes.py
by xqt＠svn.wikimedia.org 12 Apr '11

12 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9165 Revision: 9165 Author: xqt Date: 2011-04-12 08:56:32 +0000 (Tue, 12 Apr 2011) Log Message: ----------- update messages from rewrite r9157 Modified Paths: -------------- trunk/pywikipedia/cosmetic_changes.py Modified: trunk/pywikipedia/cosmetic_changes.py =================================================================== --- trunk/pywikipedia/cosmetic_changes.py 2011-04-12 06:28:27 UTC (rev 9164) +++ trunk/pywikipedia/cosmetic_changes.py 2011-04-12 08:56:32 UTC (rev 9165) @@ -69,13 +69,17 @@ # Summary message when using this module as a stand-alone script msg_standalone = { 'commons': u'Bot: [[Commons talk:Tools/pywiki file description cleanup|desc page fmt]]', - 'als':u'Bötli: chleineri Änderige', + 'als': u'Bötli: chleineri Änderige', 'ar': u'روبوت: تغييرات تجميلية', + 'ast': u'Robot: Cambéos cosméticos', + 'be-tarask': u'Робат: касмэтычныя зьмены', 'be-x-old': u'Робат: касмэтычныя зьмены', 'bg': u'Робот козметични промени', + 'bn': u'বট: কসমেটিক পরিবর্তন', 'br': u'Bot: Kemm dister', + 'bs': u'Robot: kozmetičke promjene', 'ca': u'Robot: Canvis cosmètics', - 'ckb':u'بۆت: دەستکاریی جوانکاری', + 'ckb': u'بۆت: دەستکاریی جوانکاری', 'cs': u'Robotické: kosmetické úpravy', 'da': u'Bot: Kosmetiske ændringer', 'de': u'Bot: Kosmetische Änderungen', @@ -83,64 +87,78 @@ 'en': u'Robot: Cosmetic changes', 'es': u'Robot: Cambios triviales', 'et': u'robot: kosmeetilised muudatused', + 'eu': u'Robota: Aldaketa kosmetikoak', 'fa': u'ربات: زیباسازی', 'fi': u'Botti kosmeettisia muutoksia', 'fr': u'Robot : Changement de type cosmétique', - 'frr':u'Bot: Kosmeetisk feranerangen', + 'frr': u'Bot: Kosmeetisk feranerangen', 'fy': u'bot tekstwiziging', 'ga': u'Róbat: Athruithe cosmaideacha', 'gl': u'bot Cambios estética', 'he': u'בוט: שינויים קוסמטיים', 'hi': u'Bot: अंगराग परिवर्तन', 'hr': u'robot kozmetičke promjene', + 'hsb': u'Bot: Kosmetiske změny', 'hu': u'Bot: kozmetikai változtatások', 'ia': u'Robot: Cambios cosmetic', 'id': u'bot kosmetik perubahan', 'it': u'Bot: Modifiche estetiche', 'ja': u'ロボットによる: 細部の編集', 'ko': u'로봇: 예쁘게 바꿈', + 'ksh': u'Bot: Änderonge för de Schönheit', 'la': u'automaton: mutationes minores', + 'lb': u'Bot: Kosmetesch Ännerungen', + 'li': u'Robot: cosmetische verangeringe', 'lt': u'robotas: smulkūs taisymai', 'lv': u'robots kosmētiskās izmaiņas', 'mk': u'Бот: козметички промени', 'ms': u'Bot: perubahan kosmetik', 'mt': u'Bot: kosmetiċi bidliet', - 'nl': u'Bot: cosmetische wijzigingen', + 'nds': u'Bot: kosmeetsch Ännern', + 'nl': u'Robot: cosmetische wijzigingen', + 'nn': u'Robot: Kosmetiske endringar', 'no': u'Bot: Kosmetiske endringer', - 'nn': u'Robot: Kosmetiske endringar', - 'pdc':u'Waddefresser: gleenere Enneringe', - 'pfl':u'Bot: Klännere Ännerunge', + 'pdc': u'Waddefresser: gleenere Enneringe', + 'pfl': u'Bot: Klännere Ännerunge', 'pl': u'Robot dokonuje poprawek kosmetycznych', - 'pt': u'Bot: Mudanças triviais', + 'pt': u'Robô: Mudanças triviais', 'ro': u'robot modificări cosmetice', 'ru': u'робот косметические изменения', 'sk': u'robot kozmetické zmeny', 'sl': u'robot kozmetične spremembe', 'sr': u'Бот козметичке промене', + 'sr-ec': u'Робот: козметичке измене', + 'sr-el': u'Robot: kozmetičke izmene', 'sv': u'Bot: Kosmetiska ändringar', 'th': u'บอต ปรับแต่งให้อ่านง่าย', 'tk': u'Bot: kosmetik üýtgeşme', 'tl': u'robot Kosmetiko pagbabago', 'tr': u'Bot Kozmetik değişiklikler', + 'tt-cyrl': u'Робот: Косметик үзгәртүләр', 'uk': u'робот косметичні зміни', - 'vec':u'Bot: Modifiche estetiche', - 'vi': u'robot: Sửa cách trình bày', - 'war':u'Robot: Kosmetiko nga mga pagbag-o', + 'vec': u'Bot: Modifiche estetiche', + 'vi': u'Robot: Sửa cách trình bày', + 'war': u'Robot: Kosmetiko nga mga pagbag-o', 'yi': u'באט: קאסמעטישע ענדערונגען', 'zh': u'機器人: 細部更改', + 'zh-hans': u'机器人：化妆品的变化', } # Summary message that will be appended to the normal message when # cosmetic changes are made on the fly msg_append = { 'commons': u'; [[Commons talk:Tools/pywiki file description cleanup|desc page fmt]]', - 'als':u'; chleineri Änderige', + 'als': u'; chleineri Änderige', 'ar': u'; تغييرات تجميلية', + 'ast': u'; cambéos cosméticos', + 'be-tarask': u'; касмэтычныя зьмены', 'be-x-old': u'; касмэтычныя зьмены', 'bg': u'; козметични промени', + 'bn': u'; কসমেটিক পরিবর্তন', 'br': u'; Kemm dister', + 'bs': u'; kozmetičke promjene', 'ca': u'; canvis cosmètics', - 'ckb':u'; دەستکاریی جوانکاری', + 'ckb': u'; دەستکاریی جوانکاری', 'cs': u'; kosmetické úpravy', 'da': u'; kosmetiske ændringer', 'de': u'; kosmetische Änderungen', @@ -148,33 +166,39 @@ 'en': u'; cosmetic changes', 'es': u'; cambios triviales', 'et': u'; kosmeetilised muudatused', + 'eu': u'; aldaketa kosmetikoak', 'fa': u'; زیباسازی', 'fi': u'; kosmeettisia muutoksia', 'fr': u'; changement de type cosmétique', - 'frr':u'; kosmeetisk feranerangen', + 'frr': u'; kosmeetisk feranerangen', 'fy': u'; tekstwiziging', 'ga': u'; athruithe cosmaideacha', 'gl': u'; cambios estética', 'he': u'; שינויים קוסמטיים', 'hi': u'; अंगराग परिवर्तन', 'hr': u'; kozmetičke promjene', + 'hsb': u'; kosmetiske změny', 'hu': u'; kozmetikai változtatások', 'ia': u'; cambios cosmetic', 'id': u'; kosmetik perubahan', 'it': u'; modifiche estetiche', 'ja': u'; 細部の編集', 'ko': u'; 예쁘게 바꿈', + 'ksh': u'; Änderonge för de Schönheit', 'la': u'; mutationes minores', + 'lb': u'; kosmetesch Ännerungen', + 'li': u'; cosmetische verangeringe', 'lt': u'; smulkūs taisymai', 'lv': u'; kosmētiskās izmaiņas', - 'mt': u'; kosmetiċi bidliet', 'mk': u'; козметички промени', 'ms': u'; perubahan kosmetik', + 'mt': u'; kosmetiċi bidliet', + 'nds': u'; kosmeetsch Ännern', 'nl': u'; cosmetische veranderingen', + 'nn': u'; kosmetiske endringar', 'no': u'; kosmetiske endringer', - 'nn': u'; kosmetiske endringar', - 'pdc':u'; gleenere Enneringe', - 'pfl':u'; klännere Ännerunge', + 'pdc': u'; gleenere Enneringe', + 'pfl': u'; klännere Ännerunge', 'pl': u'; zmiany kosmetyczne', 'pt': u'; mudanças triviais', 'ro': u'; modificări cosmetice', @@ -182,17 +206,21 @@ 'sk': u'; kozmetické zmeny', 'sl': u'; kozmetične spremembe', 'sr': u'; козметичке промене', + 'sr-ec': u'; козметичке измене', + 'sr-el': u'; kozmetičke izmene', 'sv': u'; kosmetiska ändringar', 'th': u'; ปรับแต่งให้อ่านง่าย', 'tk': u'; kosmetik üýtgeşme', 'tl': u'; Kosmetiko pagbabago', 'tr': u'; Kozmetik değişiklikler', + 'tt-cyrl': u'; косметик үзгәртүләр', 'uk': u'; косметичні зміни', - 'vec':u'; modifiche estetiche', + 'vec': u'; modifiche estetiche', 'vi': u'; sửa cách trình bày', - 'war':u'; kosmetiko nga mga pagbag-o', + 'war': u'; kosmetiko nga mga pagbag-o', 'yi': u'; קאסמעטישע ענדערונגען', 'zh': u'; 細部更改', + 'zh-hans': u';化妆品的变化', } nn_iw_msg = u''

1 0

SVN: [9164] trunk/pywikipedia/catlib.py
by a_engels＠svn.wikimedia.org 11 Apr '11

11 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9164 Revision: 9164 Author: a_engels Date: 2011-04-12 06:28:27 +0000 (Tue, 12 Apr 2011) Log Message: ----------- category sort keys are always in upper case Modified Paths: -------------- trunk/pywikipedia/catlib.py Modified: trunk/pywikipedia/catlib.py =================================================================== --- trunk/pywikipedia/catlib.py 2011-04-12 06:07:23 UTC (rev 9163) +++ trunk/pywikipedia/catlib.py 2011-04-12 06:28:27 UTC (rev 9164) @@ -214,6 +214,7 @@ wikipedia.output('Getting [[%s]] list from %s...' % (self.title(), "%s=%s" % currentPageOffset.popitem())) elif startFrom: + startFrom = startFrom.upper() # category sort keys are uppercase params['cmstartsortkey'] = startFrom wikipedia.output('Getting [[%s]] list starting at %s...' % (self.title(), startFrom))

1 0

SVN: [9163] trunk/pywikipedia/wikipedia.py
by xqt＠svn.wikimedia.org 11 Apr '11

11 Apr '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9163 Revision: 9163 Author: xqt Date: 2011-04-12 06:07:23 +0000 (Tue, 12 Apr 2011) Log Message: ----------- page object has no versionnumber() use page.site() instead (fix for r9160) Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2011-04-12 06:04:32 UTC (rev 9162) +++ trunk/pywikipedia/wikipedia.py 2011-04-12 06:07:23 UTC (rev 9163) @@ -1113,7 +1113,7 @@ """ found = False - if self.isRedirectPage() and self.versionnumber() > 13: + if self.isRedirectPage() and self.site().versionnumber() > 13: staticKeys = self.site().getmagicwords('staticredirect') text = self.get(get_redirect=True, force=force) if staticKeys:

1 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn April 2011