Xqt has submitted this change and it was merged.
Change subject: remove patch-BeautifulSoup since not needed and causes issues with setup of core components ......................................................................
remove patch-BeautifulSoup since not needed and causes issues with setup of core components
Change-Id: I4b5462737fbf54c7521786bf03e71a90421926fc --- M externals/__init__.py D externals/patch-BeautifulSoup 2 files changed, 1 insertion(+), 845 deletions(-)
Approvals: DrTrigon: Checked; Looks good to me, but someone else must approve Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/externals/__init__.py b/externals/__init__.py index 13ea4c4..83045f5 100644 --- a/externals/__init__.py +++ b/externals/__init__.py @@ -57,9 +57,7 @@ 'BeautifulSoup.py': ({'linux-fedora': ['python-BeautifulSoup'], 'linux-ubuntu': ['']}, { 'url': 'https://pypi.python.org/packages/source/B/BeautifulSoup/BeautifulSoup-3.2.0....', - 'path': 'BeautifulSoup-3.2.0/BeautifulSoup.py', - #$ diff -Nau TEST_BeautifulSoup.py BeautifulSoup.py > patch-BeautifulSoup - 'patch': 'patch-BeautifulSoup'}, + 'path': 'BeautifulSoup-3.2.0/BeautifulSoup.py'}, {}), # OK 'irclib': ({'linux-fedora': ['python-irclib'], 'linux-ubuntu': ['']}, diff --git a/externals/patch-BeautifulSoup b/externals/patch-BeautifulSoup deleted file mode 100644 index 271c061..0000000 --- a/externals/patch-BeautifulSoup +++ /dev/null @@ -1,842 +0,0 @@ ---- TEST_BeautifulSoup.py 2010-11-21 14:35:28.000000000 +0100 -+++ BeautifulSoup.py 2013-03-02 14:56:38.000000000 +0100 -@@ -76,7 +76,6 @@ - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. - - """ --from __future__ import generators - - __author__ = "Leonard Richardson (leonardr@segfault.org)" - __version__ = "3.2.0" -@@ -90,9 +89,9 @@ - import re - import sgmllib - try: -- from htmlentitydefs import name2codepoint -+ from htmlentitydefs import name2codepoint - except ImportError: -- name2codepoint = {} -+ name2codepoint = {} - try: - set - except NameError: -@@ -104,12 +103,13 @@ - - DEFAULT_OUTPUT_ENCODING = "utf-8" - -+ - def _match_css_class(str): - """Build a RE to match the given CSS class.""" - return re.compile(r"(^|.*\s)%s($|\s)" % str) - --# First, the classes that represent markup elements. - -+# First, the classes that represent markup elements. - class PageElement(object): - """Contains the navigational information for some part of the page - (either a tag or a piece of text)""" -@@ -129,8 +129,8 @@ - def replaceWith(self, replaceWith): - oldParent = self.parent - myIndex = self.parent.index(self) -- if hasattr(replaceWith, "parent")\ -- and replaceWith.parent is self.parent: -+ if hasattr(replaceWith, "parent") and \ -+ replaceWith.parent is self.parent: - # We're replacing this element with one of its siblings. - index = replaceWith.parent.index(replaceWith) - if index and index < myIndex: -@@ -187,11 +187,11 @@ - return lastChild - - def insert(self, position, newChild): -- if isinstance(newChild, basestring) \ -- and not isinstance(newChild, NavigableString): -+ if isinstance(newChild, basestring) and not \ -+ isinstance(newChild, NavigableString): - newChild = NavigableString(newChild) - -- position = min(position, len(self.contents)) -+ position = min(position, len(self.contents)) - if hasattr(newChild, 'parent') and newChild.parent is not None: - # We're 'inserting' an element that's already one - # of this object's children. -@@ -228,7 +228,7 @@ - while not parentsNextSibling: - parentsNextSibling = parent.nextSibling - parent = parent.parent -- if not parent: # This is the last element in the document. -+ if not parent: # This is the last element in the document. - break - if parentsNextSibling: - newChildsLastElement.next = parentsNextSibling -@@ -273,7 +273,8 @@ - criteria and appear after this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.nextSiblingGenerator, **kwargs) -- fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x -+ -+ fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x - - def findPrevious(self, name=None, attrs={}, text=None, **kwargs): - """Returns the first item that matches the given criteria and -@@ -285,8 +286,8 @@ - """Returns all items that match the given criteria and appear - before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, self.previousGenerator, -- **kwargs) -- fetchPrevious = findAllPrevious # Compatibility with pre-3.x -+ **kwargs) -+ fetchPrevious = findAllPrevious # Compatibility with pre-3.x - - def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): - """Returns the closest sibling to this Tag that matches the -@@ -300,7 +301,7 @@ - criteria and appear before this Tag in the document.""" - return self._findAll(name, attrs, text, limit, - self.previousSiblingGenerator, **kwargs) -- fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x -+ fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x - - def findParent(self, name=None, attrs={}, **kwargs): - """Returns the closest parent of this Tag that matches the given -@@ -319,7 +320,8 @@ - - return self._findAll(name, attrs, None, limit, self.parentGenerator, - **kwargs) -- fetchParents = findParents # Compatibility with pre-3.x -+ -+ fetchParents = findParents # Compatibility with pre-3.x - - #These methods do the real heavy lifting. - -@@ -416,11 +418,12 @@ - s = unicode(s) - else: - if encoding: -- s = self.toEncoding(str(s), encoding) -+ s = self.toEncoding(str(s), encoding) - else: - s = unicode(s) - return s - -+ - class NavigableString(unicode, PageElement): - - def __new__(cls, value): -@@ -445,7 +448,8 @@ - if attr == 'string': - return self - else: -- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) -+ raise AttributeError("'%s' object has no attribute '%s'" -+ % (self.__class__.__name__, attr)) - - def __unicode__(self): - return str(self).decode(DEFAULT_OUTPUT_ENCODING) -@@ -456,11 +460,13 @@ - else: - return self - -+ - class CData(NavigableString): - - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding) - -+ - class ProcessingInstruction(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - output = self -@@ -468,14 +474,17 @@ - output = self.substituteEncoding(output, encoding) - return "<?%s?>" % self.toEncoding(output, encoding) - -+ - class Comment(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "<!--%s-->" % NavigableString.__str__(self, encoding) - -+ - class Declaration(NavigableString): - def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): - return "<!%s>" % NavigableString.__str__(self, encoding) - -+ - class Tag(PageElement): - - """Represents a found HTML tag with its attributes and contents.""" -@@ -483,15 +492,15 @@ - def _invert(h): - "Cheap function to invert a hash." - i = {} -- for k,v in h.items(): -+ for k, v in h.items(): - i[v] = k - return i - -- XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", -- "quot" : '"', -- "amp" : "&", -- "lt" : "<", -- "gt" : ">" } -+ XML_ENTITIES_TO_SPECIAL_CHARS = {"apos": "'", -+ "quot": '"', -+ "amp": "&", -+ "lt": "<", -+ "gt": ">"} - - XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) - -@@ -550,8 +559,8 @@ - self.attrs = map(convert, self.attrs) - - def getString(self): -- if (len(self.contents) == 1 -- and isinstance(self.contents[0], NavigableString)): -+ if (len(self.contents) == 1 and isinstance(self.contents[0], -+ NavigableString)): - return self.contents[0] - - def setString(self, string): -@@ -593,7 +602,7 @@ - raise ValueError("Tag.index: element not in tag") - - def has_key(self, key): -- return self._getAttrMap().has_key(key) -+ return key in self._getAttrMap() - - def __getitem__(self, key): - """tag[key] returns the value of the 'key' attribute for the tag, -@@ -637,7 +646,7 @@ - #We don't break because bad HTML can define the same - #attribute multiple times. - self._getAttrMap() -- if self.attrMap.has_key(key): -+ if key in self.attrMap: - del self.attrMap[key] - - def __call__(self, *args, **kwargs): -@@ -652,7 +661,8 @@ - return self.find(tag[:-3]) - elif tag.find('__') != 0: - return self.find(tag) -- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) -+ raise AttributeError("'%s' object has no attribute '%s'" -+ % (self.__class__, tag)) - - def __eq__(self, other): - """Returns true iff this tag has the same name, the same attributes, -@@ -662,7 +672,9 @@ - same attributes in a different order. Should this be fixed?""" - if other is self: - return True -- if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): -+ if not hasattr(other, 'name') or not hasattr(other, 'attrs') or \ -+ not hasattr(other, 'contents') or self.name != other.name or \ -+ self.attrs != other.attrs or len(self) != len(other): - return False - for i in range(0, len(self.contents)): - if self.contents[i] != other.contents[i]: -@@ -735,7 +747,8 @@ - # value might also contain angle brackets, or - # ampersands that aren't part of entities. We need - # to escape those to XML entities too. -- val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) -+ val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, -+ val) - - attrs.append(fmt % (self.toEncoding(key, encoding), - self.toEncoding(val, encoding))) -@@ -799,7 +812,7 @@ - prettyPrint=False, indentLevel=0): - """Renders the contents of this tag as a string in the given - encoding. If encoding is None, returns a Unicode string..""" -- s=[] -+ s = [] - for c in self: - text = None - if isinstance(c, NavigableString): -@@ -913,13 +926,13 @@ - if isinstance(markupName, Tag): - markup = markupName - markupAttrs = markup -- callFunctionWithTagData = callable(self.name) \ -- and not isinstance(markupName, Tag) -+ callFunctionWithTagData = callable(self.name) and \ -+ not isinstance(markupName, Tag) - - if (not self.name) \ -- or callFunctionWithTagData \ -- or (markup and self._matches(markup, self.name)) \ -- or (not markup and self._matches(markupName, self.name)): -+ or callFunctionWithTagData \ -+ or (markup and self._matches(markup, self.name)) \ -+ or (not markup and self._matches(markupName, self.name)): - if callFunctionWithTagData: - match = self.name(markupName, markupAttrs) - else: -@@ -927,11 +940,11 @@ - markupAttrMap = None - for attr, matchAgainst in self.attrs.items(): - if not markupAttrMap: -- if hasattr(markupAttrs, 'get'): -+ if hasattr(markupAttrs, 'get'): - markupAttrMap = markupAttrs -- else: -+ else: - markupAttrMap = {} -- for k,v in markupAttrs: -+ for k, v in markupAttrs: - markupAttrMap[k] = v - attrValue = markupAttrMap.get(attr) - if not self._matches(attrValue, matchAgainst): -@@ -949,11 +962,10 @@ - found = None - # If given a list of items, scan it for a text element that - # matches. -- if hasattr(markup, "__iter__") \ -- and not isinstance(markup, Tag): -+ if hasattr(markup, "__iter__") and not isinstance(markup, Tag): - for element in markup: -- if isinstance(element, NavigableString) \ -- and self.search(element): -+ if isinstance(element, NavigableString) and \ -+ self.search(element): - found = element - break - # If it's a Tag, make sure its name or attributes match. -@@ -962,13 +974,13 @@ - if not self.text: - found = self.searchTag(markup) - # If it's text, make sure the text matches. -- elif isinstance(markup, NavigableString) or \ -- isinstance(markup, basestring): -+ elif isinstance(markup, NavigableString) or isinstance(markup, -+ basestring): - if self._matches(markup, self.text): - found = markup - else: -- raise Exception, "I don't know how to match against a %s" \ -- % markup.__class__ -+ raise Exception("I don't know how to match against a %s" -+ % markup.__class__) - return found - - def _matches(self, markup, matchAgainst): -@@ -989,10 +1001,10 @@ - if hasattr(matchAgainst, 'match'): - # It's a regexp object. - result = markup and matchAgainst.search(markup) -- elif hasattr(matchAgainst, '__iter__'): # list-like -+ elif hasattr(matchAgainst, '__iter__'): # list-like - result = markup in matchAgainst - elif hasattr(matchAgainst, 'items'): -- result = markup.has_key(matchAgainst) -+ result = matchAgainst in markup - elif matchAgainst and isinstance(markup, basestring): - if isinstance(markup, unicode): - matchAgainst = unicode(matchAgainst) -@@ -1003,6 +1015,7 @@ - result = matchAgainst == markup - return result - -+ - class ResultSet(list): - """A ResultSet is just a list that keeps track of the SoupStrainer - that created it.""" -@@ -1010,6 +1023,7 @@ - list.__init__([]) - self.source = source - -+ - # Now, some helper functions. - - def buildTagMap(default, *args): -@@ -1020,9 +1034,9 @@ - for portion in args: - if hasattr(portion, 'items'): - #It's a map. Merge it. -- for k,v in portion.items(): -+ for k, v in portion.items(): - built[k] = v -- elif hasattr(portion, '__iter__'): # is a list -+ elif hasattr(portion, '__iter__'): # is a list - #It's a list. Map each item to the default. - for k in portion: - built[k] = default -@@ -1031,6 +1045,7 @@ - built[portion] = default - return built - -+ - # Now, the parser classes. - - class BeautifulStoneSoup(Tag, SGMLParser): -@@ -1075,7 +1090,7 @@ - # can be replaced with a single space. A text node that contains - # fancy Unicode spaces (usually non-breaking) should be left - # alone. -- STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } -+ STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, } - - def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, - markupMassage=True, smartQuotesTo=XML_ENTITIES, -@@ -1152,7 +1167,7 @@ - n = int(name) - except ValueError: - return -- if not 0 <= n <= 127 : # ASCII ends at 127, not 255 -+ if not 0 <= n <= 127: # ASCII ends at 127, not 255 - return - return self.convert_codepoint(n) - -@@ -1163,9 +1178,10 @@ - if not hasattr(self, 'originalEncoding'): - self.originalEncoding = None - else: -- dammit = UnicodeDammit\ -- (markup, [self.fromEncoding, inDocumentEncoding], -- smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) -+ dammit = UnicodeDammit(markup, -+ [self.fromEncoding, inDocumentEncoding], -+ smartQuotesTo=self.smartQuotesTo, -+ isHTML=isHTML) - markup = dammit.unicode - self.originalEncoding = dammit.originalEncoding - self.declaredHTMLEncoding = dammit.declaredHTMLEncoding -@@ -1195,7 +1211,7 @@ - #print "__getattr__ called on %s.%s" % (self.__class__, methodName) - - if methodName.startswith('start_') or methodName.startswith('end_') \ -- or methodName.startswith('do_'): -+ or methodName.startswith('do_'): - return SGMLParser.__getattr__(self, methodName) - elif not methodName.startswith('__'): - return Tag.__getattr__(self, methodName) -@@ -1205,8 +1221,8 @@ - def isSelfClosingTag(self, name): - """Returns true iff the given string is the name of a - self-closing tag according to this parser.""" -- return self.SELF_CLOSING_TAGS.has_key(name) \ -- or self.instanceSelfClosingTags.has_key(name) -+ return name in self.SELF_CLOSING_TAGS or \ -+ name in self.instanceSelfClosingTags - - def reset(self): - Tag.__init__(self, self, self.ROOT_TAG_NAME) -@@ -1245,8 +1261,8 @@ - currentData = ' ' - self.currentData = [] - if self.parseOnlyThese and len(self.tagStack) <= 1 and \ -- (not self.parseOnlyThese.text or \ -- not self.parseOnlyThese.search(currentData)): -+ (not self.parseOnlyThese.text or not -+ self.parseOnlyThese.search(currentData)): - return - o = containerClass(currentData) - o.setup(self.currentTag, self.previous) -@@ -1255,7 +1271,6 @@ - self.previous = o - self.currentTag.contents.append(o) - -- - def _popToTag(self, name, inclusivePop=True): - """Pops the tag stack up to and including the most recent - instance of the given tag. If inclusivePop is false, pops the tag -@@ -1297,8 +1312,8 @@ - """ - - nestingResetTriggers = self.NESTABLE_TAGS.get(name) -- isNestable = nestingResetTriggers != None -- isResetNesting = self.RESET_NESTING_TAGS.has_key(name) -+ isNestable = nestingResetTriggers is not None -+ isResetNesting = name in self.RESET_NESTING_TAGS - popTo = None - inclusive = True - for i in range(len(self.tagStack)-1, 0, -1): -@@ -1311,7 +1326,7 @@ - if (nestingResetTriggers is not None - and p.name in nestingResetTriggers) \ - or (nestingResetTriggers is None and isResetNesting -- and self.RESET_NESTING_TAGS.has_key(p.name)): -+ and p.name in self.RESET_NESTING_TAGS): - - #If we encounter one of the nesting reset triggers - #peculiar to this tag, or we encounter another tag -@@ -1338,7 +1353,8 @@ - self._smartPop(name) - - if self.parseOnlyThese and len(self.tagStack) <= 1 \ -- and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): -+ and (self.parseOnlyThese.text or -+ not self.parseOnlyThese.searchTag(name, attrs)): - return - - tag = Tag(self, name, attrs, self.currentTag, self.previous) -@@ -1412,7 +1428,7 @@ - data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) - - if not data and self.convertHTMLEntities and \ -- not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): -+ not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): - # TODO: We've got a problem here. We're told this is - # an entity reference, but it's not an XML entity - # reference or an HTML entity reference. Nonetheless, -@@ -1449,12 +1465,12 @@ - declaration as a CData object.""" - j = None - if self.rawdata[i:i+9] == '<![CDATA[': -- k = self.rawdata.find(']]>', i) -- if k == -1: -- k = len(self.rawdata) -- data = self.rawdata[i+9:k] -- j = k+3 -- self._toStringSubclass(data, CData) -+ k = self.rawdata.find(']]>', i) -+ if k == -1: -+ k = len(self.rawdata) -+ data = self.rawdata[i+9:k] -+ j = k + 3 -+ self._toStringSubclass(data, CData) - else: - try: - j = SGMLParser.parse_declaration(self, i) -@@ -1464,6 +1480,7 @@ - j = i + len(toHandle) - return j - -+ - class BeautifulSoup(BeautifulStoneSoup): - - """This parser knows the following facts about HTML: -@@ -1513,18 +1530,18 @@ - BeautifulStoneSoup before writing your own subclass.""" - - def __init__(self, *args, **kwargs): -- if not kwargs.has_key('smartQuotesTo'): -+ if not 'smartQuotesTo' in kwargs: - kwargs['smartQuotesTo'] = self.HTML_ENTITIES - kwargs['isHTML'] = True - BeautifulStoneSoup.__init__(self, *args, **kwargs) - - SELF_CLOSING_TAGS = buildTagMap(None, -- ('br' , 'hr', 'input', 'img', 'meta', -+ ('br', 'hr', 'input', 'img', 'meta', - 'spacer', 'link', 'frame', 'base', 'col')) - - PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) - -- QUOTE_TAGS = {'script' : None, 'textarea' : None} -+ QUOTE_TAGS = {'script': None, 'textarea': None} - - #According to the HTML standard, each of these inline tags can - #contain another tag of the same type. Furthermore, it's common -@@ -1538,21 +1555,21 @@ - NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') - - #Lists can contain other lists, but there are restrictions. -- NESTABLE_LIST_TAGS = { 'ol' : [], -- 'ul' : [], -- 'li' : ['ul', 'ol'], -- 'dl' : [], -- 'dd' : ['dl'], -- 'dt' : ['dl'] } -+ NESTABLE_LIST_TAGS = {'ol': [], -+ 'ul': [], -+ 'li': ['ul', 'ol'], -+ 'dl': [], -+ 'dd': ['dl'], -+ 'dt': ['dl']} - - #Tables can contain other tables, but there are restrictions. -- NESTABLE_TABLE_TAGS = {'table' : [], -- 'tr' : ['table', 'tbody', 'tfoot', 'thead'], -- 'td' : ['tr'], -- 'th' : ['tr'], -- 'thead' : ['table'], -- 'tbody' : ['table'], -- 'tfoot' : ['table'], -+ NESTABLE_TABLE_TAGS = {'table': [], -+ 'tr': ['table', 'tbody', 'tfoot', 'thead'], -+ 'td': ['tr'], -+ 'th': ['tr'], -+ 'thead': ['table'], -+ 'tbody': ['table'], -+ 'tfoot': ['table'], - } - - NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') -@@ -1588,11 +1605,11 @@ - contentType = value - contentTypeIndex = i - -- if httpEquiv and contentType: # It's an interesting meta tag. -+ if httpEquiv and contentType: # It's an interesting meta tag. - match = self.CHARSET_RE.search(contentType) - if match: - if (self.declaredHTMLEncoding is not None or -- self.originalEncoding == self.fromEncoding): -+ self.originalEncoding == self.fromEncoding): - # An HTML encoding was sniffed while converting - # the document to Unicode, or an HTML encoding was - # sniffed during a previous pass through the -@@ -1617,9 +1634,11 @@ - if tag and tagNeedsEncodingSubstitution: - tag.containsSubstitutions = True - -+ - class StopParsing(Exception): - pass - -+ - class ICantBelieveItsBeautifulSoup(BeautifulSoup): - - """The BeautifulSoup class is oriented towards skipping over -@@ -1645,10 +1664,10 @@ - it's valid HTML and BeautifulSoup screwed up by assuming it - wouldn't be.""" - -- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ -- ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', -- 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', -- 'big') -+ I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = ( -+ 'em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', 'cite', -+ 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', 'big' -+ ) - - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) - -@@ -1656,6 +1675,7 @@ - I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, - I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) - -+ - class MinimalSoup(BeautifulSoup): - """The MinimalSoup class is for parsing HTML that contains - pathologically bad markup. It makes no assumptions about tag -@@ -1669,6 +1689,7 @@ - RESET_NESTING_TAGS = buildTagMap('noscript') - NESTABLE_TAGS = {} - -+ - class BeautifulSOAP(BeautifulStoneSoup): - """This class will push a tag with only a single string child into - the tag's parent as an attribute. The attribute's name is the tag -@@ -1696,10 +1717,11 @@ - parent._getAttrMap() - if (isinstance(tag, Tag) and len(tag.contents) == 1 and - isinstance(tag.contents[0], NavigableString) and -- not parent.attrMap.has_key(tag.name)): -+ not tag.name in parent.attrMap): - parent[tag.name] = tag.contents[0] - BeautifulStoneSoup.popTag(self) - -+ - #Enterprise class names! It has come to our attention that some people - #think the names of the Beautiful Soup parser classes are too silly - #and "unprofessional" for use in enterprise screen-scraping. We feel -@@ -1750,6 +1772,7 @@ - except ImportError: - pass - -+ - class UnicodeDammit: - """A class for detecting the encoding of a *ML document and - converting it to a Unicode string. If the source encoding is -@@ -1760,14 +1783,14 @@ - # meta tags to the corresponding Python codec names. It only covers - # values that aren't in Python's aliases and can't be determined - # by the heuristics in find_codec. -- CHARSET_ALIASES = { "macintosh" : "mac-roman", -- "x-sjis" : "shift-jis" } -+ CHARSET_ALIASES = {"macintosh": "mac-roman", -+ "x-sjis": "shift-jis"} - - def __init__(self, markup, overrideEncodings=[], - smartQuotesTo='xml', isHTML=False): - self.declaredHTMLEncoding = None - self.markup, documentEncoding, sniffedEncoding = \ -- self._detectEncoding(markup, isHTML) -+ self._detectEncoding(markup, isHTML) - self.smartQuotesTo = smartQuotesTo - self.triedEncodings = [] - if markup == '' or isinstance(markup, unicode): -@@ -1820,9 +1843,8 @@ - if self.smartQuotesTo and proposed.lower() in("windows-1252", - "iso-8859-1", - "iso-8859-2"): -- markup = re.compile("([\x80-\x9f])").sub \ -- (lambda(x): self._subMSChar(x.group(1)), -- markup) -+ markup = re.compile("([\x80-\x9f])").sub( -+ lambda(x): self._subMSChar(x.group(1)), markup) - - try: - # print "Trying to convert document to %s" % proposed -@@ -1842,11 +1864,11 @@ - - # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ -- and (data[2:4] != '\x00\x00'): -+ and (data[2:4] != '\x00\x00'): - encoding = 'utf-16be' - data = data[2:] -- elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ -- and (data[2:4] != '\x00\x00'): -+ elif (len(data) >= 4) and \ -+ (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'): - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == '\xef\xbb\xbf': -@@ -1872,8 +1894,8 @@ - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') -- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ -- and (xml_data[2:4] != '\x00\x00'): -+ elif (len(xml_data) >= 4) and \ -+ (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') -@@ -1882,7 +1904,7 @@ - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ -- (xml_data[2:4] != '\x00\x00'): -+ (xml_data[2:4] != '\x00\x00'): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') -@@ -1928,7 +1950,6 @@ - xml_encoding = sniffed_xml_encoding - return xml_data, xml_encoding, sniffed_xml_encoding - -- - def find_codec(self, charset): - return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ - or (charset and self._codec(charset.replace("-", ""))) \ -@@ -1946,63 +1967,70 @@ - return codec - - EBCDIC_TO_ASCII_MAP = None -+ - def _ebcdic_to_ascii(self, s): - c = self.__class__ - if not c.EBCDIC_TO_ASCII_MAP: -- emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, -- 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, -- 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, -- 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, -- 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, -- 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, -- 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, -- 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, -- 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, -- 201,202,106,107,108,109,110,111,112,113,114,203,204,205, -- 206,207,208,209,126,115,116,117,118,119,120,121,122,210, -- 211,212,213,214,215,216,217,218,219,220,221,222,223,224, -- 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, -- 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, -- 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, -- 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, -- 250,251,252,253,254,255) -+ emap = (0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, -+ 15, 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, -+ 29, 30, 31, 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, -+ 138, 139, 140, 5, 6, 7, 144, 145, 22, 147, 148, 149, 150, 4, -+ 152, 153, 154, 155, 20, 21, 158, 26, 32, 160, 161, 162, 163, -+ 164, 165, 166, 167, 168, 91, 46, 60, 40, 43, 33, 38, 169, -+ 170, 171, 172, 173, 174, 175, 176, 177, 93, 36, 42, 41, 59, -+ 94, 45, 47, 178, 179, 180, 181, 182, 183, 184, 185, 124, 44, -+ 37, 95, 62, 63, 186, 187, 188, 189, 190, 191, 192, 193, 194, -+ 96, 58, 35, 64, 39, 61, 34, 195, 97, 98, 99, 100, 101, 102, -+ 103, 104, 105, 196, 197, 198, 199, 200, 201, 202, 106, 107, -+ 108, 109, 110, 111, 112, 113, 114, 203, 204, 205, 206, 207, -+ 208, 209, 126, 115, 116, 117, 118, 119, 120, 121, 122, 210, -+ 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, -+ 223, 224, 225, 226, 227, 228, 229, 230, 231, 123, 65, 66, -+ 67, 68, 69, 70, 71, 72, 73, 232, 233, 234, 235, 236, 237, -+ 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 238, 239, 240, 241, -+ 242, 243, 92, 159, 83, 84, 85, 86, 87, 88, 89, 90, 244, 245, -+ 246, 247, 248, 249, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -+ 250, 251, 252, 253, 254, 255) - import string -- c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ -- ''.join(map(chr, range(256))), ''.join(map(chr, emap))) -+ c.EBCDIC_TO_ASCII_MAP = string.maketrans(''.join(map(chr, -+ range(256))), -+ ''.join(map(chr, emap))) - return s.translate(c.EBCDIC_TO_ASCII_MAP) - -- MS_CHARS = { '\x80' : ('euro', '20AC'), -- '\x81' : ' ', -- '\x82' : ('sbquo', '201A'), -- '\x83' : ('fnof', '192'), -- '\x84' : ('bdquo', '201E'), -- '\x85' : ('hellip', '2026'), -- '\x86' : ('dagger', '2020'), -- '\x87' : ('Dagger', '2021'), -- '\x88' : ('circ', '2C6'), -- '\x89' : ('permil', '2030'), -- '\x8A' : ('Scaron', '160'), -- '\x8B' : ('lsaquo', '2039'), -- '\x8C' : ('OElig', '152'), -- '\x8D' : '?', -- '\x8E' : ('#x17D', '17D'), -- '\x8F' : '?', -- '\x90' : '?', -- '\x91' : ('lsquo', '2018'), -- '\x92' : ('rsquo', '2019'), -- '\x93' : ('ldquo', '201C'), -- '\x94' : ('rdquo', '201D'), -- '\x95' : ('bull', '2022'), -- '\x96' : ('ndash', '2013'), -- '\x97' : ('mdash', '2014'), -- '\x98' : ('tilde', '2DC'), -- '\x99' : ('trade', '2122'), -- '\x9a' : ('scaron', '161'), -- '\x9b' : ('rsaquo', '203A'), -- '\x9c' : ('oelig', '153'), -- '\x9d' : '?', -- '\x9e' : ('#x17E', '17E'), -- '\x9f' : ('Yuml', ''),} -+ MS_CHARS = { -+ '\x80': ('euro', '20AC'), -+ '\x81': ' ', -+ '\x82': ('sbquo', '201A'), -+ '\x83': ('fnof', '192'), -+ '\x84': ('bdquo', '201E'), -+ '\x85': ('hellip', '2026'), -+ '\x86': ('dagger', '2020'), -+ '\x87': ('Dagger', '2021'), -+ '\x88': ('circ', '2C6'), -+ '\x89': ('permil', '2030'), -+ '\x8A': ('Scaron', '160'), -+ '\x8B': ('lsaquo', '2039'), -+ '\x8C': ('OElig', '152'), -+ '\x8D': '?', -+ '\x8E': ('#x17D', '17D'), -+ '\x8F': '?', -+ '\x90': '?', -+ '\x91': ('lsquo', '2018'), -+ '\x92': ('rsquo', '2019'), -+ '\x93': ('ldquo', '201C'), -+ '\x94': ('rdquo', '201D'), -+ '\x95': ('bull', '2022'), -+ '\x96': ('ndash', '2013'), -+ '\x97': ('mdash', '2014'), -+ '\x98': ('tilde', '2DC'), -+ '\x99': ('trade', '2122'), -+ '\x9a': ('scaron', '161'), -+ '\x9b': ('rsaquo', '203A'), -+ '\x9c': ('oelig', '153'), -+ '\x9d': '?', -+ '\x9e': ('#x17E', '17E'), -+ '\x9f': ('Yuml', ''), -+ } - - ####################################################################### -
pywikibot-commits@lists.wikimedia.org