Xqt has submitted this change and it was merged.
Change subject: remove patch-BeautifulSoup since not needed and causes issues with setup
of core components
......................................................................
remove patch-BeautifulSoup since not needed and causes issues with setup of core
components
Change-Id: I4b5462737fbf54c7521786bf03e71a90421926fc
---
M externals/__init__.py
D externals/patch-BeautifulSoup
2 files changed, 1 insertion(+), 845 deletions(-)
Approvals:
DrTrigon: Checked; Looks good to me, but someone else must approve
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/externals/__init__.py b/externals/__init__.py
index 13ea4c4..83045f5 100644
--- a/externals/__init__.py
+++ b/externals/__init__.py
@@ -57,9 +57,7 @@
'BeautifulSoup.py': ({'linux-fedora':
['python-BeautifulSoup'],
'linux-ubuntu': ['']},
{ 'url':
'https://pypi.python.org/packages/source/B/BeautifulSoup/BeautifulSoup-3.2.0.tar.gz',
- 'path':
'BeautifulSoup-3.2.0/BeautifulSoup.py',
- #$ diff -Nau TEST_BeautifulSoup.py BeautifulSoup.py >
patch-BeautifulSoup
- 'patch': 'patch-BeautifulSoup'},
+ 'path':
'BeautifulSoup-3.2.0/BeautifulSoup.py'},
{}), # OK
'irclib': ({'linux-fedora': ['python-irclib'],
'linux-ubuntu': ['']},
diff --git a/externals/patch-BeautifulSoup b/externals/patch-BeautifulSoup
deleted file mode 100644
index 271c061..0000000
--- a/externals/patch-BeautifulSoup
+++ /dev/null
@@ -1,842 +0,0 @@
---- TEST_BeautifulSoup.py 2010-11-21 14:35:28.000000000 +0100
-+++ BeautifulSoup.py 2013-03-02 14:56:38.000000000 +0100
-@@ -76,7 +76,6 @@
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
-
- """
--from __future__ import generators
-
- __author__ = "Leonard Richardson (leonardr(a)segfault.org)"
- __version__ = "3.2.0"
-@@ -90,9 +89,9 @@
- import re
- import sgmllib
- try:
-- from htmlentitydefs import name2codepoint
-+ from htmlentitydefs import name2codepoint
- except ImportError:
-- name2codepoint = {}
-+ name2codepoint = {}
- try:
- set
- except NameError:
-@@ -104,12 +103,13 @@
-
- DEFAULT_OUTPUT_ENCODING = "utf-8"
-
-+
- def _match_css_class(str):
- """Build a RE to match the given CSS class."""
- return re.compile(r"(^|.*\s)%s($|\s)" % str)
-
--# First, the classes that represent markup elements.
-
-+# First, the classes that represent markup elements.
- class PageElement(object):
- """Contains the navigational information for some part of the page
- (either a tag or a piece of text)"""
-@@ -129,8 +129,8 @@
- def replaceWith(self, replaceWith):
- oldParent = self.parent
- myIndex = self.parent.index(self)
-- if hasattr(replaceWith, "parent")\
-- and replaceWith.parent is self.parent:
-+ if hasattr(replaceWith, "parent") and \
-+ replaceWith.parent is self.parent:
- # We're replacing this element with one of its siblings.
- index = replaceWith.parent.index(replaceWith)
- if index and index < myIndex:
-@@ -187,11 +187,11 @@
- return lastChild
-
- def insert(self, position, newChild):
-- if isinstance(newChild, basestring) \
-- and not isinstance(newChild, NavigableString):
-+ if isinstance(newChild, basestring) and not \
-+ isinstance(newChild, NavigableString):
- newChild = NavigableString(newChild)
-
-- position = min(position, len(self.contents))
-+ position = min(position, len(self.contents))
- if hasattr(newChild, 'parent') and newChild.parent is not None:
- # We're 'inserting' an element that's already one
- # of this object's children.
-@@ -228,7 +228,7 @@
- while not parentsNextSibling:
- parentsNextSibling = parent.nextSibling
- parent = parent.parent
-- if not parent: # This is the last element in the document.
-+ if not parent: # This is the last element in the document.
- break
- if parentsNextSibling:
- newChildsLastElement.next = parentsNextSibling
-@@ -273,7 +273,8 @@
- criteria and appear after this Tag in the document."""
- return self._findAll(name, attrs, text, limit,
- self.nextSiblingGenerator, **kwargs)
-- fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
-+
-+ fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
-
- def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the first item that matches the given criteria and
-@@ -285,8 +286,8 @@
- """Returns all items that match the given criteria and appear
- before this Tag in the document."""
- return self._findAll(name, attrs, text, limit, self.previousGenerator,
-- **kwargs)
-- fetchPrevious = findAllPrevious # Compatibility with pre-3.x
-+ **kwargs)
-+ fetchPrevious = findAllPrevious # Compatibility with pre-3.x
-
- def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
- """Returns the closest sibling to this Tag that matches the
-@@ -300,7 +301,7 @@
- criteria and appear before this Tag in the document."""
- return self._findAll(name, attrs, text, limit,
- self.previousSiblingGenerator, **kwargs)
-- fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
-+ fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
-
- def findParent(self, name=None, attrs={}, **kwargs):
- """Returns the closest parent of this Tag that matches the given
-@@ -319,7 +320,8 @@
-
- return self._findAll(name, attrs, None, limit, self.parentGenerator,
- **kwargs)
-- fetchParents = findParents # Compatibility with pre-3.x
-+
-+ fetchParents = findParents # Compatibility with pre-3.x
-
- #These methods do the real heavy lifting.
-
-@@ -416,11 +418,12 @@
- s = unicode(s)
- else:
- if encoding:
-- s = self.toEncoding(str(s), encoding)
-+ s = self.toEncoding(str(s), encoding)
- else:
- s = unicode(s)
- return s
-
-+
- class NavigableString(unicode, PageElement):
-
- def __new__(cls, value):
-@@ -445,7 +448,8 @@
- if attr == 'string':
- return self
- else:
-- raise AttributeError, "'%s' object has no attribute
'%s'" % (self.__class__.__name__, attr)
-+ raise AttributeError("'%s' object has no attribute
'%s'"
-+ % (self.__class__.__name__, attr))
-
- def __unicode__(self):
- return str(self).decode(DEFAULT_OUTPUT_ENCODING)
-@@ -456,11 +460,13 @@
- else:
- return self
-
-+
- class CData(NavigableString):
-
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "<![CDATA[%s]]>" % NavigableString.__str__(self,
encoding)
-
-+
- class ProcessingInstruction(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- output = self
-@@ -468,14 +474,17 @@
- output = self.substituteEncoding(output, encoding)
- return "<?%s?>" % self.toEncoding(output, encoding)
-
-+
- class Comment(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "<!--%s-->" % NavigableString.__str__(self, encoding)
-
-+
- class Declaration(NavigableString):
- def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
- return "<!%s>" % NavigableString.__str__(self, encoding)
-
-+
- class Tag(PageElement):
-
- """Represents a found HTML tag with its attributes and
contents."""
-@@ -483,15 +492,15 @@
- def _invert(h):
- "Cheap function to invert a hash."
- i = {}
-- for k,v in h.items():
-+ for k, v in h.items():
- i[v] = k
- return i
-
-- XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
-- "quot" : '"',
-- "amp" : "&",
-- "lt" : "<",
-- "gt" : ">" }
-+ XML_ENTITIES_TO_SPECIAL_CHARS = {"apos": "'",
-+ "quot": '"',
-+ "amp": "&",
-+ "lt": "<",
-+ "gt": ">"}
-
- XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
-
-@@ -550,8 +559,8 @@
- self.attrs = map(convert, self.attrs)
-
- def getString(self):
-- if (len(self.contents) == 1
-- and isinstance(self.contents[0], NavigableString)):
-+ if (len(self.contents) == 1 and isinstance(self.contents[0],
-+ NavigableString)):
- return self.contents[0]
-
- def setString(self, string):
-@@ -593,7 +602,7 @@
- raise ValueError("Tag.index: element not in tag")
-
- def has_key(self, key):
-- return self._getAttrMap().has_key(key)
-+ return key in self._getAttrMap()
-
- def __getitem__(self, key):
- """tag[key] returns the value of the 'key' attribute for
the tag,
-@@ -637,7 +646,7 @@
- #We don't break because bad HTML can define the same
- #attribute multiple times.
- self._getAttrMap()
-- if self.attrMap.has_key(key):
-+ if key in self.attrMap:
- del self.attrMap[key]
-
- def __call__(self, *args, **kwargs):
-@@ -652,7 +661,8 @@
- return self.find(tag[:-3])
- elif tag.find('__') != 0:
- return self.find(tag)
-- raise AttributeError, "'%s' object has no attribute
'%s'" % (self.__class__, tag)
-+ raise AttributeError("'%s' object has no attribute
'%s'"
-+ % (self.__class__, tag))
-
- def __eq__(self, other):
- """Returns true iff this tag has the same name, the same
attributes,
-@@ -662,7 +672,9 @@
- same attributes in a different order. Should this be fixed?"""
- if other is self:
- return True
-- if not hasattr(other, 'name') or not hasattr(other, 'attrs') or
not hasattr(other, 'contents') or self.name != other.name or self.attrs !=
other.attrs or len(self) != len(other):
-+ if not hasattr(other, 'name') or not hasattr(other, 'attrs') or
\
-+ not hasattr(other, 'contents') or self.name != other.name or \
-+ self.attrs != other.attrs or len(self) != len(other):
- return False
- for i in range(0, len(self.contents)):
- if self.contents[i] != other.contents[i]:
-@@ -735,7 +747,8 @@
- # value might also contain angle brackets, or
- # ampersands that aren't part of entities. We need
- # to escape those to XML entities too.
-- val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
-+ val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity,
-+ val)
-
- attrs.append(fmt % (self.toEncoding(key, encoding),
- self.toEncoding(val, encoding)))
-@@ -799,7 +812,7 @@
- prettyPrint=False, indentLevel=0):
- """Renders the contents of this tag as a string in the given
- encoding. If encoding is None, returns a Unicode string.."""
-- s=[]
-+ s = []
- for c in self:
- text = None
- if isinstance(c, NavigableString):
-@@ -913,13 +926,13 @@
- if isinstance(markupName, Tag):
- markup = markupName
- markupAttrs = markup
-- callFunctionWithTagData = callable(self.name) \
-- and not isinstance(markupName, Tag)
-+ callFunctionWithTagData = callable(self.name) and \
-+ not isinstance(markupName, Tag)
-
- if (not self.name) \
-- or callFunctionWithTagData \
-- or (markup and self._matches(markup, self.name)) \
-- or (not markup and self._matches(markupName, self.name)):
-+ or callFunctionWithTagData \
-+ or (markup and self._matches(markup, self.name)) \
-+ or (not markup and self._matches(markupName, self.name)):
- if callFunctionWithTagData:
- match = self.name(markupName, markupAttrs)
- else:
-@@ -927,11 +940,11 @@
- markupAttrMap = None
- for attr, matchAgainst in self.attrs.items():
- if not markupAttrMap:
-- if hasattr(markupAttrs, 'get'):
-+ if hasattr(markupAttrs, 'get'):
- markupAttrMap = markupAttrs
-- else:
-+ else:
- markupAttrMap = {}
-- for k,v in markupAttrs:
-+ for k, v in markupAttrs:
- markupAttrMap[k] = v
- attrValue = markupAttrMap.get(attr)
- if not self._matches(attrValue, matchAgainst):
-@@ -949,11 +962,10 @@
- found = None
- # If given a list of items, scan it for a text element that
- # matches.
-- if hasattr(markup, "__iter__") \
-- and not isinstance(markup, Tag):
-+ if hasattr(markup, "__iter__") and not isinstance(markup, Tag):
- for element in markup:
-- if isinstance(element, NavigableString) \
-- and self.search(element):
-+ if isinstance(element, NavigableString) and \
-+ self.search(element):
- found = element
- break
- # If it's a Tag, make sure its name or attributes match.
-@@ -962,13 +974,13 @@
- if not self.text:
- found = self.searchTag(markup)
- # If it's text, make sure the text matches.
-- elif isinstance(markup, NavigableString) or \
-- isinstance(markup, basestring):
-+ elif isinstance(markup, NavigableString) or isinstance(markup,
-+ basestring):
- if self._matches(markup, self.text):
- found = markup
- else:
-- raise Exception, "I don't know how to match against a %s" \
-- % markup.__class__
-+ raise Exception("I don't know how to match against a %s"
-+ % markup.__class__)
- return found
-
- def _matches(self, markup, matchAgainst):
-@@ -989,10 +1001,10 @@
- if hasattr(matchAgainst, 'match'):
- # It's a regexp object.
- result = markup and matchAgainst.search(markup)
-- elif hasattr(matchAgainst, '__iter__'): # list-like
-+ elif hasattr(matchAgainst, '__iter__'): # list-like
- result = markup in matchAgainst
- elif hasattr(matchAgainst, 'items'):
-- result = markup.has_key(matchAgainst)
-+ result = matchAgainst in markup
- elif matchAgainst and isinstance(markup, basestring):
- if isinstance(markup, unicode):
- matchAgainst = unicode(matchAgainst)
-@@ -1003,6 +1015,7 @@
- result = matchAgainst == markup
- return result
-
-+
- class ResultSet(list):
- """A ResultSet is just a list that keeps track of the SoupStrainer
- that created it."""
-@@ -1010,6 +1023,7 @@
- list.__init__([])
- self.source = source
-
-+
- # Now, some helper functions.
-
- def buildTagMap(default, *args):
-@@ -1020,9 +1034,9 @@
- for portion in args:
- if hasattr(portion, 'items'):
- #It's a map. Merge it.
-- for k,v in portion.items():
-+ for k, v in portion.items():
- built[k] = v
-- elif hasattr(portion, '__iter__'): # is a list
-+ elif hasattr(portion, '__iter__'): # is a list
- #It's a list. Map each item to the default.
- for k in portion:
- built[k] = default
-@@ -1031,6 +1045,7 @@
- built[portion] = default
- return built
-
-+
- # Now, the parser classes.
-
- class BeautifulStoneSoup(Tag, SGMLParser):
-@@ -1075,7 +1090,7 @@
- # can be replaced with a single space. A text node that contains
- # fancy Unicode spaces (usually non-breaking) should be left
- # alone.
-- STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
-+ STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, }
-
- def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
- markupMassage=True, smartQuotesTo=XML_ENTITIES,
-@@ -1152,7 +1167,7 @@
- n = int(name)
- except ValueError:
- return
-- if not 0 <= n <= 127 : # ASCII ends at 127, not 255
-+ if not 0 <= n <= 127: # ASCII ends at 127, not 255
- return
- return self.convert_codepoint(n)
-
-@@ -1163,9 +1178,10 @@
- if not hasattr(self, 'originalEncoding'):
- self.originalEncoding = None
- else:
-- dammit = UnicodeDammit\
-- (markup, [self.fromEncoding, inDocumentEncoding],
-- smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
-+ dammit = UnicodeDammit(markup,
-+ [self.fromEncoding, inDocumentEncoding],
-+ smartQuotesTo=self.smartQuotesTo,
-+ isHTML=isHTML)
- markup = dammit.unicode
- self.originalEncoding = dammit.originalEncoding
- self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
-@@ -1195,7 +1211,7 @@
- #print "__getattr__ called on %s.%s" % (self.__class__, methodName)
-
- if methodName.startswith('start_') or
methodName.startswith('end_') \
-- or methodName.startswith('do_'):
-+ or methodName.startswith('do_'):
- return SGMLParser.__getattr__(self, methodName)
- elif not methodName.startswith('__'):
- return Tag.__getattr__(self, methodName)
-@@ -1205,8 +1221,8 @@
- def isSelfClosingTag(self, name):
- """Returns true iff the given string is the name of a
- self-closing tag according to this parser."""
-- return self.SELF_CLOSING_TAGS.has_key(name) \
-- or self.instanceSelfClosingTags.has_key(name)
-+ return name in self.SELF_CLOSING_TAGS or \
-+ name in self.instanceSelfClosingTags
-
- def reset(self):
- Tag.__init__(self, self, self.ROOT_TAG_NAME)
-@@ -1245,8 +1261,8 @@
- currentData = ' '
- self.currentData = []
- if self.parseOnlyThese and len(self.tagStack) <= 1 and \
-- (not self.parseOnlyThese.text or \
-- not self.parseOnlyThese.search(currentData)):
-+ (not self.parseOnlyThese.text or not
-+ self.parseOnlyThese.search(currentData)):
- return
- o = containerClass(currentData)
- o.setup(self.currentTag, self.previous)
-@@ -1255,7 +1271,6 @@
- self.previous = o
- self.currentTag.contents.append(o)
-
--
- def _popToTag(self, name, inclusivePop=True):
- """Pops the tag stack up to and including the most recent
- instance of the given tag. If inclusivePop is false, pops the tag
-@@ -1297,8 +1312,8 @@
- """
-
- nestingResetTriggers = self.NESTABLE_TAGS.get(name)
-- isNestable = nestingResetTriggers != None
-- isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
-+ isNestable = nestingResetTriggers is not None
-+ isResetNesting = name in self.RESET_NESTING_TAGS
- popTo = None
- inclusive = True
- for i in range(len(self.tagStack)-1, 0, -1):
-@@ -1311,7 +1326,7 @@
- if (nestingResetTriggers is not None
- and p.name in nestingResetTriggers) \
- or (nestingResetTriggers is None and isResetNesting
-- and self.RESET_NESTING_TAGS.has_key(p.name)):
-+ and p.name in self.RESET_NESTING_TAGS):
-
- #If we encounter one of the nesting reset triggers
- #peculiar to this tag, or we encounter another tag
-@@ -1338,7 +1353,8 @@
- self._smartPop(name)
-
- if self.parseOnlyThese and len(self.tagStack) <= 1 \
-- and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name,
attrs)):
-+ and (self.parseOnlyThese.text or
-+ not self.parseOnlyThese.searchTag(name, attrs)):
- return
-
- tag = Tag(self, name, attrs, self.currentTag, self.previous)
-@@ -1412,7 +1428,7 @@
- data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
-
- if not data and self.convertHTMLEntities and \
-- not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
-+ not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
- # TODO: We've got a problem here. We're told this is
- # an entity reference, but it's not an XML entity
- # reference or an HTML entity reference. Nonetheless,
-@@ -1449,12 +1465,12 @@
- declaration as a CData object."""
- j = None
- if self.rawdata[i:i+9] == '<![CDATA[':
-- k = self.rawdata.find(']]>', i)
-- if k == -1:
-- k = len(self.rawdata)
-- data = self.rawdata[i+9:k]
-- j = k+3
-- self._toStringSubclass(data, CData)
-+ k = self.rawdata.find(']]>', i)
-+ if k == -1:
-+ k = len(self.rawdata)
-+ data = self.rawdata[i+9:k]
-+ j = k + 3
-+ self._toStringSubclass(data, CData)
- else:
- try:
- j = SGMLParser.parse_declaration(self, i)
-@@ -1464,6 +1480,7 @@
- j = i + len(toHandle)
- return j
-
-+
- class BeautifulSoup(BeautifulStoneSoup):
-
- """This parser knows the following facts about HTML:
-@@ -1513,18 +1530,18 @@
- BeautifulStoneSoup before writing your own subclass."""
-
- def __init__(self, *args, **kwargs):
-- if not kwargs.has_key('smartQuotesTo'):
-+ if not 'smartQuotesTo' in kwargs:
- kwargs['smartQuotesTo'] = self.HTML_ENTITIES
- kwargs['isHTML'] = True
- BeautifulStoneSoup.__init__(self, *args, **kwargs)
-
- SELF_CLOSING_TAGS = buildTagMap(None,
-- ('br' , 'hr', 'input',
'img', 'meta',
-+ ('br', 'hr', 'input',
'img', 'meta',
- 'spacer', 'link', 'frame',
'base', 'col'))
-
- PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
-
-- QUOTE_TAGS = {'script' : None, 'textarea' : None}
-+ QUOTE_TAGS = {'script': None, 'textarea': None}
-
- #According to the HTML standard, each of these inline tags can
- #contain another tag of the same type. Furthermore, it's common
-@@ -1538,21 +1555,21 @@
- NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset',
'ins', 'del')
-
- #Lists can contain other lists, but there are restrictions.
-- NESTABLE_LIST_TAGS = { 'ol' : [],
-- 'ul' : [],
-- 'li' : ['ul', 'ol'],
-- 'dl' : [],
-- 'dd' : ['dl'],
-- 'dt' : ['dl'] }
-+ NESTABLE_LIST_TAGS = {'ol': [],
-+ 'ul': [],
-+ 'li': ['ul', 'ol'],
-+ 'dl': [],
-+ 'dd': ['dl'],
-+ 'dt': ['dl']}
-
- #Tables can contain other tables, but there are restrictions.
-- NESTABLE_TABLE_TAGS = {'table' : [],
-- 'tr' : ['table', 'tbody',
'tfoot', 'thead'],
-- 'td' : ['tr'],
-- 'th' : ['tr'],
-- 'thead' : ['table'],
-- 'tbody' : ['table'],
-- 'tfoot' : ['table'],
-+ NESTABLE_TABLE_TAGS = {'table': [],
-+ 'tr': ['table', 'tbody',
'tfoot', 'thead'],
-+ 'td': ['tr'],
-+ 'th': ['tr'],
-+ 'thead': ['table'],
-+ 'tbody': ['table'],
-+ 'tfoot': ['table'],
- }
-
- NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p',
'pre')
-@@ -1588,11 +1605,11 @@
- contentType = value
- contentTypeIndex = i
-
-- if httpEquiv and contentType: # It's an interesting meta tag.
-+ if httpEquiv and contentType: # It's an interesting meta tag.
- match = self.CHARSET_RE.search(contentType)
- if match:
- if (self.declaredHTMLEncoding is not None or
-- self.originalEncoding == self.fromEncoding):
-+ self.originalEncoding == self.fromEncoding):
- # An HTML encoding was sniffed while converting
- # the document to Unicode, or an HTML encoding was
- # sniffed during a previous pass through the
-@@ -1617,9 +1634,11 @@
- if tag and tagNeedsEncodingSubstitution:
- tag.containsSubstitutions = True
-
-+
- class StopParsing(Exception):
- pass
-
-+
- class ICantBelieveItsBeautifulSoup(BeautifulSoup):
-
- """The BeautifulSoup class is oriented towards skipping over
-@@ -1645,10 +1664,10 @@
- it's valid HTML and BeautifulSoup screwed up by assuming it
- wouldn't be."""
-
-- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
-- ('em', 'big', 'i', 'small', 'tt',
'abbr', 'acronym', 'strong',
-- 'cite', 'code', 'dfn', 'kbd', 'samp',
'strong', 'var', 'b',
-- 'big')
-+ I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = (
-+ 'em', 'big', 'i', 'small', 'tt',
'abbr', 'acronym', 'strong', 'cite',
-+ 'code', 'dfn', 'kbd', 'samp', 'strong',
'var', 'b', 'big'
-+ )
-
- I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',)
-
-@@ -1656,6 +1675,7 @@
- I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
-
-+
- class MinimalSoup(BeautifulSoup):
- """The MinimalSoup class is for parsing HTML that contains
- pathologically bad markup. It makes no assumptions about tag
-@@ -1669,6 +1689,7 @@
- RESET_NESTING_TAGS = buildTagMap('noscript')
- NESTABLE_TAGS = {}
-
-+
- class BeautifulSOAP(BeautifulStoneSoup):
- """This class will push a tag with only a single string child into
- the tag's parent as an attribute. The attribute's name is the tag
-@@ -1696,10 +1717,11 @@
- parent._getAttrMap()
- if (isinstance(tag, Tag) and len(tag.contents) == 1 and
- isinstance(tag.contents[0], NavigableString) and
-- not parent.attrMap.has_key(tag.name)):
-+ not tag.name in parent.attrMap):
- parent[tag.name] = tag.contents[0]
- BeautifulStoneSoup.popTag(self)
-
-+
- #Enterprise class names! It has come to our attention that some people
- #think the names of the Beautiful Soup parser classes are too silly
- #and "unprofessional" for use in enterprise screen-scraping. We feel
-@@ -1750,6 +1772,7 @@
- except ImportError:
- pass
-
-+
- class UnicodeDammit:
- """A class for detecting the encoding of a *ML document and
- converting it to a Unicode string. If the source encoding is
-@@ -1760,14 +1783,14 @@
- # meta tags to the corresponding Python codec names. It only covers
- # values that aren't in Python's aliases and can't be determined
- # by the heuristics in find_codec.
-- CHARSET_ALIASES = { "macintosh" : "mac-roman",
-- "x-sjis" : "shift-jis" }
-+ CHARSET_ALIASES = {"macintosh": "mac-roman",
-+ "x-sjis": "shift-jis"}
-
- def __init__(self, markup, overrideEncodings=[],
- smartQuotesTo='xml', isHTML=False):
- self.declaredHTMLEncoding = None
- self.markup, documentEncoding, sniffedEncoding = \
-- self._detectEncoding(markup, isHTML)
-+ self._detectEncoding(markup, isHTML)
- self.smartQuotesTo = smartQuotesTo
- self.triedEncodings = []
- if markup == '' or isinstance(markup, unicode):
-@@ -1820,9 +1843,8 @@
- if self.smartQuotesTo and proposed.lower() in("windows-1252",
- "iso-8859-1",
- "iso-8859-2"):
-- markup = re.compile("([\x80-\x9f])").sub \
-- (lambda(x): self._subMSChar(x.group(1)),
-- markup)
-+ markup = re.compile("([\x80-\x9f])").sub(
-+ lambda(x): self._subMSChar(x.group(1)), markup)
-
- try:
- # print "Trying to convert document to %s" % proposed
-@@ -1842,11 +1864,11 @@
-
- # strip Byte Order Mark (if present)
- if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
-- and (data[2:4] != '\x00\x00'):
-+ and (data[2:4] != '\x00\x00'):
- encoding = 'utf-16be'
- data = data[2:]
-- elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
-- and (data[2:4] != '\x00\x00'):
-+ elif (len(data) >= 4) and \
-+ (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'):
- encoding = 'utf-16le'
- data = data[2:]
- elif data[:3] == '\xef\xbb\xbf':
-@@ -1872,8 +1894,8 @@
- # UTF-16BE
- sniffed_xml_encoding = 'utf-16be'
- xml_data = unicode(xml_data,
'utf-16be').encode('utf-8')
-- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
-- and (xml_data[2:4] != '\x00\x00'):
-+ elif (len(xml_data) >= 4) and \
-+ (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] !=
'\x00\x00'):
- # UTF-16BE with BOM
- sniffed_xml_encoding = 'utf-16be'
- xml_data = unicode(xml_data[2:],
'utf-16be').encode('utf-8')
-@@ -1882,7 +1904,7 @@
- sniffed_xml_encoding = 'utf-16le'
- xml_data = unicode(xml_data,
'utf-16le').encode('utf-8')
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
-- (xml_data[2:4] != '\x00\x00'):
-+ (xml_data[2:4] != '\x00\x00'):
- # UTF-16LE with BOM
- sniffed_xml_encoding = 'utf-16le'
- xml_data = unicode(xml_data[2:],
'utf-16le').encode('utf-8')
-@@ -1928,7 +1950,6 @@
- xml_encoding = sniffed_xml_encoding
- return xml_data, xml_encoding, sniffed_xml_encoding
-
--
- def find_codec(self, charset):
- return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
- or (charset and self._codec(charset.replace("-",
""))) \
-@@ -1946,63 +1967,70 @@
- return codec
-
- EBCDIC_TO_ASCII_MAP = None
-+
- def _ebcdic_to_ascii(self, s):
- c = self.__class__
- if not c.EBCDIC_TO_ASCII_MAP:
-- emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
-- 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
-- 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
-- 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
-- 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
-- 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
-- 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
-- 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
-- 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
-- 201,202,106,107,108,109,110,111,112,113,114,203,204,205,
-- 206,207,208,209,126,115,116,117,118,119,120,121,122,210,
-- 211,212,213,214,215,216,217,218,219,220,221,222,223,224,
-- 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
-- 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
-- 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
-- 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
-- 250,251,252,253,254,255)
-+ emap = (0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14,
-+ 15, 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28,
-+ 29, 30, 31, 128, 129, 130, 131, 132, 10, 23, 27, 136, 137,
-+ 138, 139, 140, 5, 6, 7, 144, 145, 22, 147, 148, 149, 150, 4,
-+ 152, 153, 154, 155, 20, 21, 158, 26, 32, 160, 161, 162, 163,
-+ 164, 165, 166, 167, 168, 91, 46, 60, 40, 43, 33, 38, 169,
-+ 170, 171, 172, 173, 174, 175, 176, 177, 93, 36, 42, 41, 59,
-+ 94, 45, 47, 178, 179, 180, 181, 182, 183, 184, 185, 124, 44,
-+ 37, 95, 62, 63, 186, 187, 188, 189, 190, 191, 192, 193, 194,
-+ 96, 58, 35, 64, 39, 61, 34, 195, 97, 98, 99, 100, 101, 102,
-+ 103, 104, 105, 196, 197, 198, 199, 200, 201, 202, 106, 107,
-+ 108, 109, 110, 111, 112, 113, 114, 203, 204, 205, 206, 207,
-+ 208, 209, 126, 115, 116, 117, 118, 119, 120, 121, 122, 210,
-+ 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
-+ 223, 224, 225, 226, 227, 228, 229, 230, 231, 123, 65, 66,
-+ 67, 68, 69, 70, 71, 72, 73, 232, 233, 234, 235, 236, 237,
-+ 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 238, 239, 240, 241,
-+ 242, 243, 92, 159, 83, 84, 85, 86, 87, 88, 89, 90, 244, 245,
-+ 246, 247, 248, 249, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
-+ 250, 251, 252, 253, 254, 255)
- import string
-- c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
-- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
-+ c.EBCDIC_TO_ASCII_MAP = string.maketrans(''.join(map(chr,
-+ range(256))),
-+ ''.join(map(chr, emap)))
- return s.translate(c.EBCDIC_TO_ASCII_MAP)
-
-- MS_CHARS = { '\x80' : ('euro', '20AC'),
-- '\x81' : ' ',
-- '\x82' : ('sbquo', '201A'),
-- '\x83' : ('fnof', '192'),
-- '\x84' : ('bdquo', '201E'),
-- '\x85' : ('hellip', '2026'),
-- '\x86' : ('dagger', '2020'),
-- '\x87' : ('Dagger', '2021'),
-- '\x88' : ('circ', '2C6'),
-- '\x89' : ('permil', '2030'),
-- '\x8A' : ('Scaron', '160'),
-- '\x8B' : ('lsaquo', '2039'),
-- '\x8C' : ('OElig', '152'),
-- '\x8D' : '?',
-- '\x8E' : ('#x17D', '17D'),
-- '\x8F' : '?',
-- '\x90' : '?',
-- '\x91' : ('lsquo', '2018'),
-- '\x92' : ('rsquo', '2019'),
-- '\x93' : ('ldquo', '201C'),
-- '\x94' : ('rdquo', '201D'),
-- '\x95' : ('bull', '2022'),
-- '\x96' : ('ndash', '2013'),
-- '\x97' : ('mdash', '2014'),
-- '\x98' : ('tilde', '2DC'),
-- '\x99' : ('trade', '2122'),
-- '\x9a' : ('scaron', '161'),
-- '\x9b' : ('rsaquo', '203A'),
-- '\x9c' : ('oelig', '153'),
-- '\x9d' : '?',
-- '\x9e' : ('#x17E', '17E'),
-- '\x9f' : ('Yuml', ''),}
-+ MS_CHARS = {
-+ '\x80': ('euro', '20AC'),
-+ '\x81': ' ',
-+ '\x82': ('sbquo', '201A'),
-+ '\x83': ('fnof', '192'),
-+ '\x84': ('bdquo', '201E'),
-+ '\x85': ('hellip', '2026'),
-+ '\x86': ('dagger', '2020'),
-+ '\x87': ('Dagger', '2021'),
-+ '\x88': ('circ', '2C6'),
-+ '\x89': ('permil', '2030'),
-+ '\x8A': ('Scaron', '160'),
-+ '\x8B': ('lsaquo', '2039'),
-+ '\x8C': ('OElig', '152'),
-+ '\x8D': '?',
-+ '\x8E': ('#x17D', '17D'),
-+ '\x8F': '?',
-+ '\x90': '?',
-+ '\x91': ('lsquo', '2018'),
-+ '\x92': ('rsquo', '2019'),
-+ '\x93': ('ldquo', '201C'),
-+ '\x94': ('rdquo', '201D'),
-+ '\x95': ('bull', '2022'),
-+ '\x96': ('ndash', '2013'),
-+ '\x97': ('mdash', '2014'),
-+ '\x98': ('tilde', '2DC'),
-+ '\x99': ('trade', '2122'),
-+ '\x9a': ('scaron', '161'),
-+ '\x9b': ('rsaquo', '203A'),
-+ '\x9c': ('oelig', '153'),
-+ '\x9d': '?',
-+ '\x9e': ('#x17E', '17E'),
-+ '\x9f': ('Yuml', ''),
-+ }
-
- #######################################################################
-
--
To view, visit
https://gerrit.wikimedia.org/r/77325
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4b5462737fbf54c7521786bf03e71a90421926fc
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: DrTrigon <dr.trigon(a)surfeu.ch>
Gerrit-Reviewer: DrTrigon <dr.trigon(a)surfeu.ch>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot