Updated bunded version of BS4
This commit is contained in:
parent
5d30398bc1
commit
2182d5a0fd
14 changed files with 832 additions and 159 deletions
|
@ -81,6 +81,11 @@ class HTMLTreeBuilderSmokeTest(object):
|
|||
self.assertDoctypeHandled(
|
||||
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
|
||||
|
||||
def test_empty_doctype(self):
|
||||
soup = self.soup("<!DOCTYPE>")
|
||||
doctype = soup.contents[0]
|
||||
self.assertEqual("", doctype.strip())
|
||||
|
||||
def test_public_doctype_with_url(self):
|
||||
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
|
||||
self.assertDoctypeHandled(doctype)
|
||||
|
@ -159,6 +164,12 @@ class HTMLTreeBuilderSmokeTest(object):
|
|||
comment = soup.find(text="foobar")
|
||||
self.assertEqual(comment.__class__, Comment)
|
||||
|
||||
# The comment is properly integrated into the tree.
|
||||
foo = soup.find(text="foo")
|
||||
self.assertEqual(comment, foo.next_element)
|
||||
baz = soup.find(text="baz")
|
||||
self.assertEqual(comment, baz.previous_element)
|
||||
|
||||
def test_preserved_whitespace_in_pre_and_textarea(self):
|
||||
"""Whitespace must be preserved in <pre> and <textarea> tags."""
|
||||
self.assertSoupEquals("<pre> </pre>")
|
||||
|
@ -217,12 +228,14 @@ class HTMLTreeBuilderSmokeTest(object):
|
|||
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
self.assertSoupEquals('<p id="piñata"></p>', expect)
|
||||
|
||||
def test_entities_in_text_converted_to_unicode(self):
|
||||
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
self.assertSoupEquals("<p>piñata</p>", expect)
|
||||
|
||||
def test_quot_entity_converted_to_quotation_mark(self):
|
||||
|
@ -235,6 +248,12 @@ class HTMLTreeBuilderSmokeTest(object):
|
|||
self.assertSoupEquals("�", expect)
|
||||
self.assertSoupEquals("�", expect)
|
||||
|
||||
def test_multipart_strings(self):
|
||||
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
|
||||
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
|
||||
self.assertEqual("p", soup.h2.string.next_element.name)
|
||||
self.assertEqual("p", soup.p.name)
|
||||
|
||||
def test_basic_namespaces(self):
|
||||
"""Parsers don't need to *understand* namespaces, but at the
|
||||
very least they should not choke on namespaces or lose
|
||||
|
@ -453,6 +472,18 @@ class XMLTreeBuilderSmokeTest(object):
|
|||
self.assertEqual(
|
||||
soup.encode("utf-8"), markup)
|
||||
|
||||
def test_formatter_processes_script_tag_for_xml_documents(self):
|
||||
doc = """
|
||||
<script type="text/javascript">
|
||||
</script>
|
||||
"""
|
||||
soup = BeautifulSoup(doc, "xml")
|
||||
# lxml would have stripped this while parsing, but we can add
|
||||
# it later.
|
||||
soup.script.string = 'console.log("< < hey > > ");'
|
||||
encoded = soup.encode()
|
||||
self.assertTrue(b"< < hey > >" in encoded)
|
||||
|
||||
def test_popping_namespaced_tag(self):
|
||||
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
|
||||
soup = self.soup(markup)
|
||||
|
@ -495,6 +526,11 @@ class XMLTreeBuilderSmokeTest(object):
|
|||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
def test_namespaced_attributes_xml_namespace(self):
|
||||
markup = '<foo xml:lang="fr">bar</foo>'
|
||||
soup = self.soup(markup)
|
||||
self.assertEqual(unicode(soup.foo), markup)
|
||||
|
||||
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
||||
"""Smoke test for a tree builder that supports HTML5."""
|
||||
|
||||
|
@ -523,6 +559,12 @@ class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
|
|||
self.assertEqual(namespace, soup.math.namespace)
|
||||
self.assertEqual(namespace, soup.msqrt.namespace)
|
||||
|
||||
def test_xml_declaration_becomes_comment(self):
|
||||
markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
|
||||
soup = self.soup(markup)
|
||||
self.assertTrue(isinstance(soup.contents[0], Comment))
|
||||
self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
|
||||
self.assertEqual("html", soup.contents[0].next_element.name)
|
||||
|
||||
def skipIf(condition, reason):
|
||||
def nothing(test, *args, **kwargs):
|
||||
|
|
Reference in a new issue