Updated bunded version of BS4
This commit is contained in:
parent
5d30398bc1
commit
2182d5a0fd
14 changed files with 832 additions and 159 deletions
|
@ -20,6 +20,7 @@ from bs4.builder import (
|
|||
)
|
||||
from bs4.element import (
|
||||
CData,
|
||||
Comment,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
SoupStrainer,
|
||||
|
@ -425,6 +426,7 @@ class TestParentOperations(TreeTest):
|
|||
|
||||
def test_find_parent(self):
|
||||
self.assertEqual(self.start.find_parent('ul')['id'], 'bottom')
|
||||
self.assertEqual(self.start.find_parent('ul', id='top')['id'], 'top')
|
||||
|
||||
def test_parent_of_text_element(self):
|
||||
text = self.tree.find(text="Start here")
|
||||
|
@ -687,6 +689,12 @@ class TestTagCreation(SoupTest):
|
|||
self.assertEqual("foo", s)
|
||||
self.assertTrue(isinstance(s, NavigableString))
|
||||
|
||||
def test_new_string_can_create_navigablestring_subclass(self):
|
||||
soup = self.soup("")
|
||||
s = soup.new_string("foo", Comment)
|
||||
self.assertEqual("foo", s)
|
||||
self.assertTrue(isinstance(s, Comment))
|
||||
|
||||
class TestTreeModification(SoupTest):
|
||||
|
||||
def test_attribute_modification(self):
|
||||
|
@ -1048,7 +1056,7 @@ class TestTreeModification(SoupTest):
|
|||
# clear using decompose()
|
||||
em = a.em
|
||||
a.clear(decompose=True)
|
||||
self.assertFalse(hasattr(em, "contents"))
|
||||
self.assertEqual(0, len(em.contents))
|
||||
|
||||
def test_string_set(self):
|
||||
"""Tag.string = 'string'"""
|
||||
|
@ -1166,6 +1174,19 @@ class TestElementObjects(SoupTest):
|
|||
self.assertEqual(soup.a.get_text(","), "a,r, , t ")
|
||||
self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
|
||||
|
||||
def test_get_text_ignores_comments(self):
|
||||
soup = self.soup("foo<!--IGNORE-->bar")
|
||||
self.assertEqual(soup.get_text(), "foobar")
|
||||
|
||||
self.assertEqual(
|
||||
soup.get_text(types=(NavigableString, Comment)), "fooIGNOREbar")
|
||||
self.assertEqual(
|
||||
soup.get_text(types=None), "fooIGNOREbar")
|
||||
|
||||
def test_all_strings_ignores_comments(self):
|
||||
soup = self.soup("foo<!--IGNORE-->bar")
|
||||
self.assertEqual(['foo', 'bar'], list(soup.strings))
|
||||
|
||||
class TestCDAtaListAttributes(SoupTest):
|
||||
|
||||
"""Testing cdata-list attributes like 'class'.
|
||||
|
@ -1310,6 +1331,32 @@ class TestSubstitutions(SoupTest):
|
|||
expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
|
||||
self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
|
||||
|
||||
def test_formatter_skips_script_tag_for_html_documents(self):
|
||||
doc = """
|
||||
<script type="text/javascript">
|
||||
console.log("< < hey > > ");
|
||||
</script>
|
||||
"""
|
||||
encoded = BeautifulSoup(doc).encode()
|
||||
self.assertTrue(b"< < hey > >" in encoded)
|
||||
|
||||
def test_formatter_skips_style_tag_for_html_documents(self):
|
||||
doc = """
|
||||
<style type="text/css">
|
||||
console.log("< < hey > > ");
|
||||
</style>
|
||||
"""
|
||||
encoded = BeautifulSoup(doc).encode()
|
||||
self.assertTrue(b"< < hey > >" in encoded)
|
||||
|
||||
def test_prettify_leaves_preformatted_text_alone(self):
|
||||
soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz ")
|
||||
# Everything outside the <pre> tag is reformatted, but everything
|
||||
# inside is left alone.
|
||||
self.assertEqual(
|
||||
u'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>',
|
||||
soup.div.prettify())
|
||||
|
||||
def test_prettify_accepts_formatter(self):
|
||||
soup = BeautifulSoup("<html><body>foo</body></html>")
|
||||
pretty = soup.prettify(formatter = lambda x: x.upper())
|
||||
|
@ -1459,7 +1506,7 @@ class TestSoupSelector(TreeTest):
|
|||
</head>
|
||||
<body>
|
||||
|
||||
<div id="main">
|
||||
<div id="main" class="fancy">
|
||||
<div id="inner">
|
||||
<h1 id="header1">An H1</h1>
|
||||
<p>Some text</p>
|
||||
|
@ -1531,7 +1578,7 @@ class TestSoupSelector(TreeTest):
|
|||
self.assertEqual(len(self.soup.select('del')), 0)
|
||||
|
||||
def test_invalid_tag(self):
|
||||
self.assertEqual(len(self.soup.select('tag%t')), 0)
|
||||
self.assertRaises(ValueError, self.soup.select, 'tag%t')
|
||||
|
||||
def test_header_tags(self):
|
||||
self.assertSelectMultiple(
|
||||
|
@ -1564,7 +1611,7 @@ class TestSoupSelector(TreeTest):
|
|||
for el in els:
|
||||
self.assertEqual(el.name, 'p')
|
||||
self.assertEqual(els[1]['class'], ['onep'])
|
||||
self.assertFalse(els[0].has_key('class'))
|
||||
self.assertFalse(els[0].has_attr('class'))
|
||||
|
||||
def test_a_bunch_of_emptys(self):
|
||||
for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
|
||||
|
@ -1584,6 +1631,9 @@ class TestSoupSelector(TreeTest):
|
|||
self.assertSelects('.s1 > a', ['s1a1', 's1a2'])
|
||||
self.assertSelects('.s1 > a span', ['s1a2s1'])
|
||||
|
||||
def test_child_selector_id(self):
|
||||
self.assertSelects('.s1 > a#s1a2 span', ['s1a2s1'])
|
||||
|
||||
def test_attribute_equals(self):
|
||||
self.assertSelectMultiple(
|
||||
('p[class="onep"]', ['p1']),
|
||||
|
@ -1690,6 +1740,33 @@ class TestSoupSelector(TreeTest):
|
|||
('p[blah]', []),
|
||||
)
|
||||
|
||||
def test_nth_of_type(self):
|
||||
# Try to select first paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(1)')
|
||||
self.assertEqual(len(els), 1)
|
||||
self.assertEqual(els[0].string, u'Some text')
|
||||
|
||||
# Try to select third paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(3)')
|
||||
self.assertEqual(len(els), 1)
|
||||
self.assertEqual(els[0].string, u'Another')
|
||||
|
||||
# Try to select (non-existent!) fourth paragraph
|
||||
els = self.soup.select('div#inner p:nth-of-type(4)')
|
||||
self.assertEqual(len(els), 0)
|
||||
|
||||
# Pass in an invalid value.
|
||||
self.assertRaises(
|
||||
ValueError, self.soup.select, 'div p:nth-of-type(0)')
|
||||
|
||||
def test_nth_of_type_direct_descendant(self):
|
||||
els = self.soup.select('div#inner > p:nth-of-type(1)')
|
||||
self.assertEqual(len(els), 1)
|
||||
self.assertEqual(els[0].string, u'Some text')
|
||||
|
||||
def test_id_child_selector_nth_of_type(self):
|
||||
self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
|
||||
|
||||
def test_select_on_element(self):
|
||||
# Other tests operate on the tree; this operates on an element
|
||||
# within the tree.
|
||||
|
@ -1698,3 +1775,26 @@ class TestSoupSelector(TreeTest):
|
|||
# The <div id="inner"> tag was selected. The <div id="footer">
|
||||
# tag was not.
|
||||
self.assertSelectsIDs(selected, ['inner'])
|
||||
|
||||
def test_overspecified_child_id(self):
|
||||
self.assertSelects(".fancy #inner", ['inner'])
|
||||
self.assertSelects(".normal #inner", [])
|
||||
|
||||
def test_adjacent_sibling_selector(self):
|
||||
self.assertSelects('#p1 + h2', ['header2'])
|
||||
self.assertSelects('#p1 + h2 + p', ['pmulti'])
|
||||
self.assertSelects('#p1 + #header2 + .class1', ['pmulti'])
|
||||
self.assertEqual([], self.soup.select('#p1 + p'))
|
||||
|
||||
def test_general_sibling_selector(self):
|
||||
self.assertSelects('#p1 ~ h2', ['header2', 'header3'])
|
||||
self.assertSelects('#p1 ~ #header2', ['header2'])
|
||||
self.assertSelects('#p1 ~ h2 + a', ['me'])
|
||||
self.assertSelects('#p1 ~ h2 + [rel="me"]', ['me'])
|
||||
self.assertEqual([], self.soup.select('#inner ~ h2'))
|
||||
|
||||
def test_dangling_combinator(self):
|
||||
self.assertRaises(ValueError, self.soup.select, 'h1 >')
|
||||
|
||||
def test_sibling_combinator_wont_select_same_tag_twice(self):
|
||||
self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
|
||||
|
|
Reference in a new issue