diff --git a/lib/bs4/AUTHORS.txt b/lib/bs4/AUTHORS.txt
deleted file mode 100644
index 2ac8fcc..0000000
--- a/lib/bs4/AUTHORS.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-Behold, mortal, the origins of Beautiful Soup...
-================================================
-
-Leonard Richardson is the primary programmer.
-
-Aaron DeVore is awesome.
-
-Mark Pilgrim provided the encoding detection code that forms the base
-of UnicodeDammit.
-
-Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
-Soup 4 working under Python 3.
-
-Simon Willison wrote soupselect, which was used to make Beautiful Soup
-support CSS selectors.
-
-Sam Ruby helped with a lot of edge cases.
-
-Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
-work in solving the nestable tags conundrum.
-
-An incomplete list of people have contributed patches to Beautiful
-Soup:
-
- Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
- Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
- Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
- Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
- Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
- Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
- Webster, Paul Wright, Danny Yoo
-
-An incomplete list of people who made suggestions or found bugs or
-found ways to break Beautiful Soup:
-
- Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
- Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
- Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
- warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
- Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
- Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart
- Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
- Sousa Rocha, Yichun Wei, Per Vognsen
diff --git a/lib/bs4/COPYING.txt b/lib/bs4/COPYING.txt
deleted file mode 100644
index d668d13..0000000
--- a/lib/bs4/COPYING.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-Beautiful Soup is made available under the MIT license:
-
- Copyright (c) 2004-2012 Leonard Richardson
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE, DAMMIT.
-
-Beautiful Soup incorporates code from the html5lib library, which is
-also made available under the MIT license.
diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py
deleted file mode 100644
index 03b2416..0000000
--- a/lib/bs4/__init__.py
+++ /dev/null
@@ -1,365 +0,0 @@
-"""Beautiful Soup
-Elixir and Tonic
-"The Screen-Scraper's Friend"
-http://www.crummy.com/software/BeautifulSoup/
-
-Beautiful Soup uses a pluggable XML or HTML parser to parse a
-(possibly invalid) document into a tree representation. Beautiful Soup
-provides provides methods and Pythonic idioms that make it easy to
-navigate, search, and modify the parse tree.
-
-Beautiful Soup works with Python 2.6 and up. It works better if lxml
-and/or html5lib is installed.
-
-For more than you ever wanted to know about Beautiful Soup, see the
-documentation:
-http://www.crummy.com/software/BeautifulSoup/bs4/doc/
-"""
-
-__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.2.1"
-__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
-__license__ = "MIT"
-
-__all__ = ['BeautifulSoup']
-
-import re
-import warnings
-
-from .builder import builder_registry
-from .dammit import UnicodeDammit
-from .element import (
- CData,
- Comment,
- DEFAULT_OUTPUT_ENCODING,
- Declaration,
- Doctype,
- NavigableString,
- PageElement,
- ProcessingInstruction,
- ResultSet,
- SoupStrainer,
- Tag,
- )
-
-# The very first thing we do is give a useful error if someone is
-# running this code under Python 3 without converting it.
-syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
-
-class BeautifulSoup(Tag):
- """
- This class defines the basic interface called by the tree builders.
-
- These methods will be called by the parser:
- reset()
- feed(markup)
-
- The tree builder may call these methods from its feed() implementation:
- handle_starttag(name, attrs) # See note about return value
- handle_endtag(name)
- handle_data(data) # Appends to the current data node
- endData(containerClass=NavigableString) # Ends the current data node
-
- No matter how complicated the underlying parser is, you should be
- able to build a tree using 'start tag' events, 'end tag' events,
- 'data' events, and "done with data" events.
-
- If you encounter an empty-element tag (aka a self-closing tag,
- like HTML's
tag), call handle_starttag and then
- handle_endtag.
- """
- ROOT_TAG_NAME = u'[document]'
-
- # If the end-user gives no indication which tree builder they
- # want, look for one with these features.
- DEFAULT_BUILDER_FEATURES = ['html', 'fast']
-
- # Used when determining whether a text node is all whitespace and
- # can be replaced with a single space. A text node that contains
- # fancy Unicode spaces (usually non-breaking) should be left
- # alone.
- STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, }
-
- def __init__(self, markup="", features=None, builder=None,
- parse_only=None, from_encoding=None, **kwargs):
- """The Soup object is initialized as the 'root tag', and the
- provided markup (which can be a string or a file-like object)
- is fed into the underlying parser."""
-
- if 'convertEntities' in kwargs:
- warnings.warn(
- "BS4 does not respect the convertEntities argument to the "
- "BeautifulSoup constructor. Entities are always converted "
- "to Unicode characters.")
-
- if 'markupMassage' in kwargs:
- del kwargs['markupMassage']
- warnings.warn(
- "BS4 does not respect the markupMassage argument to the "
- "BeautifulSoup constructor. The tree builder is responsible "
- "for any necessary markup massage.")
-
- if 'smartQuotesTo' in kwargs:
- del kwargs['smartQuotesTo']
- warnings.warn(
- "BS4 does not respect the smartQuotesTo argument to the "
- "BeautifulSoup constructor. Smart quotes are always converted "
- "to Unicode characters.")
-
- if 'selfClosingTags' in kwargs:
- del kwargs['selfClosingTags']
- warnings.warn(
- "BS4 does not respect the selfClosingTags argument to the "
- "BeautifulSoup constructor. The tree builder is responsible "
- "for understanding self-closing tags.")
-
- if 'isHTML' in kwargs:
- del kwargs['isHTML']
- warnings.warn(
- "BS4 does not respect the isHTML argument to the "
- "BeautifulSoup constructor. You can pass in features='html' "
- "or features='xml' to get a builder capable of handling "
- "one or the other.")
-
- def deprecated_argument(old_name, new_name):
- if old_name in kwargs:
- warnings.warn(
- 'The "%s" argument to the BeautifulSoup constructor '
- 'has been renamed to "%s."' % (old_name, new_name))
- value = kwargs[old_name]
- del kwargs[old_name]
- return value
- return None
-
- parse_only = parse_only or deprecated_argument(
- "parseOnlyThese", "parse_only")
-
- from_encoding = from_encoding or deprecated_argument(
- "fromEncoding", "from_encoding")
-
- if len(kwargs) > 0:
- arg = kwargs.keys().pop()
- raise TypeError(
- "__init__() got an unexpected keyword argument '%s'" % arg)
-
- if builder is None:
- if isinstance(features, basestring):
- features = [features]
- if features is None or len(features) == 0:
- features = self.DEFAULT_BUILDER_FEATURES
- builder_class = builder_registry.lookup(*features)
- if builder_class is None:
- raise FeatureNotFound(
- "Couldn't find a tree builder with the features you "
- "requested: %s. Do you need to install a parser library?"
- % ",".join(features))
- builder = builder_class()
- self.builder = builder
- self.is_xml = builder.is_xml
- self.builder.soup = self
-
- self.parse_only = parse_only
-
- self.reset()
-
- if hasattr(markup, 'read'): # It's a file-type object.
- markup = markup.read()
- (self.markup, self.original_encoding, self.declared_html_encoding,
- self.contains_replacement_characters) = (
- self.builder.prepare_markup(markup, from_encoding))
-
- try:
- self._feed()
- except StopParsing:
- pass
-
- # Clear out the markup and remove the builder's circular
- # reference to this object.
- self.markup = None
- self.builder.soup = None
-
- def _feed(self):
- # Convert the document to Unicode.
- self.builder.reset()
-
- self.builder.feed(self.markup)
- # Close out any unfinished strings and close all the open tags.
- self.endData()
- while self.currentTag.name != self.ROOT_TAG_NAME:
- self.popTag()
-
- def reset(self):
- Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
- self.hidden = 1
- self.builder.reset()
- self.currentData = []
- self.currentTag = None
- self.tagStack = []
- self.pushTag(self)
-
- def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
- """Create a new tag associated with this soup."""
- return Tag(None, self.builder, name, namespace, nsprefix, attrs)
-
- def new_string(self, s, subclass=NavigableString):
- """Create a new NavigableString associated with this soup."""
- navigable = subclass(s)
- navigable.setup()
- return navigable
-
- def insert_before(self, successor):
- raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
-
- def insert_after(self, successor):
- raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
-
- def popTag(self):
- tag = self.tagStack.pop()
- #print "Pop", tag.name
- if self.tagStack:
- self.currentTag = self.tagStack[-1]
- return self.currentTag
-
- def pushTag(self, tag):
- #print "Push", tag.name
- if self.currentTag:
- self.currentTag.contents.append(tag)
- self.tagStack.append(tag)
- self.currentTag = self.tagStack[-1]
-
- def endData(self, containerClass=NavigableString):
- if self.currentData:
- currentData = u''.join(self.currentData)
- if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
- not set([tag.name for tag in self.tagStack]).intersection(
- self.builder.preserve_whitespace_tags)):
- if '\n' in currentData:
- currentData = '\n'
- else:
- currentData = ' '
- self.currentData = []
- if self.parse_only and len(self.tagStack) <= 1 and \
- (not self.parse_only.text or \
- not self.parse_only.search(currentData)):
- return
- o = containerClass(currentData)
- self.object_was_parsed(o)
-
- def object_was_parsed(self, o, parent=None, most_recent_element=None):
- """Add an object to the parse tree."""
- parent = parent or self.currentTag
- most_recent_element = most_recent_element or self._most_recent_element
- o.setup(parent, most_recent_element)
- if most_recent_element is not None:
- most_recent_element.next_element = o
- self._most_recent_element = o
- parent.contents.append(o)
-
- def _popToTag(self, name, nsprefix=None, inclusivePop=True):
- """Pops the tag stack up to and including the most recent
- instance of the given tag. If inclusivePop is false, pops the tag
- stack up to but *not* including the most recent instqance of
- the given tag."""
- #print "Popping to %s" % name
- if name == self.ROOT_TAG_NAME:
- return
-
- numPops = 0
- mostRecentTag = None
-
- for i in range(len(self.tagStack) - 1, 0, -1):
- if (name == self.tagStack[i].name
- and nsprefix == self.tagStack[i].prefix):
- numPops = len(self.tagStack) - i
- break
- if not inclusivePop:
- numPops = numPops - 1
-
- for i in range(0, numPops):
- mostRecentTag = self.popTag()
- return mostRecentTag
-
- def handle_starttag(self, name, namespace, nsprefix, attrs):
- """Push a start tag on to the stack.
-
- If this method returns None, the tag was rejected by the
- SoupStrainer. You should proceed as if the tag had not occured
- in the document. For instance, if this was a self-closing tag,
- don't call handle_endtag.
- """
-
- # print "Start tag %s: %s" % (name, attrs)
- self.endData()
-
- if (self.parse_only and len(self.tagStack) <= 1
- and (self.parse_only.text
- or not self.parse_only.search_tag(name, attrs))):
- return None
-
- tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
- self.currentTag, self._most_recent_element)
- if tag is None:
- return tag
- if self._most_recent_element:
- self._most_recent_element.next_element = tag
- self._most_recent_element = tag
- self.pushTag(tag)
- return tag
-
- def handle_endtag(self, name, nsprefix=None):
- #print "End tag: " + name
- self.endData()
- self._popToTag(name, nsprefix)
-
- def handle_data(self, data):
- self.currentData.append(data)
-
- def decode(self, pretty_print=False,
- eventual_encoding=DEFAULT_OUTPUT_ENCODING,
- formatter="minimal"):
- """Returns a string or Unicode representation of this document.
- To get Unicode, pass None for encoding."""
-
- if self.is_xml:
- # Print the XML declaration
- encoding_part = ''
- if eventual_encoding != None:
- encoding_part = ' encoding="%s"' % eventual_encoding
- prefix = u'\n' % encoding_part
- else:
- prefix = u''
- if not pretty_print:
- indent_level = None
- else:
- indent_level = 0
- return prefix + super(BeautifulSoup, self).decode(
- indent_level, eventual_encoding, formatter)
-
-# Alias to make it easier to type import: 'from bs4 import _soup'
-_s = BeautifulSoup
-_soup = BeautifulSoup
-
-class BeautifulStoneSoup(BeautifulSoup):
- """Deprecated interface to an XML parser."""
-
- def __init__(self, *args, **kwargs):
- kwargs['features'] = 'xml'
- warnings.warn(
- 'The BeautifulStoneSoup class is deprecated. Instead of using '
- 'it, pass features="xml" into the BeautifulSoup constructor.')
- super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
-
-
-class StopParsing(Exception):
- pass
-
-
-class FeatureNotFound(ValueError):
- pass
-
-
-#By default, act as an HTML pretty-printer.
-if __name__ == '__main__':
- import sys
- soup = BeautifulSoup(sys.stdin)
- print soup.prettify()
diff --git a/lib/bs4/builder/__init__.py b/lib/bs4/builder/__init__.py
deleted file mode 100644
index bae453e..0000000
--- a/lib/bs4/builder/__init__.py
+++ /dev/null
@@ -1,316 +0,0 @@
-from collections import defaultdict
-import itertools
-import sys
-from bs4.element import (
- CharsetMetaAttributeValue,
- ContentMetaAttributeValue,
- whitespace_re
- )
-
-__all__ = [
- 'HTMLTreeBuilder',
- 'SAXTreeBuilder',
- 'TreeBuilder',
- 'TreeBuilderRegistry',
- ]
-
-# Some useful features for a TreeBuilder to have.
-FAST = 'fast'
-PERMISSIVE = 'permissive'
-STRICT = 'strict'
-XML = 'xml'
-HTML = 'html'
-HTML_5 = 'html5'
-
-
-class TreeBuilderRegistry(object):
-
- def __init__(self):
- self.builders_for_feature = defaultdict(list)
- self.builders = []
-
- def register(self, treebuilder_class):
- """Register a treebuilder based on its advertised features."""
- for feature in treebuilder_class.features:
- self.builders_for_feature[feature].insert(0, treebuilder_class)
- self.builders.insert(0, treebuilder_class)
-
- def lookup(self, *features):
- if len(self.builders) == 0:
- # There are no builders at all.
- return None
-
- if len(features) == 0:
- # They didn't ask for any features. Give them the most
- # recently registered builder.
- return self.builders[0]
-
- # Go down the list of features in order, and eliminate any builders
- # that don't match every feature.
- features = list(features)
- features.reverse()
- candidates = None
- candidate_set = None
- while len(features) > 0:
- feature = features.pop()
- we_have_the_feature = self.builders_for_feature.get(feature, [])
- if len(we_have_the_feature) > 0:
- if candidates is None:
- candidates = we_have_the_feature
- candidate_set = set(candidates)
- else:
- # Eliminate any candidates that don't have this feature.
- candidate_set = candidate_set.intersection(
- set(we_have_the_feature))
-
- # The only valid candidates are the ones in candidate_set.
- # Go through the original list of candidates and pick the first one
- # that's in candidate_set.
- if candidate_set is None:
- return None
- for candidate in candidates:
- if candidate in candidate_set:
- return candidate
- return None
-
-# The BeautifulSoup class will take feature lists from developers and use them
-# to look up builders in this registry.
-builder_registry = TreeBuilderRegistry()
-
-class TreeBuilder(object):
- """Turn a document into a Beautiful Soup object tree."""
-
- features = []
-
- is_xml = False
- preserve_whitespace_tags = set()
- empty_element_tags = None # A tag will be considered an empty-element
- # tag when and only when it has no contents.
-
- # A value for these tag/attribute combinations is a space- or
- # comma-separated list of CDATA, rather than a single CDATA.
- cdata_list_attributes = {}
-
-
- def __init__(self):
- self.soup = None
-
- def reset(self):
- pass
-
- def can_be_empty_element(self, tag_name):
- """Might a tag with this name be an empty-element tag?
-
- The final markup may or may not actually present this tag as
- self-closing.
-
- For instance: an HTMLBuilder does not consider a
tag to be - an empty-element tag (it's not in - HTMLBuilder.empty_element_tags). This means an empty
tag - will be presented as "
", not "". - - The default implementation has no opinion about which tags are - empty-element tags, so a tag will be presented as an - empty-element tag if and only if it has no contents. - "", "
") - self.assertSoupEquals("foo
") - - def test_namespaces_are_preserved(self): - markup = 'A bold statement.
" - with warnings.catch_warnings(record=True) as w: - soup = self.soup(markup, parse_only=strainer) - self.assertEqual( - soup.decode(), self.document_for(markup)) - - self.assertTrue( - "the html5lib tree builder doesn't support parse_only" in - str(w[0].message)) - - def test_correctly_nested_tables(self): - """html5lib inserts tags where other parsers don't.""" - markup = ('