Removed lib folder

This commit is contained in:
Luke Rogers 2013-10-02 11:30:51 +13:00
parent b923cf34a4
commit bdf5e98c5c
67 changed files with 3 additions and 18452 deletions

View file

@ -1,43 +0,0 @@
Behold, mortal, the origins of Beautiful Soup...
================================================
Leonard Richardson is the primary programmer.
Aaron DeVore is awesome.
Mark Pilgrim provided the encoding detection code that forms the base
of UnicodeDammit.
Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
Soup 4 working under Python 3.
Simon Willison wrote soupselect, which was used to make Beautiful Soup
support CSS selectors.
Sam Ruby helped with a lot of edge cases.
Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
work in solving the nestable tags conundrum.
An incomplete list of people have contributed patches to Beautiful
Soup:
Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
Webster, Paul Wright, Danny Yoo
An incomplete list of people who made suggestions or found bugs or
found ways to break Beautiful Soup:
Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart
Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
Sousa Rocha, Yichun Wei, Per Vognsen

View file

@ -1,26 +0,0 @@
Beautiful Soup is made available under the MIT license:
Copyright (c) 2004-2012 Leonard Richardson
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE, DAMMIT.
Beautiful Soup incorporates code from the html5lib library, which is
also made available under the MIT license.

View file

@ -1,365 +0,0 @@
"""Beautiful Soup
Elixir and Tonic
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
Beautiful Soup uses a pluggable XML or HTML parser to parse a
(possibly invalid) document into a tree representation. Beautiful Soup
provides provides methods and Pythonic idioms that make it easy to
navigate, search, and modify the parse tree.
Beautiful Soup works with Python 2.6 and up. It works better if lxml
and/or html5lib is installed.
For more than you ever wanted to know about Beautiful Soup, see the
documentation:
http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "4.2.1"
__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson"
__license__ = "MIT"
__all__ = ['BeautifulSoup']
import re
import warnings
from .builder import builder_registry
from .dammit import UnicodeDammit
from .element import (
CData,
Comment,
DEFAULT_OUTPUT_ENCODING,
Declaration,
Doctype,
NavigableString,
PageElement,
ProcessingInstruction,
ResultSet,
SoupStrainer,
Tag,
)
# The very first thing we do is give a useful error if someone is
# running this code under Python 3 without converting it.
syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
class BeautifulSoup(Tag):
"""
This class defines the basic interface called by the tree builders.
These methods will be called by the parser:
reset()
feed(markup)
The tree builder may call these methods from its feed() implementation:
handle_starttag(name, attrs) # See note about return value
handle_endtag(name)
handle_data(data) # Appends to the current data node
endData(containerClass=NavigableString) # Ends the current data node
No matter how complicated the underlying parser is, you should be
able to build a tree using 'start tag' events, 'end tag' events,
'data' events, and "done with data" events.
If you encounter an empty-element tag (aka a self-closing tag,
like HTML's <br> tag), call handle_starttag and then
handle_endtag.
"""
ROOT_TAG_NAME = u'[document]'
# If the end-user gives no indication which tree builder they
# want, look for one with these features.
DEFAULT_BUILDER_FEATURES = ['html', 'fast']
# Used when determining whether a text node is all whitespace and
# can be replaced with a single space. A text node that contains
# fancy Unicode spaces (usually non-breaking) should be left
# alone.
STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, }
def __init__(self, markup="", features=None, builder=None,
parse_only=None, from_encoding=None, **kwargs):
"""The Soup object is initialized as the 'root tag', and the
provided markup (which can be a string or a file-like object)
is fed into the underlying parser."""
if 'convertEntities' in kwargs:
warnings.warn(
"BS4 does not respect the convertEntities argument to the "
"BeautifulSoup constructor. Entities are always converted "
"to Unicode characters.")
if 'markupMassage' in kwargs:
del kwargs['markupMassage']
warnings.warn(
"BS4 does not respect the markupMassage argument to the "
"BeautifulSoup constructor. The tree builder is responsible "
"for any necessary markup massage.")
if 'smartQuotesTo' in kwargs:
del kwargs['smartQuotesTo']
warnings.warn(
"BS4 does not respect the smartQuotesTo argument to the "
"BeautifulSoup constructor. Smart quotes are always converted "
"to Unicode characters.")
if 'selfClosingTags' in kwargs:
del kwargs['selfClosingTags']
warnings.warn(
"BS4 does not respect the selfClosingTags argument to the "
"BeautifulSoup constructor. The tree builder is responsible "
"for understanding self-closing tags.")
if 'isHTML' in kwargs:
del kwargs['isHTML']
warnings.warn(
"BS4 does not respect the isHTML argument to the "
"BeautifulSoup constructor. You can pass in features='html' "
"or features='xml' to get a builder capable of handling "
"one or the other.")
def deprecated_argument(old_name, new_name):
if old_name in kwargs:
warnings.warn(
'The "%s" argument to the BeautifulSoup constructor '
'has been renamed to "%s."' % (old_name, new_name))
value = kwargs[old_name]
del kwargs[old_name]
return value
return None
parse_only = parse_only or deprecated_argument(
"parseOnlyThese", "parse_only")
from_encoding = from_encoding or deprecated_argument(
"fromEncoding", "from_encoding")
if len(kwargs) > 0:
arg = kwargs.keys().pop()
raise TypeError(
"__init__() got an unexpected keyword argument '%s'" % arg)
if builder is None:
if isinstance(features, basestring):
features = [features]
if features is None or len(features) == 0:
features = self.DEFAULT_BUILDER_FEATURES
builder_class = builder_registry.lookup(*features)
if builder_class is None:
raise FeatureNotFound(
"Couldn't find a tree builder with the features you "
"requested: %s. Do you need to install a parser library?"
% ",".join(features))
builder = builder_class()
self.builder = builder
self.is_xml = builder.is_xml
self.builder.soup = self
self.parse_only = parse_only
self.reset()
if hasattr(markup, 'read'): # It's a file-type object.
markup = markup.read()
(self.markup, self.original_encoding, self.declared_html_encoding,
self.contains_replacement_characters) = (
self.builder.prepare_markup(markup, from_encoding))
try:
self._feed()
except StopParsing:
pass
# Clear out the markup and remove the builder's circular
# reference to this object.
self.markup = None
self.builder.soup = None
def _feed(self):
# Convert the document to Unicode.
self.builder.reset()
self.builder.feed(self.markup)
# Close out any unfinished strings and close all the open tags.
self.endData()
while self.currentTag.name != self.ROOT_TAG_NAME:
self.popTag()
def reset(self):
Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
self.hidden = 1
self.builder.reset()
self.currentData = []
self.currentTag = None
self.tagStack = []
self.pushTag(self)
def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
"""Create a new tag associated with this soup."""
return Tag(None, self.builder, name, namespace, nsprefix, attrs)
def new_string(self, s, subclass=NavigableString):
"""Create a new NavigableString associated with this soup."""
navigable = subclass(s)
navigable.setup()
return navigable
def insert_before(self, successor):
raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
def insert_after(self, successor):
raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
def popTag(self):
tag = self.tagStack.pop()
#print "Pop", tag.name
if self.tagStack:
self.currentTag = self.tagStack[-1]
return self.currentTag
def pushTag(self, tag):
#print "Push", tag.name
if self.currentTag:
self.currentTag.contents.append(tag)
self.tagStack.append(tag)
self.currentTag = self.tagStack[-1]
def endData(self, containerClass=NavigableString):
if self.currentData:
currentData = u''.join(self.currentData)
if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
not set([tag.name for tag in self.tagStack]).intersection(
self.builder.preserve_whitespace_tags)):
if '\n' in currentData:
currentData = '\n'
else:
currentData = ' '
self.currentData = []
if self.parse_only and len(self.tagStack) <= 1 and \
(not self.parse_only.text or \
not self.parse_only.search(currentData)):
return
o = containerClass(currentData)
self.object_was_parsed(o)
def object_was_parsed(self, o, parent=None, most_recent_element=None):
"""Add an object to the parse tree."""
parent = parent or self.currentTag
most_recent_element = most_recent_element or self._most_recent_element
o.setup(parent, most_recent_element)
if most_recent_element is not None:
most_recent_element.next_element = o
self._most_recent_element = o
parent.contents.append(o)
def _popToTag(self, name, nsprefix=None, inclusivePop=True):
"""Pops the tag stack up to and including the most recent
instance of the given tag. If inclusivePop is false, pops the tag
stack up to but *not* including the most recent instqance of
the given tag."""
#print "Popping to %s" % name
if name == self.ROOT_TAG_NAME:
return
numPops = 0
mostRecentTag = None
for i in range(len(self.tagStack) - 1, 0, -1):
if (name == self.tagStack[i].name
and nsprefix == self.tagStack[i].prefix):
numPops = len(self.tagStack) - i
break
if not inclusivePop:
numPops = numPops - 1
for i in range(0, numPops):
mostRecentTag = self.popTag()
return mostRecentTag
def handle_starttag(self, name, namespace, nsprefix, attrs):
"""Push a start tag on to the stack.
If this method returns None, the tag was rejected by the
SoupStrainer. You should proceed as if the tag had not occured
in the document. For instance, if this was a self-closing tag,
don't call handle_endtag.
"""
# print "Start tag %s: %s" % (name, attrs)
self.endData()
if (self.parse_only and len(self.tagStack) <= 1
and (self.parse_only.text
or not self.parse_only.search_tag(name, attrs))):
return None
tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
self.currentTag, self._most_recent_element)
if tag is None:
return tag
if self._most_recent_element:
self._most_recent_element.next_element = tag
self._most_recent_element = tag
self.pushTag(tag)
return tag
def handle_endtag(self, name, nsprefix=None):
#print "End tag: " + name
self.endData()
self._popToTag(name, nsprefix)
def handle_data(self, data):
self.currentData.append(data)
def decode(self, pretty_print=False,
eventual_encoding=DEFAULT_OUTPUT_ENCODING,
formatter="minimal"):
"""Returns a string or Unicode representation of this document.
To get Unicode, pass None for encoding."""
if self.is_xml:
# Print the XML declaration
encoding_part = ''
if eventual_encoding != None:
encoding_part = ' encoding="%s"' % eventual_encoding
prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
else:
prefix = u''
if not pretty_print:
indent_level = None
else:
indent_level = 0
return prefix + super(BeautifulSoup, self).decode(
indent_level, eventual_encoding, formatter)
# Alias to make it easier to type import: 'from bs4 import _soup'
_s = BeautifulSoup
_soup = BeautifulSoup
class BeautifulStoneSoup(BeautifulSoup):
"""Deprecated interface to an XML parser."""
def __init__(self, *args, **kwargs):
kwargs['features'] = 'xml'
warnings.warn(
'The BeautifulStoneSoup class is deprecated. Instead of using '
'it, pass features="xml" into the BeautifulSoup constructor.')
super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
class StopParsing(Exception):
pass
class FeatureNotFound(ValueError):
pass
#By default, act as an HTML pretty-printer.
if __name__ == '__main__':
import sys
soup = BeautifulSoup(sys.stdin)
print soup.prettify()

View file

@ -1,316 +0,0 @@
from collections import defaultdict
import itertools
import sys
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
whitespace_re
)
__all__ = [
'HTMLTreeBuilder',
'SAXTreeBuilder',
'TreeBuilder',
'TreeBuilderRegistry',
]
# Some useful features for a TreeBuilder to have.
FAST = 'fast'
PERMISSIVE = 'permissive'
STRICT = 'strict'
XML = 'xml'
HTML = 'html'
HTML_5 = 'html5'
class TreeBuilderRegistry(object):
def __init__(self):
self.builders_for_feature = defaultdict(list)
self.builders = []
def register(self, treebuilder_class):
"""Register a treebuilder based on its advertised features."""
for feature in treebuilder_class.features:
self.builders_for_feature[feature].insert(0, treebuilder_class)
self.builders.insert(0, treebuilder_class)
def lookup(self, *features):
if len(self.builders) == 0:
# There are no builders at all.
return None
if len(features) == 0:
# They didn't ask for any features. Give them the most
# recently registered builder.
return self.builders[0]
# Go down the list of features in order, and eliminate any builders
# that don't match every feature.
features = list(features)
features.reverse()
candidates = None
candidate_set = None
while len(features) > 0:
feature = features.pop()
we_have_the_feature = self.builders_for_feature.get(feature, [])
if len(we_have_the_feature) > 0:
if candidates is None:
candidates = we_have_the_feature
candidate_set = set(candidates)
else:
# Eliminate any candidates that don't have this feature.
candidate_set = candidate_set.intersection(
set(we_have_the_feature))
# The only valid candidates are the ones in candidate_set.
# Go through the original list of candidates and pick the first one
# that's in candidate_set.
if candidate_set is None:
return None
for candidate in candidates:
if candidate in candidate_set:
return candidate
return None
# The BeautifulSoup class will take feature lists from developers and use them
# to look up builders in this registry.
builder_registry = TreeBuilderRegistry()
class TreeBuilder(object):
"""Turn a document into a Beautiful Soup object tree."""
features = []
is_xml = False
preserve_whitespace_tags = set()
empty_element_tags = None # A tag will be considered an empty-element
# tag when and only when it has no contents.
# A value for these tag/attribute combinations is a space- or
# comma-separated list of CDATA, rather than a single CDATA.
cdata_list_attributes = {}
def __init__(self):
self.soup = None
def reset(self):
pass
def can_be_empty_element(self, tag_name):
"""Might a tag with this name be an empty-element tag?
The final markup may or may not actually present this tag as
self-closing.
For instance: an HTMLBuilder does not consider a <p> tag to be
an empty-element tag (it's not in
HTMLBuilder.empty_element_tags). This means an empty <p> tag
will be presented as "<p></p>", not "<p />".
The default implementation has no opinion about which tags are
empty-element tags, so a tag will be presented as an
empty-element tag if and only if it has no contents.
"<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
be left alone.
"""
if self.empty_element_tags is None:
return True
return tag_name in self.empty_element_tags
def feed(self, markup):
raise NotImplementedError()
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
return markup, None, None, False
def test_fragment_to_document(self, fragment):
"""Wrap an HTML fragment to make it look like a document.
Different parsers do this differently. For instance, lxml
introduces an empty <head> tag, and html5lib
doesn't. Abstracting this away lets us write simple tests
which run HTML fragments through the parser and compare the
results against other HTML fragments.
This method should not be used outside of tests.
"""
return fragment
def set_up_substitutions(self, tag):
return False
def _replace_cdata_list_attribute_values(self, tag_name, attrs):
"""Replaces class="foo bar" with class=["foo", "bar"]
Modifies its input in place.
"""
if self.cdata_list_attributes:
universal = self.cdata_list_attributes.get('*', [])
tag_specific = self.cdata_list_attributes.get(
tag_name.lower(), [])
for cdata_list_attr in itertools.chain(universal, tag_specific):
if cdata_list_attr in attrs:
# Basically, we have a "class" attribute whose
# value is a whitespace-separated list of CSS
# classes. Split it into a list.
value = attrs[cdata_list_attr]
if isinstance(value, basestring):
values = whitespace_re.split(value)
else:
# html5lib sometimes calls setAttributes twice
# for the same tag when rearranging the parse
# tree. On the second call the attribute value
# here is already a list. If this happens,
# leave the value alone rather than trying to
# split it again.
values = value
attrs[cdata_list_attr] = values
return attrs
class SAXTreeBuilder(TreeBuilder):
"""A Beautiful Soup treebuilder that listens for SAX events."""
def feed(self, markup):
raise NotImplementedError()
def close(self):
pass
def startElement(self, name, attrs):
attrs = dict((key[1], value) for key, value in list(attrs.items()))
#print "Start %s, %r" % (name, attrs)
self.soup.handle_starttag(name, attrs)
def endElement(self, name):
#print "End %s" % name
self.soup.handle_endtag(name)
def startElementNS(self, nsTuple, nodeName, attrs):
# Throw away (ns, nodeName) for now.
self.startElement(nodeName, attrs)
def endElementNS(self, nsTuple, nodeName):
# Throw away (ns, nodeName) for now.
self.endElement(nodeName)
#handler.endElementNS((ns, node.nodeName), node.nodeName)
def startPrefixMapping(self, prefix, nodeValue):
# Ignore the prefix for now.
pass
def endPrefixMapping(self, prefix):
# Ignore the prefix for now.
# handler.endPrefixMapping(prefix)
pass
def characters(self, content):
self.soup.handle_data(content)
def startDocument(self):
pass
def endDocument(self):
pass
class HTMLTreeBuilder(TreeBuilder):
"""This TreeBuilder knows facts about HTML.
Such as which tags are empty-element tags.
"""
preserve_whitespace_tags = set(['pre', 'textarea'])
empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base'])
# The HTML standard defines these attributes as containing a
# space-separated list of values, not a single value. That is,
# class="foo bar" means that the 'class' attribute has two values,
# 'foo' and 'bar', not the single value 'foo bar'. When we
# encounter one of these attributes, we will parse its value into
# a list of values if possible. Upon output, the list will be
# converted back into a string.
cdata_list_attributes = {
"*" : ['class', 'accesskey', 'dropzone'],
"a" : ['rel', 'rev'],
"link" : ['rel', 'rev'],
"td" : ["headers"],
"th" : ["headers"],
"td" : ["headers"],
"form" : ["accept-charset"],
"object" : ["archive"],
# These are HTML5 specific, as are *.accesskey and *.dropzone above.
"area" : ["rel"],
"icon" : ["sizes"],
"iframe" : ["sandbox"],
"output" : ["for"],
}
def set_up_substitutions(self, tag):
# We are only interested in <meta> tags
if tag.name != 'meta':
return False
http_equiv = tag.get('http-equiv')
content = tag.get('content')
charset = tag.get('charset')
# We are interested in <meta> tags that say what encoding the
# document was originally in. This means HTML 5-style <meta>
# tags that provide the "charset" attribute. It also means
# HTML 4-style <meta> tags that provide the "content"
# attribute and have "http-equiv" set to "content-type".
#
# In both cases we will replace the value of the appropriate
# attribute with a standin object that can take on any
# encoding.
meta_encoding = None
if charset is not None:
# HTML 5 style:
# <meta charset="utf8">
meta_encoding = charset
tag['charset'] = CharsetMetaAttributeValue(charset)
elif (content is not None and http_equiv is not None
and http_equiv.lower() == 'content-type'):
# HTML 4 style:
# <meta http-equiv="content-type" content="text/html; charset=utf8">
tag['content'] = ContentMetaAttributeValue(content)
return (meta_encoding is not None)
def register_treebuilders_from(module):
"""Copy TreeBuilders from the given module into this module."""
# I'm fairly sure this is not the best way to do this.
this_module = sys.modules['bs4.builder']
for name in module.__all__:
obj = getattr(module, name)
if issubclass(obj, TreeBuilder):
setattr(this_module, name, obj)
this_module.__all__.append(name)
# Register the builder while we're at it.
this_module.builder_registry.register(obj)
# Builders are registered in reverse order of priority, so that custom
# builder registrations will take precedence. In general, we want lxml
# to take precedence over html5lib, because it's faster. And we only
# want to use HTMLParser as a last result.
from . import _htmlparser
register_treebuilders_from(_htmlparser)
try:
from . import _html5lib
register_treebuilders_from(_html5lib)
except ImportError:
# They don't have html5lib installed.
pass
try:
from . import _lxml
register_treebuilders_from(_lxml)
except ImportError:
# They don't have lxml installed.
pass

View file

@ -1,222 +0,0 @@
__all__ = [
'HTML5TreeBuilder',
]
import warnings
from bs4.builder import (
PERMISSIVE,
HTML,
HTML_5,
HTMLTreeBuilder,
)
from bs4.element import NamespacedAttribute
import html5lib
from html5lib.constants import namespaces
from bs4.element import (
Comment,
Doctype,
NavigableString,
Tag,
)
class HTML5TreeBuilder(HTMLTreeBuilder):
"""Use html5lib to build a tree."""
features = ['html5lib', PERMISSIVE, HTML_5, HTML]
def prepare_markup(self, markup, user_specified_encoding):
# Store the user-specified encoding for use later on.
self.user_specified_encoding = user_specified_encoding
return markup, None, None, False
# These methods are defined by Beautiful Soup.
def feed(self, markup):
if self.soup.parse_only is not None:
warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
parser = html5lib.HTMLParser(tree=self.create_treebuilder)
doc = parser.parse(markup, encoding=self.user_specified_encoding)
# Set the character encoding detected by the tokenizer.
if isinstance(markup, unicode):
# We need to special-case this because html5lib sets
# charEncoding to UTF-8 if it gets Unicode input.
doc.original_encoding = None
else:
doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
def create_treebuilder(self, namespaceHTMLElements):
self.underlying_builder = TreeBuilderForHtml5lib(
self.soup, namespaceHTMLElements)
return self.underlying_builder
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><head></head><body>%s</body></html>' % fragment
class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
def __init__(self, soup, namespaceHTMLElements):
self.soup = soup
super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
def documentClass(self):
self.soup.reset()
return Element(self.soup, self.soup, None)
def insertDoctype(self, token):
name = token["name"]
publicId = token["publicId"]
systemId = token["systemId"]
doctype = Doctype.for_name_and_ids(name, publicId, systemId)
self.soup.object_was_parsed(doctype)
def elementClass(self, name, namespace):
tag = self.soup.new_tag(name, namespace)
return Element(tag, self.soup, namespace)
def commentClass(self, data):
return TextNode(Comment(data), self.soup)
def fragmentClass(self):
self.soup = BeautifulSoup("")
self.soup.name = "[document_fragment]"
return Element(self.soup, self.soup, None)
def appendChild(self, node):
# XXX This code is not covered by the BS4 tests.
self.soup.append(node.element)
def getDocument(self):
return self.soup
def getFragment(self):
return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
class AttrList(object):
def __init__(self, element):
self.element = element
self.attrs = dict(self.element.attrs)
def __iter__(self):
return list(self.attrs.items()).__iter__()
def __setitem__(self, name, value):
"set attr", name, value
self.element[name] = value
def items(self):
return list(self.attrs.items())
def keys(self):
return list(self.attrs.keys())
def __len__(self):
return len(self.attrs)
def __getitem__(self, name):
return self.attrs[name]
def __contains__(self, name):
return name in list(self.attrs.keys())
class Element(html5lib.treebuilders._base.Node):
def __init__(self, element, soup, namespace):
html5lib.treebuilders._base.Node.__init__(self, element.name)
self.element = element
self.soup = soup
self.namespace = namespace
def appendChild(self, node):
if (node.element.__class__ == NavigableString and self.element.contents
and self.element.contents[-1].__class__ == NavigableString):
# Concatenate new text onto old text node
# XXX This has O(n^2) performance, for input like
# "a</a>a</a>a</a>..."
old_element = self.element.contents[-1]
new_element = self.soup.new_string(old_element + node.element)
old_element.replace_with(new_element)
self.soup._most_recent_element = new_element
else:
self.soup.object_was_parsed(node.element, parent=self.element)
def getAttributes(self):
return AttrList(self.element)
def setAttributes(self, attributes):
if attributes is not None and len(attributes) > 0:
converted_attributes = []
for name, value in list(attributes.items()):
if isinstance(name, tuple):
new_name = NamespacedAttribute(*name)
del attributes[name]
attributes[new_name] = value
self.soup.builder._replace_cdata_list_attribute_values(
self.name, attributes)
for name, value in attributes.items():
self.element[name] = value
# The attributes may contain variables that need substitution.
# Call set_up_substitutions manually.
#
# The Tag constructor called this method when the Tag was created,
# but we just set/changed the attributes, so call it again.
self.soup.builder.set_up_substitutions(self.element)
attributes = property(getAttributes, setAttributes)
def insertText(self, data, insertBefore=None):
text = TextNode(self.soup.new_string(data), self.soup)
if insertBefore:
self.insertBefore(text, insertBefore)
else:
self.appendChild(text)
def insertBefore(self, node, refNode):
index = self.element.index(refNode.element)
if (node.element.__class__ == NavigableString and self.element.contents
and self.element.contents[index-1].__class__ == NavigableString):
# (See comments in appendChild)
old_node = self.element.contents[index-1]
new_str = self.soup.new_string(old_node + node.element)
old_node.replace_with(new_str)
else:
self.element.insert(index, node.element)
node.parent = self
def removeChild(self, node):
node.element.extract()
def reparentChildren(self, newParent):
while self.element.contents:
child = self.element.contents[0]
child.extract()
if isinstance(child, Tag):
newParent.appendChild(
Element(child, self.soup, namespaces["html"]))
else:
newParent.appendChild(
TextNode(child, self.soup))
def cloneNode(self):
tag = self.soup.new_tag(self.element.name, self.namespace)
node = Element(tag, self.soup, self.namespace)
for key,value in self.attributes:
node.attributes[key] = value
return node
def hasContent(self):
return self.element.contents
def getNameTuple(self):
if self.namespace == None:
return namespaces["html"], self.name
else:
return self.namespace, self.name
nameTuple = property(getNameTuple)
class TextNode(Element):
def __init__(self, element, soup):
html5lib.treebuilders._base.Node.__init__(self, None)
self.element = element
self.soup = soup
def cloneNode(self):
raise NotImplementedError

View file

@ -1,249 +0,0 @@
"""Use the HTMLParser library to parse HTML files that aren't too bad."""
__all__ = [
'HTMLParserTreeBuilder',
]
from HTMLParser import (
HTMLParser,
HTMLParseError,
)
import sys
import warnings
# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
# argument, which we'd like to set to False. Unfortunately,
# http://bugs.python.org/issue13273 makes strict=True a better bet
# before Python 3.2.3.
#
# At the end of this file, we monkeypatch HTMLParser so that
# strict=True works well on Python 3.2.2.
major, minor, release = sys.version_info[:3]
CONSTRUCTOR_TAKES_STRICT = (
major > 3
or (major == 3 and minor > 2)
or (major == 3 and minor == 2 and release >= 3))
from bs4.element import (
CData,
Comment,
Declaration,
Doctype,
ProcessingInstruction,
)
from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import (
HTML,
HTMLTreeBuilder,
STRICT,
)
HTMLPARSER = 'html.parser'
class BeautifulSoupHTMLParser(HTMLParser):
def handle_starttag(self, name, attrs):
# XXX namespace
self.soup.handle_starttag(name, None, None, dict(attrs))
def handle_endtag(self, name):
self.soup.handle_endtag(name)
def handle_data(self, data):
self.soup.handle_data(data)
def handle_charref(self, name):
# XXX workaround for a bug in HTMLParser. Remove this once
# it's fixed.
if name.startswith('x'):
real_name = int(name.lstrip('x'), 16)
elif name.startswith('X'):
real_name = int(name.lstrip('X'), 16)
else:
real_name = int(name)
try:
data = unichr(real_name)
except (ValueError, OverflowError), e:
data = u"\N{REPLACEMENT CHARACTER}"
self.handle_data(data)
def handle_entityref(self, name):
character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
if character is not None:
data = character
else:
data = "&%s;" % name
self.handle_data(data)
def handle_comment(self, data):
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(Comment)
def handle_decl(self, data):
self.soup.endData()
if data.startswith("DOCTYPE "):
data = data[len("DOCTYPE "):]
elif data == 'DOCTYPE':
# i.e. "<!DOCTYPE>"
data = ''
self.soup.handle_data(data)
self.soup.endData(Doctype)
def unknown_decl(self, data):
if data.upper().startswith('CDATA['):
cls = CData
data = data[len('CDATA['):]
else:
cls = Declaration
self.soup.endData()
self.soup.handle_data(data)
self.soup.endData(cls)
def handle_pi(self, data):
self.soup.endData()
if data.endswith("?") and data.lower().startswith("xml"):
# "An XHTML processing instruction using the trailing '?'
# will cause the '?' to be included in data." - HTMLParser
# docs.
#
# Strip the question mark so we don't end up with two
# question marks.
data = data[:-1]
self.soup.handle_data(data)
self.soup.endData(ProcessingInstruction)
class HTMLParserTreeBuilder(HTMLTreeBuilder):
is_xml = False
features = [HTML, STRICT, HTMLPARSER]
def __init__(self, *args, **kwargs):
if CONSTRUCTOR_TAKES_STRICT:
kwargs['strict'] = False
self.parser_args = (args, kwargs)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
:return: A 4-tuple (markup, original encoding, encoding
declared within markup, whether any characters had to be
replaced with REPLACEMENT CHARACTER).
"""
if isinstance(markup, unicode):
return markup, None, None, False
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
return (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
def feed(self, markup):
args, kwargs = self.parser_args
parser = BeautifulSoupHTMLParser(*args, **kwargs)
parser.soup = self.soup
try:
parser.feed(markup)
except HTMLParseError, e:
warnings.warn(RuntimeWarning(
"Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
raise e
# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
# string.
#
# XXX This code can be removed once most Python 3 users are on 3.2.3.
if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
import re
attrfind_tolerant = re.compile(
r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
(?:\s+ # whitespace before attribute name
(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
(?:\s*=\s* # value indicator
(?:'[^']*' # LITA-enclosed value
|\"[^\"]*\" # LIT-enclosed value
|[^'\">\s]+ # bare value
)
)?
)
)*
\s* # trailing whitespace
""", re.VERBOSE)
BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
from html.parser import tagfind, attrfind
def parse_starttag(self, i):
self.__starttag_text = None
endpos = self.check_for_whole_start_tag(i)
if endpos < 0:
return endpos
rawdata = self.rawdata
self.__starttag_text = rawdata[i:endpos]
# Now parse the data between i+1 and j into a tag and attrs
attrs = []
match = tagfind.match(rawdata, i+1)
assert match, 'unexpected call to parse_starttag()'
k = match.end()
self.lasttag = tag = rawdata[i+1:k].lower()
while k < endpos:
if self.strict:
m = attrfind.match(rawdata, k)
else:
m = attrfind_tolerant.match(rawdata, k)
if not m:
break
attrname, rest, attrvalue = m.group(1, 2, 3)
if not rest:
attrvalue = None
elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
attrvalue[:1] == '"' == attrvalue[-1:]:
attrvalue = attrvalue[1:-1]
if attrvalue:
attrvalue = self.unescape(attrvalue)
attrs.append((attrname.lower(), attrvalue))
k = m.end()
end = rawdata[k:endpos].strip()
if end not in (">", "/>"):
lineno, offset = self.getpos()
if "\n" in self.__starttag_text:
lineno = lineno + self.__starttag_text.count("\n")
offset = len(self.__starttag_text) \
- self.__starttag_text.rfind("\n")
else:
offset = offset + len(self.__starttag_text)
if self.strict:
self.error("junk characters in start tag: %r"
% (rawdata[k:endpos][:20],))
self.handle_data(rawdata[i:endpos])
return endpos
if end.endswith('/>'):
# XHTML-style empty tag: <span attr="value" />
self.handle_startendtag(tag, attrs)
else:
self.handle_starttag(tag, attrs)
if tag in self.CDATA_CONTENT_ELEMENTS:
self.set_cdata_mode(tag)
return endpos
def set_cdata_mode(self, elem):
self.cdata_elem = elem.lower()
self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
BeautifulSoupHTMLParser.parse_starttag = parse_starttag
BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
CONSTRUCTOR_TAKES_STRICT = True

View file

@ -1,199 +0,0 @@
__all__ = [
'LXMLTreeBuilderForXML',
'LXMLTreeBuilder',
]
from io import BytesIO
from StringIO import StringIO
import collections
from lxml import etree
from bs4.element import Comment, Doctype, NamespacedAttribute
from bs4.builder import (
FAST,
HTML,
HTMLTreeBuilder,
PERMISSIVE,
TreeBuilder,
XML)
from bs4.dammit import UnicodeDammit
LXML = 'lxml'
class LXMLTreeBuilderForXML(TreeBuilder):
DEFAULT_PARSER_CLASS = etree.XMLParser
is_xml = True
# Well, it's permissive by XML parser standards.
features = [LXML, XML, FAST, PERMISSIVE]
CHUNK_SIZE = 512
# This namespace mapping is specified in the XML Namespace
# standard.
DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"}
@property
def default_parser(self):
# This can either return a parser object or a class, which
# will be instantiated with default arguments.
return etree.XMLParser(target=self, strip_cdata=False, recover=True)
def __init__(self, parser=None, empty_element_tags=None):
if empty_element_tags is not None:
self.empty_element_tags = set(empty_element_tags)
if parser is None:
# Use the default parser.
parser = self.default_parser
if isinstance(parser, collections.Callable):
# Instantiate the parser with default arguments
parser = parser(target=self, strip_cdata=False)
self.parser = parser
self.soup = None
self.nsmaps = [self.DEFAULT_NSMAPS]
def _getNsTag(self, tag):
# Split the namespace URL out of a fully-qualified lxml tag
# name. Copied from lxml's src/lxml/sax.py.
if tag[0] == '{':
return tuple(tag[1:].split('}', 1))
else:
return (None, tag)
def prepare_markup(self, markup, user_specified_encoding=None,
document_declared_encoding=None):
"""
:return: A 3-tuple (markup, original encoding, encoding
declared within markup).
"""
if isinstance(markup, unicode):
return markup, None, None, False
try_encodings = [user_specified_encoding, document_declared_encoding]
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
return (dammit.markup, dammit.original_encoding,
dammit.declared_html_encoding,
dammit.contains_replacement_characters)
def feed(self, markup):
if isinstance(markup, bytes):
markup = BytesIO(markup)
elif isinstance(markup, unicode):
markup = StringIO(markup)
# Call feed() at least once, even if the markup is empty,
# or the parser won't be initialized.
data = markup.read(self.CHUNK_SIZE)
self.parser.feed(data)
while data != '':
# Now call feed() on the rest of the data, chunk by chunk.
data = markup.read(self.CHUNK_SIZE)
if data != '':
self.parser.feed(data)
self.parser.close()
def close(self):
self.nsmaps = [self.DEFAULT_NSMAPS]
def start(self, name, attrs, nsmap={}):
# Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
attrs = dict(attrs)
nsprefix = None
# Invert each namespace map as it comes in.
if len(self.nsmaps) > 1:
# There are no new namespaces for this tag, but
# non-default namespaces are in play, so we need a
# separate tag stack to know when they end.
self.nsmaps.append(None)
elif len(nsmap) > 0:
# A new namespace mapping has come into play.
inverted_nsmap = dict((value, key) for key, value in nsmap.items())
self.nsmaps.append(inverted_nsmap)
# Also treat the namespace mapping as a set of attributes on the
# tag, so we can recreate it later.
attrs = attrs.copy()
for prefix, namespace in nsmap.items():
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
# Namespaces are in play. Find any attributes that came in
# from lxml with namespaces attached to their names, and
# turn then into NamespacedAttribute objects.
new_attrs = {}
for attr, value in attrs.items():
namespace, attr = self._getNsTag(attr)
if namespace is None:
new_attrs[attr] = value
else:
nsprefix = self._prefix_for_namespace(namespace)
attr = NamespacedAttribute(nsprefix, attr, namespace)
new_attrs[attr] = value
attrs = new_attrs
namespace, name = self._getNsTag(name)
nsprefix = self._prefix_for_namespace(namespace)
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
def _prefix_for_namespace(self, namespace):
"""Find the currently active prefix for the given namespace."""
if namespace is None:
return None
for inverted_nsmap in reversed(self.nsmaps):
if inverted_nsmap is not None and namespace in inverted_nsmap:
return inverted_nsmap[namespace]
return None
def end(self, name):
self.soup.endData()
completed_tag = self.soup.tagStack[-1]
namespace, name = self._getNsTag(name)
nsprefix = None
if namespace is not None:
for inverted_nsmap in reversed(self.nsmaps):
if inverted_nsmap is not None and namespace in inverted_nsmap:
nsprefix = inverted_nsmap[namespace]
break
self.soup.handle_endtag(name, nsprefix)
if len(self.nsmaps) > 1:
# This tag, or one of its parents, introduced a namespace
# mapping, so pop it off the stack.
self.nsmaps.pop()
def pi(self, target, data):
pass
def data(self, content):
self.soup.handle_data(content)
def doctype(self, name, pubid, system):
self.soup.endData()
doctype = Doctype.for_name_and_ids(name, pubid, system)
self.soup.object_was_parsed(doctype)
def comment(self, content):
"Handle comments as Comment objects."
self.soup.endData()
self.soup.handle_data(content)
self.soup.endData(Comment)
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
features = [LXML, HTML, FAST, PERMISSIVE]
is_xml = False
@property
def default_parser(self):
return etree.HTMLParser
def feed(self, markup):
self.parser.feed(markup)
self.parser.close()
def test_fragment_to_document(self, fragment):
"""See `TreeBuilder`."""
return u'<html><body>%s</body></html>' % fragment

View file

@ -1,827 +0,0 @@
# -*- coding: utf-8 -*-
"""Beautiful Soup bonus library: Unicode, Dammit
This class forces XML data into a standard format (usually to UTF-8 or
Unicode). It is heavily based on code from Mark Pilgrim's Universal
Feed Parser. It does not rewrite the XML or HTML to reflect a new
encoding; that's the tree builder's job.
"""
import codecs
from htmlentitydefs import codepoint2name
import re
import logging
# Import a library to autodetect character encodings.
chardet_type = None
try:
# First try the fast C implementation.
# PyPI package: cchardet
import cchardet
def chardet_dammit(s):
return cchardet.detect(s)['encoding']
except ImportError:
try:
# Fall back to the pure Python implementation
# Debian package: python-chardet
# PyPI package: chardet
import chardet
def chardet_dammit(s):
return chardet.detect(s)['encoding']
#import chardet.constants
#chardet.constants._debug = 1
except ImportError:
# No chardet available.
def chardet_dammit(s):
return None
# Available from http://cjkpython.i18n.org/.
try:
import iconv_codec
except ImportError:
pass
xml_encoding_re = re.compile(
'^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
html_meta_re = re.compile(
'<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
class EntitySubstitution(object):
"""Substitute XML or HTML entities for the corresponding characters."""
def _populate_class_variables():
lookup = {}
reverse_lookup = {}
characters_for_re = []
for codepoint, name in list(codepoint2name.items()):
character = unichr(codepoint)
if codepoint != 34:
# There's no point in turning the quotation mark into
# &quot;, unless it happens within an attribute value, which
# is handled elsewhere.
characters_for_re.append(character)
lookup[character] = name
# But we do want to turn &quot; into the quotation mark.
reverse_lookup[name] = character
re_definition = "[%s]" % "".join(characters_for_re)
return lookup, reverse_lookup, re.compile(re_definition)
(CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
CHARACTER_TO_XML_ENTITY = {
"'": "apos",
'"': "quot",
"&": "amp",
"<": "lt",
">": "gt",
}
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
"&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
")")
AMPERSAND_OR_BRACKET = re.compile("([<>&])")
@classmethod
def _substitute_html_entity(cls, matchobj):
entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
return "&%s;" % entity
@classmethod
def _substitute_xml_entity(cls, matchobj):
"""Used with a regular expression to substitute the
appropriate XML entity for an XML special character."""
entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
return "&%s;" % entity
@classmethod
def quoted_attribute_value(self, value):
"""Make a value into a quoted XML attribute, possibly escaping it.
Most strings will be quoted using double quotes.
Bob's Bar -> "Bob's Bar"
If a string contains double quotes, it will be quoted using
single quotes.
Welcome to "my bar" -> 'Welcome to "my bar"'
If a string contains both single and double quotes, the
double quotes will be escaped, and the string will be quoted
using double quotes.
Welcome to "Bob's Bar" -> "Welcome to &quot;Bob's bar&quot;
"""
quote_with = '"'
if '"' in value:
if "'" in value:
# The string contains both single and double
# quotes. Turn the double quotes into
# entities. We quote the double quotes rather than
# the single quotes because the entity name is
# "&quot;" whether this is HTML or XML. If we
# quoted the single quotes, we'd have to decide
# between &apos; and &squot;.
replace_with = "&quot;"
value = value.replace('"', replace_with)
else:
# There are double quotes but no single quotes.
# We can use single quotes to quote the attribute.
quote_with = "'"
return quote_with + value + quote_with
@classmethod
def substitute_xml(cls, value, make_quoted_attribute=False):
"""Substitute XML entities for special XML characters.
:param value: A string to be substituted. The less-than sign
will become &lt;, the greater-than sign will become &gt;,
and any ampersands will become &amp;. If you want ampersands
that appear to be part of an entity definition to be left
alone, use substitute_xml_containing_entities() instead.
:param make_quoted_attribute: If True, then the string will be
quoted, as befits an attribute value.
"""
# Escape angle brackets and ampersands.
value = cls.AMPERSAND_OR_BRACKET.sub(
cls._substitute_xml_entity, value)
if make_quoted_attribute:
value = cls.quoted_attribute_value(value)
return value
@classmethod
def substitute_xml_containing_entities(
cls, value, make_quoted_attribute=False):
"""Substitute XML entities for special XML characters.
:param value: A string to be substituted. The less-than sign will
become &lt;, the greater-than sign will become &gt;, and any
ampersands that are not part of an entity defition will
become &amp;.
:param make_quoted_attribute: If True, then the string will be
quoted, as befits an attribute value.
"""
# Escape angle brackets, and ampersands that aren't part of
# entities.
value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
cls._substitute_xml_entity, value)
if make_quoted_attribute:
value = cls.quoted_attribute_value(value)
return value
@classmethod
def substitute_html(cls, s):
"""Replace certain Unicode characters with named HTML entities.
This differs from data.encode(encoding, 'xmlcharrefreplace')
in that the goal is to make the result more readable (to those
with ASCII displays) rather than to recover from
errors. There's absolutely nothing wrong with a UTF-8 string
containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
character with "&eacute;" will make it more readable to some
people.
"""
return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
cls._substitute_html_entity, s)
class UnicodeDammit:
"""A class for detecting the encoding of a *ML document and
converting it to a Unicode string. If the source encoding is
windows-1252, can replace MS smart quotes with their HTML or XML
equivalents."""
# This dictionary maps commonly seen values for "charset" in HTML
# meta tags to the corresponding Python codec names. It only covers
# values that aren't in Python's aliases and can't be determined
# by the heuristics in find_codec.
CHARSET_ALIASES = {"macintosh": "mac-roman",
"x-sjis": "shift-jis"}
ENCODINGS_WITH_SMART_QUOTES = [
"windows-1252",
"iso-8859-1",
"iso-8859-2",
]
def __init__(self, markup, override_encodings=[],
smart_quotes_to=None, is_html=False):
self.declared_html_encoding = None
self.smart_quotes_to = smart_quotes_to
self.tried_encodings = []
self.contains_replacement_characters = False
if markup == '' or isinstance(markup, unicode):
self.markup = markup
self.unicode_markup = unicode(markup)
self.original_encoding = None
return
new_markup, document_encoding, sniffed_encoding = \
self._detectEncoding(markup, is_html)
self.markup = new_markup
u = None
if new_markup != markup:
# _detectEncoding modified the markup, then converted it to
# Unicode and then to UTF-8. So convert it from UTF-8.
u = self._convert_from("utf8")
self.original_encoding = sniffed_encoding
if not u:
for proposed_encoding in (
override_encodings + [document_encoding, sniffed_encoding]):
if proposed_encoding is not None:
u = self._convert_from(proposed_encoding)
if u:
break
# If no luck and we have auto-detection library, try that:
if not u and not isinstance(self.markup, unicode):
u = self._convert_from(chardet_dammit(self.markup))
# As a last resort, try utf-8 and windows-1252:
if not u:
for proposed_encoding in ("utf-8", "windows-1252"):
u = self._convert_from(proposed_encoding)
if u:
break
# As an absolute last resort, try the encodings again with
# character replacement.
if not u:
for proposed_encoding in (
override_encodings + [
document_encoding, sniffed_encoding, "utf-8", "windows-1252"]):
if proposed_encoding != "ascii":
u = self._convert_from(proposed_encoding, "replace")
if u is not None:
logging.warning(
"Some characters could not be decoded, and were "
"replaced with REPLACEMENT CHARACTER.")
self.contains_replacement_characters = True
break
# We could at this point force it to ASCII, but that would
# destroy so much data that I think giving up is better
self.unicode_markup = u
if not u:
self.original_encoding = None
def _sub_ms_char(self, match):
"""Changes a MS smart quote character to an XML or HTML
entity, or an ASCII character."""
orig = match.group(1)
if self.smart_quotes_to == 'ascii':
sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
else:
sub = self.MS_CHARS.get(orig)
if type(sub) == tuple:
if self.smart_quotes_to == 'xml':
sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
else:
sub = '&'.encode() + sub[0].encode() + ';'.encode()
else:
sub = sub.encode()
return sub
def _convert_from(self, proposed, errors="strict"):
proposed = self.find_codec(proposed)
if not proposed or (proposed, errors) in self.tried_encodings:
return None
self.tried_encodings.append((proposed, errors))
markup = self.markup
# Convert smart quotes to HTML if coming from an encoding
# that might have them.
if (self.smart_quotes_to is not None
and proposed.lower() in self.ENCODINGS_WITH_SMART_QUOTES):
smart_quotes_re = b"([\x80-\x9f])"
smart_quotes_compiled = re.compile(smart_quotes_re)
markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
try:
#print "Trying to convert document to %s (errors=%s)" % (
# proposed, errors)
u = self._to_unicode(markup, proposed, errors)
self.markup = u
self.original_encoding = proposed
except Exception as e:
#print "That didn't work!"
#print e
return None
#print "Correct encoding: %s" % proposed
return self.markup
def _to_unicode(self, data, encoding, errors="strict"):
'''Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases'''
# strip Byte Order Mark (if present)
if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
and (data[2:4] != '\x00\x00'):
encoding = 'utf-16be'
data = data[2:]
elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
and (data[2:4] != '\x00\x00'):
encoding = 'utf-16le'
data = data[2:]
elif data[:3] == '\xef\xbb\xbf':
encoding = 'utf-8'
data = data[3:]
elif data[:4] == '\x00\x00\xfe\xff':
encoding = 'utf-32be'
data = data[4:]
elif data[:4] == '\xff\xfe\x00\x00':
encoding = 'utf-32le'
data = data[4:]
newdata = unicode(data, encoding, errors)
return newdata
def _detectEncoding(self, xml_data, is_html=False):
"""Given a document, tries to detect its XML encoding."""
xml_encoding = sniffed_xml_encoding = None
try:
if xml_data[:4] == b'\x4c\x6f\xa7\x94':
# EBCDIC
xml_data = self._ebcdic_to_ascii(xml_data)
elif xml_data[:4] == b'\x00\x3c\x00\x3f':
# UTF-16BE
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
elif (len(xml_data) >= 4) and (xml_data[:2] == b'\xfe\xff') \
and (xml_data[2:4] != b'\x00\x00'):
# UTF-16BE with BOM
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
elif xml_data[:4] == b'\x3c\x00\x3f\x00':
# UTF-16LE
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
elif (len(xml_data) >= 4) and (xml_data[:2] == b'\xff\xfe') and \
(xml_data[2:4] != b'\x00\x00'):
# UTF-16LE with BOM
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
elif xml_data[:4] == b'\x00\x00\x00\x3c':
# UTF-32BE
sniffed_xml_encoding = 'utf-32be'
xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
elif xml_data[:4] == b'\x3c\x00\x00\x00':
# UTF-32LE
sniffed_xml_encoding = 'utf-32le'
xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
elif xml_data[:4] == b'\x00\x00\xfe\xff':
# UTF-32BE with BOM
sniffed_xml_encoding = 'utf-32be'
xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
elif xml_data[:4] == b'\xff\xfe\x00\x00':
# UTF-32LE with BOM
sniffed_xml_encoding = 'utf-32le'
xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
elif xml_data[:3] == b'\xef\xbb\xbf':
# UTF-8 with BOM
sniffed_xml_encoding = 'utf-8'
xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
else:
sniffed_xml_encoding = 'ascii'
pass
except:
xml_encoding_match = None
xml_encoding_match = xml_encoding_re.match(xml_data)
if not xml_encoding_match and is_html:
xml_encoding_match = html_meta_re.search(xml_data)
if xml_encoding_match is not None:
xml_encoding = xml_encoding_match.groups()[0].decode(
'ascii').lower()
if is_html:
self.declared_html_encoding = xml_encoding
if sniffed_xml_encoding and \
(xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',
'iso-10646-ucs-4', 'ucs-4', 'csucs4',
'utf-16', 'utf-32', 'utf_16', 'utf_32',
'utf16', 'u16')):
xml_encoding = sniffed_xml_encoding
return xml_data, xml_encoding, sniffed_xml_encoding
def find_codec(self, charset):
return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
or (charset and self._codec(charset.replace("-", ""))) \
or (charset and self._codec(charset.replace("-", "_"))) \
or charset
def _codec(self, charset):
if not charset:
return charset
codec = None
try:
codecs.lookup(charset)
codec = charset
except (LookupError, ValueError):
pass
return codec
EBCDIC_TO_ASCII_MAP = None
def _ebcdic_to_ascii(self, s):
c = self.__class__
if not c.EBCDIC_TO_ASCII_MAP:
emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
201,202,106,107,108,109,110,111,112,113,114,203,204,205,
206,207,208,209,126,115,116,117,118,119,120,121,122,210,
211,212,213,214,215,216,217,218,219,220,221,222,223,224,
225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
250,251,252,253,254,255)
import string
c.EBCDIC_TO_ASCII_MAP = string.maketrans(
''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
return s.translate(c.EBCDIC_TO_ASCII_MAP)
# A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
MS_CHARS = {b'\x80': ('euro', '20AC'),
b'\x81': ' ',
b'\x82': ('sbquo', '201A'),
b'\x83': ('fnof', '192'),
b'\x84': ('bdquo', '201E'),
b'\x85': ('hellip', '2026'),
b'\x86': ('dagger', '2020'),
b'\x87': ('Dagger', '2021'),
b'\x88': ('circ', '2C6'),
b'\x89': ('permil', '2030'),
b'\x8A': ('Scaron', '160'),
b'\x8B': ('lsaquo', '2039'),
b'\x8C': ('OElig', '152'),
b'\x8D': '?',
b'\x8E': ('#x17D', '17D'),
b'\x8F': '?',
b'\x90': '?',
b'\x91': ('lsquo', '2018'),
b'\x92': ('rsquo', '2019'),
b'\x93': ('ldquo', '201C'),
b'\x94': ('rdquo', '201D'),
b'\x95': ('bull', '2022'),
b'\x96': ('ndash', '2013'),
b'\x97': ('mdash', '2014'),
b'\x98': ('tilde', '2DC'),
b'\x99': ('trade', '2122'),
b'\x9a': ('scaron', '161'),
b'\x9b': ('rsaquo', '203A'),
b'\x9c': ('oelig', '153'),
b'\x9d': '?',
b'\x9e': ('#x17E', '17E'),
b'\x9f': ('Yuml', ''),}
# A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
# horrors like stripping diacritical marks to turn á into a, but also
# contains non-horrors like turning “ into ".
MS_CHARS_TO_ASCII = {
b'\x80' : 'EUR',
b'\x81' : ' ',
b'\x82' : ',',
b'\x83' : 'f',
b'\x84' : ',,',
b'\x85' : '...',
b'\x86' : '+',
b'\x87' : '++',
b'\x88' : '^',
b'\x89' : '%',
b'\x8a' : 'S',
b'\x8b' : '<',
b'\x8c' : 'OE',
b'\x8d' : '?',
b'\x8e' : 'Z',
b'\x8f' : '?',
b'\x90' : '?',
b'\x91' : "'",
b'\x92' : "'",
b'\x93' : '"',
b'\x94' : '"',
b'\x95' : '*',
b'\x96' : '-',
b'\x97' : '--',
b'\x98' : '~',
b'\x99' : '(TM)',
b'\x9a' : 's',
b'\x9b' : '>',
b'\x9c' : 'oe',
b'\x9d' : '?',
b'\x9e' : 'z',
b'\x9f' : 'Y',
b'\xa0' : ' ',
b'\xa1' : '!',
b'\xa2' : 'c',
b'\xa3' : 'GBP',
b'\xa4' : '$', #This approximation is especially parochial--this is the
#generic currency symbol.
b'\xa5' : 'YEN',
b'\xa6' : '|',
b'\xa7' : 'S',
b'\xa8' : '..',
b'\xa9' : '',
b'\xaa' : '(th)',
b'\xab' : '<<',
b'\xac' : '!',
b'\xad' : ' ',
b'\xae' : '(R)',
b'\xaf' : '-',
b'\xb0' : 'o',
b'\xb1' : '+-',
b'\xb2' : '2',
b'\xb3' : '3',
b'\xb4' : ("'", 'acute'),
b'\xb5' : 'u',
b'\xb6' : 'P',
b'\xb7' : '*',
b'\xb8' : ',',
b'\xb9' : '1',
b'\xba' : '(th)',
b'\xbb' : '>>',
b'\xbc' : '1/4',
b'\xbd' : '1/2',
b'\xbe' : '3/4',
b'\xbf' : '?',
b'\xc0' : 'A',
b'\xc1' : 'A',
b'\xc2' : 'A',
b'\xc3' : 'A',
b'\xc4' : 'A',
b'\xc5' : 'A',
b'\xc6' : 'AE',
b'\xc7' : 'C',
b'\xc8' : 'E',
b'\xc9' : 'E',
b'\xca' : 'E',
b'\xcb' : 'E',
b'\xcc' : 'I',
b'\xcd' : 'I',
b'\xce' : 'I',
b'\xcf' : 'I',
b'\xd0' : 'D',
b'\xd1' : 'N',
b'\xd2' : 'O',
b'\xd3' : 'O',
b'\xd4' : 'O',
b'\xd5' : 'O',
b'\xd6' : 'O',
b'\xd7' : '*',
b'\xd8' : 'O',
b'\xd9' : 'U',
b'\xda' : 'U',
b'\xdb' : 'U',
b'\xdc' : 'U',
b'\xdd' : 'Y',
b'\xde' : 'b',
b'\xdf' : 'B',
b'\xe0' : 'a',
b'\xe1' : 'a',
b'\xe2' : 'a',
b'\xe3' : 'a',
b'\xe4' : 'a',
b'\xe5' : 'a',
b'\xe6' : 'ae',
b'\xe7' : 'c',
b'\xe8' : 'e',
b'\xe9' : 'e',
b'\xea' : 'e',
b'\xeb' : 'e',
b'\xec' : 'i',
b'\xed' : 'i',
b'\xee' : 'i',
b'\xef' : 'i',
b'\xf0' : 'o',
b'\xf1' : 'n',
b'\xf2' : 'o',
b'\xf3' : 'o',
b'\xf4' : 'o',
b'\xf5' : 'o',
b'\xf6' : 'o',
b'\xf7' : '/',
b'\xf8' : 'o',
b'\xf9' : 'u',
b'\xfa' : 'u',
b'\xfb' : 'u',
b'\xfc' : 'u',
b'\xfd' : 'y',
b'\xfe' : 'b',
b'\xff' : 'y',
}
# A map used when removing rogue Windows-1252/ISO-8859-1
# characters in otherwise UTF-8 documents.
#
# Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
# Windows-1252.
WINDOWS_1252_TO_UTF8 = {
0x80 : b'\xe2\x82\xac', # €
0x82 : b'\xe2\x80\x9a', #
0x83 : b'\xc6\x92', # ƒ
0x84 : b'\xe2\x80\x9e', # „
0x85 : b'\xe2\x80\xa6', # …
0x86 : b'\xe2\x80\xa0', # †
0x87 : b'\xe2\x80\xa1', # ‡
0x88 : b'\xcb\x86', # ˆ
0x89 : b'\xe2\x80\xb0', # ‰
0x8a : b'\xc5\xa0', # Š
0x8b : b'\xe2\x80\xb9', #
0x8c : b'\xc5\x92', # Œ
0x8e : b'\xc5\xbd', # Ž
0x91 : b'\xe2\x80\x98', #
0x92 : b'\xe2\x80\x99', #
0x93 : b'\xe2\x80\x9c', # “
0x94 : b'\xe2\x80\x9d', # ”
0x95 : b'\xe2\x80\xa2', # •
0x96 : b'\xe2\x80\x93', #
0x97 : b'\xe2\x80\x94', # —
0x98 : b'\xcb\x9c', # ˜
0x99 : b'\xe2\x84\xa2', # ™
0x9a : b'\xc5\xa1', # š
0x9b : b'\xe2\x80\xba', #
0x9c : b'\xc5\x93', # œ
0x9e : b'\xc5\xbe', # ž
0x9f : b'\xc5\xb8', # Ÿ
0xa0 : b'\xc2\xa0', #  
0xa1 : b'\xc2\xa1', # ¡
0xa2 : b'\xc2\xa2', # ¢
0xa3 : b'\xc2\xa3', # £
0xa4 : b'\xc2\xa4', # ¤
0xa5 : b'\xc2\xa5', # ¥
0xa6 : b'\xc2\xa6', # ¦
0xa7 : b'\xc2\xa7', # §
0xa8 : b'\xc2\xa8', # ¨
0xa9 : b'\xc2\xa9', # ©
0xaa : b'\xc2\xaa', # ª
0xab : b'\xc2\xab', # «
0xac : b'\xc2\xac', # ¬
0xad : b'\xc2\xad', # ­
0xae : b'\xc2\xae', # ®
0xaf : b'\xc2\xaf', # ¯
0xb0 : b'\xc2\xb0', # °
0xb1 : b'\xc2\xb1', # ±
0xb2 : b'\xc2\xb2', # ²
0xb3 : b'\xc2\xb3', # ³
0xb4 : b'\xc2\xb4', # ´
0xb5 : b'\xc2\xb5', # µ
0xb6 : b'\xc2\xb6', # ¶
0xb7 : b'\xc2\xb7', # ·
0xb8 : b'\xc2\xb8', # ¸
0xb9 : b'\xc2\xb9', # ¹
0xba : b'\xc2\xba', # º
0xbb : b'\xc2\xbb', # »
0xbc : b'\xc2\xbc', # ¼
0xbd : b'\xc2\xbd', # ½
0xbe : b'\xc2\xbe', # ¾
0xbf : b'\xc2\xbf', # ¿
0xc0 : b'\xc3\x80', # À
0xc1 : b'\xc3\x81', # Á
0xc2 : b'\xc3\x82', # Â
0xc3 : b'\xc3\x83', # Ã
0xc4 : b'\xc3\x84', # Ä
0xc5 : b'\xc3\x85', # Å
0xc6 : b'\xc3\x86', # Æ
0xc7 : b'\xc3\x87', # Ç
0xc8 : b'\xc3\x88', # È
0xc9 : b'\xc3\x89', # É
0xca : b'\xc3\x8a', # Ê
0xcb : b'\xc3\x8b', # Ë
0xcc : b'\xc3\x8c', # Ì
0xcd : b'\xc3\x8d', # Í
0xce : b'\xc3\x8e', # Î
0xcf : b'\xc3\x8f', # Ï
0xd0 : b'\xc3\x90', # Ð
0xd1 : b'\xc3\x91', # Ñ
0xd2 : b'\xc3\x92', # Ò
0xd3 : b'\xc3\x93', # Ó
0xd4 : b'\xc3\x94', # Ô
0xd5 : b'\xc3\x95', # Õ
0xd6 : b'\xc3\x96', # Ö
0xd7 : b'\xc3\x97', # ×
0xd8 : b'\xc3\x98', # Ø
0xd9 : b'\xc3\x99', # Ù
0xda : b'\xc3\x9a', # Ú
0xdb : b'\xc3\x9b', # Û
0xdc : b'\xc3\x9c', # Ü
0xdd : b'\xc3\x9d', # Ý
0xde : b'\xc3\x9e', # Þ
0xdf : b'\xc3\x9f', # ß
0xe0 : b'\xc3\xa0', # à
0xe1 : b'\xa1', # á
0xe2 : b'\xc3\xa2', # â
0xe3 : b'\xc3\xa3', # ã
0xe4 : b'\xc3\xa4', # ä
0xe5 : b'\xc3\xa5', # å
0xe6 : b'\xc3\xa6', # æ
0xe7 : b'\xc3\xa7', # ç
0xe8 : b'\xc3\xa8', # è
0xe9 : b'\xc3\xa9', # é
0xea : b'\xc3\xaa', # ê
0xeb : b'\xc3\xab', # ë
0xec : b'\xc3\xac', # ì
0xed : b'\xc3\xad', # í
0xee : b'\xc3\xae', # î
0xef : b'\xc3\xaf', # ï
0xf0 : b'\xc3\xb0', # ð
0xf1 : b'\xc3\xb1', # ñ
0xf2 : b'\xc3\xb2', # ò
0xf3 : b'\xc3\xb3', # ó
0xf4 : b'\xc3\xb4', # ô
0xf5 : b'\xc3\xb5', # õ
0xf6 : b'\xc3\xb6', # ö
0xf7 : b'\xc3\xb7', # ÷
0xf8 : b'\xc3\xb8', # ø
0xf9 : b'\xc3\xb9', # ù
0xfa : b'\xc3\xba', # ú
0xfb : b'\xc3\xbb', # û
0xfc : b'\xc3\xbc', # ü
0xfd : b'\xc3\xbd', # ý
0xfe : b'\xc3\xbe', # þ
}
MULTIBYTE_MARKERS_AND_SIZES = [
(0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
(0xe0, 0xef, 3), # 3-byte characters start with E0-EF
(0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
]
FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
@classmethod
def detwingle(cls, in_bytes, main_encoding="utf8",
embedded_encoding="windows-1252"):
"""Fix characters from one encoding embedded in some other encoding.
Currently the only situation supported is Windows-1252 (or its
subset ISO-8859-1), embedded in UTF-8.
The input must be a bytestring. If you've already converted
the document to Unicode, you're too late.
The output is a bytestring in which `embedded_encoding`
characters have been converted to their `main_encoding`
equivalents.
"""
if embedded_encoding.replace('_', '-').lower() not in (
'windows-1252', 'windows_1252'):
raise NotImplementedError(
"Windows-1252 and ISO-8859-1 are the only currently supported "
"embedded encodings.")
if main_encoding.lower() not in ('utf8', 'utf-8'):
raise NotImplementedError(
"UTF-8 is the only currently supported main encoding.")
byte_chunks = []
chunk_start = 0
pos = 0
while pos < len(in_bytes):
byte = in_bytes[pos]
if not isinstance(byte, int):
# Python 2.x
byte = ord(byte)
if (byte >= cls.FIRST_MULTIBYTE_MARKER
and byte <= cls.LAST_MULTIBYTE_MARKER):
# This is the start of a UTF-8 multibyte character. Skip
# to the end.
for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
if byte >= start and byte <= end:
pos += size
break
elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
# We found a Windows-1252 character!
# Save the string up to this point as a chunk.
byte_chunks.append(in_bytes[chunk_start:pos])
# Now translate the Windows-1252 character into UTF-8
# and add it as another, one-byte chunk.
byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
pos += 1
chunk_start = pos
else:
# Go on to the next character.
pos += 1
if chunk_start == 0:
# The string is unchanged.
return in_bytes
else:
# Store the final chunk.
byte_chunks.append(in_bytes[chunk_start:])
return b''.join(byte_chunks)

View file

@ -1,178 +0,0 @@
"""Diagnostic functions, mainly for use when doing tech support."""
from StringIO import StringIO
from HTMLParser import HTMLParser
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
import os
import random
import time
import traceback
import sys
import cProfile
def diagnose(data):
"""Diagnostic suite for isolating common problems."""
print "Diagnostic running on Beautiful Soup %s" % __version__
print "Python version %s" % sys.version
basic_parsers = ["html.parser", "html5lib", "lxml"]
for name in basic_parsers:
for builder in builder_registry.builders:
if name in builder.features:
break
else:
basic_parsers.remove(name)
print (
"I noticed that %s is not installed. Installing it may help." %
name)
if 'lxml' in basic_parsers:
basic_parsers.append(["lxml", "xml"])
from lxml import etree
print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
if 'html5lib' in basic_parsers:
import html5lib
print "Found html5lib version %s" % html5lib.__version__
if hasattr(data, 'read'):
data = data.read()
elif os.path.exists(data):
print '"%s" looks like a filename. Reading data from the file.' % data
data = open(data).read()
elif data.startswith("http:") or data.startswith("https:"):
print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
return
print
for parser in basic_parsers:
print "Trying to parse your markup with %s" % parser
success = False
try:
soup = BeautifulSoup(data, parser)
success = True
except Exception, e:
print "%s could not parse the markup." % parser
traceback.print_exc()
if success:
print "Here's what %s did with the markup:" % parser
print soup.prettify()
print "-" * 80
def lxml_trace(data, html=True):
"""Print out the lxml events that occur during parsing.
This lets you see how lxml parses a document when no Beautiful
Soup code is running.
"""
from lxml import etree
for event, element in etree.iterparse(StringIO(data), html=html):
print("%s, %4s, %s" % (event, element.tag, element.text))
class AnnouncingParser(HTMLParser):
"""Announces HTMLParser parse events, without doing anything else."""
def _p(self, s):
print(s)
def handle_starttag(self, name, attrs):
self._p("%s START" % name)
def handle_endtag(self, name):
self._p("%s END" % name)
def handle_data(self, data):
self._p("%s DATA" % data)
def handle_charref(self, name):
self._p("%s CHARREF" % name)
def handle_entityref(self, name):
self._p("%s ENTITYREF" % name)
def handle_comment(self, data):
self._p("%s COMMENT" % data)
def handle_decl(self, data):
self._p("%s DECL" % data)
def unknown_decl(self, data):
self._p("%s UNKNOWN-DECL" % data)
def handle_pi(self, data):
self._p("%s PI" % data)
def htmlparser_trace(data):
"""Print out the HTMLParser events that occur during parsing.
This lets you see how HTMLParser parses a document when no
Beautiful Soup code is running.
"""
parser = AnnouncingParser()
parser.feed(data)
_vowels = "aeiou"
_consonants = "bcdfghjklmnpqrstvwxyz"
def rword(length=5):
"Generate a random word-like string."
s = ''
for i in range(length):
if i % 2 == 0:
t = _consonants
else:
t = _vowels
s += random.choice(t)
return s
def rsentence(length=4):
"Generate a random sentence-like string."
return " ".join(rword(random.randint(4,9)) for i in range(length))
def rdoc(num_elements=1000):
"""Randomly generate an invalid HTML document."""
tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table']
elements = []
for i in range(num_elements):
choice = random.randint(0,3)
if choice == 0:
# New tag.
tag_name = random.choice(tag_names)
elements.append("<%s>" % tag_name)
elif choice == 1:
elements.append(rsentence(random.randint(1,4)))
elif choice == 2:
# Close a tag.
tag_name = random.choice(tag_names)
elements.append("</%s>" % tag_name)
return "<html>" + "\n".join(elements) + "</html>"
def benchmark_parsers(num_elements=100000):
"""Very basic head-to-head performance benchmark."""
print "Comparative parser benchmark on Beautiful Soup %s" % __version__
data = rdoc(num_elements)
print "Generated a large invalid HTML document (%d bytes)." % len(data)
for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
success = False
try:
a = time.time()
soup = BeautifulSoup(data, parser)
b = time.time()
success = True
except Exception, e:
print "%s could not parse the markup." % parser
traceback.print_exc()
if success:
print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
from lxml import etree
a = time.time()
etree.HTML(data)
b = time.time()
print "Raw lxml parsed the markup in %.2fs." % (b-a)
if __name__ == '__main__':
diagnose(sys.stdin.read())

File diff suppressed because it is too large Load diff

View file

@ -1,579 +0,0 @@
"""Helper classes for tests."""
import copy
import functools
import unittest
from unittest import TestCase
from bs4 import BeautifulSoup
from bs4.element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
Doctype,
SoupStrainer,
)
from bs4.builder import HTMLParserTreeBuilder
default_builder = HTMLParserTreeBuilder
class SoupTest(unittest.TestCase):
@property
def default_builder(self):
return default_builder()
def soup(self, markup, **kwargs):
"""Build a Beautiful Soup object from markup."""
builder = kwargs.pop('builder', self.default_builder)
return BeautifulSoup(markup, builder=builder, **kwargs)
def document_for(self, markup):
"""Turn an HTML fragment into a document.
The details depend on the builder.
"""
return self.default_builder.test_fragment_to_document(markup)
def assertSoupEquals(self, to_parse, compare_parsed_to=None):
builder = self.default_builder
obj = BeautifulSoup(to_parse, builder=builder)
if compare_parsed_to is None:
compare_parsed_to = to_parse
self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
class HTMLTreeBuilderSmokeTest(object):
"""A basic test of a treebuilder's competence.
Any HTML treebuilder, present or future, should be able to pass
these tests. With invalid markup, there's room for interpretation,
and different parsers can handle it differently. But with the
markup in these tests, there's not much room for interpretation.
"""
def assertDoctypeHandled(self, doctype_fragment):
"""Assert that a given doctype string is handled correctly."""
doctype_str, soup = self._document_with_doctype(doctype_fragment)
# Make sure a Doctype object was created.
doctype = soup.contents[0]
self.assertEqual(doctype.__class__, Doctype)
self.assertEqual(doctype, doctype_fragment)
self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
# Make sure that the doctype was correctly associated with the
# parse tree and that the rest of the document parsed.
self.assertEqual(soup.p.contents[0], 'foo')
def _document_with_doctype(self, doctype_fragment):
"""Generate and parse a document with the given doctype."""
doctype = '<!DOCTYPE %s>' % doctype_fragment
markup = doctype + '\n<p>foo</p>'
soup = self.soup(markup)
return doctype, soup
def test_normal_doctypes(self):
"""Make sure normal, everyday HTML doctypes are handled correctly."""
self.assertDoctypeHandled("html")
self.assertDoctypeHandled(
'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
def test_empty_doctype(self):
soup = self.soup("<!DOCTYPE>")
doctype = soup.contents[0]
self.assertEqual("", doctype.strip())
def test_public_doctype_with_url(self):
doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
self.assertDoctypeHandled(doctype)
def test_system_doctype(self):
self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
def test_namespaced_system_doctype(self):
# We can handle a namespaced doctype with a system ID.
self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
def test_namespaced_public_doctype(self):
# Test a namespaced doctype with a public id.
self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
def test_real_xhtml_document(self):
"""A real XHTML document should come out more or less the same as it went in."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>"""
soup = self.soup(markup)
self.assertEqual(
soup.encode("utf-8").replace(b"\n", b""),
markup.replace(b"\n", b""))
def test_deepcopy(self):
"""Make sure you can copy the tree builder.
This is important because the builder is part of a
BeautifulSoup object, and we want to be able to copy that.
"""
copy.deepcopy(self.default_builder)
def test_p_tag_is_never_empty_element(self):
"""A <p> tag is never designated as an empty-element tag.
Even if the markup shows it as an empty-element tag, it
shouldn't be presented that way.
"""
soup = self.soup("<p/>")
self.assertFalse(soup.p.is_empty_element)
self.assertEqual(str(soup.p), "<p></p>")
def test_unclosed_tags_get_closed(self):
"""A tag that's not closed by the end of the document should be closed.
This applies to all tags except empty-element tags.
"""
self.assertSoupEquals("<p>", "<p></p>")
self.assertSoupEquals("<b>", "<b></b>")
self.assertSoupEquals("<br>", "<br/>")
def test_br_is_always_empty_element_tag(self):
"""A <br> tag is designated as an empty-element tag.
Some parsers treat <br></br> as one <br/> tag, some parsers as
two tags, but it should always be an empty-element tag.
"""
soup = self.soup("<br></br>")
self.assertTrue(soup.br.is_empty_element)
self.assertEqual(str(soup.br), "<br/>")
def test_nested_formatting_elements(self):
self.assertSoupEquals("<em><em></em></em>")
def test_comment(self):
# Comments are represented as Comment objects.
markup = "<p>foo<!--foobar-->baz</p>"
self.assertSoupEquals(markup)
soup = self.soup(markup)
comment = soup.find(text="foobar")
self.assertEqual(comment.__class__, Comment)
# The comment is properly integrated into the tree.
foo = soup.find(text="foo")
self.assertEqual(comment, foo.next_element)
baz = soup.find(text="baz")
self.assertEqual(comment, baz.previous_element)
def test_preserved_whitespace_in_pre_and_textarea(self):
"""Whitespace must be preserved in <pre> and <textarea> tags."""
self.assertSoupEquals("<pre> </pre>")
self.assertSoupEquals("<textarea> woo </textarea>")
def test_nested_inline_elements(self):
"""Inline elements can be nested indefinitely."""
b_tag = "<b>Inside a B tag</b>"
self.assertSoupEquals(b_tag)
nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
self.assertSoupEquals(nested_b_tag)
double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
self.assertSoupEquals(nested_b_tag)
def test_nested_block_level_elements(self):
"""Block elements can be nested."""
soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
blockquote = soup.blockquote
self.assertEqual(blockquote.p.b.string, 'Foo')
self.assertEqual(blockquote.b.string, 'Foo')
def test_correctly_nested_tables(self):
"""One table can go inside another one."""
markup = ('<table id="1">'
'<tr>'
"<td>Here's another table:"
'<table id="2">'
'<tr><td>foo</td></tr>'
'</table></td>')
self.assertSoupEquals(
markup,
'<table id="1"><tr><td>Here\'s another table:'
'<table id="2"><tr><td>foo</td></tr></table>'
'</td></tr></table>')
self.assertSoupEquals(
"<table><thead><tr><td>Foo</td></tr></thead>"
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_deeply_nested_multivalued_attribute(self):
# html5lib can set the attributes of the same tag many times
# as it rearranges the tree. This has caused problems with
# multivalued attributes.
markup = '<table><div><div class="css"></div></div></table>'
soup = self.soup(markup)
self.assertEqual(["css"], soup.div.div['class'])
def test_angle_brackets_in_attribute_values_are_escaped(self):
self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
def test_entities_in_attributes_converted_to_unicode(self):
expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
def test_entities_in_text_converted_to_unicode(self):
expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
def test_quot_entity_converted_to_quotation_mark(self):
self.assertSoupEquals("<p>I said &quot;good day!&quot;</p>",
'<p>I said "good day!"</p>')
def test_out_of_range_entity(self):
expect = u"\N{REPLACEMENT CHARACTER}"
self.assertSoupEquals("&#10000000000000;", expect)
self.assertSoupEquals("&#x10000000000000;", expect)
self.assertSoupEquals("&#1000000000;", expect)
def test_multipart_strings(self):
"Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
self.assertEqual("p", soup.h2.string.next_element.name)
self.assertEqual("p", soup.p.name)
def test_basic_namespaces(self):
"""Parsers don't need to *understand* namespaces, but at the
very least they should not choke on namespaces or lose
data."""
markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
soup = self.soup(markup)
self.assertEqual(markup, soup.encode())
html = soup.html
self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
self.assertEqual(
'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
self.assertEqual(
'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
def test_multivalued_attribute_value_becomes_list(self):
markup = b'<a class="foo bar">'
soup = self.soup(markup)
self.assertEqual(['foo', 'bar'], soup.a['class'])
#
# Generally speaking, tests below this point are more tests of
# Beautiful Soup than tests of the tree builders. But parsers are
# weird, so we run these tests separately for every tree builder
# to detect any differences between them.
#
def test_soupstrainer(self):
"""Parsers should be able to work with SoupStrainers."""
strainer = SoupStrainer("b")
soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
parse_only=strainer)
self.assertEqual(soup.decode(), "<b>bold</b>")
def test_single_quote_attribute_values_become_double_quotes(self):
self.assertSoupEquals("<foo attr='bar'></foo>",
'<foo attr="bar"></foo>')
def test_attribute_values_with_nested_quotes_are_left_alone(self):
text = """<foo attr='bar "brawls" happen'>a</foo>"""
self.assertSoupEquals(text)
def test_attribute_values_with_double_nested_quotes_get_quoted(self):
text = """<foo attr='bar "brawls" happen'>a</foo>"""
soup = self.soup(text)
soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
self.assertSoupEquals(
soup.foo.decode(),
"""<foo attr="Brawls happen at &quot;Bob\'s Bar&quot;">a</foo>""")
def test_ampersand_in_attribute_value_gets_escaped(self):
self.assertSoupEquals('<this is="really messed up & stuff"></this>',
'<this is="really messed up &amp; stuff"></this>')
self.assertSoupEquals(
'<a href="http://example.org?a=1&b=2;3">foo</a>',
'<a href="http://example.org?a=1&amp;b=2;3">foo</a>')
def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
self.assertSoupEquals('<a href="http://example.org?a=1&amp;b=2;3"></a>')
def test_entities_in_strings_converted_during_parsing(self):
# Both XML and HTML entities are converted to Unicode characters
# during parsing.
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
self.assertSoupEquals(text, expected)
def test_smart_quotes_converted_on_the_way_in(self):
# Microsoft smart quotes are converted to Unicode characters during
# parsing.
quote = b"<p>\x91Foo\x92</p>"
soup = self.soup(quote)
self.assertEqual(
soup.p.string,
u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
def test_non_breaking_spaces_converted_on_the_way_in(self):
soup = self.soup("<a>&nbsp;&nbsp;</a>")
self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
def test_entities_converted_on_the_way_out(self):
text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
soup = self.soup(text)
self.assertEqual(soup.p.encode("utf-8"), expected)
def test_real_iso_latin_document(self):
# Smoke test of interrelated functionality, using an
# easy-to-understand document.
# Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
# That's because we're going to encode it into ISO-Latin-1, and use
# that to test.
iso_latin_html = unicode_html.encode("iso-8859-1")
# Parse the ISO-Latin-1 HTML.
soup = self.soup(iso_latin_html)
# Encode it to UTF-8.
result = soup.encode("utf-8")
# What do we expect the result to look like? Well, it would
# look like unicode_html, except that the META tag would say
# UTF-8 instead of ISO-Latin-1.
expected = unicode_html.replace("ISO-Latin-1", "utf-8")
# And, of course, it would be in UTF-8, not Unicode.
expected = expected.encode("utf-8")
# Ta-da!
self.assertEqual(result, expected)
def test_real_shift_jis_document(self):
# Smoke test to make sure the parser can handle a document in
# Shift-JIS encoding, without choking.
shift_jis_html = (
b'<html><head></head><body><pre>'
b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
b'</pre></body></html>')
unicode_html = shift_jis_html.decode("shift-jis")
soup = self.soup(unicode_html)
# Make sure the parse tree is correctly encoded to various
# encodings.
self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
def test_real_hebrew_document(self):
# A real-world test to make sure we can convert ISO-8859-9 (a
# Hebrew encoding) to UTF-8.
hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
soup = self.soup(
hebrew_document, from_encoding="iso8859-8")
self.assertEqual(soup.original_encoding, 'iso8859-8')
self.assertEqual(
soup.encode('utf-8'),
hebrew_document.decode("iso8859-8").encode("utf-8"))
def test_meta_tag_reflects_current_encoding(self):
# Here's the <meta> tag saying that a document is
# encoded in Shift-JIS.
meta_tag = ('<meta content="text/html; charset=x-sjis" '
'http-equiv="Content-type"/>')
# Here's a document incorporating that meta tag.
shift_jis_html = (
'<html><head>\n%s\n'
'<meta http-equiv="Content-language" content="ja"/>'
'</head><body>Shift-JIS markup goes here.') % meta_tag
soup = self.soup(shift_jis_html)
# Parse the document, and the charset is seemingly unaffected.
parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
content = parsed_meta['content']
self.assertEqual('text/html; charset=x-sjis', content)
# But that value is actually a ContentMetaAttributeValue object.
self.assertTrue(isinstance(content, ContentMetaAttributeValue))
# And it will take on a value that reflects its current
# encoding.
self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
# For the rest of the story, see TestSubstitutions in
# test_tree.py.
def test_html5_style_meta_tag_reflects_current_encoding(self):
# Here's the <meta> tag saying that a document is
# encoded in Shift-JIS.
meta_tag = ('<meta id="encoding" charset="x-sjis" />')
# Here's a document incorporating that meta tag.
shift_jis_html = (
'<html><head>\n%s\n'
'<meta http-equiv="Content-language" content="ja"/>'
'</head><body>Shift-JIS markup goes here.') % meta_tag
soup = self.soup(shift_jis_html)
# Parse the document, and the charset is seemingly unaffected.
parsed_meta = soup.find('meta', id="encoding")
charset = parsed_meta['charset']
self.assertEqual('x-sjis', charset)
# But that value is actually a CharsetMetaAttributeValue object.
self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
# And it will take on a value that reflects its current
# encoding.
self.assertEqual('utf8', charset.encode("utf8"))
def test_tag_with_no_attributes_can_have_attributes_added(self):
data = self.soup("<a>text</a>")
data.a['foo'] = 'bar'
self.assertEqual('<a foo="bar">text</a>', data.a.decode())
class XMLTreeBuilderSmokeTest(object):
def test_docstring_generated(self):
soup = self.soup("<root/>")
self.assertEqual(
soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
def test_real_xhtml_document(self):
"""A real XHTML document should come out *exactly* the same as it went in."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>"""
soup = self.soup(markup)
self.assertEqual(
soup.encode("utf-8"), markup)
def test_formatter_processes_script_tag_for_xml_documents(self):
doc = """
<script type="text/javascript">
</script>
"""
soup = BeautifulSoup(doc, "xml")
# lxml would have stripped this while parsing, but we can add
# it later.
soup.script.string = 'console.log("< < hey > > ");'
encoded = soup.encode()
self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
def test_popping_namespaced_tag(self):
markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
soup = self.soup(markup)
self.assertEqual(
unicode(soup.rss), markup)
def test_docstring_includes_correct_encoding(self):
soup = self.soup("<root/>")
self.assertEqual(
soup.encode("latin1"),
b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
def test_large_xml_document(self):
"""A large XML document should come out the same as it went in."""
markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
+ b'0' * (2**12)
+ b'</root>')
soup = self.soup(markup)
self.assertEqual(soup.encode("utf-8"), markup)
def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
self.assertSoupEquals("<p>", "<p/>")
self.assertSoupEquals("<p>foo</p>")
def test_namespaces_are_preserved(self):
markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
soup = self.soup(markup)
root = soup.root
self.assertEqual("http://example.com/", root['xmlns:a'])
self.assertEqual("http://example.net/", root['xmlns:b'])
def test_closing_namespaced_tag(self):
markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.p), markup)
def test_namespaced_attributes(self):
markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
def test_namespaced_attributes_xml_namespace(self):
markup = '<foo xml:lang="fr">bar</foo>'
soup = self.soup(markup)
self.assertEqual(unicode(soup.foo), markup)
class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
"""Smoke test for a tree builder that supports HTML5."""
def test_real_xhtml_document(self):
# Since XHTML is not HTML5, HTML5 parsers are not tested to handle
# XHTML documents in any particular way.
pass
def test_html_tags_have_namespace(self):
markup = "<a>"
soup = self.soup(markup)
self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
def test_svg_tags_have_namespace(self):
markup = '<svg><circle/></svg>'
soup = self.soup(markup)
namespace = "http://www.w3.org/2000/svg"
self.assertEqual(namespace, soup.svg.namespace)
self.assertEqual(namespace, soup.circle.namespace)
def test_mathml_tags_have_namespace(self):
markup = '<math><msqrt>5</msqrt></math>'
soup = self.soup(markup)
namespace = 'http://www.w3.org/1998/Math/MathML'
self.assertEqual(namespace, soup.math.namespace)
self.assertEqual(namespace, soup.msqrt.namespace)
def test_xml_declaration_becomes_comment(self):
markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
soup = self.soup(markup)
self.assertTrue(isinstance(soup.contents[0], Comment))
self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
self.assertEqual("html", soup.contents[0].next_element.name)
def skipIf(condition, reason):
def nothing(test, *args, **kwargs):
return None
def decorator(test_item):
if condition:
return nothing
else:
return test_item
return decorator

View file

@ -1 +0,0 @@
"The beautifulsoup tests."

View file

@ -1,141 +0,0 @@
"""Tests of the builder registry."""
import unittest
from bs4 import BeautifulSoup
from bs4.builder import (
builder_registry as registry,
HTMLParserTreeBuilder,
TreeBuilderRegistry,
)
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError:
HTML5LIB_PRESENT = False
try:
from bs4.builder import (
LXMLTreeBuilderForXML,
LXMLTreeBuilder,
)
LXML_PRESENT = True
except ImportError:
LXML_PRESENT = False
class BuiltInRegistryTest(unittest.TestCase):
"""Test the built-in registry with the default builders registered."""
def test_combination(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('fast', 'html'),
LXMLTreeBuilder)
if LXML_PRESENT:
self.assertEqual(registry.lookup('permissive', 'xml'),
LXMLTreeBuilderForXML)
self.assertEqual(registry.lookup('strict', 'html'),
HTMLParserTreeBuilder)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html5lib', 'html'),
HTML5TreeBuilder)
def test_lookup_by_markup_type(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
else:
self.assertEqual(registry.lookup('xml'), None)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
else:
self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
def test_named_library(self):
if LXML_PRESENT:
self.assertEqual(registry.lookup('lxml', 'xml'),
LXMLTreeBuilderForXML)
self.assertEqual(registry.lookup('lxml', 'html'),
LXMLTreeBuilder)
if HTML5LIB_PRESENT:
self.assertEqual(registry.lookup('html5lib'),
HTML5TreeBuilder)
self.assertEqual(registry.lookup('html.parser'),
HTMLParserTreeBuilder)
def test_beautifulsoup_constructor_does_lookup(self):
# You can pass in a string.
BeautifulSoup("", features="html")
# Or a list of strings.
BeautifulSoup("", features=["html", "fast"])
# You'll get an exception if BS can't find an appropriate
# builder.
self.assertRaises(ValueError, BeautifulSoup,
"", features="no-such-feature")
class RegistryTest(unittest.TestCase):
"""Test the TreeBuilderRegistry class in general."""
def setUp(self):
self.registry = TreeBuilderRegistry()
def builder_for_features(self, *feature_list):
cls = type('Builder_' + '_'.join(feature_list),
(object,), {'features' : feature_list})
self.registry.register(cls)
return cls
def test_register_with_no_features(self):
builder = self.builder_for_features()
# Since the builder advertises no features, you can't find it
# by looking up features.
self.assertEqual(self.registry.lookup('foo'), None)
# But you can find it by doing a lookup with no features, if
# this happens to be the only registered builder.
self.assertEqual(self.registry.lookup(), builder)
def test_register_with_features_makes_lookup_succeed(self):
builder = self.builder_for_features('foo', 'bar')
self.assertEqual(self.registry.lookup('foo'), builder)
self.assertEqual(self.registry.lookup('bar'), builder)
def test_lookup_fails_when_no_builder_implements_feature(self):
builder = self.builder_for_features('foo', 'bar')
self.assertEqual(self.registry.lookup('baz'), None)
def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
builder1 = self.builder_for_features('foo')
builder2 = self.builder_for_features('bar')
self.assertEqual(self.registry.lookup(), builder2)
def test_lookup_fails_when_no_tree_builders_registered(self):
self.assertEqual(self.registry.lookup(), None)
def test_lookup_gets_most_recent_builder_supporting_all_features(self):
has_one = self.builder_for_features('foo')
has_the_other = self.builder_for_features('bar')
has_both_early = self.builder_for_features('foo', 'bar', 'baz')
has_both_late = self.builder_for_features('foo', 'bar', 'quux')
lacks_one = self.builder_for_features('bar')
has_the_other = self.builder_for_features('foo')
# There are two builders featuring 'foo' and 'bar', but
# the one that also features 'quux' was registered later.
self.assertEqual(self.registry.lookup('foo', 'bar'),
has_both_late)
# There is only one builder featuring 'foo', 'bar', and 'baz'.
self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
has_both_early)
def test_lookup_fails_when_cannot_reconcile_requested_features(self):
builder1 = self.builder_for_features('foo', 'bar')
builder2 = self.builder_for_features('foo', 'baz')
self.assertEqual(self.registry.lookup('bar', 'baz'), None)

View file

@ -1,36 +0,0 @@
"Test harness for doctests."
# pylint: disable-msg=E0611,W0142
__metaclass__ = type
__all__ = [
'additional_tests',
]
import atexit
import doctest
import os
#from pkg_resources import (
# resource_filename, resource_exists, resource_listdir, cleanup_resources)
import unittest
DOCTEST_FLAGS = (
doctest.ELLIPSIS |
doctest.NORMALIZE_WHITESPACE |
doctest.REPORT_NDIFF)
# def additional_tests():
# "Run the doc tests (README.txt and docs/*, if any exist)"
# doctest_files = [
# os.path.abspath(resource_filename('bs4', 'README.txt'))]
# if resource_exists('bs4', 'docs'):
# for name in resource_listdir('bs4', 'docs'):
# if name.endswith('.txt'):
# doctest_files.append(
# os.path.abspath(
# resource_filename('bs4', 'docs/%s' % name)))
# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
# atexit.register(cleanup_resources)
# return unittest.TestSuite((
# doctest.DocFileSuite(*doctest_files, **kwargs)))

View file

@ -1,72 +0,0 @@
"""Tests to ensure that the html5lib tree builder generates good trees."""
import warnings
try:
from bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError, e:
HTML5LIB_PRESENT = False
from bs4.element import SoupStrainer
from bs4.testing import (
HTML5TreeBuilderSmokeTest,
SoupTest,
skipIf,
)
@skipIf(
not HTML5LIB_PRESENT,
"html5lib seems not to be present, not testing its tree builder.")
class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
"""See ``HTML5TreeBuilderSmokeTest``."""
@property
def default_builder(self):
return HTML5TreeBuilder()
def test_soupstrainer(self):
# The html5lib tree builder does not support SoupStrainers.
strainer = SoupStrainer("b")
markup = "<p>A <b>bold</b> statement.</p>"
with warnings.catch_warnings(record=True) as w:
soup = self.soup(markup, parse_only=strainer)
self.assertEqual(
soup.decode(), self.document_for(markup))
self.assertTrue(
"the html5lib tree builder doesn't support parse_only" in
str(w[0].message))
def test_correctly_nested_tables(self):
"""html5lib inserts <tbody> tags where other parsers don't."""
markup = ('<table id="1">'
'<tr>'
"<td>Here's another table:"
'<table id="2">'
'<tr><td>foo</td></tr>'
'</table></td>')
self.assertSoupEquals(
markup,
'<table id="1"><tbody><tr><td>Here\'s another table:'
'<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
'</td></tr></tbody></table>')
self.assertSoupEquals(
"<table><thead><tr><td>Foo</td></tr></thead>"
"<tbody><tr><td>Bar</td></tr></tbody>"
"<tfoot><tr><td>Baz</td></tr></tfoot></table>")
def test_xml_declaration_followed_by_doctype(self):
markup = '''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<p>foo</p>
</body>
</html>'''
soup = self.soup(markup)
# Verify that we can reach the <p> tag; this means the tree is connected.
self.assertEqual(b"<p>foo</p>", soup.p.encode())

View file

@ -1,19 +0,0 @@
"""Tests to ensure that the html.parser tree builder generates good
trees."""
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from bs4.builder import HTMLParserTreeBuilder
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
@property
def default_builder(self):
return HTMLParserTreeBuilder()
def test_namespaced_system_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass
def test_namespaced_public_doctype(self):
# html.parser can't handle namespaced doctypes, so skip this one.
pass

View file

@ -1,88 +0,0 @@
"""Tests to ensure that the lxml tree builder generates good trees."""
import re
import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
import lxml.etree
LXML_VERSION = lxml.etree.LXML_VERSION
except ImportError, e:
LXML_PRESENT = False
LXML_VERSION = (0,)
from bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
)
from bs4.element import Comment, Doctype, SoupStrainer
from bs4.testing import skipIf
from bs4.tests import test_htmlparser
from bs4.testing import (
HTMLTreeBuilderSmokeTest,
XMLTreeBuilderSmokeTest,
SoupTest,
skipIf,
)
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its tree builder.")
class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
"""See ``HTMLTreeBuilderSmokeTest``."""
@property
def default_builder(self):
return LXMLTreeBuilder()
def test_out_of_range_entity(self):
self.assertSoupEquals(
"<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
self.assertSoupEquals(
"<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
self.assertSoupEquals(
"<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
# In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
# test if an old version of lxml is installed.
@skipIf(
not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
"Skipping doctype test for old version of lxml to avoid segfault.")
def test_empty_doctype(self):
soup = self.soup("<!DOCTYPE>")
doctype = soup.contents[0]
self.assertEqual("", doctype.strip())
def test_beautifulstonesoup_is_xml_parser(self):
# Make sure that the deprecated BSS class uses an xml builder
# if one is installed.
with warnings.catch_warnings(record=False) as w:
soup = BeautifulStoneSoup("<b />")
self.assertEqual(u"<b/>", unicode(soup.b))
def test_real_xhtml_document(self):
"""lxml strips the XML definition from an XHTML doc, which is fine."""
markup = b"""<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><title>Hello.</title></head>
<body>Goodbye.</body>
</html>"""
soup = self.soup(markup)
self.assertEqual(
soup.encode("utf-8").replace(b"\n", b''),
markup.replace(b'\n', b'').replace(
b'<?xml version="1.0" encoding="utf-8"?>', b''))
@skipIf(
not LXML_PRESENT,
"lxml seems not to be present, not testing its XML tree builder.")
class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
"""See ``HTMLTreeBuilderSmokeTest``."""
@property
def default_builder(self):
return LXMLTreeBuilderForXML()

View file

@ -1,383 +0,0 @@
# -*- coding: utf-8 -*-
"""Tests of Beautiful Soup as a whole."""
import logging
import unittest
import sys
from bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
)
from bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
SoupStrainer,
NamespacedAttribute,
)
import bs4.dammit
from bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.testing import (
SoupTest,
skipIf,
)
import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
except ImportError, e:
LXML_PRESENT = False
PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
class TestDeprecatedConstructorArguments(SoupTest):
def test_parseOnlyThese_renamed_to_parse_only(self):
with warnings.catch_warnings(record=True) as w:
soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
msg = str(w[0].message)
self.assertTrue("parseOnlyThese" in msg)
self.assertTrue("parse_only" in msg)
self.assertEqual(b"<b></b>", soup.encode())
def test_fromEncoding_renamed_to_from_encoding(self):
with warnings.catch_warnings(record=True) as w:
utf8 = b"\xc3\xa9"
soup = self.soup(utf8, fromEncoding="utf8")
msg = str(w[0].message)
self.assertTrue("fromEncoding" in msg)
self.assertTrue("from_encoding" in msg)
self.assertEqual("utf8", soup.original_encoding)
def test_unrecognized_keyword_argument(self):
self.assertRaises(
TypeError, self.soup, "<a>", no_such_argument=True)
@skipIf(
not LXML_PRESENT,
"lxml not present, not testing BeautifulStoneSoup.")
def test_beautifulstonesoup(self):
with warnings.catch_warnings(record=True) as w:
soup = BeautifulStoneSoup("<markup>")
self.assertTrue(isinstance(soup, BeautifulSoup))
self.assertTrue("BeautifulStoneSoup class is deprecated")
class TestSelectiveParsing(SoupTest):
def test_parse_with_soupstrainer(self):
markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
strainer = SoupStrainer("b")
soup = self.soup(markup, parse_only=strainer)
self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
class TestEntitySubstitution(unittest.TestCase):
"""Standalone tests of the EntitySubstitution class."""
def setUp(self):
self.sub = EntitySubstitution
def test_simple_html_substitution(self):
# Unicode characters corresponding to named HTML entites
# are substituted, and no others.
s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
self.assertEqual(self.sub.substitute_html(s),
u"foo&forall;\N{SNOWMAN}&otilde;bar")
def test_smart_quote_substitution(self):
# MS smart quotes are a common source of frustration, so we
# give them a special test.
quotes = b"\x91\x92foo\x93\x94"
dammit = UnicodeDammit(quotes)
self.assertEqual(self.sub.substitute_html(dammit.markup),
"&lsquo;&rsquo;foo&ldquo;&rdquo;")
def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
s = 'Welcome to "my bar"'
self.assertEqual(self.sub.substitute_xml(s, False), s)
def test_xml_attribute_quoting_normally_uses_double_quotes(self):
self.assertEqual(self.sub.substitute_xml("Welcome", True),
'"Welcome"')
self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
'"Bob\'s Bar"')
def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
s = 'Welcome to "my bar"'
self.assertEqual(self.sub.substitute_xml(s, True),
"'Welcome to \"my bar\"'")
def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
s = 'Welcome to "Bob\'s Bar"'
self.assertEqual(
self.sub.substitute_xml(s, True),
'"Welcome to &quot;Bob\'s Bar&quot;"')
def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
quoted = 'Welcome to "Bob\'s Bar"'
self.assertEqual(self.sub.substitute_xml(quoted), quoted)
def test_xml_quoting_handles_angle_brackets(self):
self.assertEqual(
self.sub.substitute_xml("foo<bar>"),
"foo&lt;bar&gt;")
def test_xml_quoting_handles_ampersands(self):
self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
self.assertEqual(
self.sub.substitute_xml("&Aacute;T&T"),
"&amp;Aacute;T&amp;T")
def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
self.assertEqual(
self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
"&Aacute;T&amp;T")
def test_quotes_not_html_substituted(self):
"""There's no need to do this except inside attribute values."""
text = 'Bob\'s "bar"'
self.assertEqual(self.sub.substitute_html(text), text)
class TestEncodingConversion(SoupTest):
# Test Beautiful Soup's ability to decode and encode from various
# encodings.
def setUp(self):
super(TestEncodingConversion, self).setUp()
self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
self.utf8_data = self.unicode_data.encode("utf-8")
# Just so you know what it looks like.
self.assertEqual(
self.utf8_data,
b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
def test_ascii_in_unicode_out(self):
# ASCII input is converted to Unicode. The original_encoding
# attribute is set.
ascii = b"<foo>a</foo>"
soup_from_ascii = self.soup(ascii)
unicode_output = soup_from_ascii.decode()
self.assertTrue(isinstance(unicode_output, unicode))
self.assertEqual(unicode_output, self.document_for(ascii.decode()))
self.assertEqual(soup_from_ascii.original_encoding.lower(), "ascii")
def test_unicode_in_unicode_out(self):
# Unicode input is left alone. The original_encoding attribute
# is not set.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
self.assertEqual(soup_from_unicode.original_encoding, None)
def test_utf8_in_unicode_out(self):
# UTF-8 input is converted to Unicode. The original_encoding
# attribute is set.
soup_from_utf8 = self.soup(self.utf8_data)
self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
def test_utf8_out(self):
# The internal data structures can be encoded as UTF-8.
soup_from_unicode = self.soup(self.unicode_data)
self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
@skipIf(
PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
"Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
def test_attribute_name_containing_unicode_characters(self):
markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
class TestUnicodeDammit(unittest.TestCase):
"""Standalone tests of Unicode, Dammit."""
def test_smart_quotes_to_unicode(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup)
self.assertEqual(
dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
def test_smart_quotes_to_xml_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="xml")
self.assertEqual(
dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
def test_smart_quotes_to_html_entities(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="html")
self.assertEqual(
dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
def test_smart_quotes_to_ascii(self):
markup = b"<foo>\x91\x92\x93\x94</foo>"
dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
self.assertEqual(
dammit.unicode_markup, """<foo>''""</foo>""")
def test_detect_utf8(self):
utf8 = b"\xc3\xa9"
dammit = UnicodeDammit(utf8)
self.assertEqual(dammit.unicode_markup, u'\xe9')
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_convert_hebrew(self):
hebrew = b"\xed\xe5\xec\xf9"
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
def test_dont_see_smart_quotes_where_there_are_none(self):
utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
dammit = UnicodeDammit(utf_8)
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
def test_ignore_inappropriate_codecs(self):
utf8_data = u"Räksmörgås".encode("utf-8")
dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_ignore_invalid_codecs(self):
utf8_data = u"Räksmörgås".encode("utf-8")
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
dammit = UnicodeDammit(utf8_data, [bad_encoding])
self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
def test_detect_html5_style_meta_tag(self):
for data in (
b'<html><meta charset="euc-jp" /></html>',
b"<html><meta charset='euc-jp' /></html>",
b"<html><meta charset=euc-jp /></html>",
b"<html><meta charset=euc-jp/></html>"):
dammit = UnicodeDammit(data, is_html=True)
self.assertEqual(
"euc-jp", dammit.original_encoding)
def test_last_ditch_entity_replacement(self):
# This is a UTF-8 document that contains bytestrings
# completely incompatible with UTF-8 (ie. encoded with some other
# encoding).
#
# Since there is no consistent encoding for the document,
# Unicode, Dammit will eventually encode the document as UTF-8
# and encode the incompatible characters as REPLACEMENT
# CHARACTER.
#
# If chardet is installed, it will detect that the document
# can be converted into ISO-8859-1 without errors. This happens
# to be the wrong encoding, but it is a consistent encoding, so the
# code we're testing here won't run.
#
# So we temporarily disable chardet if it's present.
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
chardet = bs4.dammit.chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
bs4.dammit.chardet_dammit = noop
dammit = UnicodeDammit(doc)
self.assertEqual(True, dammit.contains_replacement_characters)
self.assertTrue(u"\ufffd" in dammit.unicode_markup)
soup = BeautifulSoup(doc, "html.parser")
self.assertTrue(soup.contains_replacement_characters)
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
def test_sniffed_xml_encoding(self):
# A document written in UTF-16LE will be converted by a different
# code path that sniffs the byte order markers.
data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
dammit = UnicodeDammit(data)
self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
self.assertEqual("utf-16le", dammit.original_encoding)
def test_detwingle(self):
# Here's a UTF8 document.
utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
# Here's a Windows-1252 document.
windows_1252 = (
u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
# Through some unholy alchemy, they've been stuck together.
doc = utf8 + windows_1252 + utf8
# The document can't be turned into UTF-8:
self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
# Unicode, Dammit thinks the whole document is Windows-1252,
# and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃"
# But if we run it through fix_embedded_windows_1252, it's fixed:
fixed = UnicodeDammit.detwingle(doc)
self.assertEqual(
u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
def test_detwingle_ignores_multibyte_characters(self):
# Each of these characters has a UTF-8 representation ending
# in \x93. \x93 is a smart quote if interpreted as
# Windows-1252. But our code knows to skip over multibyte
# UTF-8 characters, so they'll survive the process unscathed.
for tricky_unicode_char in (
u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
):
input = tricky_unicode_char.encode("utf8")
self.assertTrue(input.endswith(b'\x93'))
output = UnicodeDammit.detwingle(input)
self.assertEqual(output, input)
class TestNamedspacedAttribute(SoupTest):
def test_name_may_be_none(self):
a = NamespacedAttribute("xmlns", None)
self.assertEqual(a, "xmlns")
def test_attribute_is_equivalent_to_colon_separated_string(self):
a = NamespacedAttribute("a", "b")
self.assertEqual("a:b", a)
def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
a = NamespacedAttribute("a", "b", "c")
b = NamespacedAttribute("a", "b", "c")
self.assertEqual(a, b)
# The actual namespace is not considered.
c = NamespacedAttribute("a", "b", None)
self.assertEqual(a, c)
# But name and prefix are important.
d = NamespacedAttribute("a", "z", "c")
self.assertNotEqual(a, d)
e = NamespacedAttribute("z", "b", "c")
self.assertNotEqual(a, e)
class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
def test_content_meta_attribute_value(self):
value = CharsetMetaAttributeValue("euc-jp")
self.assertEqual("euc-jp", value)
self.assertEqual("euc-jp", value.original_value)
self.assertEqual("utf8", value.encode("utf8"))
def test_content_meta_attribute_value(self):
value = ContentMetaAttributeValue("text/html; charset=euc-jp")
self.assertEqual("text/html; charset=euc-jp", value)
self.assertEqual("text/html; charset=euc-jp", value.original_value)
self.assertEqual("text/html; charset=utf8", value.encode("utf8"))

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,714 +0,0 @@
# Certifcate Authority certificates for validating SSL connections.
#
# This file contains PEM format certificates generated from
# http://mxr.mozilla.org/seamonkey/source/security/nss/lib/ckfw/builtins/certdata.txt
#
# ***** BEGIN LICENSE BLOCK *****
# Version: MPL 1.1/GPL 2.0/LGPL 2.1
#
# The contents of this file are subject to the Mozilla Public License Version
# 1.1 (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
# http://www.mozilla.org/MPL/
#
# Software distributed under the License is distributed on an "AS IS" basis,
# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
# for the specific language governing rights and limitations under the
# License.
#
# The Original Code is the Netscape security libraries.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1994-2000
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
#
# Alternatively, the contents of this file may be used under the terms of
# either the GNU General Public License Version 2 or later (the "GPL"), or
# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
# in which case the provisions of the GPL or the LGPL are applicable instead
# of those above. If you wish to allow use of your version of this file only
# under the terms of either the GPL or the LGPL, and not to allow others to
# use your version of this file under the terms of the MPL, indicate your
# decision by deleting the provisions above and replace them with the notice
# and other provisions required by the GPL or the LGPL. If you do not delete
# the provisions above, a recipient may use your version of this file under
# the terms of any one of the MPL, the GPL or the LGPL.
#
# ***** END LICENSE BLOCK *****
Verisign/RSA Secure Server CA
=============================
-----BEGIN CERTIFICATE-----
MIICNDCCAaECEAKtZn5ORf5eV288mBle3cAwDQYJKoZIhvcNAQECBQAwXzELMAkG
A1UEBhMCVVMxIDAeBgNVBAoTF1JTQSBEYXRhIFNlY3VyaXR5LCBJbmMuMS4wLAYD
VQQLEyVTZWN1cmUgU2VydmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk0
MTEwOTAwMDAwMFoXDTEwMDEwNzIzNTk1OVowXzELMAkGA1UEBhMCVVMxIDAeBgNV
BAoTF1JTQSBEYXRhIFNlY3VyaXR5LCBJbmMuMS4wLAYDVQQLEyVTZWN1cmUgU2Vy
dmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGbMA0GCSqGSIb3DQEBAQUAA4GJ
ADCBhQJ+AJLOesGugz5aqomDV6wlAXYMra6OLDfO6zV4ZFQD5YRAUcm/jwjiioII
0haGN1XpsSECrXZogZoFokvJSyVmIlZsiAeP94FZbYQHZXATcXY+m3dM41CJVphI
uR2nKRoTLkoRWZweFdVJVCxzOmmCsZc5nG1wZ0jl3S3WyB57AgMBAAEwDQYJKoZI
hvcNAQECBQADfgBl3X7hsuyw4jrg7HFGmhkRuNPHoLQDQCYCPgmc4RKz0Vr2N6W3
YQO2WxZpO8ZECAyIUwxrl0nHPjXcbLm7qt9cuzovk2C2qUtN8iD3zV9/ZHuO3ABc
1/p3yjkWWW8O6tO1g39NTUJWdrTJXwT4OPjr0l91X817/OWOgHz8UA==
-----END CERTIFICATE-----
Thawte Personal Basic CA
========================
-----BEGIN CERTIFICATE-----
MIIDITCCAoqgAwIBAgIBADANBgkqhkiG9w0BAQQFADCByzELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMRowGAYD
VQQKExFUaGF3dGUgQ29uc3VsdGluZzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBT
ZXJ2aWNlcyBEaXZpc2lvbjEhMB8GA1UEAxMYVGhhd3RlIFBlcnNvbmFsIEJhc2lj
IENBMSgwJgYJKoZIhvcNAQkBFhlwZXJzb25hbC1iYXNpY0B0aGF3dGUuY29tMB4X
DTk2MDEwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgcsxCzAJBgNVBAYTAlpBMRUw
EwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEaMBgGA1UE
ChMRVGhhd3RlIENvbnN1bHRpbmcxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2Vy
dmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQZXJzb25hbCBCYXNpYyBD
QTEoMCYGCSqGSIb3DQEJARYZcGVyc29uYWwtYmFzaWNAdGhhd3RlLmNvbTCBnzAN
BgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAvLyTU23AUE+CFeZIlDWmWr5vQvoPR+53
dXLdjUmbllegeNTKP1GzaQuRdhciB5dqxFGTS+CN7zeVoQxN2jSQHReJl+A1OFdK
wPQIcOk8RHtQfmGakOMj04gRRif1CwcOu93RfyAKiLlWCy4cgNrx454p7xS9CkT7
G1sY0b8jkyECAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQQF
AAOBgQAt4plrsD16iddZopQBHyvdEktTwq1/qqcAXJFAVyVKOKqEcLnZgA+le1z7
c8a914phXAPjLSeoF+CEhULcXpvGt7Jtu3Sv5D/Lp7ew4F2+eIMllNLbgQ95B21P
9DkVWlIBe94y1k049hJcBlDfBVu9FEuh3ym6O0GN92NWod8isQ==
-----END CERTIFICATE-----
Thawte Personal Premium CA
==========================
-----BEGIN CERTIFICATE-----
MIIDKTCCApKgAwIBAgIBADANBgkqhkiG9w0BAQQFADCBzzELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMRowGAYD
VQQKExFUaGF3dGUgQ29uc3VsdGluZzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBT
ZXJ2aWNlcyBEaXZpc2lvbjEjMCEGA1UEAxMaVGhhd3RlIFBlcnNvbmFsIFByZW1p
dW0gQ0ExKjAoBgkqhkiG9w0BCQEWG3BlcnNvbmFsLXByZW1pdW1AdGhhd3RlLmNv
bTAeFw05NjAxMDEwMDAwMDBaFw0yMDEyMzEyMzU5NTlaMIHPMQswCQYDVQQGEwJa
QTEVMBMGA1UECBMMV2VzdGVybiBDYXBlMRIwEAYDVQQHEwlDYXBlIFRvd24xGjAY
BgNVBAoTEVRoYXd0ZSBDb25zdWx0aW5nMSgwJgYDVQQLEx9DZXJ0aWZpY2F0aW9u
IFNlcnZpY2VzIERpdmlzaW9uMSMwIQYDVQQDExpUaGF3dGUgUGVyc29uYWwgUHJl
bWl1bSBDQTEqMCgGCSqGSIb3DQEJARYbcGVyc29uYWwtcHJlbWl1bUB0aGF3dGUu
Y29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDJZtn4B0TPuYwu8KHvE0Vs
Bd/eJxZRNkERbGw77f4QfRKe5ZtCmv5gMcNmt3M6SK5O0DI3lIi1DbbZ8/JE2dWI
Et12TfIa/G8jHnrx2JhFTgcQ7xZC0EN1bUre4qrJMf8fAHB8Zs8QJQi6+u4A6UYD
ZicRFTuqW/KY3TZCstqIdQIDAQABoxMwETAPBgNVHRMBAf8EBTADAQH/MA0GCSqG
SIb3DQEBBAUAA4GBAGk2ifc0KjNyL2071CKyuG+axTZmDhs8obF1Wub9NdP4qPIH
b4Vnjt4rueIXsDqg8A6iAJrf8xQVbrvIhVqYgPn/vnQdPfP+MCXRNzRn+qVxeTBh
KXLA4CxM+1bkOqhv5TJZUtt1KFBZDPgLGeSs2a+WjS9Q2wfD6h+rM+D1KzGJ
-----END CERTIFICATE-----
Thawte Personal Freemail CA
===========================
-----BEGIN CERTIFICATE-----
MIIDLTCCApagAwIBAgIBADANBgkqhkiG9w0BAQQFADCB0TELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMRowGAYD
VQQKExFUaGF3dGUgQ29uc3VsdGluZzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBT
ZXJ2aWNlcyBEaXZpc2lvbjEkMCIGA1UEAxMbVGhhd3RlIFBlcnNvbmFsIEZyZWVt
YWlsIENBMSswKQYJKoZIhvcNAQkBFhxwZXJzb25hbC1mcmVlbWFpbEB0aGF3dGUu
Y29tMB4XDTk2MDEwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgdExCzAJBgNVBAYT
AlpBMRUwEwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEa
MBgGA1UEChMRVGhhd3RlIENvbnN1bHRpbmcxKDAmBgNVBAsTH0NlcnRpZmljYXRp
b24gU2VydmljZXMgRGl2aXNpb24xJDAiBgNVBAMTG1RoYXd0ZSBQZXJzb25hbCBG
cmVlbWFpbCBDQTErMCkGCSqGSIb3DQEJARYccGVyc29uYWwtZnJlZW1haWxAdGhh
d3RlLmNvbTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA1GnX1LCUZFtx6UfY
DFG26nKRsIRefS0Nj3sS34UldSh0OkIsYyeflXtL734Zhx2G6qPduc6WZBrCFG5E
rHzmj+hND3EfQDimAKOHePb5lIZererAXnbr2RSjXW56fAylS1V/Bhkpf56aJtVq
uzgkCGqYx7Hao5iR/Xnb5VrEHLkCAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zAN
BgkqhkiG9w0BAQQFAAOBgQDH7JJ+Tvj1lqVnYiqk8E0RYNBvjWBYYawmu1I1XAjP
MPuoSpaKH2JCI4wXD/S6ZJwXrEcp352YXtJsYHFcoqzceePnbgBHH7UNKOgCneSa
/RP0ptl8sfjcXyMmCZGAc9AUG95DqYMl8uacLxXK/qarigd1iwzdUYRr5PjRznei
gQ==
-----END CERTIFICATE-----
Thawte Server CA
================
-----BEGIN CERTIFICATE-----
MIIDEzCCAnygAwIBAgIBATANBgkqhkiG9w0BAQQFADCBxDELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
biBTZXJ2aWNlcyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEm
MCQGCSqGSIb3DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wHhcNOTYwODAx
MDAwMDAwWhcNMjAxMjMxMjM1OTU5WjCBxDELMAkGA1UEBhMCWkExFTATBgNVBAgT
DFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYDVQQKExRUaGF3
dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNl
cyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEmMCQGCSqGSIb3
DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wgZ8wDQYJKoZIhvcNAQEBBQAD
gY0AMIGJAoGBANOkUG7I/1Zr5s9dtuoMaHVHoqrC2oQl/Kj0R1HahbUgdJSGHg91
yekIYfUGbTBuFRkC6VLAYttNmZ7iagxEOM3+vuNkCXDF/rFrKbYvScg71CcEJRCX
L+eQbcAoQpnXTEPew/UhbVSfXcNY4cDk2VuwuNy0e982OsK1ZiIS1ocNAgMBAAGj
EzARMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAB/pMaVz7lcxG
7oWDTSEwjsrZqG9JGubaUeNgcGyEYRGhGshIPllDfU+VPaGLtwtimHp1it2ITk6e
QNuozDJ0uW8NxuOzRAvZim+aKZuZGCg70eNAKJpaPNW15yAbi8qkq43pUdniTCxZ
qdq5snUb9kLy78fyGPmJvKP/iiMucEc=
-----END CERTIFICATE-----
Thawte Premium Server CA
========================
-----BEGIN CERTIFICATE-----
MIIDJzCCApCgAwIBAgIBATANBgkqhkiG9w0BAQQFADCBzjELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD
VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv
biBTZXJ2aWNlcyBEaXZpc2lvbjEhMB8GA1UEAxMYVGhhd3RlIFByZW1pdW0gU2Vy
dmVyIENBMSgwJgYJKoZIhvcNAQkBFhlwcmVtaXVtLXNlcnZlckB0aGF3dGUuY29t
MB4XDTk2MDgwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgc4xCzAJBgNVBAYTAlpB
MRUwEwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsG
A1UEChMUVGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRp
b24gU2VydmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNl
cnZlciBDQTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNv
bTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0jY2aovXwlue2oFBYo847kkE
VdbQ7xwblRZH7xhINTpS9CtqBo87L+pW46+GjZ4X9560ZXUCTe/LCaIhUdib0GfQ
ug2SBhRz1JPLlyoAnFxODLz6FVL88kRu2hFKbgifLy3j+ao6hnO2RlNYyIkFvYMR
uHM/qgeN9EJN50CdHDcCAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG
9w0BAQQFAAOBgQAmSCwWwlj66BZ0DKqqX1Q/8tfJeGBeXm43YyJ3Nn6yF8Q0ufUI
hfzJATj/Tb7yFkJD57taRvvBxhEf8UqwKEbJw8RCfbz6q1lu1bdRiBHjpIUZa4JM
pAwSremkrj/xw0llmozFyD4lt5SZu5IycQfwhl7tUCemDaYj+bvLpgcUQg==
-----END CERTIFICATE-----
Equifax Secure CA
=================
-----BEGIN CERTIFICATE-----
MIIDIDCCAomgAwIBAgIENd70zzANBgkqhkiG9w0BAQUFADBOMQswCQYDVQQGEwJV
UzEQMA4GA1UEChMHRXF1aWZheDEtMCsGA1UECxMkRXF1aWZheCBTZWN1cmUgQ2Vy
dGlmaWNhdGUgQXV0aG9yaXR5MB4XDTk4MDgyMjE2NDE1MVoXDTE4MDgyMjE2NDE1
MVowTjELMAkGA1UEBhMCVVMxEDAOBgNVBAoTB0VxdWlmYXgxLTArBgNVBAsTJEVx
dWlmYXggU2VjdXJlIENlcnRpZmljYXRlIEF1dGhvcml0eTCBnzANBgkqhkiG9w0B
AQEFAAOBjQAwgYkCgYEAwV2xWGcIYu6gmi0fCG2RFGiYCh7+2gRvE4RiIcPRfM6f
BeC4AfBONOziipUEZKzxa1NfBbPLZ4C/QgKO/t0BCezhABRP/PvwDN1Dulsr4R+A
cJkVV5MW8Q+XarfCaCMczE1ZMKxRHjuvK9buY0V7xdlfUNLjUA86iOe/FP3gx7kC
AwEAAaOCAQkwggEFMHAGA1UdHwRpMGcwZaBjoGGkXzBdMQswCQYDVQQGEwJVUzEQ
MA4GA1UEChMHRXF1aWZheDEtMCsGA1UECxMkRXF1aWZheCBTZWN1cmUgQ2VydGlm
aWNhdGUgQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMBoGA1UdEAQTMBGBDzIwMTgw
ODIyMTY0MTUxWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAUSOZo+SvSspXXR9gj
IBBPM5iQn9QwHQYDVR0OBBYEFEjmaPkr0rKV10fYIyAQTzOYkJ/UMAwGA1UdEwQF
MAMBAf8wGgYJKoZIhvZ9B0EABA0wCxsFVjMuMGMDAgbAMA0GCSqGSIb3DQEBBQUA
A4GBAFjOKer89961zgK5F7WF0bnj4JXMJTENAKaSbn+2kmOeUJXRmm/kEd5jhW6Y
7qj/WsjTVbJmcVfewCHrPSqnI0kBBIZCe/zuf6IWUrVnZ9NA2zsmWLIodz2uFHdh
1voqZiegDfqnc1zqcPGUIWVEX/r87yloqaKHee9570+sB3c4
-----END CERTIFICATE-----
Verisign Class 1 Public Primary Certification Authority
=======================================================
-----BEGIN CERTIFICATE-----
MIICPTCCAaYCEQDNun9W8N/kvFT+IqyzcqpVMA0GCSqGSIb3DQEBAgUAMF8xCzAJ
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xh
c3MgMSBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05
NjAxMjkwMDAwMDBaFw0yODA4MDEyMzU5NTlaMF8xCzAJBgNVBAYTAlVTMRcwFQYD
VQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgMSBQdWJsaWMgUHJp
bWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOB
jQAwgYkCgYEA5Rm/baNWYS2ZSHH2Z965jeu3noaACpEO+jglr0aIguVzqKCbJF0N
H8xlbgyw0FaEGIeaBpsQoXPftFg5a27B9hXVqKg/qhIGjTGsf7A01480Z4gJzRQR
4k5FVmkfeAKA2txHkSm7NsljXMXg1y2He6G3MrB7MLoqLzGq7qNn2tsCAwEAATAN
BgkqhkiG9w0BAQIFAAOBgQBMP7iLxmjf7kMzDl3ppssHhE16M/+SG/Q2rdiVIjZo
EWx8QszznC7EBz8UsA9P/5CSdvnivErpj82ggAr3xSnxgiJduLHdgSOjeyUVRjB5
FvjqBUuUfx3CHMjjt/QQQDwTw18fU+hI5Ia0e6E1sHslurjTjqs/OJ0ANACY89Fx
lA==
-----END CERTIFICATE-----
Verisign Class 2 Public Primary Certification Authority
=======================================================
-----BEGIN CERTIFICATE-----
MIICPDCCAaUCEC0b/EoXjaOR6+f/9YtFvgswDQYJKoZIhvcNAQECBQAwXzELMAkG
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
cyAyIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
MDEyOTAwMDAwMFoXDTI4MDgwMTIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAyIFB1YmxpYyBQcmlt
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
ADCBiQKBgQC2WoujDWojg4BrzzmH9CETMwZMJaLtVRKXxaeAufqDwSCg+i8VDXyh
YGt+eSz6Bg86rvYbb7HS/y8oUl+DfUvEerf4Zh+AVPy3wo5ZShRXRtGak75BkQO7
FYCTXOvnzAhsPz6zSvz/S2wj1VCCJkQZjiPDceoZJEcEnnW/yKYAHwIDAQABMA0G
CSqGSIb3DQEBAgUAA4GBAIobK/o5wXTXXtgZZKJYSi034DNHD6zt96rbHuSLBlxg
J8pFUs4W7z8GZOeUaHxgMxURaa+dYo2jA1Rrpr7l7gUYYAS/QoD90KioHgE796Nc
r6Pc5iaAIzy4RHT3Cq5Ji2F4zCS/iIqnDupzGUH9TQPwiNHleI2lKk/2lw0Xd8rY
-----END CERTIFICATE-----
Verisign Class 3 Public Primary Certification Authority
=======================================================
-----BEGIN CERTIFICATE-----
MIICPDCCAaUCEHC65B0Q2Sk0tjjKewPMur8wDQYJKoZIhvcNAQECBQAwXzELMAkG
A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz
cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2
MDEyOTAwMDAwMFoXDTI4MDgwMTIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV
BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt
YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN
ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE
BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is
I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G
CSqGSIb3DQEBAgUAA4GBALtMEivPLCYATxQT3ab7/AoRhIzzKBxnki98tsX63/Do
lbwdj2wsqFHMc9ikwFPwTtYmwHYBV4GSXiHx0bH/59AhWM1pF+NEHJwZRDmJXNyc
AA9WjQKZ7aKQRUzkuxCkPfAyAw7xzvjoyVGM5mKf5p/AfbdynMk2OmufTqj/ZA1k
-----END CERTIFICATE-----
Verisign Class 1 Public Primary Certification Authority - G2
============================================================
-----BEGIN CERTIFICATE-----
MIIDAjCCAmsCEEzH6qqYPnHTkxD4PTqJkZIwDQYJKoZIhvcNAQEFBQAwgcExCzAJ
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh
c3MgMSBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy
MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp
emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X
DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw
FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgMSBQdWJsaWMg
UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo
YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5
MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB
AQUAA4GNADCBiQKBgQCq0Lq+Fi24g9TK0g+8djHKlNgdk4xWArzZbxpvUjZudVYK
VdPfQ4chEWWKfo+9Id5rMj8bhDSVBZ1BNeuS65bdqlk/AVNtmU/t5eIqWpDBucSm
Fc/IReumXY6cPvBkJHalzasab7bYe1FhbqZ/h8jit+U03EGI6glAvnOSPWvndQID
AQABMA0GCSqGSIb3DQEBBQUAA4GBAKlPww3HZ74sy9mozS11534Vnjty637rXC0J
h9ZrbWB85a7FkCMMXErQr7Fd88e2CtvgFZMN3QO8x3aKtd1Pw5sTdbgBwObJW2ul
uIncrKTdcu1OofdPvAbT6shkdHvClUGcZXNY8ZCaPGqxmMnEh7zPRW1F4m4iP/68
DzFc6PLZ
-----END CERTIFICATE-----
Verisign Class 2 Public Primary Certification Authority - G2
============================================================
-----BEGIN CERTIFICATE-----
MIIDAzCCAmwCEQC5L2DMiJ+hekYJuFtwbIqvMA0GCSqGSIb3DQEBBQUAMIHBMQsw
CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xPDA6BgNVBAsTM0Ns
YXNzIDIgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgLSBH
MjE6MDgGA1UECxMxKGMpIDE5OTggVmVyaVNpZ24sIEluYy4gLSBGb3IgYXV0aG9y
aXplZCB1c2Ugb25seTEfMB0GA1UECxMWVmVyaVNpZ24gVHJ1c3QgTmV0d29yazAe
Fw05ODA1MTgwMDAwMDBaFw0yODA4MDEyMzU5NTlaMIHBMQswCQYDVQQGEwJVUzEX
MBUGA1UEChMOVmVyaVNpZ24sIEluYy4xPDA6BgNVBAsTM0NsYXNzIDIgUHVibGlj
IFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgLSBHMjE6MDgGA1UECxMx
KGMpIDE5OTggVmVyaVNpZ24sIEluYy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25s
eTEfMB0GA1UECxMWVmVyaVNpZ24gVHJ1c3QgTmV0d29yazCBnzANBgkqhkiG9w0B
AQEFAAOBjQAwgYkCgYEAp4gBIXQs5xoD8JjhlzwPIQjxnNuX6Zr8wgQGE75fUsjM
HiwSViy4AWkszJkfrbCWrnkE8hM5wXuYuggs6MKEEyyqaekJ9MepAqRCwiNPStjw
DqL7MWzJ5m+ZJwf15vRMeJ5t60aG+rmGyVTyssSv1EYcWskVMP8NbPUtDm3Of3cC
AwEAATANBgkqhkiG9w0BAQUFAAOBgQByLvl/0fFx+8Se9sVeUYpAmLho+Jscg9ji
nb3/7aHmZuovCfTK1+qlK5X2JGCGTUQug6XELaDTrnhpb3LabK4I8GOSN+a7xDAX
rXfMSTWqz9iP0b63GJZHc2pUIjRkLbYWm1lbtFFZOrMLFPQS32eg9K0yZF6xRnIn
jBJ7xUS0rg==
-----END CERTIFICATE-----
Verisign Class 3 Public Primary Certification Authority - G2
============================================================
-----BEGIN CERTIFICATE-----
MIIDAjCCAmsCEH3Z/gfPqB63EHln+6eJNMYwDQYJKoZIhvcNAQEFBQAwgcExCzAJ
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh
c3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy
MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp
emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X
DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw
FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgMyBQdWJsaWMg
UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo
YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5
MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB
AQUAA4GNADCBiQKBgQDMXtERXVxp0KvTuWpMmR9ZmDCOFoUgRm1HP9SFIIThbbP4
pO0M8RcPO/mn+SXXwc+EY/J8Y8+iR/LGWzOOZEAEaMGAuWQcRXfH2G71lSk8UOg0
13gfqLptQ5GVj0VXXn7F+8qkBOvqlzdUMG+7AUcyM83cV5tkaWH4mx0ciU9cZwID
AQABMA0GCSqGSIb3DQEBBQUAA4GBAFFNzb5cy5gZnBWyATl4Lk0PZ3BwmcYQWpSk
U01UbSuvDV1Ai2TT1+7eVmGSX6bEHRBhNtMsJzzoKQm5EWR0zLVznxxIqbxhAe7i
F6YM40AIOw7n60RzKprxaZLvcRTDOaxxp5EJb+RxBrO6WVcmeQD2+A2iMzAo1KpY
oJ2daZH9
-----END CERTIFICATE-----
Verisign Class 4 Public Primary Certification Authority - G2
============================================================
-----BEGIN CERTIFICATE-----
MIIDAjCCAmsCEDKIjprS9esTR/h/xCA3JfgwDQYJKoZIhvcNAQEFBQAwgcExCzAJ
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh
c3MgNCBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy
MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp
emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X
DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw
FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgNCBQdWJsaWMg
UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo
YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5
MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB
AQUAA4GNADCBiQKBgQC68OTP+cSuhVS5B1f5j8V/aBH4xBewRNzjMHPVKmIquNDM
HO0oW369atyzkSTKQWI8/AIBvxwWMZQFl3Zuoq29YRdsTjCG8FE3KlDHqGKB3FtK
qsGgtG7rL+VXxbErQHDbWk2hjh+9Ax/YA9SPTJlxvOKCzFjomDqG04Y48wApHwID
AQABMA0GCSqGSIb3DQEBBQUAA4GBAIWMEsGnuVAVess+rLhDityq3RS6iYF+ATwj
cSGIL4LcY/oCRaxFWdcqWERbt5+BO5JoPeI3JPV7bI92NZYJqFmduc4jq3TWg/0y
cyfYaT5DdPauxYma51N86Xv2S/PBZYPejYqcPIiNOVn8qj8ijaHBZlCBckztImRP
T8qAkbYp
-----END CERTIFICATE-----
Verisign Class 1 Public Primary Certification Authority - G3
============================================================
-----BEGIN CERTIFICATE-----
MIIEGjCCAwICEQCLW3VWhFSFCwDPrzhIzrGkMA0GCSqGSIb3DQEBBQUAMIHKMQsw
CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZl
cmlTaWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWdu
LCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlT
aWduIENsYXNzIDEgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3Jp
dHkgLSBHMzAeFw05OTEwMDEwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMIHKMQswCQYD
VQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlT
aWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWduLCBJ
bmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlTaWdu
IENsYXNzIDEgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg
LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAN2E1Lm0+afY8wR4
nN493GwTFtl63SRRZsDHJlkNrAYIwpTRMx/wgzUfbhvI3qpuFU5UJ+/EbRrsC+MO
8ESlV8dAWB6jRx9x7GD2bZTIGDnt/kIYVt/kTEkQeE4BdjVjEjbdZrwBBDajVWjV
ojYJrKshJlQGrT/KFOCsyq0GHZXi+J3x4GD/wn91K0zM2v6HmSHquv4+VNfSWXjb
PG7PoBMAGrgnoeS+Z5bKoMWznN3JdZ7rMJpfo83ZrngZPyPpXNspva1VyBtUjGP2
6KbqxzcSXKMpHgLZ2x87tNcPVkeBFQRKr4Mn0cVYiMHd9qqnoxjaaKptEVHhv2Vr
n5Z20T0CAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAq2aN17O6x5q25lXQBfGfMY1a
qtmqRiYPce2lrVNWYgFHKkTp/j90CxObufRNG7LRX7K20ohcs5/Ny9Sn2WCVhDr4
wTcdYcrnsMXlkdpUpqwxga6X3s0IrLjAl4B/bnKk52kTlWUfxJM8/XmPBNQ+T+r3
ns7NZ3xPZQL/kYVUc8f/NveGLezQXk//EZ9yBta4GvFMDSZl4kSAHsef493oCtrs
pSCAaWihT37ha88HQfqDjrw43bAuEbFrskLMmrz5SCJ5ShkPshw+IHTZasO+8ih4
E1Z5T21Q6huwtVexN2ZYI/PcD98Kh8TvhgXVOBRgmaNL3gaWcSzy27YfpO8/7g==
-----END CERTIFICATE-----
Verisign Class 2 Public Primary Certification Authority - G3
============================================================
-----BEGIN CERTIFICATE-----
MIIEGTCCAwECEGFwy0mMX5hFKeewptlQW3owDQYJKoZIhvcNAQEFBQAwgcoxCzAJ
BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjEfMB0GA1UECxMWVmVy
aVNpZ24gVHJ1c3QgTmV0d29yazE6MDgGA1UECxMxKGMpIDE5OTkgVmVyaVNpZ24s
IEluYy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25seTFFMEMGA1UEAxM8VmVyaVNp
Z24gQ2xhc3MgMiBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0
eSAtIEczMB4XDTk5MTAwMTAwMDAwMFoXDTM2MDcxNjIzNTk1OVowgcoxCzAJBgNV
BAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjEfMB0GA1UECxMWVmVyaVNp
Z24gVHJ1c3QgTmV0d29yazE6MDgGA1UECxMxKGMpIDE5OTkgVmVyaVNpZ24sIElu
Yy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25seTFFMEMGA1UEAxM8VmVyaVNpZ24g
Q2xhc3MgMiBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAt
IEczMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArwoNwtUs22e5LeWU
J92lvuCwTY+zYVY81nzD9M0+hsuiiOLh2KRpxbXiv8GmR1BeRjmL1Za6tW8UvxDO
JxOeBUebMXoT2B/Z0wI3i60sR/COgQanDTAM6/c8DyAd3HJG7qUCyFvDyVZpTMUY
wZF7C9UTAJu878NIPkZgIIUq1ZC2zYugzDLdt/1AVbJQHFauzI13TccgTacxdu9o
koqQHgiBVrKtaaNS0MscxCM9H5n+TOgWY47GCI72MfbS+uV23bUckqNJzc0BzWjN
qWm6o+sdDZykIKbBoMXRRkwXbdKsZj+WjOCE1Db/IlnF+RFgqF8EffIa9iVCYQ/E
Srg+iQIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQA0JhU8wI1NQ0kdvekhktdmnLfe
xbjQ5F1fdiLAJvmEOjr5jLX77GDx6M4EsMjdpwOPMPOY36TmpDHf0xwLRtxyID+u
7gU8pDM/CzmscHhzS5kr3zDCVLCoO1Wh/hYozUK9dG6A2ydEp85EXdQbkJgNHkKU
sQAsBNB0owIFImNjzYO1+8FtYmtpdf1dcEG59b98377BMnMiIYtYgXsVkXq642RI
sH/7NiXaldDxJBQX3RiAa0YjOVT1jmIJBB2UkKab5iXiQkWquJCtvgiPqQtCGJTP
cjnhsUPgKM+351psE2tJs//jGHyJizNdrDPXp/naOlXJWBD5qu9ats9LS98q
-----END CERTIFICATE-----
Verisign Class 3 Public Primary Certification Authority - G3
============================================================
-----BEGIN CERTIFICATE-----
MIIEGjCCAwICEQCbfgZJoz5iudXukEhxKe9XMA0GCSqGSIb3DQEBBQUAMIHKMQsw
CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZl
cmlTaWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWdu
LCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlT
aWduIENsYXNzIDMgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3Jp
dHkgLSBHMzAeFw05OTEwMDEwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMIHKMQswCQYD
VQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlT
aWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWduLCBJ
bmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlTaWdu
IENsYXNzIDMgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg
LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMu6nFL8eB8aHm8b
N3O9+MlrlBIwT/A2R/XQkQr1F8ilYcEWQE37imGQ5XYgwREGfassbqb1EUGO+i2t
KmFZpGcmTNDovFJbcCAEWNF6yaRpvIMXZK0Fi7zQWM6NjPXr8EJJC52XJ2cybuGu
kxUccLwgTS8Y3pKI6GyFVxEa6X7jJhFUokWWVYPKMIno3Nij7SqAP395ZVc+FSBm
CC+Vk7+qRy+oRpfwEuL+wgorUeZ25rdGt+INpsyow0xZVYnm6FNcHOqd8GIWC6fJ
Xwzw3sJ2zq/3avL6QaaiMxTJ5Xpj055iN9WFZZ4O5lMkdBteHRJTW8cs54NJOxWu
imi5V5cCAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAERSWwauSCPc/L8my/uRan2Te
2yFPhpk0djZX3dAVL8WtfxUfN2JzPtTnX84XA9s1+ivbrmAJXx5fj267Cz3qWhMe
DGBvtcC1IyIuBwvLqXTLR7sdwdela8wv0kL9Sd2nic9TutoAWii/gt/4uhMdUIaC
/Y4wjylGsB49Ndo4YhYYSq3mtlFs3q9i6wHQHiT+eo8SGhJouPtmmRQURVyu565p
F4ErWjfJXir0xuKhXFSbplQAz/DxwceYMBo7Nhbbo27q/a2ywtrvAkcTisDxszGt
TxzhT5yvDwyd93gN2PQ1VoDat20Xj50egWTh/sVFuq1ruQp6Tk9LhO5L8X3dEQ==
-----END CERTIFICATE-----
Verisign Class 4 Public Primary Certification Authority - G3
============================================================
-----BEGIN CERTIFICATE-----
MIIEGjCCAwICEQDsoKeLbnVqAc/EfMwvlF7XMA0GCSqGSIb3DQEBBQUAMIHKMQsw
CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZl
cmlTaWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWdu
LCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlT
aWduIENsYXNzIDQgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3Jp
dHkgLSBHMzAeFw05OTEwMDEwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMIHKMQswCQYD
VQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlT
aWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWduLCBJ
bmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlTaWdu
IENsYXNzIDQgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg
LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAK3LpRFpxlmr8Y+1
GQ9Wzsy1HyDkniYlS+BzZYlZ3tCD5PUPtbut8XzoIfzk6AzufEUiGXaStBO3IFsJ
+mGuqPKljYXCKtbeZjbSmwL0qJJgfJxptI8kHtCGUvYynEFYHiK9zUVilQhu0Gbd
U6LM8BDcVHOLBKFGMzNcF0C5nk3T875Vg+ixiY5afJqWIpA7iCXy0lOIAgwLePLm
NxdLMEYH5IBtptiWLugs+BGzOA1mppvqySNb247i8xOOGlktqgLw7KSHZtzBP/XY
ufTsgsbSPZUd5cBPhMnZo0QoBmrXRazwa2rvTl/4EYIeOGM0ZlDUPpNz+jDDZq3/
ky2X7wMCAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAj/ola09b5KROJ1WrIhVZPMq1
CtRK26vdoV9TxaBXOcLORyu+OshWv8LZJxA6sQU8wHcxuzrTBXttmhwwjIDLk5Mq
g6sFUYICABFna/OIYUdfA5PVWw3g8dShMjWFsjrbsIKr0csKvE+MW8VLADsfKoKm
fjaF3H48ZwC15DtS4KjrXRX5xm3wrR0OhbepmnMUWluPQSjA1egtTaRezarZ7c7c
2NU8Qh0XwRJdRTjDOPP8hS6DRkiy1yBfkjaP53kPmF6Z6PDQpLv1U70qzlmwr25/
bLvSHgCwIe34QWKCudiyxLtGUPMxxY8BqHTr9Xgn2uf3ZkPznoM+IKrDNWCRzg==
-----END CERTIFICATE-----
Equifax Secure Global eBusiness CA
==================================
-----BEGIN CERTIFICATE-----
MIICkDCCAfmgAwIBAgIBATANBgkqhkiG9w0BAQQFADBaMQswCQYDVQQGEwJVUzEc
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEtMCsGA1UEAxMkRXF1aWZheCBT
ZWN1cmUgR2xvYmFsIGVCdXNpbmVzcyBDQS0xMB4XDTk5MDYyMTA0MDAwMFoXDTIw
MDYyMTA0MDAwMFowWjELMAkGA1UEBhMCVVMxHDAaBgNVBAoTE0VxdWlmYXggU2Vj
dXJlIEluYy4xLTArBgNVBAMTJEVxdWlmYXggU2VjdXJlIEdsb2JhbCBlQnVzaW5l
c3MgQ0EtMTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAuucXkAJlsTRVPEnC
UdXfp9E3j9HngXNBUmCbnaEXJnitx7HoJpQytd4zjTov2/KaelpzmKNc6fuKcxtc
58O/gGzNqfTWK8D3+ZmqY6KxRwIP1ORROhI8bIpaVIRw28HFkM9yRcuoWcDNM50/
o5brhTMhHD4ePmBudpxnhcXIw2ECAwEAAaNmMGQwEQYJYIZIAYb4QgEBBAQDAgAH
MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUvqigdHJQa0S3ySPY+6j/s1dr
aGwwHQYDVR0OBBYEFL6ooHRyUGtEt8kj2Puo/7NXa2hsMA0GCSqGSIb3DQEBBAUA
A4GBADDiAVGqx+pf2rnQZQ8w1j7aDRRJbpGTJxQx78T3LUX47Me/okENI7SS+RkA
Z70Br83gcfxaz2TE4JaY0KNA4gGK7ycH8WUBikQtBmV1UsCGECAhX2xrD2yuCRyv
8qIYNMR1pHMc8Y3c7635s3a0kr/clRAevsvIO1qEYBlWlKlV
-----END CERTIFICATE-----
Equifax Secure eBusiness CA 1
=============================
-----BEGIN CERTIFICATE-----
MIICgjCCAeugAwIBAgIBBDANBgkqhkiG9w0BAQQFADBTMQswCQYDVQQGEwJVUzEc
MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEmMCQGA1UEAxMdRXF1aWZheCBT
ZWN1cmUgZUJ1c2luZXNzIENBLTEwHhcNOTkwNjIxMDQwMDAwWhcNMjAwNjIxMDQw
MDAwWjBTMQswCQYDVQQGEwJVUzEcMBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5j
LjEmMCQGA1UEAxMdRXF1aWZheCBTZWN1cmUgZUJ1c2luZXNzIENBLTEwgZ8wDQYJ
KoZIhvcNAQEBBQADgY0AMIGJAoGBAM4vGbwXt3fek6lfWg0XTzQaDJj0ItlZ1MRo
RvC0NcWFAyDGr0WlIVFFQesWWDYyb+JQYmT5/VGcqiTZ9J2DKocKIdMSODRsjQBu
WqDZQu4aIZX5UkxVWsUPOE9G+m34LjXWHXzr4vCwdYDIqROsvojvOm6rXyo4YgKw
Env+j6YDAgMBAAGjZjBkMBEGCWCGSAGG+EIBAQQEAwIABzAPBgNVHRMBAf8EBTAD
AQH/MB8GA1UdIwQYMBaAFEp4MlIR21kWNl7fwRQ2QGpHfEyhMB0GA1UdDgQWBBRK
eDJSEdtZFjZe38EUNkBqR3xMoTANBgkqhkiG9w0BAQQFAAOBgQB1W6ibAxHm6VZM
zfmpTMANmvPMZWnmJXbMWbfWVMMdzZmsGd20hdXgPfxiIKeES1hl8eL5lSE/9dR+
WB5Hh1Q+WKG1tfgq73HnvMP2sUlG4tega+VWeponmHxGYhTnyfxuAxJ5gDgdSIKN
/Bf+KpYrtWKmpj29f5JZzVoqgrI3eQ==
-----END CERTIFICATE-----
Equifax Secure eBusiness CA 2
=============================
-----BEGIN CERTIFICATE-----
MIIDIDCCAomgAwIBAgIEN3DPtTANBgkqhkiG9w0BAQUFADBOMQswCQYDVQQGEwJV
UzEXMBUGA1UEChMORXF1aWZheCBTZWN1cmUxJjAkBgNVBAsTHUVxdWlmYXggU2Vj
dXJlIGVCdXNpbmVzcyBDQS0yMB4XDTk5MDYyMzEyMTQ0NVoXDTE5MDYyMzEyMTQ0
NVowTjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDkVxdWlmYXggU2VjdXJlMSYwJAYD
VQQLEx1FcXVpZmF4IFNlY3VyZSBlQnVzaW5lc3MgQ0EtMjCBnzANBgkqhkiG9w0B
AQEFAAOBjQAwgYkCgYEA5Dk5kx5SBhsoNviyoynF7Y6yEb3+6+e0dMKP/wXn2Z0G
vxLIPw7y1tEkshHe0XMJitSxLJgJDR5QRrKDpkWNYmi7hRsgcDKqQM2mll/EcTc/
BPO3QSQ5BxoeLmFYoBIL5aXfxavqN3HMHMg3OrmXUqesxWoklE6ce8/AatbfIb0C
AwEAAaOCAQkwggEFMHAGA1UdHwRpMGcwZaBjoGGkXzBdMQswCQYDVQQGEwJVUzEX
MBUGA1UEChMORXF1aWZheCBTZWN1cmUxJjAkBgNVBAsTHUVxdWlmYXggU2VjdXJl
IGVCdXNpbmVzcyBDQS0yMQ0wCwYDVQQDEwRDUkwxMBoGA1UdEAQTMBGBDzIwMTkw
NjIzMTIxNDQ1WjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAUUJ4L6q9euSBIplBq
y/3YIHqngnYwHQYDVR0OBBYEFFCeC+qvXrkgSKZQasv92CB6p4J2MAwGA1UdEwQF
MAMBAf8wGgYJKoZIhvZ9B0EABA0wCxsFVjMuMGMDAgbAMA0GCSqGSIb3DQEBBQUA
A4GBAAyGgq3oThr1jokn4jVYPSm0B482UJW/bsGe68SQsoWou7dC4A8HOd/7npCy
0cE+U58DRLB+S/Rv5Hwf5+Kx5Lia78O9zt4LMjTZ3ijtM2vE1Nc9ElirfQkty3D1
E4qUoSek1nDFbZS1yX2doNLGCEnZZpum0/QL3MUmV+GRMOrN
-----END CERTIFICATE-----
Thawte Time Stamping CA
=======================
-----BEGIN CERTIFICATE-----
MIICoTCCAgqgAwIBAgIBADANBgkqhkiG9w0BAQQFADCBizELMAkGA1UEBhMCWkEx
FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTEUMBIGA1UEBxMLRHVyYmFudmlsbGUxDzAN
BgNVBAoTBlRoYXd0ZTEdMBsGA1UECxMUVGhhd3RlIENlcnRpZmljYXRpb24xHzAd
BgNVBAMTFlRoYXd0ZSBUaW1lc3RhbXBpbmcgQ0EwHhcNOTcwMTAxMDAwMDAwWhcN
MjAxMjMxMjM1OTU5WjCBizELMAkGA1UEBhMCWkExFTATBgNVBAgTDFdlc3Rlcm4g
Q2FwZTEUMBIGA1UEBxMLRHVyYmFudmlsbGUxDzANBgNVBAoTBlRoYXd0ZTEdMBsG
A1UECxMUVGhhd3RlIENlcnRpZmljYXRpb24xHzAdBgNVBAMTFlRoYXd0ZSBUaW1l
c3RhbXBpbmcgQ0EwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANYrWHhhRYZT
6jR7UZztsOYuGA7+4F+oJ9O0yeB8WU4WDnNUYMF/9p8u6TqFJBU820cEY8OexJQa
Wt9MevPZQx08EHp5JduQ/vBR5zDWQQD9nyjfeb6Uu522FOMjhdepQeBMpHmwKxqL
8vg7ij5FrHGSALSQQZj7X+36ty6K+Ig3AgMBAAGjEzARMA8GA1UdEwEB/wQFMAMB
Af8wDQYJKoZIhvcNAQEEBQADgYEAZ9viwuaHPUCDhjc1fR/OmsMMZiCouqoEiYbC
9RAIDb/LogWK0E02PvTX72nGXuSwlG9KuefeW4i2e9vjJ+V2w/A1wcu1J5szedyQ
pgCed/r8zSeUQhac0xxo7L9c3eWpexAKMnRUEzGLhQOEkbdYATAUOK8oyvyxUBkZ
CayJSdM=
-----END CERTIFICATE-----
thawte Primary Root CA
======================
-----BEGIN CERTIFICATE-----
MIIEIDCCAwigAwIBAgIQNE7VVyDV7exJ9C/ON9srbTANBgkqhkiG9w0BAQUFADCB
qTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5jLjEoMCYGA1UECxMf
Q2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYGA1UECxMvKGMpIDIw
MDYgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxHzAdBgNV
BAMTFnRoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EwHhcNMDYxMTE3MDAwMDAwWhcNMzYw
NzE2MjM1OTU5WjCBqTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5j
LjEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYG
A1UECxMvKGMpIDIwMDYgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNl
IG9ubHkxHzAdBgNVBAMTFnRoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EwggEiMA0GCSqG
SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCsoPD7gFnUnMekz52hWXMJEEUMDSxuaPFs
W0hoSVk3/AszGcJ3f8wQLZU0HObrTQmnHNK4yZc2AreJ1CRfBsDMRJSUjQJib+ta
3RGNKJpchJAQeg29dGYvajig4tVUROsdB58Hum/u6f1OCyn1PoSgAfGcq/gcfomk
6KHYcWUNo1F77rzSImANuVud37r8UVsLr5iy6S7pBOhih94ryNdOwUxkHt3Ph1i6
Sk/KaAcdHJ1KxtUvkcx8cXIcxcBn6zL9yZJclNqFwJu/U30rCfSMnZEfl2pSy94J
NqR32HuHUETVPm4pafs5SSYeCaWAe0At6+gnhcn+Yf1+5nyXHdWdAgMBAAGjQjBA
MA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMB0GA1UdDgQWBBR7W0XP
r87Lev0xkhpqtvNG61dIUDANBgkqhkiG9w0BAQUFAAOCAQEAeRHAS7ORtvzw6WfU
DW5FvlXok9LOAz/t2iWwHVfLHjp2oEzsUHboZHIMpKnxuIvW1oeEuzLlQRHAd9mz
YJ3rG9XRbkREqaYB7FViHXe4XI5ISXycO1cRrK1zN44veFyQaEfZYGDm/Ac9IiAX
xPcW6cTYcvnIc3zfFi8VqT79aie2oetaupgf1eNNZAqdE8hhuvU5HIe6uL17In/2
/qxAeeWsEG89jxt5dovEN7MhGITlNgDrYyCZuen+MwS7QcjBAvlEYyCegc5C09Y/
LHbTY5xZ3Y+m4Q6gLkH3LpVHz7z9M/P2C2F+fpErgUfCJzDupxBdN49cOSvkBPB7
jVaMaA==
-----END CERTIFICATE-----
VeriSign Class 3 Public Primary Certification Authority - G5
============================================================
-----BEGIN CERTIFICATE-----
MIIE0zCCA7ugAwIBAgIQGNrRniZ96LtKIVjNzGs7SjANBgkqhkiG9w0BAQUFADCB
yjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQL
ExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNiBWZXJp
U2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxW
ZXJpU2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0
aG9yaXR5IC0gRzUwHhcNMDYxMTA4MDAwMDAwWhcNMzYwNzE2MjM1OTU5WjCByjEL
MAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQLExZW
ZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNiBWZXJpU2ln
biwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxWZXJp
U2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9y
aXR5IC0gRzUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCvJAgIKXo1
nmAMqudLO07cfLw8RRy7K+D+KQL5VwijZIUVJ/XxrcgxiV0i6CqqpkKzj/i5Vbex
t0uz/o9+B1fs70PbZmIVYc9gDaTY3vjgw2IIPVQT60nKWVSFJuUrjxuf6/WhkcIz
SdhDY2pSS9KP6HBRTdGJaXvHcPaz3BJ023tdS1bTlr8Vd6Gw9KIl8q8ckmcY5fQG
BO+QueQA5N06tRn/Arr0PO7gi+s3i+z016zy9vA9r911kTMZHRxAy3QkGSGT2RT+
rCpSx4/VBEnkjWNHiDxpg8v+R70rfk/Fla4OndTRQ8Bnc+MUCH7lP59zuDMKz10/
NIeWiu5T6CUVAgMBAAGjgbIwga8wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8E
BAMCAQYwbQYIKwYBBQUHAQwEYTBfoV2gWzBZMFcwVRYJaW1hZ2UvZ2lmMCEwHzAH
BgUrDgMCGgQUj+XTGoasjY5rw8+AatRIGCx7GS4wJRYjaHR0cDovL2xvZ28udmVy
aXNpZ24uY29tL3ZzbG9nby5naWYwHQYDVR0OBBYEFH/TZafC3ey78DAJ80M5+gKv
MzEzMA0GCSqGSIb3DQEBBQUAA4IBAQCTJEowX2LP2BqYLz3q3JktvXf2pXkiOOzE
p6B4Eq1iDkVwZMXnl2YtmAl+X6/WzChl8gGqCBpH3vn5fJJaCGkgDdk+bW48DW7Y
5gaRQBi5+MHt39tBquCWIMnNZBU4gcmU7qKEKQsTb47bDN0lAtukixlE0kF6BWlK
WE9gyn6CagsCqiUXObXbf+eEZSqVir2G3l6BFoMtEMze/aiCKm0oHw0LxOXnGiYZ
4fQRbxC1lfznQgUy286dUV4otp6F01vvpX1FQHKOtw5rDgb7MzVIcbidJ4vEZV8N
hnacRHr2lVz2XTIIM6RUthg/aFzyQkqFOFSDX9HoLPKsEdao7WNq
-----END CERTIFICATE-----
Entrust.net Secure Server Certification Authority
=================================================
-----BEGIN CERTIFICATE-----
MIIE2DCCBEGgAwIBAgIEN0rSQzANBgkqhkiG9w0BAQUFADCBwzELMAkGA1UEBhMC
VVMxFDASBgNVBAoTC0VudHJ1c3QubmV0MTswOQYDVQQLEzJ3d3cuZW50cnVzdC5u
ZXQvQ1BTIGluY29ycC4gYnkgcmVmLiAobGltaXRzIGxpYWIuKTElMCMGA1UECxMc
KGMpIDE5OTkgRW50cnVzdC5uZXQgTGltaXRlZDE6MDgGA1UEAxMxRW50cnVzdC5u
ZXQgU2VjdXJlIFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05OTA1
MjUxNjA5NDBaFw0xOTA1MjUxNjM5NDBaMIHDMQswCQYDVQQGEwJVUzEUMBIGA1UE
ChMLRW50cnVzdC5uZXQxOzA5BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5j
b3JwLiBieSByZWYuIChsaW1pdHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBF
bnRydXN0Lm5ldCBMaW1pdGVkMTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUg
U2VydmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGdMA0GCSqGSIb3DQEBAQUA
A4GLADCBhwKBgQDNKIM0VBuJ8w+vN5Ex/68xYMmo6LIQaO2f55M28Qpku0f1BBc/
I0dNxScZgSYMVHINiC3ZH5oSn7yzcdOAGT9HZnuMNSjSuQrfJNqc1lB5gXpa0zf3
wkrYKZImZNHkmGw6AIr1NJtl+O3jEP/9uElY3KDegjlrgbEWGWG5VLbmQwIBA6OC
AdcwggHTMBEGCWCGSAGG+EIBAQQEAwIABzCCARkGA1UdHwSCARAwggEMMIHeoIHb
oIHYpIHVMIHSMQswCQYDVQQGEwJVUzEUMBIGA1UEChMLRW50cnVzdC5uZXQxOzA5
BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5jb3JwLiBieSByZWYuIChsaW1p
dHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBFbnRydXN0Lm5ldCBMaW1pdGVk
MTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUgU2VydmVyIENlcnRpZmljYXRp
b24gQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMCmgJ6AlhiNodHRwOi8vd3d3LmVu
dHJ1c3QubmV0L0NSTC9uZXQxLmNybDArBgNVHRAEJDAigA8xOTk5MDUyNTE2MDk0
MFqBDzIwMTkwNTI1MTYwOTQwWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAU8Bdi
E1U9s/8KAGv7UISX8+1i0BowHQYDVR0OBBYEFPAXYhNVPbP/CgBr+1CEl/PtYtAa
MAwGA1UdEwQFMAMBAf8wGQYJKoZIhvZ9B0EABAwwChsEVjQuMAMCBJAwDQYJKoZI
hvcNAQEFBQADgYEAkNwwAvpkdMKnCqV8IY00F6j7Rw7/JXyNEwr75Ji174z4xRAN
95K+8cPV1ZVqBLssziY2ZcgxxufuP+NXdYR6Ee9GTxj005i7qIcyunL2POI9n9cd
2cNgQ4xYDiKWL2KjLB+6rQXvqzJ4h6BUcxm1XAX5Uj5tLUUL9wqT6u0G+bI=
-----END CERTIFICATE-----
Go Daddy Certification Authority Root Certificate Bundle
========================================================
-----BEGIN CERTIFICATE-----
MIIE3jCCA8agAwIBAgICAwEwDQYJKoZIhvcNAQEFBQAwYzELMAkGA1UEBhMCVVMx
ITAfBgNVBAoTGFRoZSBHbyBEYWRkeSBHcm91cCwgSW5jLjExMC8GA1UECxMoR28g
RGFkZHkgQ2xhc3MgMiBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wNjExMTYw
MTU0MzdaFw0yNjExMTYwMTU0MzdaMIHKMQswCQYDVQQGEwJVUzEQMA4GA1UECBMH
QXJpem9uYTETMBEGA1UEBxMKU2NvdHRzZGFsZTEaMBgGA1UEChMRR29EYWRkeS5j
b20sIEluYy4xMzAxBgNVBAsTKmh0dHA6Ly9jZXJ0aWZpY2F0ZXMuZ29kYWRkeS5j
b20vcmVwb3NpdG9yeTEwMC4GA1UEAxMnR28gRGFkZHkgU2VjdXJlIENlcnRpZmlj
YXRpb24gQXV0aG9yaXR5MREwDwYDVQQFEwgwNzk2OTI4NzCCASIwDQYJKoZIhvcN
AQEBBQADggEPADCCAQoCggEBAMQt1RWMnCZM7DI161+4WQFapmGBWTtwY6vj3D3H
KrjJM9N55DrtPDAjhI6zMBS2sofDPZVUBJ7fmd0LJR4h3mUpfjWoqVTr9vcyOdQm
VZWt7/v+WIbXnvQAjYwqDL1CBM6nPwT27oDyqu9SoWlm2r4arV3aLGbqGmu75RpR
SgAvSMeYddi5Kcju+GZtCpyz8/x4fKL4o/K1w/O5epHBp+YlLpyo7RJlbmr2EkRT
cDCVw5wrWCs9CHRK8r5RsL+H0EwnWGu1NcWdrxcx+AuP7q2BNgWJCJjPOq8lh8BJ
6qf9Z/dFjpfMFDniNoW1fho3/Rb2cRGadDAW/hOUoz+EDU8CAwEAAaOCATIwggEu
MB0GA1UdDgQWBBT9rGEyk2xF1uLuhV+auud2mWjM5zAfBgNVHSMEGDAWgBTSxLDS
kdRMEXGzYcs9of7dqGrU4zASBgNVHRMBAf8ECDAGAQH/AgEAMDMGCCsGAQUFBwEB
BCcwJTAjBggrBgEFBQcwAYYXaHR0cDovL29jc3AuZ29kYWRkeS5jb20wRgYDVR0f
BD8wPTA7oDmgN4Y1aHR0cDovL2NlcnRpZmljYXRlcy5nb2RhZGR5LmNvbS9yZXBv
c2l0b3J5L2dkcm9vdC5jcmwwSwYDVR0gBEQwQjBABgRVHSAAMDgwNgYIKwYBBQUH
AgEWKmh0dHA6Ly9jZXJ0aWZpY2F0ZXMuZ29kYWRkeS5jb20vcmVwb3NpdG9yeTAO
BgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEFBQADggEBANKGwOy9+aG2Z+5mC6IG
OgRQjhVyrEp0lVPLN8tESe8HkGsz2ZbwlFalEzAFPIUyIXvJxwqoJKSQ3kbTJSMU
A2fCENZvD117esyfxVgqwcSeIaha86ykRvOe5GPLL5CkKSkB2XIsKd83ASe8T+5o
0yGPwLPk9Qnt0hCqU7S+8MxZC9Y7lhyVJEnfzuz9p0iRFEUOOjZv2kWzRaJBydTX
RE4+uXR21aITVSzGh6O1mawGhId/dQb8vxRMDsxuxN89txJx9OjxUUAiKEngHUuH
qDTMBqLdElrRhjZkAzVvb3du6/KFUJheqwNTrZEjYx8WnM25sgVjOuH0aBsXBTWV
U+4=
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIE+zCCBGSgAwIBAgICAQ0wDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1Zh
bGlDZXJ0IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIElu
Yy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENsYXNzIDIgUG9saWN5IFZhbGlkYXRpb24g
QXV0aG9yaXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAe
BgkqhkiG9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTA0MDYyOTE3MDYyMFoX
DTI0MDYyOTE3MDYyMFowYzELMAkGA1UEBhMCVVMxITAfBgNVBAoTGFRoZSBHbyBE
YWRkeSBHcm91cCwgSW5jLjExMC8GA1UECxMoR28gRGFkZHkgQ2xhc3MgMiBDZXJ0
aWZpY2F0aW9uIEF1dGhvcml0eTCCASAwDQYJKoZIhvcNAQEBBQADggENADCCAQgC
ggEBAN6d1+pXGEmhW+vXX0iG6r7d/+TvZxz0ZWizV3GgXne77ZtJ6XCAPVYYYwhv
2vLM0D9/AlQiVBDYsoHUwHU9S3/Hd8M+eKsaA7Ugay9qK7HFiH7Eux6wwdhFJ2+q
N1j3hybX2C32qRe3H3I2TqYXP2WYktsqbl2i/ojgC95/5Y0V4evLOtXiEqITLdiO
r18SPaAIBQi2XKVlOARFmR6jYGB0xUGlcmIbYsUfb18aQr4CUWWoriMYavx4A6lN
f4DD+qta/KFApMoZFv6yyO9ecw3ud72a9nmYvLEHZ6IVDd2gWMZEewo+YihfukEH
U1jPEX44dMX4/7VpkI+EdOqXG68CAQOjggHhMIIB3TAdBgNVHQ4EFgQU0sSw0pHU
TBFxs2HLPaH+3ahq1OMwgdIGA1UdIwSByjCBx6GBwaSBvjCBuzEkMCIGA1UEBxMb
VmFsaUNlcnQgVmFsaWRhdGlvbiBOZXR3b3JrMRcwFQYDVQQKEw5WYWxpQ2VydCwg
SW5jLjE1MDMGA1UECxMsVmFsaUNlcnQgQ2xhc3MgMiBQb2xpY3kgVmFsaWRhdGlv
biBBdXRob3JpdHkxITAfBgNVBAMTGGh0dHA6Ly93d3cudmFsaWNlcnQuY29tLzEg
MB4GCSqGSIb3DQEJARYRaW5mb0B2YWxpY2VydC5jb22CAQEwDwYDVR0TAQH/BAUw
AwEB/zAzBggrBgEFBQcBAQQnMCUwIwYIKwYBBQUHMAGGF2h0dHA6Ly9vY3NwLmdv
ZGFkZHkuY29tMEQGA1UdHwQ9MDswOaA3oDWGM2h0dHA6Ly9jZXJ0aWZpY2F0ZXMu
Z29kYWRkeS5jb20vcmVwb3NpdG9yeS9yb290LmNybDBLBgNVHSAERDBCMEAGBFUd
IAAwODA2BggrBgEFBQcCARYqaHR0cDovL2NlcnRpZmljYXRlcy5nb2RhZGR5LmNv
bS9yZXBvc2l0b3J5MA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQUFAAOBgQC1
QPmnHfbq/qQaQlpE9xXUhUaJwL6e4+PrxeNYiY+Sn1eocSxI0YGyeR+sBjUZsE4O
WBsUs5iB0QQeyAfJg594RAoYC5jcdnplDQ1tgMQLARzLrUc+cb53S8wGd9D0Vmsf
SxOaFIqII6hR8INMqzW/Rn453HWkrugp++85j09VZw==
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0
IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz
BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y
aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG
9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMTk1NFoXDTE5MDYy
NjAwMTk1NFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y
azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs
YXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw
Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl
cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDOOnHK5avIWZJV16vY
dA757tn2VUdZZUcOBVXc65g2PFxTXdMwzzjsvUGJ7SVCCSRrCl6zfN1SLUzm1NZ9
WlmpZdRJEy0kTRxQb7XBhVQ7/nHk01xC+YDgkRoKWzk2Z/M/VXwbP7RfZHM047QS
v4dk+NoS/zcnwbNDu+97bi5p9wIDAQABMA0GCSqGSIb3DQEBBQUAA4GBADt/UG9v
UJSZSWI4OB9L+KXIPqeCgfYrx+jFzug6EILLGACOTb2oWH+heQC1u+mNr0HZDzTu
IYEZoDJJKPTEjlbVUjP9UNV+mWwD5MlM/Mtsq2azSiGM5bUMMj4QssxsodyamEwC
W/POuZ6lcg5Ktz885hZo+L7tdEy8W9ViH0Pd
-----END CERTIFICATE-----

View file

@ -1,110 +0,0 @@
"""
iri2uri
Converts an IRI to a URI.
"""
__author__ = "Joe Gregorio (joe@bitworking.org)"
__copyright__ = "Copyright 2006, Joe Gregorio"
__contributors__ = []
__version__ = "1.0.0"
__license__ = "MIT"
__history__ = """
"""
import urlparse
# Convert an IRI to a URI following the rules in RFC 3987
#
# The characters we need to enocde and escape are defined in the spec:
#
# iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
# / %xD0000-DFFFD / %xE1000-EFFFD
escape_range = [
(0xA0, 0xD7FF ),
(0xE000, 0xF8FF ),
(0xF900, 0xFDCF ),
(0xFDF0, 0xFFEF),
(0x10000, 0x1FFFD ),
(0x20000, 0x2FFFD ),
(0x30000, 0x3FFFD),
(0x40000, 0x4FFFD ),
(0x50000, 0x5FFFD ),
(0x60000, 0x6FFFD),
(0x70000, 0x7FFFD ),
(0x80000, 0x8FFFD ),
(0x90000, 0x9FFFD),
(0xA0000, 0xAFFFD ),
(0xB0000, 0xBFFFD ),
(0xC0000, 0xCFFFD),
(0xD0000, 0xDFFFD ),
(0xE1000, 0xEFFFD),
(0xF0000, 0xFFFFD ),
(0x100000, 0x10FFFD)
]
def encode(c):
retval = c
i = ord(c)
for low, high in escape_range:
if i < low:
break
if i >= low and i <= high:
retval = "".join(["%%%2X" % ord(o) for o in c.encode('utf-8')])
break
return retval
def iri2uri(uri):
"""Convert an IRI to a URI. Note that IRIs must be
passed in a unicode strings. That is, do not utf-8 encode
the IRI before passing it into the function."""
if isinstance(uri ,unicode):
(scheme, authority, path, query, fragment) = urlparse.urlsplit(uri)
authority = authority.encode('idna')
# For each character in 'ucschar' or 'iprivate'
# 1. encode as utf-8
# 2. then %-encode each octet of that utf-8
uri = urlparse.urlunsplit((scheme, authority, path, query, fragment))
uri = "".join([encode(c) for c in uri])
return uri
if __name__ == "__main__":
import unittest
class Test(unittest.TestCase):
def test_uris(self):
"""Test that URIs are invariant under the transformation."""
invariant = [
u"ftp://ftp.is.co.za/rfc/rfc1808.txt",
u"http://www.ietf.org/rfc/rfc2396.txt",
u"ldap://[2001:db8::7]/c=GB?objectClass?one",
u"mailto:John.Doe@example.com",
u"news:comp.infosystems.www.servers.unix",
u"tel:+1-816-555-1212",
u"telnet://192.0.2.16:80/",
u"urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ]
for uri in invariant:
self.assertEqual(uri, iri2uri(uri))
def test_iri(self):
""" Test that the right type of escaping is done for each part of the URI."""
self.assertEqual("http://xn--o3h.com/%E2%98%84", iri2uri(u"http://\N{COMET}.com/\N{COMET}"))
self.assertEqual("http://bitworking.org/?fred=%E2%98%84", iri2uri(u"http://bitworking.org/?fred=\N{COMET}"))
self.assertEqual("http://bitworking.org/#%E2%98%84", iri2uri(u"http://bitworking.org/#\N{COMET}"))
self.assertEqual("#%E2%98%84", iri2uri(u"#\N{COMET}"))
self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}"))
self.assertEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}")))
self.assertNotEqual("/fred?bar=%E2%98%9A#%E2%98%84", iri2uri(u"/fred?bar=\N{BLACK LEFT POINTING INDEX}#\N{COMET}".encode('utf-8')))
unittest.main()

View file

@ -1,438 +0,0 @@
"""SocksiPy - Python SOCKS module.
Version 1.00
Copyright 2006 Dan-Haim. All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of Dan Haim nor the names of his contributors may be used
to endorse or promote products derived from this software without specific
prior written permission.
THIS SOFTWARE IS PROVIDED BY DAN HAIM "AS IS" AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
EVENT SHALL DAN HAIM OR HIS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMANGE.
This module provides a standard socket-like interface for Python
for tunneling connections through SOCKS proxies.
"""
"""
Minor modifications made by Christopher Gilbert (http://motomastyle.com/)
for use in PyLoris (http://pyloris.sourceforge.net/)
Minor modifications made by Mario Vilas (http://breakingcode.wordpress.com/)
mainly to merge bug fixes found in Sourceforge
"""
import base64
import socket
import struct
import sys
if getattr(socket, 'socket', None) is None:
raise ImportError('socket.socket missing, proxy support unusable')
PROXY_TYPE_SOCKS4 = 1
PROXY_TYPE_SOCKS5 = 2
PROXY_TYPE_HTTP = 3
PROXY_TYPE_HTTP_NO_TUNNEL = 4
_defaultproxy = None
_orgsocket = socket.socket
class ProxyError(Exception): pass
class GeneralProxyError(ProxyError): pass
class Socks5AuthError(ProxyError): pass
class Socks5Error(ProxyError): pass
class Socks4Error(ProxyError): pass
class HTTPError(ProxyError): pass
_generalerrors = ("success",
"invalid data",
"not connected",
"not available",
"bad proxy type",
"bad input")
_socks5errors = ("succeeded",
"general SOCKS server failure",
"connection not allowed by ruleset",
"Network unreachable",
"Host unreachable",
"Connection refused",
"TTL expired",
"Command not supported",
"Address type not supported",
"Unknown error")
_socks5autherrors = ("succeeded",
"authentication is required",
"all offered authentication methods were rejected",
"unknown username or invalid password",
"unknown error")
_socks4errors = ("request granted",
"request rejected or failed",
"request rejected because SOCKS server cannot connect to identd on the client",
"request rejected because the client program and identd report different user-ids",
"unknown error")
def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None):
"""setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets a default proxy which all further socksocket objects will use,
unless explicitly changed.
"""
global _defaultproxy
_defaultproxy = (proxytype, addr, port, rdns, username, password)
def wrapmodule(module):
"""wrapmodule(module)
Attempts to replace a module's socket library with a SOCKS socket. Must set
a default proxy using setdefaultproxy(...) first.
This will only work on modules that import socket directly into the namespace;
most of the Python Standard Library falls into this category.
"""
if _defaultproxy != None:
module.socket.socket = socksocket
else:
raise GeneralProxyError((4, "no proxy specified"))
class socksocket(socket.socket):
"""socksocket([family[, type[, proto]]]) -> socket object
Open a SOCKS enabled socket. The parameters are the same as
those of the standard socket init. In order for SOCKS to work,
you must specify family=AF_INET, type=SOCK_STREAM and proto=0.
"""
def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None):
_orgsocket.__init__(self, family, type, proto, _sock)
if _defaultproxy != None:
self.__proxy = _defaultproxy
else:
self.__proxy = (None, None, None, None, None, None)
self.__proxysockname = None
self.__proxypeername = None
self.__httptunnel = True
def __recvall(self, count):
"""__recvall(count) -> data
Receive EXACTLY the number of bytes requested from the socket.
Blocks until the required number of bytes have been received.
"""
data = self.recv(count)
while len(data) < count:
d = self.recv(count-len(data))
if not d: raise GeneralProxyError((0, "connection closed unexpectedly"))
data = data + d
return data
def sendall(self, content, *args):
""" override socket.socket.sendall method to rewrite the header
for non-tunneling proxies if needed
"""
if not self.__httptunnel:
content = self.__rewriteproxy(content)
return super(socksocket, self).sendall(content, *args)
def __rewriteproxy(self, header):
""" rewrite HTTP request headers to support non-tunneling proxies
(i.e. those which do not support the CONNECT method).
This only works for HTTP (not HTTPS) since HTTPS requires tunneling.
"""
host, endpt = None, None
hdrs = header.split("\r\n")
for hdr in hdrs:
if hdr.lower().startswith("host:"):
host = hdr
elif hdr.lower().startswith("get") or hdr.lower().startswith("post"):
endpt = hdr
if host and endpt:
hdrs.remove(host)
hdrs.remove(endpt)
host = host.split(" ")[1]
endpt = endpt.split(" ")
if (self.__proxy[4] != None and self.__proxy[5] != None):
hdrs.insert(0, self.__getauthheader())
hdrs.insert(0, "Host: %s" % host)
hdrs.insert(0, "%s http://%s%s %s" % (endpt[0], host, endpt[1], endpt[2]))
return "\r\n".join(hdrs)
def __getauthheader(self):
auth = self.__proxy[4] + ":" + self.__proxy[5]
return "Proxy-Authorization: Basic " + base64.b64encode(auth)
def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, username=None, password=None):
"""setproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
Sets the proxy to be used.
proxytype - The type of the proxy to be used. Three types
are supported: PROXY_TYPE_SOCKS4 (including socks4a),
PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP
addr - The address of the server (IP or DNS).
port - The port of the server. Defaults to 1080 for SOCKS
servers and 8080 for HTTP proxy servers.
rdns - Should DNS queries be preformed on the remote side
(rather than the local side). The default is True.
Note: This has no effect with SOCKS4 servers.
username - Username to authenticate with to the server.
The default is no authentication.
password - Password to authenticate with to the server.
Only relevant when username is also provided.
"""
self.__proxy = (proxytype, addr, port, rdns, username, password)
def __negotiatesocks5(self, destaddr, destport):
"""__negotiatesocks5(self,destaddr,destport)
Negotiates a connection through a SOCKS5 server.
"""
# First we'll send the authentication packages we support.
if (self.__proxy[4]!=None) and (self.__proxy[5]!=None):
# The username/password details were supplied to the
# setproxy method so we support the USERNAME/PASSWORD
# authentication (in addition to the standard none).
self.sendall(struct.pack('BBBB', 0x05, 0x02, 0x00, 0x02))
else:
# No username/password were entered, therefore we
# only support connections with no authentication.
self.sendall(struct.pack('BBB', 0x05, 0x01, 0x00))
# We'll receive the server's response to determine which
# method was selected
chosenauth = self.__recvall(2)
if chosenauth[0:1] != chr(0x05).encode():
self.close()
raise GeneralProxyError((1, _generalerrors[1]))
# Check the chosen authentication method
if chosenauth[1:2] == chr(0x00).encode():
# No authentication is required
pass
elif chosenauth[1:2] == chr(0x02).encode():
# Okay, we need to perform a basic username/password
# authentication.
self.sendall(chr(0x01).encode() + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5])
authstat = self.__recvall(2)
if authstat[0:1] != chr(0x01).encode():
# Bad response
self.close()
raise GeneralProxyError((1, _generalerrors[1]))
if authstat[1:2] != chr(0x00).encode():
# Authentication failed
self.close()
raise Socks5AuthError((3, _socks5autherrors[3]))
# Authentication succeeded
else:
# Reaching here is always bad
self.close()
if chosenauth[1] == chr(0xFF).encode():
raise Socks5AuthError((2, _socks5autherrors[2]))
else:
raise GeneralProxyError((1, _generalerrors[1]))
# Now we can request the actual connection
req = struct.pack('BBB', 0x05, 0x01, 0x00)
# If the given destination address is an IP address, we'll
# use the IPv4 address request even if remote resolving was specified.
try:
ipaddr = socket.inet_aton(destaddr)
req = req + chr(0x01).encode() + ipaddr
except socket.error:
# Well it's not an IP number, so it's probably a DNS name.
if self.__proxy[3]:
# Resolve remotely
ipaddr = None
req = req + chr(0x03).encode() + chr(len(destaddr)).encode() + destaddr
else:
# Resolve locally
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
req = req + chr(0x01).encode() + ipaddr
req = req + struct.pack(">H", destport)
self.sendall(req)
# Get the response
resp = self.__recvall(4)
if resp[0:1] != chr(0x05).encode():
self.close()
raise GeneralProxyError((1, _generalerrors[1]))
elif resp[1:2] != chr(0x00).encode():
# Connection failed
self.close()
if ord(resp[1:2])<=8:
raise Socks5Error((ord(resp[1:2]), _socks5errors[ord(resp[1:2])]))
else:
raise Socks5Error((9, _socks5errors[9]))
# Get the bound address/port
elif resp[3:4] == chr(0x01).encode():
boundaddr = self.__recvall(4)
elif resp[3:4] == chr(0x03).encode():
resp = resp + self.recv(1)
boundaddr = self.__recvall(ord(resp[4:5]))
else:
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
boundport = struct.unpack(">H", self.__recvall(2))[0]
self.__proxysockname = (boundaddr, boundport)
if ipaddr != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr), destport)
else:
self.__proxypeername = (destaddr, destport)
def getproxysockname(self):
"""getsockname() -> address info
Returns the bound IP address and port number at the proxy.
"""
return self.__proxysockname
def getproxypeername(self):
"""getproxypeername() -> address info
Returns the IP and port number of the proxy.
"""
return _orgsocket.getpeername(self)
def getpeername(self):
"""getpeername() -> address info
Returns the IP address and port number of the destination
machine (note: getproxypeername returns the proxy)
"""
return self.__proxypeername
def __negotiatesocks4(self,destaddr,destport):
"""__negotiatesocks4(self,destaddr,destport)
Negotiates a connection through a SOCKS4 server.
"""
# Check if the destination address provided is an IP address
rmtrslv = False
try:
ipaddr = socket.inet_aton(destaddr)
except socket.error:
# It's a DNS name. Check where it should be resolved.
if self.__proxy[3]:
ipaddr = struct.pack("BBBB", 0x00, 0x00, 0x00, 0x01)
rmtrslv = True
else:
ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
# Construct the request packet
req = struct.pack(">BBH", 0x04, 0x01, destport) + ipaddr
# The username parameter is considered userid for SOCKS4
if self.__proxy[4] != None:
req = req + self.__proxy[4]
req = req + chr(0x00).encode()
# DNS name if remote resolving is required
# NOTE: This is actually an extension to the SOCKS4 protocol
# called SOCKS4A and may not be supported in all cases.
if rmtrslv:
req = req + destaddr + chr(0x00).encode()
self.sendall(req)
# Get the response from the server
resp = self.__recvall(8)
if resp[0:1] != chr(0x00).encode():
# Bad data
self.close()
raise GeneralProxyError((1,_generalerrors[1]))
if resp[1:2] != chr(0x5A).encode():
# Server returned an error
self.close()
if ord(resp[1:2]) in (91, 92, 93):
self.close()
raise Socks4Error((ord(resp[1:2]), _socks4errors[ord(resp[1:2]) - 90]))
else:
raise Socks4Error((94, _socks4errors[4]))
# Get the bound address/port
self.__proxysockname = (socket.inet_ntoa(resp[4:]), struct.unpack(">H", resp[2:4])[0])
if rmtrslv != None:
self.__proxypeername = (socket.inet_ntoa(ipaddr), destport)
else:
self.__proxypeername = (destaddr, destport)
def __negotiatehttp(self, destaddr, destport):
"""__negotiatehttp(self,destaddr,destport)
Negotiates a connection through an HTTP server.
"""
# If we need to resolve locally, we do this now
if not self.__proxy[3]:
addr = socket.gethostbyname(destaddr)
else:
addr = destaddr
headers = ["CONNECT ", addr, ":", str(destport), " HTTP/1.1\r\n"]
headers += ["Host: ", destaddr, "\r\n"]
if (self.__proxy[4] != None and self.__proxy[5] != None):
headers += [self.__getauthheader(), "\r\n"]
headers.append("\r\n")
self.sendall("".join(headers).encode())
# We read the response until we get the string "\r\n\r\n"
resp = self.recv(1)
while resp.find("\r\n\r\n".encode()) == -1:
resp = resp + self.recv(1)
# We just need the first line to check if the connection
# was successful
statusline = resp.splitlines()[0].split(" ".encode(), 2)
if statusline[0] not in ("HTTP/1.0".encode(), "HTTP/1.1".encode()):
self.close()
raise GeneralProxyError((1, _generalerrors[1]))
try:
statuscode = int(statusline[1])
except ValueError:
self.close()
raise GeneralProxyError((1, _generalerrors[1]))
if statuscode != 200:
self.close()
raise HTTPError((statuscode, statusline[2]))
self.__proxysockname = ("0.0.0.0", 0)
self.__proxypeername = (addr, destport)
def connect(self, destpair):
"""connect(self, despair)
Connects to the specified destination through a proxy.
destpar - A tuple of the IP/DNS address and the port number.
(identical to socket's connect).
To select the proxy server use setproxy().
"""
# Do a minimal input check first
if (not type(destpair) in (list,tuple)) or (len(destpair) < 2) or (not isinstance(destpair[0], basestring)) or (type(destpair[1]) != int):
raise GeneralProxyError((5, _generalerrors[5]))
if self.__proxy[0] == PROXY_TYPE_SOCKS5:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self, (self.__proxy[1], portnum))
self.__negotiatesocks5(destpair[0], destpair[1])
elif self.__proxy[0] == PROXY_TYPE_SOCKS4:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 1080
_orgsocket.connect(self,(self.__proxy[1], portnum))
self.__negotiatesocks4(destpair[0], destpair[1])
elif self.__proxy[0] == PROXY_TYPE_HTTP:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 8080
_orgsocket.connect(self,(self.__proxy[1], portnum))
self.__negotiatehttp(destpair[0], destpair[1])
elif self.__proxy[0] == PROXY_TYPE_HTTP_NO_TUNNEL:
if self.__proxy[2] != None:
portnum = self.__proxy[2]
else:
portnum = 8080
_orgsocket.connect(self,(self.__proxy[1],portnum))
if destpair[1] == 443:
self.__negotiatehttp(destpair[0],destpair[1])
else:
self.__httptunnel = False
elif self.__proxy[0] == None:
_orgsocket.connect(self, (destpair[0], destpair[1]))
else:
raise GeneralProxyError((4, _generalerrors[4]))

View file

@ -1,860 +0,0 @@
"""
The MIT License
Copyright (c) 2007-2010 Leah Culver, Joe Stump, Mark Paschal, Vic Fryzel
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import base64
import urllib
import time
import random
import urlparse
import hmac
import binascii
import httplib2
try:
from urlparse import parse_qs
parse_qs # placate pyflakes
except ImportError:
# fall back for Python 2.5
from cgi import parse_qs
try:
from hashlib import sha1
sha = sha1
except ImportError:
# hashlib was added in Python 2.5
import sha
import _version
__version__ = _version.__version__
OAUTH_VERSION = '1.0' # Hi Blaine!
HTTP_METHOD = 'GET'
SIGNATURE_METHOD = 'PLAINTEXT'
class Error(RuntimeError):
"""Generic exception class."""
def __init__(self, message='OAuth error occurred.'):
self._message = message
@property
def message(self):
"""A hack to get around the deprecation errors in 2.6."""
return self._message
def __str__(self):
return self._message
class MissingSignature(Error):
pass
def build_authenticate_header(realm=''):
"""Optional WWW-Authenticate header (401 error)"""
return {'WWW-Authenticate': 'OAuth realm="%s"' % realm}
def build_xoauth_string(url, consumer, token=None):
"""Build an XOAUTH string for use in SMTP/IMPA authentication."""
request = Request.from_consumer_and_token(consumer, token,
"GET", url)
signing_method = SignatureMethod_HMAC_SHA1()
request.sign_request(signing_method, consumer, token)
params = []
for k, v in sorted(request.iteritems()):
if v is not None:
params.append('%s="%s"' % (k, escape(v)))
return "%s %s %s" % ("GET", url, ','.join(params))
def to_unicode(s):
""" Convert to unicode, raise exception with instructive error
message if s is not unicode, ascii, or utf-8. """
if not isinstance(s, unicode):
if not isinstance(s, str):
raise TypeError('You are required to pass either unicode or string here, not: %r (%s)' % (type(s), s))
try:
s = s.decode('utf-8')
except UnicodeDecodeError, le:
raise TypeError('You are required to pass either a unicode object or a utf-8 string here. You passed a Python string object which contained non-utf-8: %r. The UnicodeDecodeError that resulted from attempting to interpret it as utf-8 was: %s' % (s, le,))
return s
def to_utf8(s):
return to_unicode(s).encode('utf-8')
def to_unicode_if_string(s):
if isinstance(s, basestring):
return to_unicode(s)
else:
return s
def to_utf8_if_string(s):
if isinstance(s, basestring):
return to_utf8(s)
else:
return s
def to_unicode_optional_iterator(x):
"""
Raise TypeError if x is a str containing non-utf8 bytes or if x is
an iterable which contains such a str.
"""
if isinstance(x, basestring):
return to_unicode(x)
try:
l = list(x)
except TypeError, e:
assert 'is not iterable' in str(e)
return x
else:
return [ to_unicode(e) for e in l ]
def to_utf8_optional_iterator(x):
"""
Raise TypeError if x is a str or if x is an iterable which
contains a str.
"""
if isinstance(x, basestring):
return to_utf8(x)
try:
l = list(x)
except TypeError, e:
assert 'is not iterable' in str(e)
return x
else:
return [ to_utf8_if_string(e) for e in l ]
def escape(s):
"""Escape a URL including any /."""
return urllib.quote(s.encode('utf-8'), safe='~')
def generate_timestamp():
"""Get seconds since epoch (UTC)."""
return int(time.time())
def generate_nonce(length=8):
"""Generate pseudorandom number."""
return ''.join([str(random.randint(0, 9)) for i in range(length)])
def generate_verifier(length=8):
"""Generate pseudorandom number."""
return ''.join([str(random.randint(0, 9)) for i in range(length)])
class Consumer(object):
"""A consumer of OAuth-protected services.
The OAuth consumer is a "third-party" service that wants to access
protected resources from an OAuth service provider on behalf of an end
user. It's kind of the OAuth client.
Usually a consumer must be registered with the service provider by the
developer of the consumer software. As part of that process, the service
provider gives the consumer a *key* and a *secret* with which the consumer
software can identify itself to the service. The consumer will include its
key in each request to identify itself, but will use its secret only when
signing requests, to prove that the request is from that particular
registered consumer.
Once registered, the consumer can then use its consumer credentials to ask
the service provider for a request token, kicking off the OAuth
authorization process.
"""
key = None
secret = None
def __init__(self, key, secret):
self.key = key
self.secret = secret
if self.key is None or self.secret is None:
raise ValueError("Key and secret must be set.")
def __str__(self):
data = {'oauth_consumer_key': self.key,
'oauth_consumer_secret': self.secret}
return urllib.urlencode(data)
class Token(object):
"""An OAuth credential used to request authorization or a protected
resource.
Tokens in OAuth comprise a *key* and a *secret*. The key is included in
requests to identify the token being used, but the secret is used only in
the signature, to prove that the requester is who the server gave the
token to.
When first negotiating the authorization, the consumer asks for a *request
token* that the live user authorizes with the service provider. The
consumer then exchanges the request token for an *access token* that can
be used to access protected resources.
"""
key = None
secret = None
callback = None
callback_confirmed = None
verifier = None
def __init__(self, key, secret):
self.key = key
self.secret = secret
if self.key is None or self.secret is None:
raise ValueError("Key and secret must be set.")
def set_callback(self, callback):
self.callback = callback
self.callback_confirmed = 'true'
def set_verifier(self, verifier=None):
if verifier is not None:
self.verifier = verifier
else:
self.verifier = generate_verifier()
def get_callback_url(self):
if self.callback and self.verifier:
# Append the oauth_verifier.
parts = urlparse.urlparse(self.callback)
scheme, netloc, path, params, query, fragment = parts[:6]
if query:
query = '%s&oauth_verifier=%s' % (query, self.verifier)
else:
query = 'oauth_verifier=%s' % self.verifier
return urlparse.urlunparse((scheme, netloc, path, params,
query, fragment))
return self.callback
def to_string(self):
"""Returns this token as a plain string, suitable for storage.
The resulting string includes the token's secret, so you should never
send or store this string where a third party can read it.
"""
data = {
'oauth_token': self.key,
'oauth_token_secret': self.secret,
}
if self.callback_confirmed is not None:
data['oauth_callback_confirmed'] = self.callback_confirmed
return urllib.urlencode(data)
@staticmethod
def from_string(s):
"""Deserializes a token from a string like one returned by
`to_string()`."""
if not len(s):
raise ValueError("Invalid parameter string.")
params = parse_qs(s, keep_blank_values=False)
if not len(params):
raise ValueError("Invalid parameter string.")
try:
key = params['oauth_token'][0]
except Exception:
raise ValueError("'oauth_token' not found in OAuth request.")
try:
secret = params['oauth_token_secret'][0]
except Exception:
raise ValueError("'oauth_token_secret' not found in "
"OAuth request.")
token = Token(key, secret)
try:
token.callback_confirmed = params['oauth_callback_confirmed'][0]
except KeyError:
pass # 1.0, no callback confirmed.
return token
def __str__(self):
return self.to_string()
def setter(attr):
name = attr.__name__
def getter(self):
try:
return self.__dict__[name]
except KeyError:
raise AttributeError(name)
def deleter(self):
del self.__dict__[name]
return property(getter, attr, deleter)
class Request(dict):
"""The parameters and information for an HTTP request, suitable for
authorizing with OAuth credentials.
When a consumer wants to access a service's protected resources, it does
so using a signed HTTP request identifying itself (the consumer) with its
key, and providing an access token authorized by the end user to access
those resources.
"""
version = OAUTH_VERSION
def __init__(self, method=HTTP_METHOD, url=None, parameters=None,
body='', is_form_encoded=False):
if url is not None:
self.url = to_unicode(url)
self.method = method
if parameters is not None:
for k, v in parameters.iteritems():
k = to_unicode(k)
v = to_unicode_optional_iterator(v)
self[k] = v
self.body = body
self.is_form_encoded = is_form_encoded
@setter
def url(self, value):
self.__dict__['url'] = value
if value is not None:
scheme, netloc, path, params, query, fragment = urlparse.urlparse(value)
# Exclude default port numbers.
if scheme == 'http' and netloc[-3:] == ':80':
netloc = netloc[:-3]
elif scheme == 'https' and netloc[-4:] == ':443':
netloc = netloc[:-4]
if scheme not in ('http', 'https'):
raise ValueError("Unsupported URL %s (%s)." % (value, scheme))
# Normalized URL excludes params, query, and fragment.
self.normalized_url = urlparse.urlunparse((scheme, netloc, path, None, None, None))
else:
self.normalized_url = None
self.__dict__['url'] = None
@setter
def method(self, value):
self.__dict__['method'] = value.upper()
def _get_timestamp_nonce(self):
return self['oauth_timestamp'], self['oauth_nonce']
def get_nonoauth_parameters(self):
"""Get any non-OAuth parameters."""
return dict([(k, v) for k, v in self.iteritems()
if not k.startswith('oauth_')])
def to_header(self, realm=''):
"""Serialize as a header for an HTTPAuth request."""
oauth_params = ((k, v) for k, v in self.items()
if k.startswith('oauth_'))
stringy_params = ((k, escape(str(v))) for k, v in oauth_params)
header_params = ('%s="%s"' % (k, v) for k, v in stringy_params)
params_header = ', '.join(header_params)
auth_header = 'OAuth realm="%s"' % realm
if params_header:
auth_header = "%s, %s" % (auth_header, params_header)
return {'Authorization': auth_header}
def to_postdata(self):
"""Serialize as post data for a POST request."""
d = {}
for k, v in self.iteritems():
d[k.encode('utf-8')] = to_utf8_optional_iterator(v)
# tell urlencode to deal with sequence values and map them correctly
# to resulting querystring. for example self["k"] = ["v1", "v2"] will
# result in 'k=v1&k=v2' and not k=%5B%27v1%27%2C+%27v2%27%5D
return urllib.urlencode(d, True).replace('+', '%20')
def to_url(self):
"""Serialize as a URL for a GET request."""
base_url = urlparse.urlparse(self.url)
try:
query = base_url.query
except AttributeError:
# must be python <2.5
query = base_url[4]
query = parse_qs(query)
for k, v in self.items():
query.setdefault(k, []).append(v)
try:
scheme = base_url.scheme
netloc = base_url.netloc
path = base_url.path
params = base_url.params
fragment = base_url.fragment
except AttributeError:
# must be python <2.5
scheme = base_url[0]
netloc = base_url[1]
path = base_url[2]
params = base_url[3]
fragment = base_url[5]
url = (scheme, netloc, path, params,
urllib.urlencode(query, True), fragment)
return urlparse.urlunparse(url)
def get_parameter(self, parameter):
ret = self.get(parameter)
if ret is None:
raise Error('Parameter not found: %s' % parameter)
return ret
def get_normalized_parameters(self):
"""Return a string that contains the parameters that must be signed."""
items = []
for key, value in self.iteritems():
if key == 'oauth_signature':
continue
# 1.0a/9.1.1 states that kvp must be sorted by key, then by value,
# so we unpack sequence values into multiple items for sorting.
if isinstance(value, basestring):
items.append((to_utf8_if_string(key), to_utf8(value)))
else:
try:
value = list(value)
except TypeError, e:
assert 'is not iterable' in str(e)
items.append((to_utf8_if_string(key), to_utf8_if_string(value)))
else:
items.extend((to_utf8_if_string(key), to_utf8_if_string(item)) for item in value)
# Include any query string parameters from the provided URL
query = urlparse.urlparse(self.url)[4]
url_items = self._split_url_string(query).items()
url_items = [(to_utf8(k), to_utf8(v)) for k, v in url_items if k != 'oauth_signature' ]
items.extend(url_items)
items.sort()
encoded_str = urllib.urlencode(items)
# Encode signature parameters per Oauth Core 1.0 protocol
# spec draft 7, section 3.6
# (http://tools.ietf.org/html/draft-hammer-oauth-07#section-3.6)
# Spaces must be encoded with "%20" instead of "+"
return encoded_str.replace('+', '%20').replace('%7E', '~')
def sign_request(self, signature_method, consumer, token):
"""Set the signature parameter to the result of sign."""
if not self.is_form_encoded:
# according to
# http://oauth.googlecode.com/svn/spec/ext/body_hash/1.0/oauth-bodyhash.html
# section 4.1.1 "OAuth Consumers MUST NOT include an
# oauth_body_hash parameter on requests with form-encoded
# request bodies."
self['oauth_body_hash'] = base64.b64encode(sha(self.body).digest())
if 'oauth_consumer_key' not in self:
self['oauth_consumer_key'] = consumer.key
if token and 'oauth_token' not in self:
self['oauth_token'] = token.key
self['oauth_signature_method'] = signature_method.name
self['oauth_signature'] = signature_method.sign(self, consumer, token)
@classmethod
def make_timestamp(cls):
"""Get seconds since epoch (UTC)."""
return str(int(time.time()))
@classmethod
def make_nonce(cls):
"""Generate pseudorandom number."""
return str(random.randint(0, 100000000))
@classmethod
def from_request(cls, http_method, http_url, headers=None, parameters=None,
query_string=None):
"""Combines multiple parameter sources."""
if parameters is None:
parameters = {}
# Headers
if headers and 'Authorization' in headers:
auth_header = headers['Authorization']
# Check that the authorization header is OAuth.
if auth_header[:6] == 'OAuth ':
auth_header = auth_header[6:]
try:
# Get the parameters from the header.
header_params = cls._split_header(auth_header)
parameters.update(header_params)
except:
raise Error('Unable to parse OAuth parameters from '
'Authorization header.')
# GET or POST query string.
if query_string:
query_params = cls._split_url_string(query_string)
parameters.update(query_params)
# URL parameters.
param_str = urlparse.urlparse(http_url)[4] # query
url_params = cls._split_url_string(param_str)
parameters.update(url_params)
if parameters:
return cls(http_method, http_url, parameters)
return None
@classmethod
def from_consumer_and_token(cls, consumer, token=None,
http_method=HTTP_METHOD, http_url=None, parameters=None,
body='', is_form_encoded=False):
if not parameters:
parameters = {}
defaults = {
'oauth_consumer_key': consumer.key,
'oauth_timestamp': cls.make_timestamp(),
'oauth_nonce': cls.make_nonce(),
'oauth_version': cls.version,
}
defaults.update(parameters)
parameters = defaults
if token:
parameters['oauth_token'] = token.key
if token.verifier:
parameters['oauth_verifier'] = token.verifier
return Request(http_method, http_url, parameters, body=body,
is_form_encoded=is_form_encoded)
@classmethod
def from_token_and_callback(cls, token, callback=None,
http_method=HTTP_METHOD, http_url=None, parameters=None):
if not parameters:
parameters = {}
parameters['oauth_token'] = token.key
if callback:
parameters['oauth_callback'] = callback
return cls(http_method, http_url, parameters)
@staticmethod
def _split_header(header):
"""Turn Authorization: header into parameters."""
params = {}
parts = header.split(',')
for param in parts:
# Ignore realm parameter.
if param.find('realm') > -1:
continue
# Remove whitespace.
param = param.strip()
# Split key-value.
param_parts = param.split('=', 1)
# Remove quotes and unescape the value.
params[param_parts[0]] = urllib.unquote(param_parts[1].strip('\"'))
return params
@staticmethod
def _split_url_string(param_str):
"""Turn URL string into parameters."""
parameters = parse_qs(param_str.encode('utf-8'), keep_blank_values=True)
for k, v in parameters.iteritems():
parameters[k] = urllib.unquote(v[0])
return parameters
class Client(httplib2.Http):
"""OAuthClient is a worker to attempt to execute a request."""
def __init__(self, consumer, token=None, cache=None, timeout=None,
proxy_info=None):
if consumer is not None and not isinstance(consumer, Consumer):
raise ValueError("Invalid consumer.")
if token is not None and not isinstance(token, Token):
raise ValueError("Invalid token.")
self.consumer = consumer
self.token = token
self.method = SignatureMethod_HMAC_SHA1()
httplib2.Http.__init__(self, cache=cache, timeout=timeout, proxy_info=proxy_info)
def set_signature_method(self, method):
if not isinstance(method, SignatureMethod):
raise ValueError("Invalid signature method.")
self.method = method
def request(self, uri, method="GET", body='', headers=None,
redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None):
DEFAULT_POST_CONTENT_TYPE = 'application/x-www-form-urlencoded'
if not isinstance(headers, dict):
headers = {}
if method == "POST":
headers['Content-Type'] = headers.get('Content-Type',
DEFAULT_POST_CONTENT_TYPE)
is_form_encoded = \
headers.get('Content-Type') == 'application/x-www-form-urlencoded'
if is_form_encoded and body:
parameters = parse_qs(body)
else:
parameters = None
req = Request.from_consumer_and_token(self.consumer,
token=self.token, http_method=method, http_url=uri,
parameters=parameters, body=body, is_form_encoded=is_form_encoded)
req.sign_request(self.method, self.consumer, self.token)
schema, rest = urllib.splittype(uri)
if rest.startswith('//'):
hierpart = '//'
else:
hierpart = ''
host, rest = urllib.splithost(rest)
realm = schema + ':' + hierpart + host
if is_form_encoded:
body = req.to_postdata()
elif method == "GET":
uri = req.to_url()
else:
headers.update(req.to_header(realm=realm))
return httplib2.Http.request(self, uri, method=method, body=body,
headers=headers, redirections=redirections,
connection_type=connection_type)
class Server(object):
"""A skeletal implementation of a service provider, providing protected
resources to requests from authorized consumers.
This class implements the logic to check requests for authorization. You
can use it with your web server or web framework to protect certain
resources with OAuth.
"""
timestamp_threshold = 300 # In seconds, five minutes.
version = OAUTH_VERSION
signature_methods = None
def __init__(self, signature_methods=None):
self.signature_methods = signature_methods or {}
def add_signature_method(self, signature_method):
self.signature_methods[signature_method.name] = signature_method
return self.signature_methods
def verify_request(self, request, consumer, token):
"""Verifies an api call and checks all the parameters."""
self._check_version(request)
self._check_signature(request, consumer, token)
parameters = request.get_nonoauth_parameters()
return parameters
def build_authenticate_header(self, realm=''):
"""Optional support for the authenticate header."""
return {'WWW-Authenticate': 'OAuth realm="%s"' % realm}
def _check_version(self, request):
"""Verify the correct version of the request for this server."""
version = self._get_version(request)
if version and version != self.version:
raise Error('OAuth version %s not supported.' % str(version))
def _get_version(self, request):
"""Return the version of the request for this server."""
try:
version = request.get_parameter('oauth_version')
except:
version = OAUTH_VERSION
return version
def _get_signature_method(self, request):
"""Figure out the signature with some defaults."""
try:
signature_method = request.get_parameter('oauth_signature_method')
except:
signature_method = SIGNATURE_METHOD
try:
# Get the signature method object.
signature_method = self.signature_methods[signature_method]
except:
signature_method_names = ', '.join(self.signature_methods.keys())
raise Error('Signature method %s not supported try one of the following: %s' % (signature_method, signature_method_names))
return signature_method
def _get_verifier(self, request):
return request.get_parameter('oauth_verifier')
def _check_signature(self, request, consumer, token):
timestamp, nonce = request._get_timestamp_nonce()
self._check_timestamp(timestamp)
signature_method = self._get_signature_method(request)
try:
signature = request.get_parameter('oauth_signature')
except:
raise MissingSignature('Missing oauth_signature.')
# Validate the signature.
valid = signature_method.check(request, consumer, token, signature)
if not valid:
key, base = signature_method.signing_base(request, consumer, token)
raise Error('Invalid signature. Expected signature base '
'string: %s' % base)
def _check_timestamp(self, timestamp):
"""Verify that timestamp is recentish."""
timestamp = int(timestamp)
now = int(time.time())
lapsed = now - timestamp
if lapsed > self.timestamp_threshold:
raise Error('Expired timestamp: given %d and now %s has a '
'greater difference than threshold %d' % (timestamp, now,
self.timestamp_threshold))
class SignatureMethod(object):
"""A way of signing requests.
The OAuth protocol lets consumers and service providers pick a way to sign
requests. This interface shows the methods expected by the other `oauth`
modules for signing requests. Subclass it and implement its methods to
provide a new way to sign requests.
"""
def signing_base(self, request, consumer, token):
"""Calculates the string that needs to be signed.
This method returns a 2-tuple containing the starting key for the
signing and the message to be signed. The latter may be used in error
messages to help clients debug their software.
"""
raise NotImplementedError
def sign(self, request, consumer, token):
"""Returns the signature for the given request, based on the consumer
and token also provided.
You should use your implementation of `signing_base()` to build the
message to sign. Otherwise it may be less useful for debugging.
"""
raise NotImplementedError
def check(self, request, consumer, token, signature):
"""Returns whether the given signature is the correct signature for
the given consumer and token signing the given request."""
built = self.sign(request, consumer, token)
return built == signature
class SignatureMethod_HMAC_SHA1(SignatureMethod):
name = 'HMAC-SHA1'
def signing_base(self, request, consumer, token):
if not hasattr(request, 'normalized_url') or request.normalized_url is None:
raise ValueError("Base URL for request is not set.")
sig = (
escape(request.method),
escape(request.normalized_url),
escape(request.get_normalized_parameters()),
)
key = '%s&' % escape(consumer.secret)
if token:
key += escape(token.secret)
raw = '&'.join(sig)
return key, raw
def sign(self, request, consumer, token):
"""Builds the base signature string."""
key, raw = self.signing_base(request, consumer, token)
hashed = hmac.new(key, raw, sha)
# Calculate the digest base 64.
return binascii.b2a_base64(hashed.digest())[:-1]
class SignatureMethod_PLAINTEXT(SignatureMethod):
name = 'PLAINTEXT'
def signing_base(self, request, consumer, token):
"""Concatenates the consumer key and secret with the token's
secret."""
sig = '%s&' % escape(consumer.secret)
if token:
sig = sig + escape(token.secret)
return sig, sig
def sign(self, request, consumer, token):
key, raw = self.signing_base(request, consumer, token)
return raw

View file

@ -1,18 +0,0 @@
# This is the version of this source code.
manual_verstr = "1.5"
auto_build_num = "211"
verstr = manual_verstr + "." + auto_build_num
try:
from pyutil.version_class import Version as pyutil_Version
__version__ = pyutil_Version(verstr)
except (ImportError, ValueError):
# Maybe there is no pyutil installed.
from distutils.version import LooseVersion as distutils_Version
__version__ = distutils_Version(verstr)

View file

@ -1,40 +0,0 @@
"""
The MIT License
Copyright (c) 2007-2010 Leah Culver, Joe Stump, Mark Paschal, Vic Fryzel
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import oauth2
import imaplib
class IMAP4_SSL(imaplib.IMAP4_SSL):
"""IMAP wrapper for imaplib.IMAP4_SSL that implements XOAUTH."""
def authenticate(self, url, consumer, token):
if consumer is not None and not isinstance(consumer, oauth2.Consumer):
raise ValueError("Invalid consumer.")
if token is not None and not isinstance(token, oauth2.Token):
raise ValueError("Invalid token.")
imaplib.IMAP4_SSL.authenticate(self, 'XOAUTH',
lambda x: oauth2.build_xoauth_string(url, consumer, token))

View file

@ -1,41 +0,0 @@
"""
The MIT License
Copyright (c) 2007-2010 Leah Culver, Joe Stump, Mark Paschal, Vic Fryzel
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import oauth2
import smtplib
import base64
class SMTP(smtplib.SMTP):
"""SMTP wrapper for smtplib.SMTP that implements XOAUTH."""
def authenticate(self, url, consumer, token):
if consumer is not None and not isinstance(consumer, oauth2.Consumer):
raise ValueError("Invalid consumer.")
if token is not None and not isinstance(token, oauth2.Token):
raise ValueError("Invalid token.")
self.docmd('AUTH', 'XOAUTH %s' % \
base64.b64encode(oauth2.build_xoauth_string(url, consumer, token)))

View file

@ -1,165 +0,0 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

View file

@ -1,21 +0,0 @@
Bootstrap manual for developers of pygeoip
Dependencies: tox, nose, epydoc
For testing we are using tox virtualenv-based Python version testing
and nose as test framwork.
Tox will create virtualenvs for all Python version pygeoip supports
and installs the current working tree using the setup.py install script.
Running the tests requires a couple of sample databases found on the
link below.
Maxmind sample databases for testing can be downloaded here:
http://www.defunct.cc/maxmind-geoip-samples.tar.gz (58 MB)
Extract the tarball in the tests directory and run tox from the root directory.
Please make sure your code passes all tests before opening pull requests.
All the best,
William Tisäter

View file

@ -1,668 +0,0 @@
# -*- coding: utf-8 -*-
"""
Pure Python GeoIP API
The API is based on MaxMind's C-based Python API, but the code itself is
ported from the Pure PHP GeoIP API by Jim Winstead and Hans Lellelid.
@author: Jennifer Ennis <zaylea@gmail.com>
@license: Copyright(C) 2004 MaxMind LLC
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
"""
import os
import math
import socket
import mmap
import codecs
from threading import Lock
try:
from StringIO import StringIO
except ImportError:
from io import StringIO, BytesIO
from pygeoip import util, const
from pygeoip.const import PY2, PY3
from pygeoip.timezone import time_zone_by_country_and_region
STANDARD = const.STANDARD
MMAP_CACHE = const.MMAP_CACHE
MEMORY_CACHE = const.MEMORY_CACHE
ENCODING = const.ENCODING
class GeoIPError(Exception):
pass
class GeoIPMetaclass(type):
def __new__(cls, *args, **kwargs):
"""
Singleton method to gets an instance without reparsing the db. Unique
instances are instantiated based on the filename of the db. Flags are
ignored for this, i.e. if you initialize one with STANDARD
flag (default) and then try later to initialize with MEMORY_CACHE, it
will still return the STANDARD one.
"""
if not hasattr(cls, '_instances'):
cls._instances = {}
if len(args) > 0:
filename = args[0]
elif 'filename' in kwargs:
filename = kwargs['filename']
if filename not in cls._instances:
cls._instances[filename] = type.__new__(cls, *args, **kwargs)
return cls._instances[filename]
GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {})
class GeoIP(GeoIPBase):
def __init__(self, filename, flags=0):
"""
Initialize the class.
@param filename: Path to a geoip database.
@type filename: str
@param flags: Flags that affect how the database is processed.
Currently supported flags are STANDARD (the default),
MEMORY_CACHE (preload the whole file into memory) and
MMAP_CACHE (access the file via mmap).
@type flags: int
"""
self._filename = filename
self._flags = flags
if self._flags & const.MMAP_CACHE:
f = open(filename, 'rb')
access = mmap.ACCESS_READ
self._filehandle = mmap.mmap(f.fileno(), 0, access=access)
f.close()
elif self._flags & const.MEMORY_CACHE:
f = open(filename, 'rb')
self._memoryBuffer = f.read()
iohandle = BytesIO if PY3 else StringIO
self._filehandle = iohandle(self._memoryBuffer)
f.close()
else:
self._filehandle = codecs.open(filename, 'rb', ENCODING)
self._lock = Lock()
self._setup_segments()
def _setup_segments(self):
"""
Parses the database file to determine what kind of database is
being used and setup segment sizes and start points that will
be used by the seek*() methods later.
Supported databases:
* COUNTRY_EDITION
* COUNTRY_EDITION_V6
* REGION_EDITION_REV0
* REGION_EDITION_REV1
* CITY_EDITION_REV0
* CITY_EDITION_REV1
* CITY_EDITION_REV1_V6
* ORG_EDITION
* ISP_EDITION
* ASNUM_EDITION
* ASNUM_EDITION_V6
"""
self._databaseType = const.COUNTRY_EDITION
self._recordLength = const.STANDARD_RECORD_LENGTH
self._databaseSegments = const.COUNTRY_BEGIN
self._lock.acquire()
filepos = self._filehandle.tell()
self._filehandle.seek(-3, os.SEEK_END)
for i in range(const.STRUCTURE_INFO_MAX_SIZE):
chars = chr(255) * 3
delim = self._filehandle.read(3)
if PY3 and type(delim) is bytes:
delim = delim.decode(ENCODING)
if PY2:
chars = chars.decode(ENCODING)
if type(delim) is str:
delim = delim.decode(ENCODING)
if delim == chars:
byte = self._filehandle.read(1)
self._databaseType = ord(byte)
# Compatibility with databases from April 2003 and earlier
if (self._databaseType >= 106):
self._databaseType -= 105
if self._databaseType == const.REGION_EDITION_REV0:
self._databaseSegments = const.STATE_BEGIN_REV0
elif self._databaseType == const.REGION_EDITION_REV1:
self._databaseSegments = const.STATE_BEGIN_REV1
elif self._databaseType in (const.CITY_EDITION_REV0,
const.CITY_EDITION_REV1,
const.CITY_EDITION_REV1_V6,
const.ORG_EDITION,
const.ISP_EDITION,
const.ASNUM_EDITION,
const.ASNUM_EDITION_V6):
self._databaseSegments = 0
buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH)
if PY3 and type(buf) is bytes:
buf = buf.decode(ENCODING)
for j in range(const.SEGMENT_RECORD_LENGTH):
self._databaseSegments += (ord(buf[j]) << (j * 8))
LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION)
if self._databaseType in LONG_RECORDS:
self._recordLength = const.ORG_RECORD_LENGTH
break
else:
self._filehandle.seek(-4, os.SEEK_CUR)
self._filehandle.seek(filepos, os.SEEK_SET)
self._lock.release()
def _seek_country(self, ipnum):
"""
Using the record length and appropriate start points, seek to the
country that corresponds to the converted IP address integer.
@param ipnum: result of ip2long conversion
@type ipnum: int
@return: offset of start of record
@rtype: int
"""
try:
offset = 0
seek_depth = 127 if len(str(ipnum)) > 10 else 31
for depth in range(seek_depth, -1, -1):
if self._flags & const.MEMORY_CACHE:
startIndex = 2 * self._recordLength * offset
endIndex = startIndex + (2 * self._recordLength)
buf = self._memoryBuffer[startIndex:endIndex]
else:
startIndex = 2 * self._recordLength * offset
readLength = 2 * self._recordLength
self._lock.acquire()
self._filehandle.seek(startIndex, os.SEEK_SET)
buf = self._filehandle.read(readLength)
self._lock.release()
if PY3 and type(buf) is bytes:
buf = buf.decode(ENCODING)
x = [0, 0]
for i in range(2):
for j in range(self._recordLength):
byte = buf[self._recordLength * i + j]
x[i] += ord(byte) << (j * 8)
if ipnum & (1 << depth):
if x[1] >= self._databaseSegments:
return x[1]
offset = x[1]
else:
if x[0] >= self._databaseSegments:
return x[0]
offset = x[0]
except:
pass
raise GeoIPError('Corrupt database')
def _get_org(self, ipnum):
"""
Seek and return organization or ISP name for ipnum.
@param ipnum: Converted IP address
@type ipnum: int
@return: org/isp name
@rtype: str
"""
seek_org = self._seek_country(ipnum)
if seek_org == self._databaseSegments:
return None
read_length = (2 * self._recordLength - 1) * self._databaseSegments
self._lock.acquire()
self._filehandle.seek(seek_org + read_length, os.SEEK_SET)
buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH)
self._lock.release()
if PY3 and type(buf) is bytes:
buf = buf.decode(ENCODING)
return buf[:buf.index(chr(0))]
def _get_region(self, ipnum):
"""
Seek and return the region info (dict containing country_code
and region_name).
@param ipnum: Converted IP address
@type ipnum: int
@return: dict containing country_code and region_name
@rtype: dict
"""
region = ''
country_code = ''
seek_country = self._seek_country(ipnum)
def get_region_name(offset):
region1 = chr(offset // 26 + 65)
region2 = chr(offset % 26 + 65)
return ''.join([region1, region2])
if self._databaseType == const.REGION_EDITION_REV0:
seek_region = seek_country - const.STATE_BEGIN_REV0
if seek_region >= 1000:
country_code = 'US'
region = get_region_name(seek_region - 1000)
else:
country_code = const.COUNTRY_CODES[seek_region]
elif self._databaseType == const.REGION_EDITION_REV1:
seek_region = seek_country - const.STATE_BEGIN_REV1
if seek_region < const.US_OFFSET:
pass
elif seek_region < const.CANADA_OFFSET:
country_code = 'US'
region = get_region_name(seek_region - const.US_OFFSET)
elif seek_region < const.WORLD_OFFSET:
country_code = 'CA'
region = get_region_name(seek_region - const.CANADA_OFFSET)
else:
index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE
if index in const.COUNTRY_CODES:
country_code = const.COUNTRY_CODES[index]
elif self._databaseType in const.CITY_EDITIONS:
rec = self._get_record(ipnum)
region = rec.get('region_name', '')
country_code = rec.get('country_code', '')
return {'country_code': country_code, 'region_name': region}
def _get_record(self, ipnum):
"""
Populate location dict for converted IP.
@param ipnum: Converted IP address
@type ipnum: int
@return: dict with country_code, country_code3, country_name,
region, city, postal_code, latitude, longitude,
dma_code, metro_code, area_code, region_name, time_zone
@rtype: dict
"""
seek_country = self._seek_country(ipnum)
if seek_country == self._databaseSegments:
return {}
read_length = (2 * self._recordLength - 1) * self._databaseSegments
self._lock.acquire()
self._filehandle.seek(seek_country + read_length, os.SEEK_SET)
buf = self._filehandle.read(const.FULL_RECORD_LENGTH)
self._lock.release()
if PY3 and type(buf) is bytes:
buf = buf.decode(ENCODING)
record = {
'dma_code': 0,
'area_code': 0,
'metro_code': '',
'postal_code': ''
}
latitude = 0
longitude = 0
buf_pos = 0
# Get country
char = ord(buf[buf_pos])
record['country_code'] = const.COUNTRY_CODES[char]
record['country_code3'] = const.COUNTRY_CODES3[char]
record['country_name'] = const.COUNTRY_NAMES[char]
record['continent'] = const.CONTINENT_NAMES[char]
buf_pos += 1
def get_data(buf, buf_pos):
offset = buf_pos
char = ord(buf[offset])
while (char != 0):
offset += 1
char = ord(buf[offset])
if offset > buf_pos:
return (offset, buf[buf_pos:offset])
return (offset, '')
offset, record['region_name'] = get_data(buf, buf_pos)
offset, record['city'] = get_data(buf, offset + 1)
offset, record['postal_code'] = get_data(buf, offset + 1)
buf_pos = offset + 1
for j in range(3):
char = ord(buf[buf_pos])
buf_pos += 1
latitude += (char << (j * 8))
for j in range(3):
char = ord(buf[buf_pos])
buf_pos += 1
longitude += (char << (j * 8))
record['latitude'] = (latitude / 10000.0) - 180.0
record['longitude'] = (longitude / 10000.0) - 180.0
if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6):
dmaarea_combo = 0
if record['country_code'] == 'US':
for j in range(3):
char = ord(buf[buf_pos])
dmaarea_combo += (char << (j * 8))
buf_pos += 1
record['dma_code'] = int(math.floor(dmaarea_combo / 1000))
record['area_code'] = dmaarea_combo % 1000
record['metro_code'] = const.DMA_MAP.get(record['dma_code'])
params = (record['country_code'], record['region_name'])
record['time_zone'] = time_zone_by_country_and_region(*params)
return record
def _gethostbyname(self, hostname):
if self._databaseType in const.IPV6_EDITIONS:
try:
response = socket.getaddrinfo(hostname, 0, socket.AF_INET6)
family, socktype, proto, canonname, sockaddr = response[0]
address, port, flow, scope = sockaddr
return address
except socket.gaierror:
return ''
else:
return socket.gethostbyname(hostname)
def id_by_addr(self, addr):
"""
Get the country index.
Looks up the index for the country which is the key for
the code and name.
@param addr: The IP address
@type addr: str
@return: network byte order 32-bit integer
@rtype: int
"""
ipnum = util.ip2long(addr)
if not ipnum:
raise ValueError("Invalid IP address: %s" % addr)
COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
if self._databaseType not in COUNTY_EDITIONS:
message = 'Invalid database type, expected Country'
raise GeoIPError(message)
return self._seek_country(ipnum) - const.COUNTRY_BEGIN
def country_code_by_addr(self, addr):
"""
Returns 2-letter country code (e.g. 'US') for specified IP address.
Use this method if you have a Country, Region, or City database.
@param addr: IP address
@type addr: str
@return: 2-letter country code
@rtype: str
"""
try:
VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
if self._databaseType in VALID_EDITIONS:
ipv = 6 if addr.find(':') >= 0 else 4
if ipv == 4 and self._databaseType != const.COUNTRY_EDITION:
message = 'Invalid database type; expected IPv6 address'
raise ValueError(message)
if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6:
message = 'Invalid database type; expected IPv4 address'
raise ValueError(message)
country_id = self.id_by_addr(addr)
return const.COUNTRY_CODES[country_id]
elif self._databaseType in const.REGION_CITY_EDITIONS:
return self.region_by_addr(addr).get('country_code')
message = 'Invalid database type, expected Country, City or Region'
raise GeoIPError(message)
except ValueError:
raise GeoIPError('Failed to lookup address %s' % addr)
def country_code_by_name(self, hostname):
"""
Returns 2-letter country code (e.g. 'US') for specified hostname.
Use this method if you have a Country, Region, or City database.
@param hostname: Hostname
@type hostname: str
@return: 2-letter country code
@rtype: str
"""
addr = self._gethostbyname(hostname)
return self.country_code_by_addr(addr)
def country_name_by_addr(self, addr):
"""
Returns full country name for specified IP address.
Use this method if you have a Country or City database.
@param addr: IP address
@type addr: str
@return: country name
@rtype: str
"""
try:
VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
if self._databaseType in VALID_EDITIONS:
country_id = self.id_by_addr(addr)
return const.COUNTRY_NAMES[country_id]
elif self._databaseType in const.CITY_EDITIONS:
return self.record_by_addr(addr).get('country_name')
else:
message = 'Invalid database type, expected Country or City'
raise GeoIPError(message)
except ValueError:
raise GeoIPError('Failed to lookup address %s' % addr)
def country_name_by_name(self, hostname):
"""
Returns full country name for specified hostname.
Use this method if you have a Country database.
@param hostname: Hostname
@type hostname: str
@return: country name
@rtype: str
"""
addr = self._gethostbyname(hostname)
return self.country_name_by_addr(addr)
def org_by_addr(self, addr):
"""
Lookup Organization, ISP or ASNum for given IP address.
Use this method if you have an Organization, ISP or ASNum database.
@param addr: IP address
@type addr: str
@return: organization or ISP name
@rtype: str
"""
try:
ipnum = util.ip2long(addr)
if not ipnum:
raise ValueError('Invalid IP address')
valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6)
if self._databaseType not in valid:
message = 'Invalid database type, expected Org, ISP or ASNum'
raise GeoIPError(message)
return self._get_org(ipnum)
except ValueError:
raise GeoIPError('Failed to lookup address %s' % addr)
def org_by_name(self, hostname):
"""
Lookup the organization (or ISP) for hostname.
Use this method if you have an Organization/ISP database.
@param hostname: Hostname
@type hostname: str
@return: Organization or ISP name
@rtype: str
"""
addr = self._gethostbyname(hostname)
return self.org_by_addr(addr)
def record_by_addr(self, addr):
"""
Look up the record for a given IP address.
Use this method if you have a City database.
@param addr: IP address
@type addr: str
@return: Dictionary with country_code, country_code3, country_name,
region, city, postal_code, latitude, longitude, dma_code,
metro_code, area_code, region_name, time_zone
@rtype: dict
"""
try:
ipnum = util.ip2long(addr)
if not ipnum:
raise ValueError('Invalid IP address')
if self._databaseType not in const.CITY_EDITIONS:
message = 'Invalid database type, expected City'
raise GeoIPError(message)
rec = self._get_record(ipnum)
if not rec:
return None
return rec
except ValueError:
raise GeoIPError('Failed to lookup address %s' % addr)
def record_by_name(self, hostname):
"""
Look up the record for a given hostname.
Use this method if you have a City database.
@param hostname: Hostname
@type hostname: str
@return: Dictionary with country_code, country_code3, country_name,
region, city, postal_code, latitude, longitude, dma_code,
metro_code, area_code, region_name, time_zone
@rtype: dict
"""
addr = self._gethostbyname(hostname)
return self.record_by_addr(addr)
def region_by_addr(self, addr):
"""
Lookup the region for given IP address.
Use this method if you have a Region database.
@param addr: IP address
@type addr: str
@return: Dictionary containing country_code, region and region_name
@rtype: dict
"""
try:
ipnum = util.ip2long(addr)
if not ipnum:
raise ValueError('Invalid IP address')
if self._databaseType not in const.REGION_CITY_EDITIONS:
message = 'Invalid database type, expected Region or City'
raise GeoIPError(message)
return self._get_region(ipnum)
except ValueError:
raise GeoIPError('Failed to lookup address %s' % addr)
def region_by_name(self, hostname):
"""
Lookup the region for given hostname.
Use this method if you have a Region database.
@param hostname: Hostname
@type hostname: str
@return: Dictionary containing country_code, region, and region_name
@rtype: dict
"""
addr = self._gethostbyname(hostname)
return self.region_by_addr(addr)
def time_zone_by_addr(self, addr):
"""
Look up the time zone for a given IP address.
Use this method if you have a Region or City database.
@param addr: IP address
@type addr: str
@return: Time zone
@rtype: str
"""
try:
ipnum = util.ip2long(addr)
if not ipnum:
raise ValueError('Invalid IP address')
if self._databaseType not in const.CITY_EDITIONS:
message = 'Invalid database type, expected City'
raise GeoIPError(message)
return self._get_record(ipnum).get('time_zone')
except ValueError:
raise GeoIPError('Failed to lookup address %s' % addr)
def time_zone_by_name(self, hostname):
"""
Look up the time zone for a given hostname.
Use this method if you have a Region or City database.
@param hostname: Hostname
@type hostname: str
@return: Time zone
@rtype: str
"""
addr = self._gethostbyname(hostname)
return self.time_zone_by_addr(addr)

View file

@ -1,431 +0,0 @@
# -*- coding: utf-8 -*-
"""
Constants needed for the binary parser. Part of the pygeoip package.
@author: Jennifer Ennis <zaylea@gmail.com>
@license: Copyright(C) 2004 MaxMind LLC
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
"""
from platform import python_version_tuple
PY2 = python_version_tuple()[0] == '2'
PY3 = python_version_tuple()[0] == '3'
GEOIP_STANDARD = 0
GEOIP_MEMORY_CACHE = 1
DMA_MAP = {
500: 'Portland-Auburn, ME',
501: 'New York, NY',
502: 'Binghamton, NY',
503: 'Macon, GA',
504: 'Philadelphia, PA',
505: 'Detroit, MI',
506: 'Boston, MA',
507: 'Savannah, GA',
508: 'Pittsburgh, PA',
509: 'Ft Wayne, IN',
510: 'Cleveland, OH',
511: 'Washington, DC',
512: 'Baltimore, MD',
513: 'Flint, MI',
514: 'Buffalo, NY',
515: 'Cincinnati, OH',
516: 'Erie, PA',
517: 'Charlotte, NC',
518: 'Greensboro, NC',
519: 'Charleston, SC',
520: 'Augusta, GA',
521: 'Providence, RI',
522: 'Columbus, GA',
523: 'Burlington, VT',
524: 'Atlanta, GA',
525: 'Albany, GA',
526: 'Utica-Rome, NY',
527: 'Indianapolis, IN',
528: 'Miami, FL',
529: 'Louisville, KY',
530: 'Tallahassee, FL',
531: 'Tri-Cities, TN',
532: 'Albany-Schenectady-Troy, NY',
533: 'Hartford, CT',
534: 'Orlando, FL',
535: 'Columbus, OH',
536: 'Youngstown-Warren, OH',
537: 'Bangor, ME',
538: 'Rochester, NY',
539: 'Tampa, FL',
540: 'Traverse City-Cadillac, MI',
541: 'Lexington, KY',
542: 'Dayton, OH',
543: 'Springfield-Holyoke, MA',
544: 'Norfolk-Portsmouth, VA',
545: 'Greenville-New Bern-Washington, NC',
546: 'Columbia, SC',
547: 'Toledo, OH',
548: 'West Palm Beach, FL',
549: 'Watertown, NY',
550: 'Wilmington, NC',
551: 'Lansing, MI',
552: 'Presque Isle, ME',
553: 'Marquette, MI',
554: 'Wheeling, WV',
555: 'Syracuse, NY',
556: 'Richmond-Petersburg, VA',
557: 'Knoxville, TN',
558: 'Lima, OH',
559: 'Bluefield-Beckley-Oak Hill, WV',
560: 'Raleigh-Durham, NC',
561: 'Jacksonville, FL',
563: 'Grand Rapids, MI',
564: 'Charleston-Huntington, WV',
565: 'Elmira, NY',
566: 'Harrisburg-Lancaster-Lebanon-York, PA',
567: 'Greenville-Spartenburg, SC',
569: 'Harrisonburg, VA',
570: 'Florence-Myrtle Beach, SC',
571: 'Ft Myers, FL',
573: 'Roanoke-Lynchburg, VA',
574: 'Johnstown-Altoona, PA',
575: 'Chattanooga, TN',
576: 'Salisbury, MD',
577: 'Wilkes Barre-Scranton, PA',
581: 'Terre Haute, IN',
582: 'Lafayette, IN',
583: 'Alpena, MI',
584: 'Charlottesville, VA',
588: 'South Bend, IN',
592: 'Gainesville, FL',
596: 'Zanesville, OH',
597: 'Parkersburg, WV',
598: 'Clarksburg-Weston, WV',
600: 'Corpus Christi, TX',
602: 'Chicago, IL',
603: 'Joplin-Pittsburg, MO',
604: 'Columbia-Jefferson City, MO',
605: 'Topeka, KS',
606: 'Dothan, AL',
609: 'St Louis, MO',
610: 'Rockford, IL',
611: 'Rochester-Mason City-Austin, MN',
612: 'Shreveport, LA',
613: 'Minneapolis-St Paul, MN',
616: 'Kansas City, MO',
617: 'Milwaukee, WI',
618: 'Houston, TX',
619: 'Springfield, MO',
620: 'Tuscaloosa, AL',
622: 'New Orleans, LA',
623: 'Dallas-Fort Worth, TX',
624: 'Sioux City, IA',
625: 'Waco-Temple-Bryan, TX',
626: 'Victoria, TX',
627: 'Wichita Falls, TX',
628: 'Monroe, LA',
630: 'Birmingham, AL',
631: 'Ottumwa-Kirksville, IA',
632: 'Paducah, KY',
633: 'Odessa-Midland, TX',
634: 'Amarillo, TX',
635: 'Austin, TX',
636: 'Harlingen, TX',
637: 'Cedar Rapids-Waterloo, IA',
638: 'St Joseph, MO',
639: 'Jackson, TN',
640: 'Memphis, TN',
641: 'San Antonio, TX',
642: 'Lafayette, LA',
643: 'Lake Charles, LA',
644: 'Alexandria, LA',
646: 'Anniston, AL',
647: 'Greenwood-Greenville, MS',
648: 'Champaign-Springfield-Decatur, IL',
649: 'Evansville, IN',
650: 'Oklahoma City, OK',
651: 'Lubbock, TX',
652: 'Omaha, NE',
656: 'Panama City, FL',
657: 'Sherman, TX',
658: 'Green Bay-Appleton, WI',
659: 'Nashville, TN',
661: 'San Angelo, TX',
662: 'Abilene-Sweetwater, TX',
669: 'Madison, WI',
670: 'Ft Smith-Fay-Springfield, AR',
671: 'Tulsa, OK',
673: 'Columbus-Tupelo-West Point, MS',
675: 'Peoria-Bloomington, IL',
676: 'Duluth, MN',
678: 'Wichita, KS',
679: 'Des Moines, IA',
682: 'Davenport-Rock Island-Moline, IL',
686: 'Mobile, AL',
687: 'Minot-Bismarck-Dickinson, ND',
691: 'Huntsville, AL',
692: 'Beaumont-Port Author, TX',
693: 'Little Rock-Pine Bluff, AR',
698: 'Montgomery, AL',
702: 'La Crosse-Eau Claire, WI',
705: 'Wausau-Rhinelander, WI',
709: 'Tyler-Longview, TX',
710: 'Hattiesburg-Laurel, MS',
711: 'Meridian, MS',
716: 'Baton Rouge, LA',
717: 'Quincy, IL',
718: 'Jackson, MS',
722: 'Lincoln-Hastings, NE',
724: 'Fargo-Valley City, ND',
725: 'Sioux Falls, SD',
734: 'Jonesboro, AR',
736: 'Bowling Green, KY',
737: 'Mankato, MN',
740: 'North Platte, NE',
743: 'Anchorage, AK',
744: 'Honolulu, HI',
745: 'Fairbanks, AK',
746: 'Biloxi-Gulfport, MS',
747: 'Juneau, AK',
749: 'Laredo, TX',
751: 'Denver, CO',
752: 'Colorado Springs, CO',
753: 'Phoenix, AZ',
754: 'Butte-Bozeman, MT',
755: 'Great Falls, MT',
756: 'Billings, MT',
757: 'Boise, ID',
758: 'Idaho Falls-Pocatello, ID',
759: 'Cheyenne, WY',
760: 'Twin Falls, ID',
762: 'Missoula, MT',
764: 'Rapid City, SD',
765: 'El Paso, TX',
766: 'Helena, MT',
767: 'Casper-Riverton, WY',
770: 'Salt Lake City, UT',
771: 'Yuma, AZ',
773: 'Grand Junction, CO',
789: 'Tucson, AZ',
790: 'Albuquerque, NM',
798: 'Glendive, MT',
800: 'Bakersfield, CA',
801: 'Eugene, OR',
802: 'Eureka, CA',
803: 'Los Angeles, CA',
804: 'Palm Springs, CA',
807: 'San Francisco, CA',
810: 'Yakima-Pasco, WA',
811: 'Reno, NV',
813: 'Medford-Klamath Falls, OR',
819: 'Seattle-Tacoma, WA',
820: 'Portland, OR',
821: 'Bend, OR',
825: 'San Diego, CA',
828: 'Monterey-Salinas, CA',
839: 'Las Vegas, NV',
855: 'Santa Barbara, CA',
862: 'Sacramento, CA',
866: 'Fresno, CA',
868: 'Chico-Redding, CA',
881: 'Spokane, WA'
}
COUNTRY_CODES = (
'',
'AP', 'EU', 'AD', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AN', 'AO', 'AQ',
'AR', 'AS', 'AT', 'AU', 'AW', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG',
'BH', 'BI', 'BJ', 'BM', 'BN', 'BO', 'BR', 'BS', 'BT', 'BV', 'BW', 'BY',
'BZ', 'CA', 'CC', 'CD', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN',
'CO', 'CR', 'CU', 'CV', 'CX', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO',
'DZ', 'EC', 'EE', 'EG', 'EH', 'ER', 'ES', 'ET', 'FI', 'FJ', 'FK', 'FM',
'FO', 'FR', 'FX', 'GA', 'GB', 'GD', 'GE', 'GF', 'GH', 'GI', 'GL', 'GM',
'GN', 'GP', 'GQ', 'GR', 'GS', 'GT', 'GU', 'GW', 'GY', 'HK', 'HM', 'HN',
'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IO', 'IQ', 'IR', 'IS', 'IT',
'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KP', 'KR', 'KW',
'KY', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV',
'LY', 'MA', 'MC', 'MD', 'MG', 'MH', 'MK', 'ML', 'MM', 'MN', 'MO', 'MP',
'MQ', 'MR', 'MS', 'MT', 'MU', 'MV', 'MW', 'MX', 'MY', 'MZ', 'NA', 'NC',
'NE', 'NF', 'NG', 'NI', 'NL', 'NO', 'NP', 'NR', 'NU', 'NZ', 'OM', 'PA',
'PE', 'PF', 'PG', 'PH', 'PK', 'PL', 'PM', 'PN', 'PR', 'PS', 'PT', 'PW',
'PY', 'QA', 'RE', 'RO', 'RU', 'RW', 'SA', 'SB', 'SC', 'SD', 'SE', 'SG',
'SH', 'SI', 'SJ', 'SK', 'SL', 'SM', 'SN', 'SO', 'SR', 'ST', 'SV', 'SY',
'SZ', 'TC', 'TD', 'TF', 'TG', 'TH', 'TJ', 'TK', 'TM', 'TN', 'TO', 'TL',
'TR', 'TT', 'TV', 'TW', 'TZ', 'UA', 'UG', 'UM', 'US', 'UY', 'UZ', 'VA',
'VC', 'VE', 'VG', 'VI', 'VN', 'VU', 'WF', 'WS', 'YE', 'YT', 'RS', 'ZA',
'ZM', 'ME', 'ZW', 'A1', 'A2', 'O1', 'AX', 'GG', 'IM', 'JE', 'BL', 'MF',
'BQ', 'SS'
)
COUNTRY_CODES3 = (
'', 'AP', 'EU', 'AND', 'ARE', 'AFG', 'ATG', 'AIA', 'ALB', 'ARM', 'ANT',
'AGO', 'AQ', 'ARG', 'ASM', 'AUT', 'AUS', 'ABW', 'AZE', 'BIH', 'BRB', 'BGD',
'BEL', 'BFA', 'BGR', 'BHR', 'BDI', 'BEN', 'BMU', 'BRN', 'BOL', 'BRA',
'BHS', 'BTN', 'BV', 'BWA', 'BLR', 'BLZ', 'CAN', 'CC', 'COD', 'CAF', 'COG',
'CHE', 'CIV', 'COK', 'CHL', 'CMR', 'CHN', 'COL', 'CRI', 'CUB', 'CPV', 'CX',
'CYP', 'CZE', 'DEU', 'DJI', 'DNK', 'DMA', 'DOM', 'DZA', 'ECU', 'EST',
'EGY', 'ESH', 'ERI', 'ESP', 'ETH', 'FIN', 'FJI', 'FLK', 'FSM', 'FRO',
'FRA', 'FX', 'GAB', 'GBR', 'GRD', 'GEO', 'GUF', 'GHA', 'GIB', 'GRL', 'GMB',
'GIN', 'GLP', 'GNQ', 'GRC', 'GS', 'GTM', 'GUM', 'GNB', 'GUY', 'HKG', 'HM',
'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'IRL', 'ISR', 'IND', 'IO', 'IRQ', 'IRN',
'ISL', 'ITA', 'JAM', 'JOR', 'JPN', 'KEN', 'KGZ', 'KHM', 'KIR', 'COM',
'KNA', 'PRK', 'KOR', 'KWT', 'CYM', 'KAZ', 'LAO', 'LBN', 'LCA', 'LIE',
'LKA', 'LBR', 'LSO', 'LTU', 'LUX', 'LVA', 'LBY', 'MAR', 'MCO', 'MDA',
'MDG', 'MHL', 'MKD', 'MLI', 'MMR', 'MNG', 'MAC', 'MNP', 'MTQ', 'MRT',
'MSR', 'MLT', 'MUS', 'MDV', 'MWI', 'MEX', 'MYS', 'MOZ', 'NAM', 'NCL',
'NER', 'NFK', 'NGA', 'NIC', 'NLD', 'NOR', 'NPL', 'NRU', 'NIU', 'NZL',
'OMN', 'PAN', 'PER', 'PYF', 'PNG', 'PHL', 'PAK', 'POL', 'SPM', 'PCN',
'PRI', 'PSE', 'PRT', 'PLW', 'PRY', 'QAT', 'REU', 'ROU', 'RUS', 'RWA',
'SAU', 'SLB', 'SYC', 'SDN', 'SWE', 'SGP', 'SHN', 'SVN', 'SJM', 'SVK',
'SLE', 'SMR', 'SEN', 'SOM', 'SUR', 'STP', 'SLV', 'SYR', 'SWZ', 'TCA',
'TCD', 'TF', 'TGO', 'THA', 'TJK', 'TKL', 'TLS', 'TKM', 'TUN', 'TON', 'TUR',
'TTO', 'TUV', 'TWN', 'TZA', 'UKR', 'UGA', 'UM', 'USA', 'URY', 'UZB', 'VAT',
'VCT', 'VEN', 'VGB', 'VIR', 'VNM', 'VUT', 'WLF', 'WSM', 'YEM', 'YT', 'SRB',
'ZAF', 'ZMB', 'MNE', 'ZWE', 'A1', 'A2', 'O1', 'ALA', 'GGY', 'IMN', 'JEY',
'BLM', 'MAF', 'BES', 'SSD'
)
COUNTRY_NAMES = (
'', 'Asia/Pacific Region', 'Europe', 'Andorra', 'United Arab Emirates',
'Afghanistan', 'Antigua and Barbuda', 'Anguilla', 'Albania', 'Armenia',
'Netherlands Antilles', 'Angola', 'Antarctica', 'Argentina',
'American Samoa', 'Austria', 'Australia', 'Aruba', 'Azerbaijan',
'Bosnia and Herzegovina', 'Barbados', 'Bangladesh', 'Belgium',
'Burkina Faso', 'Bulgaria', 'Bahrain', 'Burundi', 'Benin', 'Bermuda',
'Brunei Darussalam', 'Bolivia', 'Brazil', 'Bahamas', 'Bhutan',
'Bouvet Island', 'Botswana', 'Belarus', 'Belize', 'Canada',
'Cocos (Keeling) Islands', 'Congo, The Democratic Republic of the',
'Central African Republic', 'Congo', 'Switzerland', 'Cote D\'Ivoire',
'Cook Islands', 'Chile', 'Cameroon', 'China', 'Colombia', 'Costa Rica',
'Cuba', 'Cape Verde', 'Christmas Island', 'Cyprus', 'Czech Republic',
'Germany', 'Djibouti', 'Denmark', 'Dominica', 'Dominican Republic',
'Algeria', 'Ecuador', 'Estonia', 'Egypt', 'Western Sahara', 'Eritrea',
'Spain', 'Ethiopia', 'Finland', 'Fiji', 'Falkland Islands (Malvinas)',
'Micronesia, Federated States of', 'Faroe Islands', 'France',
'France, Metropolitan', 'Gabon', 'United Kingdom', 'Grenada', 'Georgia',
'French Guiana', 'Ghana', 'Gibraltar', 'Greenland', 'Gambia', 'Guinea',
'Guadeloupe', 'Equatorial Guinea', 'Greece',
'South Georgia and the South Sandwich Islands', 'Guatemala', 'Guam',
'Guinea-Bissau', 'Guyana', 'Hong Kong',
'Heard Island and McDonald Islands', 'Honduras', 'Croatia', 'Haiti',
'Hungary', 'Indonesia', 'Ireland', 'Israel', 'India',
'British Indian Ocean Territory', 'Iraq', 'Iran, Islamic Republic of',
'Iceland', 'Italy', 'Jamaica', 'Jordan', 'Japan', 'Kenya', 'Kyrgyzstan',
'Cambodia', 'Kiribati', 'Comoros', 'Saint Kitts and Nevis',
'Korea, Democratic People\'s Republic of', 'Korea, Republic of', 'Kuwait',
'Cayman Islands', 'Kazakhstan', 'Lao People\'s Democratic Republic',
'Lebanon', 'Saint Lucia', 'Liechtenstein', 'Sri Lanka', 'Liberia',
'Lesotho', 'Lithuania', 'Luxembourg', 'Latvia', 'Libya', 'Morocco',
'Monaco', 'Moldova, Republic of', 'Madagascar', 'Marshall Islands',
'Macedonia', 'Mali', 'Myanmar', 'Mongolia', 'Macau',
'Northern Mariana Islands', 'Martinique', 'Mauritania', 'Montserrat',
'Malta', 'Mauritius', 'Maldives', 'Malawi', 'Mexico', 'Malaysia',
'Mozambique', 'Namibia', 'New Caledonia', 'Niger', 'Norfolk Island',
'Nigeria', 'Nicaragua', 'Netherlands', 'Norway', 'Nepal', 'Nauru', 'Niue',
'New Zealand', 'Oman', 'Panama', 'Peru', 'French Polynesia',
'Papua New Guinea', 'Philippines', 'Pakistan', 'Poland',
'Saint Pierre and Miquelon', 'Pitcairn Islands', 'Puerto Rico',
'Palestinian Territory', 'Portugal', 'Palau', 'Paraguay', 'Qatar',
'Reunion', 'Romania', 'Russian Federation', 'Rwanda', 'Saudi Arabia',
'Solomon Islands', 'Seychelles', 'Sudan', 'Sweden', 'Singapore',
'Saint Helena', 'Slovenia', 'Svalbard and Jan Mayen', 'Slovakia',
'Sierra Leone', 'San Marino', 'Senegal', 'Somalia', 'Suriname',
'Sao Tome and Principe', 'El Salvador', 'Syrian Arab Republic',
'Swaziland', 'Turks and Caicos Islands', 'Chad',
'French Southern Territories', 'Togo', 'Thailand', 'Tajikistan', 'Tokelau',
'Turkmenistan', 'Tunisia', 'Tonga', 'Timor-Leste', 'Turkey',
'Trinidad and Tobago', 'Tuvalu', 'Taiwan', 'Tanzania, United Republic of',
'Ukraine', 'Uganda', 'United States Minor Outlying Islands',
'United States', 'Uruguay', 'Uzbekistan', 'Holy See (Vatican City State)',
'Saint Vincent and the Grenadines', 'Venezuela', 'Virgin Islands, British',
'Virgin Islands, U.S.', 'Vietnam', 'Vanuatu', 'Wallis and Futuna', 'Samoa',
'Yemen', 'Mayotte', 'Serbia', 'South Africa', 'Zambia', 'Montenegro',
'Zimbabwe', 'Anonymous Proxy', 'Satellite Provider', 'Other',
'Aland Islands', 'Guernsey', 'Isle of Man', 'Jersey', 'Saint Barthelemy',
'Saint Martin', 'Bonaire, Sint Eustatius and Saba', 'South Sudan'
)
CONTINENT_NAMES = (
'--', 'AS', 'EU', 'EU', 'AS', 'AS', 'NA', 'NA', 'EU', 'AS', 'NA', 'AF',
'AN', 'SA', 'OC', 'EU', 'OC', 'NA', 'AS', 'EU', 'NA', 'AS', 'EU', 'AF',
'EU', 'AS', 'AF', 'AF', 'NA', 'AS', 'SA', 'SA', 'NA', 'AS', 'AN', 'AF',
'EU', 'NA', 'NA', 'AS', 'AF', 'AF', 'AF', 'EU', 'AF', 'OC', 'SA', 'AF',
'AS', 'SA', 'NA', 'NA', 'AF', 'AS', 'AS', 'EU', 'EU', 'AF', 'EU', 'NA',
'NA', 'AF', 'SA', 'EU', 'AF', 'AF', 'AF', 'EU', 'AF', 'EU', 'OC', 'SA',
'OC', 'EU', 'EU', 'NA', 'AF', 'EU', 'NA', 'AS', 'SA', 'AF', 'EU', 'NA',
'AF', 'AF', 'NA', 'AF', 'EU', 'AN', 'NA', 'OC', 'AF', 'SA', 'AS', 'AN',
'NA', 'EU', 'NA', 'EU', 'AS', 'EU', 'AS', 'AS', 'AS', 'AS', 'AS', 'EU',
'EU', 'NA', 'AS', 'AS', 'AF', 'AS', 'AS', 'OC', 'AF', 'NA', 'AS', 'AS',
'AS', 'NA', 'AS', 'AS', 'AS', 'NA', 'EU', 'AS', 'AF', 'AF', 'EU', 'EU',
'EU', 'AF', 'AF', 'EU', 'EU', 'AF', 'OC', 'EU', 'AF', 'AS', 'AS', 'AS',
'OC', 'NA', 'AF', 'NA', 'EU', 'AF', 'AS', 'AF', 'NA', 'AS', 'AF', 'AF',
'OC', 'AF', 'OC', 'AF', 'NA', 'EU', 'EU', 'AS', 'OC', 'OC', 'OC', 'AS',
'NA', 'SA', 'OC', 'OC', 'AS', 'AS', 'EU', 'NA', 'OC', 'NA', 'AS', 'EU',
'OC', 'SA', 'AS', 'AF', 'EU', 'EU', 'AF', 'AS', 'OC', 'AF', 'AF', 'EU',
'AS', 'AF', 'EU', 'EU', 'EU', 'AF', 'EU', 'AF', 'AF', 'SA', 'AF', 'NA',
'AS', 'AF', 'NA', 'AF', 'AN', 'AF', 'AS', 'AS', 'OC', 'AS', 'AF', 'OC',
'AS', 'EU', 'NA', 'OC', 'AS', 'AF', 'EU', 'AF', 'OC', 'NA', 'SA', 'AS',
'EU', 'NA', 'SA', 'NA', 'NA', 'AS', 'OC', 'OC', 'OC', 'AS', 'AF', 'EU',
'AF', 'AF', 'EU', 'AF', '--', '--', '--', 'EU', 'EU', 'EU', 'EU', 'NA',
'NA', 'NA', 'AF'
)
# storage / caching flags
STANDARD = 0
MEMORY_CACHE = 1
MMAP_CACHE = 8
# Database structure constants
COUNTRY_BEGIN = 16776960
STATE_BEGIN_REV0 = 16700000
STATE_BEGIN_REV1 = 16000000
STRUCTURE_INFO_MAX_SIZE = 20
DATABASE_INFO_MAX_SIZE = 100
# Database editions
COUNTRY_EDITION = 1
COUNTRY_EDITION_V6 = 12
REGION_EDITION_REV0 = 7
REGION_EDITION_REV1 = 3
CITY_EDITION_REV0 = 6
CITY_EDITION_REV1 = 2
CITY_EDITION_REV1_V6 = 30
ORG_EDITION = 5
ISP_EDITION = 4
ASNUM_EDITION = 9
ASNUM_EDITION_V6 = 21
# Not yet supported databases
PROXY_EDITION = 8
NETSPEED_EDITION = 11
# Collection of databases
IPV6_EDITIONS = (COUNTRY_EDITION_V6, ASNUM_EDITION_V6, CITY_EDITION_REV1_V6)
CITY_EDITIONS = (CITY_EDITION_REV0, CITY_EDITION_REV1, CITY_EDITION_REV1_V6)
REGION_EDITIONS = (REGION_EDITION_REV0, REGION_EDITION_REV1)
REGION_CITY_EDITIONS = REGION_EDITIONS + CITY_EDITIONS
SEGMENT_RECORD_LENGTH = 3
STANDARD_RECORD_LENGTH = 3
ORG_RECORD_LENGTH = 4
MAX_RECORD_LENGTH = 4
MAX_ORG_RECORD_LENGTH = 300
FULL_RECORD_LENGTH = 50
US_OFFSET = 1
CANADA_OFFSET = 677
WORLD_OFFSET = 1353
FIPS_RANGE = 360
ENCODING = 'iso-8859-1'

View file

@ -1,760 +0,0 @@
# -*- coding: utf-8 -*-
"""
Time zone functions. Part of the pygeoip package.
@author: Jennifer Ennis <zaylea@gmail.com>
@license: Copyright(C) 2004 MaxMind LLC
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
"""
__all__ = ['time_zone_by_country_and_region']
_country = {
'AD': 'Europe/Andorra',
'AE': 'Asia/Dubai',
'AF': 'Asia/Kabul',
'AG': 'America/Antigua',
'AI': 'America/Anguilla',
'AL': 'Europe/Tirane',
'AM': 'Asia/Yerevan',
'AN': 'America/Curacao',
'AO': 'Africa/Luanda',
'AR': {
'01': 'America/Argentina/Buenos_Aires',
'02': 'America/Argentina/Catamarca',
'03': 'America/Argentina/Tucuman',
'04': 'America/Argentina/Rio_Gallegos',
'05': 'America/Argentina/Cordoba',
'06': 'America/Argentina/Tucuman',
'07': 'America/Argentina/Buenos_Aires',
'08': 'America/Argentina/Buenos_Aires',
'09': 'America/Argentina/Tucuman',
'10': 'America/Argentina/Jujuy',
'11': 'America/Argentina/San_Luis',
'12': 'America/Argentina/La_Rioja',
'13': 'America/Argentina/Mendoza',
'14': 'America/Argentina/Buenos_Aires',
'15': 'America/Argentina/San_Luis',
'16': 'America/Argentina/Buenos_Aires',
'17': 'America/Argentina/Salta',
'18': 'America/Argentina/San_Juan',
'19': 'America/Argentina/San_Luis',
'20': 'America/Argentina/Rio_Gallegos',
'21': 'America/Argentina/Buenos_Aires',
'22': 'America/Argentina/Catamarca',
'23': 'America/Argentina/Ushuaia',
'24': 'America/Argentina/Tucuman'
},
'AS': 'US/Samoa',
'AT': 'Europe/Vienna',
'AU': {
'01': 'Australia/Canberra',
'02': 'Australia/NSW',
'03': 'Australia/North',
'04': 'Australia/Queensland',
'05': 'Australia/South',
'06': 'Australia/Tasmania',
'07': 'Australia/Victoria',
'08': 'Australia/West'
},
'AW': 'America/Aruba',
'AX': 'Europe/Mariehamn',
'AZ': 'Asia/Baku',
'BA': 'Europe/Sarajevo',
'BB': 'America/Barbados',
'BD': 'Asia/Dhaka',
'BE': 'Europe/Brussels',
'BF': 'Africa/Ouagadougou',
'BG': 'Europe/Sofia',
'BH': 'Asia/Bahrain',
'BI': 'Africa/Bujumbura',
'BJ': 'Africa/Porto-Novo',
'BL': 'America/St_Barthelemy',
'BM': 'Atlantic/Bermuda',
'BN': 'Asia/Brunei',
'BO': 'America/La_Paz',
'BQ': 'America/Curacao',
'BR': {
'01': 'America/Rio_Branco',
'02': 'America/Maceio',
'03': 'America/Sao_Paulo',
'04': 'America/Manaus',
'05': 'America/Bahia',
'06': 'America/Fortaleza',
'07': 'America/Sao_Paulo',
'08': 'America/Sao_Paulo',
'11': 'America/Campo_Grande',
'13': 'America/Belem',
'14': 'America/Cuiaba',
'15': 'America/Sao_Paulo',
'16': 'America/Belem',
'17': 'America/Recife',
'18': 'America/Sao_Paulo',
'20': 'America/Fortaleza',
'21': 'America/Sao_Paulo',
'22': 'America/Recife',
'23': 'America/Sao_Paulo',
'24': 'America/Porto_Velho',
'25': 'America/Boa_Vista',
'26': 'America/Sao_Paulo',
'27': 'America/Sao_Paulo',
'28': 'America/Maceio',
'29': 'America/Sao_Paulo',
'30': 'America/Recife',
'31': 'America/Araguaina'
},
'BS': 'America/Nassau',
'BT': 'Asia/Thimphu',
'BW': 'Africa/Gaborone',
'BY': 'Europe/Minsk',
'BZ': 'America/Belize',
'CA': {
'AB': 'America/Edmonton',
'BC': 'America/Vancouver',
'MB': 'America/Winnipeg',
'NB': 'America/Halifax',
'NL': 'America/St_Johns',
'NS': 'America/Halifax',
'NT': 'America/Yellowknife',
'NU': 'America/Rankin_Inlet',
'ON': 'America/Toronto',
'PE': 'America/Halifax',
'QC': 'America/Montreal',
'SK': 'America/Regina',
'YT': 'America/Whitehorse'
},
'CC': 'Indian/Cocos',
'CD': {
'02': 'Africa/Kinshasa',
'05': 'Africa/Lubumbashi',
'06': 'Africa/Kinshasa',
'08': 'Africa/Kinshasa',
'10': 'Africa/Lubumbashi',
'11': 'Africa/Lubumbashi',
'12': 'Africa/Lubumbashi'
},
'CF': 'Africa/Bangui',
'CG': 'Africa/Brazzaville',
'CH': 'Europe/Zurich',
'CI': 'Africa/Abidjan',
'CK': 'Pacific/Rarotonga',
'CL': 'Chile/Continental',
'CM': 'Africa/Lagos',
'CN': {
'01': 'Asia/Shanghai',
'02': 'Asia/Shanghai',
'03': 'Asia/Shanghai',
'04': 'Asia/Shanghai',
'05': 'Asia/Harbin',
'06': 'Asia/Chongqing',
'07': 'Asia/Shanghai',
'08': 'Asia/Harbin',
'09': 'Asia/Shanghai',
'10': 'Asia/Shanghai',
'11': 'Asia/Chongqing',
'12': 'Asia/Shanghai',
'13': 'Asia/Urumqi',
'14': 'Asia/Chongqing',
'15': 'Asia/Chongqing',
'16': 'Asia/Chongqing',
'18': 'Asia/Chongqing',
'19': 'Asia/Harbin',
'20': 'Asia/Harbin',
'21': 'Asia/Chongqing',
'22': 'Asia/Harbin',
'23': 'Asia/Shanghai',
'24': 'Asia/Chongqing',
'25': 'Asia/Shanghai',
'26': 'Asia/Chongqing',
'28': 'Asia/Shanghai',
'29': 'Asia/Chongqing',
'30': 'Asia/Chongqing',
'31': 'Asia/Chongqing',
'32': 'Asia/Chongqing',
'33': 'Asia/Chongqing'
},
'CO': 'America/Bogota',
'CR': 'America/Costa_Rica',
'CU': 'America/Havana',
'CV': 'Atlantic/Cape_Verde',
'CW': 'America/Curacao',
'CX': 'Indian/Christmas',
'CY': 'Asia/Nicosia',
'CZ': 'Europe/Prague',
'DE': 'Europe/Berlin',
'DJ': 'Africa/Djibouti',
'DK': 'Europe/Copenhagen',
'DM': 'America/Dominica',
'DO': 'America/Santo_Domingo',
'DZ': 'Africa/Algiers',
'EC': {
'01': 'Pacific/Galapagos',
'02': 'America/Guayaquil',
'03': 'America/Guayaquil',
'04': 'America/Guayaquil',
'05': 'America/Guayaquil',
'06': 'America/Guayaquil',
'07': 'America/Guayaquil',
'08': 'America/Guayaquil',
'09': 'America/Guayaquil',
'10': 'America/Guayaquil',
'11': 'America/Guayaquil',
'12': 'America/Guayaquil',
'13': 'America/Guayaquil',
'14': 'America/Guayaquil',
'15': 'America/Guayaquil',
'17': 'America/Guayaquil',
'18': 'America/Guayaquil',
'19': 'America/Guayaquil',
'20': 'America/Guayaquil',
'22': 'America/Guayaquil'
},
'EE': 'Europe/Tallinn',
'EG': 'Africa/Cairo',
'EH': 'Africa/El_Aaiun',
'ER': 'Africa/Asmera',
'ES': {
'07': 'Europe/Madrid',
'27': 'Europe/Madrid',
'29': 'Europe/Madrid',
'31': 'Europe/Madrid',
'32': 'Europe/Madrid',
'34': 'Europe/Madrid',
'39': 'Europe/Madrid',
'51': 'Africa/Ceuta',
'52': 'Europe/Madrid',
'53': 'Atlantic/Canary',
'54': 'Europe/Madrid',
'55': 'Europe/Madrid',
'56': 'Europe/Madrid',
'57': 'Europe/Madrid',
'58': 'Europe/Madrid',
'59': 'Europe/Madrid',
'60': 'Europe/Madrid'
},
'ET': 'Africa/Addis_Ababa',
'FI': 'Europe/Helsinki',
'FJ': 'Pacific/Fiji',
'FK': 'Atlantic/Stanley',
'FO': 'Atlantic/Faeroe',
'FR': 'Europe/Paris',
'FX': 'Europe/Paris',
'GA': 'Africa/Libreville',
'GB': 'Europe/London',
'GD': 'America/Grenada',
'GE': 'Asia/Tbilisi',
'GF': 'America/Cayenne',
'GG': 'Europe/Guernsey',
'GH': 'Africa/Accra',
'GI': 'Europe/Gibraltar',
'GL': {
'01': 'America/Thule',
'02': 'America/Godthab',
'03': 'America/Godthab'
},
'GM': 'Africa/Banjul',
'GN': 'Africa/Conakry',
'GP': 'America/Guadeloupe',
'GQ': 'Africa/Malabo',
'GR': 'Europe/Athens',
'GS': 'Atlantic/South_Georgia',
'GT': 'America/Guatemala',
'GU': 'Pacific/Guam',
'GW': 'Africa/Bissau',
'GY': 'America/Guyana',
'HK': 'Asia/Hong_Kong',
'HN': 'America/Tegucigalpa',
'HR': 'Europe/Zagreb',
'HT': 'America/Port-au-Prince',
'HU': 'Europe/Budapest',
'ID': {
'01': 'Asia/Pontianak',
'02': 'Asia/Makassar',
'03': 'Asia/Jakarta',
'04': 'Asia/Jakarta',
'05': 'Asia/Jakarta',
'06': 'Asia/Jakarta',
'07': 'Asia/Jakarta',
'08': 'Asia/Jakarta',
'09': 'Asia/Jayapura',
'10': 'Asia/Jakarta',
'11': 'Asia/Pontianak',
'12': 'Asia/Makassar',
'13': 'Asia/Makassar',
'14': 'Asia/Makassar',
'15': 'Asia/Jakarta',
'16': 'Asia/Makassar',
'17': 'Asia/Makassar',
'18': 'Asia/Makassar',
'19': 'Asia/Pontianak',
'20': 'Asia/Makassar',
'21': 'Asia/Makassar',
'22': 'Asia/Makassar',
'23': 'Asia/Makassar',
'24': 'Asia/Jakarta',
'25': 'Asia/Pontianak',
'26': 'Asia/Pontianak',
'30': 'Asia/Jakarta',
'31': 'Asia/Makassar',
'33': 'Asia/Jakarta'
},
'IE': 'Europe/Dublin',
'IL': 'Asia/Jerusalem',
'IM': 'Europe/Isle_of_Man',
'IN': 'Asia/Calcutta',
'IO': 'Indian/Chagos',
'IQ': 'Asia/Baghdad',
'IR': 'Asia/Tehran',
'IS': 'Atlantic/Reykjavik',
'IT': 'Europe/Rome',
'JE': 'Europe/Jersey',
'JM': 'America/Jamaica',
'JO': 'Asia/Amman',
'JP': 'Asia/Tokyo',
'KE': 'Africa/Nairobi',
'KG': 'Asia/Bishkek',
'KH': 'Asia/Phnom_Penh',
'KI': 'Pacific/Tarawa',
'KM': 'Indian/Comoro',
'KN': 'America/St_Kitts',
'KP': 'Asia/Pyongyang',
'KR': 'Asia/Seoul',
'KW': 'Asia/Kuwait',
'KY': 'America/Cayman',
'KZ': {
'01': 'Asia/Almaty',
'02': 'Asia/Almaty',
'03': 'Asia/Qyzylorda',
'04': 'Asia/Aqtobe',
'05': 'Asia/Qyzylorda',
'06': 'Asia/Aqtau',
'07': 'Asia/Oral',
'08': 'Asia/Qyzylorda',
'09': 'Asia/Aqtau',
'10': 'Asia/Qyzylorda',
'11': 'Asia/Almaty',
'12': 'Asia/Qyzylorda',
'13': 'Asia/Aqtobe',
'14': 'Asia/Qyzylorda',
'15': 'Asia/Almaty',
'16': 'Asia/Aqtobe',
'17': 'Asia/Almaty'
},
'LA': 'Asia/Vientiane',
'LB': 'Asia/Beirut',
'LC': 'America/St_Lucia',
'LI': 'Europe/Vaduz',
'LK': 'Asia/Colombo',
'LR': 'Africa/Monrovia',
'LS': 'Africa/Maseru',
'LT': 'Europe/Vilnius',
'LU': 'Europe/Luxembourg',
'LV': 'Europe/Riga',
'LY': 'Africa/Tripoli',
'MA': 'Africa/Casablanca',
'MC': 'Europe/Monaco',
'MD': 'Europe/Chisinau',
'ME': 'Europe/Podgorica',
'MF': 'America/Marigot',
'MG': 'Indian/Antananarivo',
'MK': 'Europe/Skopje',
'ML': 'Africa/Bamako',
'MM': 'Asia/Rangoon',
'MN': 'Asia/Choibalsan',
'MO': 'Asia/Macao',
'MP': 'Pacific/Saipan',
'MQ': 'America/Martinique',
'MR': 'Africa/Nouakchott',
'MS': 'America/Montserrat',
'MT': 'Europe/Malta',
'MU': 'Indian/Mauritius',
'MV': 'Indian/Maldives',
'MW': 'Africa/Blantyre',
'MX': {
'01': 'America/Mexico_City',
'02': 'America/Tijuana',
'03': 'America/Hermosillo',
'04': 'America/Merida',
'05': 'America/Mexico_City',
'06': 'America/Chihuahua',
'07': 'America/Monterrey',
'08': 'America/Mexico_City',
'09': 'America/Mexico_City',
'10': 'America/Mazatlan',
'11': 'America/Mexico_City',
'12': 'America/Mexico_City',
'13': 'America/Mexico_City',
'14': 'America/Mazatlan',
'15': 'America/Chihuahua',
'16': 'America/Mexico_City',
'17': 'America/Mexico_City',
'18': 'America/Mazatlan',
'19': 'America/Monterrey',
'20': 'America/Mexico_City',
'21': 'America/Mexico_City',
'22': 'America/Mexico_City',
'23': 'America/Cancun',
'24': 'America/Mexico_City',
'25': 'America/Mazatlan',
'26': 'America/Hermosillo',
'27': 'America/Merida',
'28': 'America/Monterrey',
'29': 'America/Mexico_City',
'30': 'America/Mexico_City',
'31': 'America/Merida',
'32': 'America/Monterrey'
},
'MY': {
'01': 'Asia/Kuala_Lumpur',
'02': 'Asia/Kuala_Lumpur',
'03': 'Asia/Kuala_Lumpur',
'04': 'Asia/Kuala_Lumpur',
'05': 'Asia/Kuala_Lumpur',
'06': 'Asia/Kuala_Lumpur',
'07': 'Asia/Kuala_Lumpur',
'08': 'Asia/Kuala_Lumpur',
'09': 'Asia/Kuala_Lumpur',
'11': 'Asia/Kuching',
'12': 'Asia/Kuala_Lumpur',
'13': 'Asia/Kuala_Lumpur',
'14': 'Asia/Kuala_Lumpur',
'15': 'Asia/Kuching',
'16': 'Asia/Kuching'
},
'MZ': 'Africa/Maputo',
'NA': 'Africa/Windhoek',
'NC': 'Pacific/Noumea',
'NE': 'Africa/Niamey',
'NF': 'Pacific/Norfolk',
'NG': 'Africa/Lagos',
'NI': 'America/Managua',
'NL': 'Europe/Amsterdam',
'NO': 'Europe/Oslo',
'NP': 'Asia/Katmandu',
'NR': 'Pacific/Nauru',
'NU': 'Pacific/Niue',
'NZ': {
'85': 'Pacific/Auckland',
'E7': 'Pacific/Auckland',
'E8': 'Pacific/Auckland',
'E9': 'Pacific/Auckland',
'F1': 'Pacific/Auckland',
'F2': 'Pacific/Auckland',
'F3': 'Pacific/Auckland',
'F4': 'Pacific/Auckland',
'F5': 'Pacific/Auckland',
'F7': 'Pacific/Chatham',
'F8': 'Pacific/Auckland',
'F9': 'Pacific/Auckland',
'G1': 'Pacific/Auckland',
'G2': 'Pacific/Auckland',
'G3': 'Pacific/Auckland'
},
'OM': 'Asia/Muscat',
'PA': 'America/Panama',
'PE': 'America/Lima',
'PF': 'Pacific/Marquesas',
'PG': 'Pacific/Port_Moresby',
'PH': 'Asia/Manila',
'PK': 'Asia/Karachi',
'PL': 'Europe/Warsaw',
'PM': 'America/Miquelon',
'PN': 'Pacific/Pitcairn',
'PR': 'America/Puerto_Rico',
'PS': 'Asia/Gaza',
'PT': {
'02': 'Europe/Lisbon',
'03': 'Europe/Lisbon',
'04': 'Europe/Lisbon',
'05': 'Europe/Lisbon',
'06': 'Europe/Lisbon',
'07': 'Europe/Lisbon',
'08': 'Europe/Lisbon',
'09': 'Europe/Lisbon',
'10': 'Atlantic/Madeira',
'11': 'Europe/Lisbon',
'13': 'Europe/Lisbon',
'14': 'Europe/Lisbon',
'16': 'Europe/Lisbon',
'17': 'Europe/Lisbon',
'18': 'Europe/Lisbon',
'19': 'Europe/Lisbon',
'20': 'Europe/Lisbon',
'21': 'Europe/Lisbon',
'22': 'Europe/Lisbon'
},
'PW': 'Pacific/Palau',
'PY': 'America/Asuncion',
'QA': 'Asia/Qatar',
'RE': 'Indian/Reunion',
'RO': 'Europe/Bucharest',
'RS': 'Europe/Belgrade',
'RU': {
'01': 'Europe/Volgograd',
'02': 'Asia/Irkutsk',
'03': 'Asia/Novokuznetsk',
'04': 'Asia/Novosibirsk',
'05': 'Asia/Vladivostok',
'06': 'Europe/Moscow',
'07': 'Europe/Volgograd',
'08': 'Europe/Samara',
'09': 'Europe/Moscow',
'10': 'Europe/Moscow',
'11': 'Asia/Irkutsk',
'13': 'Asia/Yekaterinburg',
'14': 'Asia/Irkutsk',
'15': 'Asia/Anadyr',
'16': 'Europe/Samara',
'17': 'Europe/Volgograd',
'18': 'Asia/Krasnoyarsk',
'20': 'Asia/Irkutsk',
'21': 'Europe/Moscow',
'22': 'Europe/Volgograd',
'23': 'Europe/Kaliningrad',
'24': 'Europe/Volgograd',
'25': 'Europe/Moscow',
'26': 'Asia/Kamchatka',
'27': 'Europe/Volgograd',
'28': 'Europe/Moscow',
'29': 'Asia/Novokuznetsk',
'30': 'Asia/Vladivostok',
'31': 'Asia/Krasnoyarsk',
'32': 'Asia/Omsk',
'33': 'Asia/Yekaterinburg',
'34': 'Asia/Yekaterinburg',
'35': 'Asia/Yekaterinburg',
'36': 'Asia/Anadyr',
'37': 'Europe/Moscow',
'38': 'Europe/Volgograd',
'39': 'Asia/Krasnoyarsk',
'40': 'Asia/Yekaterinburg',
'41': 'Europe/Moscow',
'42': 'Europe/Moscow',
'43': 'Europe/Moscow',
'44': 'Asia/Magadan',
'45': 'Europe/Samara',
'46': 'Europe/Samara',
'47': 'Europe/Moscow',
'48': 'Europe/Moscow',
'49': 'Europe/Moscow',
'50': 'Asia/Yekaterinburg',
'51': 'Europe/Moscow',
'52': 'Europe/Moscow',
'53': 'Asia/Novosibirsk',
'54': 'Asia/Omsk',
'55': 'Europe/Samara',
'56': 'Europe/Moscow',
'57': 'Europe/Samara',
'58': 'Asia/Yekaterinburg',
'59': 'Asia/Vladivostok',
'60': 'Europe/Kaliningrad',
'61': 'Europe/Volgograd',
'62': 'Europe/Moscow',
'63': 'Asia/Yakutsk',
'64': 'Asia/Sakhalin',
'65': 'Europe/Samara',
'66': 'Europe/Moscow',
'67': 'Europe/Samara',
'68': 'Europe/Volgograd',
'69': 'Europe/Moscow',
'70': 'Europe/Volgograd',
'71': 'Asia/Yekaterinburg',
'72': 'Europe/Moscow',
'73': 'Europe/Samara',
'74': 'Asia/Krasnoyarsk',
'75': 'Asia/Novosibirsk',
'76': 'Europe/Moscow',
'77': 'Europe/Moscow',
'78': 'Asia/Yekaterinburg',
'79': 'Asia/Irkutsk',
'80': 'Asia/Yekaterinburg',
'81': 'Europe/Samara',
'82': 'Asia/Irkutsk',
'83': 'Europe/Moscow',
'84': 'Europe/Volgograd',
'85': 'Europe/Moscow',
'86': 'Europe/Moscow',
'87': 'Asia/Novosibirsk',
'88': 'Europe/Moscow',
'89': 'Asia/Vladivostok'
},
'RW': 'Africa/Kigali',
'SA': 'Asia/Riyadh',
'SB': 'Pacific/Guadalcanal',
'SC': 'Indian/Mahe',
'SD': 'Africa/Khartoum',
'SE': 'Europe/Stockholm',
'SG': 'Asia/Singapore',
'SH': 'Atlantic/St_Helena',
'SI': 'Europe/Ljubljana',
'SJ': 'Arctic/Longyearbyen',
'SK': 'Europe/Bratislava',
'SL': 'Africa/Freetown',
'SM': 'Europe/San_Marino',
'SN': 'Africa/Dakar',
'SO': 'Africa/Mogadishu',
'SR': 'America/Paramaribo',
'SS': 'Africa/Juba',
'ST': 'Africa/Sao_Tome',
'SV': 'America/El_Salvador',
'SX': 'America/Curacao',
'SY': 'Asia/Damascus',
'SZ': 'Africa/Mbabane',
'TC': 'America/Grand_Turk',
'TD': 'Africa/Ndjamena',
'TF': 'Indian/Kerguelen',
'TG': 'Africa/Lome',
'TH': 'Asia/Bangkok',
'TJ': 'Asia/Dushanbe',
'TK': 'Pacific/Fakaofo',
'TL': 'Asia/Dili',
'TM': 'Asia/Ashgabat',
'TN': 'Africa/Tunis',
'TO': 'Pacific/Tongatapu',
'TR': 'Asia/Istanbul',
'TT': 'America/Port_of_Spain',
'TV': 'Pacific/Funafuti',
'TW': 'Asia/Taipei',
'TZ': 'Africa/Dar_es_Salaam',
'UA': {
'01': 'Europe/Kiev',
'02': 'Europe/Kiev',
'03': 'Europe/Uzhgorod',
'04': 'Europe/Zaporozhye',
'05': 'Europe/Zaporozhye',
'06': 'Europe/Uzhgorod',
'07': 'Europe/Zaporozhye',
'08': 'Europe/Simferopol',
'09': 'Europe/Kiev',
'10': 'Europe/Zaporozhye',
'11': 'Europe/Simferopol',
'13': 'Europe/Kiev',
'14': 'Europe/Zaporozhye',
'15': 'Europe/Uzhgorod',
'16': 'Europe/Zaporozhye',
'17': 'Europe/Simferopol',
'18': 'Europe/Zaporozhye',
'19': 'Europe/Kiev',
'20': 'Europe/Simferopol',
'21': 'Europe/Kiev',
'22': 'Europe/Uzhgorod',
'23': 'Europe/Kiev',
'24': 'Europe/Uzhgorod',
'25': 'Europe/Uzhgorod',
'26': 'Europe/Zaporozhye',
'27': 'Europe/Kiev'
},
'UG': 'Africa/Kampala',
'US': {
'AK': 'America/Anchorage',
'AL': 'America/Chicago',
'AR': 'America/Chicago',
'AZ': 'America/Phoenix',
'CA': 'America/Los_Angeles',
'CO': 'America/Denver',
'CT': 'America/New_York',
'DC': 'America/New_York',
'DE': 'America/New_York',
'FL': 'America/New_York',
'GA': 'America/New_York',
'HI': 'Pacific/Honolulu',
'IA': 'America/Chicago',
'ID': 'America/Denver',
'IL': 'America/Chicago',
'IN': 'America/Indianapolis',
'KS': 'America/Chicago',
'KY': 'America/New_York',
'LA': 'America/Chicago',
'MA': 'America/New_York',
'MD': 'America/New_York',
'ME': 'America/New_York',
'MI': 'America/New_York',
'MN': 'America/Chicago',
'MO': 'America/Chicago',
'MS': 'America/Chicago',
'MT': 'America/Denver',
'NC': 'America/New_York',
'ND': 'America/Chicago',
'NE': 'America/Chicago',
'NH': 'America/New_York',
'NJ': 'America/New_York',
'NM': 'America/Denver',
'NV': 'America/Los_Angeles',
'NY': 'America/New_York',
'OH': 'America/New_York',
'OK': 'America/Chicago',
'OR': 'America/Los_Angeles',
'PA': 'America/New_York',
'RI': 'America/New_York',
'SC': 'America/New_York',
'SD': 'America/Chicago',
'TN': 'America/Chicago',
'TX': 'America/Chicago',
'UT': 'America/Denver',
'VA': 'America/New_York',
'VT': 'America/New_York',
'WA': 'America/Los_Angeles',
'WI': 'America/Chicago',
'WV': 'America/New_York',
'WY': 'America/Denver'
},
'UY': 'America/Montevideo',
'UZ': {
'01': 'Asia/Tashkent',
'02': 'Asia/Samarkand',
'03': 'Asia/Tashkent',
'06': 'Asia/Tashkent',
'07': 'Asia/Samarkand',
'08': 'Asia/Samarkand',
'09': 'Asia/Samarkand',
'10': 'Asia/Samarkand',
'12': 'Asia/Samarkand',
'13': 'Asia/Tashkent',
'14': 'Asia/Tashkent'
},
'VA': 'Europe/Vatican',
'VC': 'America/St_Vincent',
'VE': 'America/Caracas',
'VG': 'America/Tortola',
'VI': 'America/St_Thomas',
'VN': 'Asia/Phnom_Penh',
'VU': 'Pacific/Efate',
'WF': 'Pacific/Wallis',
'WS': 'Pacific/Samoa',
'YE': 'Asia/Aden',
'YT': 'Indian/Mayotte',
'YU': 'Europe/Belgrade',
'ZA': 'Africa/Johannesburg',
'ZM': 'Africa/Lusaka',
'ZW': 'Africa/Harare'
}
def time_zone_by_country_and_region(country_code, region_name=None):
if country_code not in _country:
return ''
if not region_name or region_name == '00':
region_name = None
timezones = _country[country_code]
if isinstance(timezones, str):
return timezones
if not region_name:
return ''
return timezones.get(region_name)

View file

@ -1,36 +0,0 @@
# -*- coding: utf-8 -*-
"""
Utility functions. Part of the pygeoip package.
@author: Jennifer Ennis <zaylea@gmail.com>
@license: Copyright(C) 2004 MaxMind LLC
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
"""
import socket
import binascii
def ip2long(ip):
"""
Wrapper function for IPv4 and IPv6 converters
@param ip: IPv4 or IPv6 address
@type ip: str
"""
try:
return int(binascii.hexlify(socket.inet_aton(ip)), 16)
except socket.error:
return int(binascii.hexlify(socket.inet_pton(socket.AF_INET6, ip)), 16)

View file

@ -1,366 +0,0 @@
"""Utilities for writing code that runs on Python 2 and 3"""
import operator
import sys
import types
__author__ = "Benjamin Peterson <benjamin@python.org>"
__version__ = "1.2.0"
# True if we are running on Python 3.
PY3 = sys.version_info[0] == 3
if PY3:
string_types = str,
integer_types = int,
class_types = type,
text_type = str
binary_type = bytes
MAXSIZE = sys.maxsize
else:
string_types = basestring,
integer_types = (int, long)
class_types = (type, types.ClassType)
text_type = unicode
binary_type = str
if sys.platform == "java":
# Jython always uses 32 bits.
MAXSIZE = int((1 << 31) - 1)
else:
# It's possible to have sizeof(long) != sizeof(Py_ssize_t).
class X(object):
def __len__(self):
return 1 << 31
try:
len(X())
except OverflowError:
# 32-bit
MAXSIZE = int((1 << 31) - 1)
else:
# 64-bit
MAXSIZE = int((1 << 63) - 1)
del X
def _add_doc(func, doc):
"""Add documentation to a function."""
func.__doc__ = doc
def _import_module(name):
"""Import module, returning the module after the last dot."""
__import__(name)
return sys.modules[name]
class _LazyDescr(object):
def __init__(self, name):
self.name = name
def __get__(self, obj, tp):
result = self._resolve()
setattr(obj, self.name, result)
# This is a bit ugly, but it avoids running this again.
delattr(tp, self.name)
return result
class MovedModule(_LazyDescr):
def __init__(self, name, old, new=None):
super(MovedModule, self).__init__(name)
if PY3:
if new is None:
new = name
self.mod = new
else:
self.mod = old
def _resolve(self):
return _import_module(self.mod)
class MovedAttribute(_LazyDescr):
def __init__(self, name, old_mod, new_mod, old_attr=None, new_attr=None):
super(MovedAttribute, self).__init__(name)
if PY3:
if new_mod is None:
new_mod = name
self.mod = new_mod
if new_attr is None:
if old_attr is None:
new_attr = name
else:
new_attr = old_attr
self.attr = new_attr
else:
self.mod = old_mod
if old_attr is None:
old_attr = name
self.attr = old_attr
def _resolve(self):
module = _import_module(self.mod)
return getattr(module, self.attr)
class _MovedItems(types.ModuleType):
"""Lazy loading of moved objects"""
_moved_attributes = [
MovedAttribute("cStringIO", "cStringIO", "io", "StringIO"),
MovedAttribute("filter", "itertools", "builtins", "ifilter", "filter"),
MovedAttribute("input", "__builtin__", "builtins", "raw_input", "input"),
MovedAttribute("map", "itertools", "builtins", "imap", "map"),
MovedAttribute("reload_module", "__builtin__", "imp", "reload"),
MovedAttribute("reduce", "__builtin__", "functools"),
MovedAttribute("StringIO", "StringIO", "io"),
MovedAttribute("xrange", "__builtin__", "builtins", "xrange", "range"),
MovedAttribute("zip", "itertools", "builtins", "izip", "zip"),
MovedModule("builtins", "__builtin__"),
MovedModule("configparser", "ConfigParser"),
MovedModule("copyreg", "copy_reg"),
MovedModule("http_cookiejar", "cookielib", "http.cookiejar"),
MovedModule("http_cookies", "Cookie", "http.cookies"),
MovedModule("html_entities", "htmlentitydefs", "html.entities"),
MovedModule("html_parser", "HTMLParser", "html.parser"),
MovedModule("http_client", "httplib", "http.client"),
MovedModule("BaseHTTPServer", "BaseHTTPServer", "http.server"),
MovedModule("CGIHTTPServer", "CGIHTTPServer", "http.server"),
MovedModule("SimpleHTTPServer", "SimpleHTTPServer", "http.server"),
MovedModule("cPickle", "cPickle", "pickle"),
MovedModule("queue", "Queue"),
MovedModule("reprlib", "repr"),
MovedModule("socketserver", "SocketServer"),
MovedModule("tkinter", "Tkinter"),
MovedModule("tkinter_dialog", "Dialog", "tkinter.dialog"),
MovedModule("tkinter_filedialog", "FileDialog", "tkinter.filedialog"),
MovedModule("tkinter_scrolledtext", "ScrolledText", "tkinter.scrolledtext"),
MovedModule("tkinter_simpledialog", "SimpleDialog", "tkinter.simpledialog"),
MovedModule("tkinter_tix", "Tix", "tkinter.tix"),
MovedModule("tkinter_constants", "Tkconstants", "tkinter.constants"),
MovedModule("tkinter_dnd", "Tkdnd", "tkinter.dnd"),
MovedModule("tkinter_colorchooser", "tkColorChooser",
"tkinter.colorchooser"),
MovedModule("tkinter_commondialog", "tkCommonDialog",
"tkinter.commondialog"),
MovedModule("tkinter_tkfiledialog", "tkFileDialog", "tkinter.filedialog"),
MovedModule("tkinter_font", "tkFont", "tkinter.font"),
MovedModule("tkinter_messagebox", "tkMessageBox", "tkinter.messagebox"),
MovedModule("tkinter_tksimpledialog", "tkSimpleDialog",
"tkinter.simpledialog"),
MovedModule("urllib_robotparser", "robotparser", "urllib.robotparser"),
MovedModule("winreg", "_winreg"),
]
for attr in _moved_attributes:
setattr(_MovedItems, attr.name, attr)
del attr
moves = sys.modules["six.moves"] = _MovedItems("moves")
def add_move(move):
"""Add an item to six.moves."""
setattr(_MovedItems, move.name, move)
def remove_move(name):
"""Remove item from six.moves."""
try:
delattr(_MovedItems, name)
except AttributeError:
try:
del moves.__dict__[name]
except KeyError:
raise AttributeError("no such move, %r" % (name,))
if PY3:
_meth_func = "__func__"
_meth_self = "__self__"
_func_code = "__code__"
_func_defaults = "__defaults__"
_iterkeys = "keys"
_itervalues = "values"
_iteritems = "items"
else:
_meth_func = "im_func"
_meth_self = "im_self"
_func_code = "func_code"
_func_defaults = "func_defaults"
_iterkeys = "iterkeys"
_itervalues = "itervalues"
_iteritems = "iteritems"
try:
advance_iterator = next
except NameError:
def advance_iterator(it):
return it.next()
next = advance_iterator
if PY3:
def get_unbound_function(unbound):
return unbound
Iterator = object
def callable(obj):
return any("__call__" in klass.__dict__ for klass in type(obj).__mro__)
else:
def get_unbound_function(unbound):
return unbound.im_func
class Iterator(object):
def next(self):
return type(self).__next__(self)
callable = callable
_add_doc(get_unbound_function,
"""Get the function out of a possibly unbound function""")
get_method_function = operator.attrgetter(_meth_func)
get_method_self = operator.attrgetter(_meth_self)
get_function_code = operator.attrgetter(_func_code)
get_function_defaults = operator.attrgetter(_func_defaults)
def iterkeys(d):
"""Return an iterator over the keys of a dictionary."""
return iter(getattr(d, _iterkeys)())
def itervalues(d):
"""Return an iterator over the values of a dictionary."""
return iter(getattr(d, _itervalues)())
def iteritems(d):
"""Return an iterator over the (key, value) pairs of a dictionary."""
return iter(getattr(d, _iteritems)())
if PY3:
def b(s):
return s.encode("latin-1")
def u(s):
return s
if sys.version_info[1] <= 1:
def int2byte(i):
return bytes((i,))
else:
# This is about 2x faster than the implementation above on 3.2+
int2byte = operator.methodcaller("to_bytes", 1, "big")
import io
StringIO = io.StringIO
BytesIO = io.BytesIO
else:
def b(s):
return s
def u(s):
return unicode(s, "unicode_escape")
int2byte = chr
import StringIO
StringIO = BytesIO = StringIO.StringIO
_add_doc(b, """Byte literal""")
_add_doc(u, """Text literal""")
if PY3:
import builtins
exec_ = getattr(builtins, "exec")
def reraise(tp, value, tb=None):
if value.__traceback__ is not tb:
raise value.with_traceback(tb)
raise value
print_ = getattr(builtins, "print")
del builtins
else:
def exec_(code, globs=None, locs=None):
"""Execute code in a namespace."""
if globs is None:
frame = sys._getframe(1)
globs = frame.f_globals
if locs is None:
locs = frame.f_locals
del frame
elif locs is None:
locs = globs
exec("""exec code in globs, locs""")
exec_("""def reraise(tp, value, tb=None):
raise tp, value, tb
""")
def print_(*args, **kwargs):
"""The new-style print function."""
fp = kwargs.pop("file", sys.stdout)
if fp is None:
return
def write(data):
if not isinstance(data, basestring):
data = str(data)
fp.write(data)
want_unicode = False
sep = kwargs.pop("sep", None)
if sep is not None:
if isinstance(sep, unicode):
want_unicode = True
elif not isinstance(sep, str):
raise TypeError("sep must be None or a string")
end = kwargs.pop("end", None)
if end is not None:
if isinstance(end, unicode):
want_unicode = True
elif not isinstance(end, str):
raise TypeError("end must be None or a string")
if kwargs:
raise TypeError("invalid keyword arguments to print()")
if not want_unicode:
for arg in args:
if isinstance(arg, unicode):
want_unicode = True
break
if want_unicode:
newline = unicode("\n")
space = unicode(" ")
else:
newline = "\n"
space = " "
if sep is None:
sep = space
if end is None:
end = newline
for i, arg in enumerate(args):
if i:
write(sep)
write(arg)
write(end)
_add_doc(reraise, """Reraise an exception.""")
def with_metaclass(meta, base=object):
"""Create a base class with a metaclass."""
return meta("NewBase", (base,), {})

View file

@ -1,31 +0,0 @@
Thank you to all who have contributed to this project!
If you contributed and not listed below please let me know.
Aaron Swartz
Adam Miskiewicz
AlanBell
Arthur Debert
Bas Westerbaan
Chris Kelly
Clay McClure
Ferenc Szalai
Gergely Imreh
Guan Yang
Ivo Wetzel
James Rowe
Jenny Loomis
Johannes Faigle
Kumar Appaiah
Michael (Doc) Norton
Pascal Jürgens
Robin Houston
Sam Kaufman
Thomas Bohmbach, Jr
Wayne Moore
Will McCutchen
gilles
Can Duruk
Jan Schaumann (@jschauma)
Stuart Powers
Jeff Hull (@jsh2134)
Mike (mikeandmore)

View file

@ -1,20 +0,0 @@
MIT License
Copyright (c) 2009-2010 Joshua Roesslein
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View file

@ -1,27 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
"""
Tweepy Twitter API library
"""
__version__ = '2.1'
__author__ = 'Joshua Roesslein'
__license__ = 'MIT'
from tweepy.models import Status, User, DirectMessage, Friendship, SavedSearch, SearchResults, ModelFactory, Category
from tweepy.error import TweepError
from tweepy.api import API
from tweepy.cache import Cache, MemoryCache, FileCache
from tweepy.auth import BasicAuthHandler, OAuthHandler
from tweepy.streaming import Stream, StreamListener
from tweepy.cursor import Cursor
# Global, unauthenticated instance of API
api = API()
def debug(enable=True, level=1):
import httplib
httplib.HTTPConnection.debuglevel = level

View file

@ -1,718 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
import os
import mimetypes
from tweepy.binder import bind_api
from tweepy.error import TweepError
from tweepy.parsers import ModelParser
from tweepy.utils import list_to_csv
class API(object):
"""Twitter API"""
def __init__(self, auth_handler=None,
host='api.twitter.com', search_host='search.twitter.com',
cache=None, secure=True, api_root='/1.1', search_root='',
retry_count=0, retry_delay=0, retry_errors=None, timeout=60,
parser=None, compression=False):
self.auth = auth_handler
self.host = host
self.search_host = search_host
self.api_root = api_root
self.search_root = search_root
self.cache = cache
self.secure = secure
self.compression = compression
self.retry_count = retry_count
self.retry_delay = retry_delay
self.retry_errors = retry_errors
self.timeout = timeout
self.parser = parser or ModelParser()
""" statuses/home_timeline """
home_timeline = bind_api(
path = '/statuses/home_timeline.json',
payload_type = 'status', payload_list = True,
allowed_param = ['since_id', 'max_id', 'count'],
require_auth = True
)
""" statuses/user_timeline """
user_timeline = bind_api(
path = '/statuses/user_timeline.json',
payload_type = 'status', payload_list = True,
allowed_param = ['id', 'user_id', 'screen_name', 'since_id',
'max_id', 'count', 'include_rts']
)
""" statuses/mentions """
mentions_timeline = bind_api(
path = '/statuses/mentions_timeline.json',
payload_type = 'status', payload_list = True,
allowed_param = ['since_id', 'max_id', 'count'],
require_auth = True
)
"""/statuses/:id/retweeted_by.format"""
retweeted_by = bind_api(
path = '/statuses/{id}/retweeted_by.json',
payload_type = 'status', payload_list = True,
allowed_param = ['id', 'count', 'page'],
require_auth = True
)
"""/related_results/show/:id.format"""
related_results = bind_api(
path = '/related_results/show/{id}.json',
payload_type = 'relation', payload_list = True,
allowed_param = ['id'],
require_auth = False
)
"""/statuses/:id/retweeted_by/ids.format"""
retweeted_by_ids = bind_api(
path = '/statuses/{id}/retweeted_by/ids.json',
payload_type = 'ids',
allowed_param = ['id', 'count', 'page'],
require_auth = True
)
""" statuses/retweets_of_me """
retweets_of_me = bind_api(
path = '/statuses/retweets_of_me.json',
payload_type = 'status', payload_list = True,
allowed_param = ['since_id', 'max_id', 'count'],
require_auth = True
)
""" statuses/show """
get_status = bind_api(
path = '/statuses/show.json',
payload_type = 'status',
allowed_param = ['id']
)
""" statuses/update """
update_status = bind_api(
path = '/statuses/update.json',
method = 'POST',
payload_type = 'status',
allowed_param = ['status', 'in_reply_to_status_id', 'lat', 'long', 'source', 'place_id'],
require_auth = True
)
""" statuses/destroy """
destroy_status = bind_api(
path = '/statuses/destroy/{id}.json',
method = 'POST',
payload_type = 'status',
allowed_param = ['id'],
require_auth = True
)
""" statuses/retweet """
retweet = bind_api(
path = '/statuses/retweet/{id}.json',
method = 'POST',
payload_type = 'status',
allowed_param = ['id'],
require_auth = True
)
""" statuses/retweets """
retweets = bind_api(
path = '/statuses/retweets/{id}.json',
payload_type = 'status', payload_list = True,
allowed_param = ['id', 'count'],
require_auth = True
)
""" users/show """
get_user = bind_api(
path = '/users/show.json',
payload_type = 'user',
allowed_param = ['id', 'user_id', 'screen_name']
)
''' statuses/oembed '''
get_oembed = bind_api(
path = '/statuses/oembed.json',
payload_type = 'json',
allowed_param = ['id', 'url', 'maxwidth', 'hide_media', 'omit_script', 'align', 'related', 'lang']
)
""" Perform bulk look up of users from user ID or screenname """
def lookup_users(self, user_ids=None, screen_names=None):
return self._lookup_users(list_to_csv(user_ids), list_to_csv(screen_names))
_lookup_users = bind_api(
path = '/users/lookup.json',
payload_type = 'user', payload_list = True,
allowed_param = ['user_id', 'screen_name'],
)
""" Get the authenticated user """
def me(self):
return self.get_user(screen_name=self.auth.get_username())
""" users/search """
search_users = bind_api(
path = '/users/search.json',
payload_type = 'user', payload_list = True,
require_auth = True,
allowed_param = ['q', 'per_page', 'page']
)
""" users/suggestions/:slug """
suggested_users = bind_api(
path = '/users/suggestions/{slug}.json',
payload_type = 'user', payload_list = True,
require_auth = True,
allowed_param = ['slug', 'lang']
)
""" users/suggestions """
suggested_categories = bind_api(
path = '/users/suggestions.json',
payload_type = 'category', payload_list = True,
allowed_param = ['lang'],
require_auth = True
)
""" users/suggestions/:slug/members """
suggested_users_tweets = bind_api(
path = '/users/suggestions/{slug}/members.json',
payload_type = 'status', payload_list = True,
allowed_param = ['slug'],
require_auth = True
)
""" direct_messages """
direct_messages = bind_api(
path = '/direct_messages.json',
payload_type = 'direct_message', payload_list = True,
allowed_param = ['since_id', 'max_id', 'count'],
require_auth = True
)
""" direct_messages/show """
get_direct_message = bind_api(
path = '/direct_messages/show/{id}.json',
payload_type = 'direct_message',
allowed_param = ['id'],
require_auth = True
)
""" direct_messages/sent """
sent_direct_messages = bind_api(
path = '/direct_messages/sent.json',
payload_type = 'direct_message', payload_list = True,
allowed_param = ['since_id', 'max_id', 'count', 'page'],
require_auth = True
)
""" direct_messages/new """
send_direct_message = bind_api(
path = '/direct_messages/new.json',
method = 'POST',
payload_type = 'direct_message',
allowed_param = ['user', 'screen_name', 'user_id', 'text'],
require_auth = True
)
""" direct_messages/destroy """
destroy_direct_message = bind_api(
path = '/direct_messages/destroy.json',
method = 'DELETE',
payload_type = 'direct_message',
allowed_param = ['id'],
require_auth = True
)
""" friendships/create """
create_friendship = bind_api(
path = '/friendships/create.json',
method = 'POST',
payload_type = 'user',
allowed_param = ['id', 'user_id', 'screen_name', 'follow'],
require_auth = True
)
""" friendships/destroy """
destroy_friendship = bind_api(
path = '/friendships/destroy.json',
method = 'DELETE',
payload_type = 'user',
allowed_param = ['id', 'user_id', 'screen_name'],
require_auth = True
)
""" friendships/show """
show_friendship = bind_api(
path = '/friendships/show.json',
payload_type = 'friendship',
allowed_param = ['source_id', 'source_screen_name',
'target_id', 'target_screen_name']
)
""" Perform bulk look up of friendships from user ID or screenname """
def lookup_friendships(self, user_ids=None, screen_names=None):
return self._lookup_friendships(list_to_csv(user_ids), list_to_csv(screen_names))
_lookup_friendships = bind_api(
path = '/friendships/lookup.json',
payload_type = 'relationship', payload_list = True,
allowed_param = ['user_id', 'screen_name'],
require_auth = True
)
""" friends/ids """
friends_ids = bind_api(
path = '/friends/ids.json',
payload_type = 'ids',
allowed_param = ['id', 'user_id', 'screen_name', 'cursor']
)
""" friends/list """
friends = bind_api(
path = '/friends/list.json',
payload_type = 'user', payload_list = True,
allowed_param = ['id', 'user_id', 'screen_name', 'cursor']
)
""" friendships/incoming """
friendships_incoming = bind_api(
path = '/friendships/incoming.json',
payload_type = 'ids',
allowed_param = ['cursor']
)
""" friendships/outgoing"""
friendships_outgoing = bind_api(
path = '/friendships/outgoing.json',
payload_type = 'ids',
allowed_param = ['cursor']
)
""" followers/ids """
followers_ids = bind_api(
path = '/followers/ids.json',
payload_type = 'ids',
allowed_param = ['id', 'user_id', 'screen_name', 'cursor']
)
""" followers/list """
followers = bind_api(
path = '/followers/list.json',
payload_type = 'user', payload_list = True,
allowed_param = ['id', 'user_id', 'screen_name', 'cursor']
)
""" account/verify_credentials """
def verify_credentials(self, **kargs):
try:
return bind_api(
path = '/account/verify_credentials.json',
payload_type = 'user',
require_auth = True,
allowed_param = ['include_entities', 'skip_status'],
)(self, **kargs)
except TweepError, e:
if e.response and e.response.status == 401:
return False
raise
""" account/rate_limit_status """
rate_limit_status = bind_api(
path = '/application/rate_limit_status.json',
payload_type = 'json',
allowed_param = ['resources'],
use_cache = False
)
""" account/update_delivery_device """
set_delivery_device = bind_api(
path = '/account/update_delivery_device.json',
method = 'POST',
allowed_param = ['device'],
payload_type = 'user',
require_auth = True
)
""" account/update_profile_colors """
update_profile_colors = bind_api(
path = '/account/update_profile_colors.json',
method = 'POST',
payload_type = 'user',
allowed_param = ['profile_background_color', 'profile_text_color',
'profile_link_color', 'profile_sidebar_fill_color',
'profile_sidebar_border_color'],
require_auth = True
)
""" account/update_profile_image """
def update_profile_image(self, filename):
headers, post_data = API._pack_image(filename, 700)
return bind_api(
path = '/account/update_profile_image.json',
method = 'POST',
payload_type = 'user',
require_auth = True
)(self, post_data=post_data, headers=headers)
""" account/update_profile_background_image """
def update_profile_background_image(self, filename, *args, **kargs):
headers, post_data = API._pack_image(filename, 800)
bind_api(
path = '/account/update_profile_background_image.json',
method = 'POST',
payload_type = 'user',
allowed_param = ['tile'],
require_auth = True
)(self, post_data=post_data, headers=headers)
""" account/update_profile """
update_profile = bind_api(
path = '/account/update_profile.json',
method = 'POST',
payload_type = 'user',
allowed_param = ['name', 'url', 'location', 'description'],
require_auth = True
)
""" favorites """
favorites = bind_api(
path = '/favorites/list.json',
payload_type = 'status', payload_list = True,
allowed_param = ['screen_name', 'user_id', 'max_id', 'count', 'since_id', 'max_id']
)
""" favorites/create """
create_favorite = bind_api(
path = '/favorites/create.json',
method = 'POST',
payload_type = 'status',
allowed_param = ['id'],
require_auth = True
)
""" favorites/destroy """
destroy_favorite = bind_api(
path = '/favorites/destroy.json',
method = 'POST',
payload_type = 'status',
allowed_param = ['id'],
require_auth = True
)
""" blocks/create """
create_block = bind_api(
path = '/blocks/create.json',
method = 'POST',
payload_type = 'user',
allowed_param = ['id', 'user_id', 'screen_name'],
require_auth = True
)
""" blocks/destroy """
destroy_block = bind_api(
path = '/blocks/destroy.json',
method = 'DELETE',
payload_type = 'user',
allowed_param = ['id', 'user_id', 'screen_name'],
require_auth = True
)
""" blocks/blocking """
blocks = bind_api(
path = '/blocks/list.json',
payload_type = 'user', payload_list = True,
allowed_param = ['cursor'],
require_auth = True
)
""" blocks/blocking/ids """
blocks_ids = bind_api(
path = '/blocks/ids.json',
payload_type = 'json',
require_auth = True
)
""" report_spam """
report_spam = bind_api(
path = '/users/report_spam.json',
method = 'POST',
payload_type = 'user',
allowed_param = ['user_id', 'screen_name'],
require_auth = True
)
""" saved_searches """
saved_searches = bind_api(
path = '/saved_searches/list.json',
payload_type = 'saved_search', payload_list = True,
require_auth = True
)
""" saved_searches/show """
get_saved_search = bind_api(
path = '/saved_searches/show/{id}.json',
payload_type = 'saved_search',
allowed_param = ['id'],
require_auth = True
)
""" saved_searches/create """
create_saved_search = bind_api(
path = '/saved_searches/create.json',
method = 'POST',
payload_type = 'saved_search',
allowed_param = ['query'],
require_auth = True
)
""" saved_searches/destroy """
destroy_saved_search = bind_api(
path = '/saved_searches/destroy/{id}.json',
method = 'POST',
payload_type = 'saved_search',
allowed_param = ['id'],
require_auth = True
)
""" help/test """
def test(self):
try:
bind_api(
path = '/help/test.json',
)(self)
except TweepError:
return False
return True
create_list = bind_api(
path = '/lists/create.json',
method = 'POST',
payload_type = 'list',
allowed_param = ['name', 'mode', 'description'],
require_auth = True
)
destroy_list = bind_api(
path = '/lists/destroy.json',
method = 'POST',
payload_type = 'list',
allowed_param = ['owner_screen_name', 'owner_id', 'list_id', 'slug'],
require_auth = True
)
update_list = bind_api(
path = '/lists/update.json',
method = 'POST',
payload_type = 'list',
allowed_param = ['list_id', 'slug', 'name', 'mode', 'description', 'owner_screen_name', 'owner_id'],
require_auth = True
)
lists_all = bind_api(
path = '/lists/list.json',
payload_type = 'list', payload_list = True,
allowed_param = ['screen_name', 'user_id'],
require_auth = True
)
lists_memberships = bind_api(
path = '/lists/memberships.json',
payload_type = 'list', payload_list = True,
allowed_param = ['screen_name', 'user_id', 'filter_to_owned_lists', 'cursor'],
require_auth = True
)
lists_subscriptions = bind_api(
path = '/lists/subscriptions.json',
payload_type = 'list', payload_list = True,
allowed_param = ['screen_name', 'user_id', 'cursor'],
require_auth = True
)
list_timeline = bind_api(
path = '/lists/statuses.json',
payload_type = 'status', payload_list = True,
allowed_param = ['owner_screen_name', 'slug', 'owner_id', 'list_id', 'since_id', 'max_id', 'count']
)
get_list = bind_api(
path = '/lists/show.json',
payload_type = 'list',
allowed_param = ['owner_screen_name', 'owner_id', 'slug', 'list_id']
)
add_list_member = bind_api(
path = '/lists/members/create.json',
method = 'POST',
payload_type = 'list',
allowed_param = ['screen_name', 'user_id', 'owner_screen_name', 'owner_id', 'slug', 'list_id'],
require_auth = True
)
remove_list_member = bind_api(
path = '/lists/members/destroy.json',
method = 'POST',
payload_type = 'list',
allowed_param = ['screen_name', 'user_id', 'owner_screen_name', 'owner_id', 'slug', 'list_id'],
require_auth = True
)
list_members = bind_api(
path = '/lists/members.json',
payload_type = 'user', payload_list = True,
allowed_param = ['owner_screen_name', 'slug', 'list_id', 'owner_id', 'cursor']
)
show_list_member = bind_api(
path = '/lists/members/show.json',
payload_type = 'user',
allowed_param = ['list_id', 'slug', 'user_id', 'screen_name', 'owner_screen_name', 'owner_id']
)
subscribe_list = bind_api(
path = '/lists/subscribers/create.json',
method = 'POST',
payload_type = 'list',
allowed_param = ['owner_screen_name', 'slug', 'owner_id', 'list_id'],
require_auth = True
)
unsubscribe_list = bind_api(
path = '/lists/subscribers/destroy.json',
method = 'POST',
payload_type = 'list',
allowed_param = ['owner_screen_name', 'slug', 'owner_id', 'list_id'],
require_auth = True
)
list_subscribers = bind_api(
path = '/lists/subscribers.json',
payload_type = 'user', payload_list = True,
allowed_param = ['owner_screen_name', 'slug', 'owner_id', 'list_id', 'cursor']
)
show_list_subscriber = bind_api(
path = '/lists/subscribers/show.json',
payload_type = 'user',
allowed_param = ['owner_screen_name', 'slug', 'screen_name', 'owner_id', 'list_id', 'user_id']
)
""" trends/available """
trends_available = bind_api(
path = '/trends/available.json',
payload_type = 'json'
)
trends_place = bind_api(
path = '/trends/place.json',
payload_type = 'json',
allowed_param = ['id', 'exclude']
)
trends_closest = bind_api(
path = '/trends/closest.json',
payload_type = 'json',
allowed_param = ['lat', 'long']
)
""" search """
search = bind_api(
path = '/search/tweets.json',
payload_type = 'search_results',
allowed_param = ['q', 'lang', 'locale', 'since_id', 'geocode', 'show_user', 'max_id', 'since', 'until', 'result_type']
)
""" trends/daily """
trends_daily = bind_api(
path = '/trends/daily.json',
payload_type = 'json',
allowed_param = ['date', 'exclude']
)
""" trends/weekly """
trends_weekly = bind_api(
path = '/trends/weekly.json',
payload_type = 'json',
allowed_param = ['date', 'exclude']
)
""" geo/reverse_geocode """
reverse_geocode = bind_api(
path = '/geo/reverse_geocode.json',
payload_type = 'place', payload_list = True,
allowed_param = ['lat', 'long', 'accuracy', 'granularity', 'max_results']
)
""" geo/id """
geo_id = bind_api(
path = '/geo/id/{id}.json',
payload_type = 'place',
allowed_param = ['id']
)
""" geo/search """
geo_search = bind_api(
path = '/geo/search.json',
payload_type = 'place', payload_list = True,
allowed_param = ['lat', 'long', 'query', 'ip', 'granularity', 'accuracy', 'max_results', 'contained_within']
)
""" geo/similar_places """
geo_similar_places = bind_api(
path = '/geo/similar_places.json',
payload_type = 'place', payload_list = True,
allowed_param = ['lat', 'long', 'name', 'contained_within']
)
""" Internal use only """
@staticmethod
def _pack_image(filename, max_size):
"""Pack image from file into multipart-formdata post body"""
# image must be less than 700kb in size
try:
if os.path.getsize(filename) > (max_size * 1024):
raise TweepError('File is too big, must be less than 700kb.')
except os.error:
raise TweepError('Unable to access file')
# image must be gif, jpeg, or png
file_type = mimetypes.guess_type(filename)
if file_type is None:
raise TweepError('Could not determine file type')
file_type = file_type[0]
if file_type not in ['image/gif', 'image/jpeg', 'image/png']:
raise TweepError('Invalid file type for image: %s' % file_type)
# build the mulitpart-formdata body
fp = open(filename, 'rb')
BOUNDARY = 'Tw3ePy'
body = []
body.append('--' + BOUNDARY)
body.append('Content-Disposition: form-data; name="image"; filename="%s"' % filename)
body.append('Content-Type: %s' % file_type)
body.append('')
body.append(fp.read())
body.append('--' + BOUNDARY + '--')
body.append('')
fp.close()
body = '\r\n'.join(body)
# build headers
headers = {
'Content-Type': 'multipart/form-data; boundary=Tw3ePy',
'Content-Length': str(len(body))
}
return headers, body

View file

@ -1,163 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
from urllib2 import Request, urlopen
import base64
from tweepy import oauth
from tweepy.error import TweepError
from tweepy.api import API
class AuthHandler(object):
def apply_auth(self, url, method, headers, parameters):
"""Apply authentication headers to request"""
raise NotImplementedError
def get_username(self):
"""Return the username of the authenticated user"""
raise NotImplementedError
class BasicAuthHandler(AuthHandler):
def __init__(self, username, password):
self.username = username
self._b64up = base64.b64encode('%s:%s' % (username, password))
def apply_auth(self, url, method, headers, parameters):
headers['Authorization'] = 'Basic %s' % self._b64up
def get_username(self):
return self.username
class OAuthHandler(AuthHandler):
"""OAuth authentication handler"""
OAUTH_HOST = 'api.twitter.com'
OAUTH_ROOT = '/oauth/'
def __init__(self, consumer_key, consumer_secret, callback=None, secure=False):
self._consumer = oauth.OAuthConsumer(consumer_key, consumer_secret)
self._sigmethod = oauth.OAuthSignatureMethod_HMAC_SHA1()
self.request_token = None
self.access_token = None
self.callback = callback
self.username = None
self.secure = secure
def _get_oauth_url(self, endpoint, secure=False):
if self.secure or secure:
prefix = 'https://'
else:
prefix = 'http://'
return prefix + self.OAUTH_HOST + self.OAUTH_ROOT + endpoint
def apply_auth(self, url, method, headers, parameters):
request = oauth.OAuthRequest.from_consumer_and_token(
self._consumer, http_url=url, http_method=method,
token=self.access_token, parameters=parameters
)
request.sign_request(self._sigmethod, self._consumer, self.access_token)
headers.update(request.to_header())
def _get_request_token(self):
try:
url = self._get_oauth_url('request_token')
request = oauth.OAuthRequest.from_consumer_and_token(
self._consumer, http_url=url, callback=self.callback
)
request.sign_request(self._sigmethod, self._consumer, None)
resp = urlopen(Request(url, headers=request.to_header()))
return oauth.OAuthToken.from_string(resp.read())
except Exception, e:
raise TweepError(e)
def set_request_token(self, key, secret):
self.request_token = oauth.OAuthToken(key, secret)
def set_access_token(self, key, secret):
self.access_token = oauth.OAuthToken(key, secret)
def get_authorization_url(self, signin_with_twitter=False):
"""Get the authorization URL to redirect the user"""
try:
# get the request token
self.request_token = self._get_request_token()
# build auth request and return as url
if signin_with_twitter:
url = self._get_oauth_url('authenticate')
else:
url = self._get_oauth_url('authorize')
request = oauth.OAuthRequest.from_token_and_callback(
token=self.request_token, http_url=url
)
return request.to_url()
except Exception, e:
raise TweepError(e)
def get_access_token(self, verifier=None):
"""
After user has authorized the request token, get access token
with user supplied verifier.
"""
try:
url = self._get_oauth_url('access_token')
# build request
request = oauth.OAuthRequest.from_consumer_and_token(
self._consumer,
token=self.request_token, http_url=url,
verifier=str(verifier)
)
request.sign_request(self._sigmethod, self._consumer, self.request_token)
# send request
resp = urlopen(Request(url, headers=request.to_header()))
self.access_token = oauth.OAuthToken.from_string(resp.read())
return self.access_token
except Exception, e:
raise TweepError(e)
def get_xauth_access_token(self, username, password):
"""
Get an access token from an username and password combination.
In order to get this working you need to create an app at
http://twitter.com/apps, after that send a mail to api@twitter.com
and request activation of xAuth for it.
"""
try:
url = self._get_oauth_url('access_token', secure=True) # must use HTTPS
request = oauth.OAuthRequest.from_consumer_and_token(
oauth_consumer=self._consumer,
http_method='POST', http_url=url,
parameters = {
'x_auth_mode': 'client_auth',
'x_auth_username': username,
'x_auth_password': password
}
)
request.sign_request(self._sigmethod, self._consumer, None)
resp = urlopen(Request(url, data=request.to_postdata()))
self.access_token = oauth.OAuthToken.from_string(resp.read())
return self.access_token
except Exception, e:
raise TweepError(e)
def get_username(self):
if self.username is None:
api = API(self)
user = api.verify_credentials()
if user:
self.username = user.screen_name
else:
raise TweepError("Unable to get username, invalid oauth token!")
return self.username

View file

@ -1,210 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
import httplib
import urllib
import time
import re
from StringIO import StringIO
import gzip
from tweepy.error import TweepError
from tweepy.utils import convert_to_utf8_str
from tweepy.models import Model
re_path_template = re.compile('{\w+}')
def bind_api(**config):
class APIMethod(object):
path = config['path']
payload_type = config.get('payload_type', None)
payload_list = config.get('payload_list', False)
allowed_param = config.get('allowed_param', [])
method = config.get('method', 'GET')
require_auth = config.get('require_auth', False)
search_api = config.get('search_api', False)
use_cache = config.get('use_cache', True)
def __init__(self, api, args, kargs):
# If authentication is required and no credentials
# are provided, throw an error.
if self.require_auth and not api.auth:
raise TweepError('Authentication required!')
self.api = api
self.post_data = kargs.pop('post_data', None)
self.retry_count = kargs.pop('retry_count', api.retry_count)
self.retry_delay = kargs.pop('retry_delay', api.retry_delay)
self.retry_errors = kargs.pop('retry_errors', api.retry_errors)
self.headers = kargs.pop('headers', {})
self.build_parameters(args, kargs)
# Pick correct URL root to use
if self.search_api:
self.api_root = api.search_root
else:
self.api_root = api.api_root
# Perform any path variable substitution
self.build_path()
if api.secure:
self.scheme = 'https://'
else:
self.scheme = 'http://'
if self.search_api:
self.host = api.search_host
else:
self.host = api.host
# Manually set Host header to fix an issue in python 2.5
# or older where Host is set including the 443 port.
# This causes Twitter to issue 301 redirect.
# See Issue https://github.com/tweepy/tweepy/issues/12
self.headers['Host'] = self.host
def build_parameters(self, args, kargs):
self.parameters = {}
for idx, arg in enumerate(args):
if arg is None:
continue
try:
self.parameters[self.allowed_param[idx]] = convert_to_utf8_str(arg)
except IndexError:
raise TweepError('Too many parameters supplied!')
for k, arg in kargs.items():
if arg is None:
continue
if k in self.parameters:
raise TweepError('Multiple values for parameter %s supplied!' % k)
self.parameters[k] = convert_to_utf8_str(arg)
def build_path(self):
for variable in re_path_template.findall(self.path):
name = variable.strip('{}')
if name == 'user' and 'user' not in self.parameters and self.api.auth:
# No 'user' parameter provided, fetch it from Auth instead.
value = self.api.auth.get_username()
else:
try:
value = urllib.quote(self.parameters[name])
except KeyError:
raise TweepError('No parameter value found for path variable: %s' % name)
del self.parameters[name]
self.path = self.path.replace(variable, value)
def execute(self):
# Build the request URL
url = self.api_root + self.path
if len(self.parameters):
url = '%s?%s' % (url, urllib.urlencode(self.parameters))
# Query the cache if one is available
# and this request uses a GET method.
if self.use_cache and self.api.cache and self.method == 'GET':
cache_result = self.api.cache.get(url)
# if cache result found and not expired, return it
if cache_result:
# must restore api reference
if isinstance(cache_result, list):
for result in cache_result:
if isinstance(result, Model):
result._api = self.api
else:
if isinstance(cache_result, Model):
cache_result._api = self.api
return cache_result
# Continue attempting request until successful
# or maximum number of retries is reached.
retries_performed = 0
while retries_performed < self.retry_count + 1:
# Open connection
if self.api.secure:
conn = httplib.HTTPSConnection(self.host, timeout=self.api.timeout)
else:
conn = httplib.HTTPConnection(self.host, timeout=self.api.timeout)
# Apply authentication
if self.api.auth:
self.api.auth.apply_auth(
self.scheme + self.host + url,
self.method, self.headers, self.parameters
)
# Request compression if configured
if self.api.compression:
self.headers['Accept-encoding'] = 'gzip'
# Execute request
try:
conn.request(self.method, url, headers=self.headers, body=self.post_data)
resp = conn.getresponse()
except Exception, e:
raise TweepError('Failed to send request: %s' % e)
# Exit request loop if non-retry error code
if self.retry_errors:
if resp.status not in self.retry_errors: break
else:
if resp.status == 200: break
# Sleep before retrying request again
time.sleep(self.retry_delay)
retries_performed += 1
# If an error was returned, throw an exception
self.api.last_response = resp
if resp.status != 200:
try:
error_msg = self.api.parser.parse_error(resp.read())
except Exception:
error_msg = "Twitter error response: status code = %s" % resp.status
raise TweepError(error_msg, resp)
# Parse the response payload
body = resp.read()
if resp.getheader('Content-Encoding', '') == 'gzip':
try:
zipper = gzip.GzipFile(fileobj=StringIO(body))
body = zipper.read()
except Exception, e:
raise TweepError('Failed to decompress data: %s' % e)
result = self.api.parser.parse(self, body)
conn.close()
# Store result into cache if one is available.
if self.use_cache and self.api.cache and self.method == 'GET' and result:
self.api.cache.store(url, result)
return result
def _call(api, *args, **kargs):
method = APIMethod(api, args, kargs)
return method.execute()
# Set pagination mode
if 'cursor' in APIMethod.allowed_param:
_call.pagination_mode = 'cursor'
elif 'max_id' in APIMethod.allowed_param and \
'since_id' in APIMethod.allowed_param:
_call.pagination_mode = 'id'
elif 'page' in APIMethod.allowed_param:
_call.pagination_mode = 'page'
return _call

View file

@ -1,424 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
import time
import datetime
import threading
import os
try:
import cPickle as pickle
except ImportError:
import pickle
try:
import hashlib
except ImportError:
# python 2.4
import md5 as hashlib
try:
import fcntl
except ImportError:
# Probably on a windows system
# TODO: use win32file
pass
class Cache(object):
"""Cache interface"""
def __init__(self, timeout=60):
"""Initialize the cache
timeout: number of seconds to keep a cached entry
"""
self.timeout = timeout
def store(self, key, value):
"""Add new record to cache
key: entry key
value: data of entry
"""
raise NotImplementedError
def get(self, key, timeout=None):
"""Get cached entry if exists and not expired
key: which entry to get
timeout: override timeout with this value [optional]
"""
raise NotImplementedError
def count(self):
"""Get count of entries currently stored in cache"""
raise NotImplementedError
def cleanup(self):
"""Delete any expired entries in cache."""
raise NotImplementedError
def flush(self):
"""Delete all cached entries"""
raise NotImplementedError
class MemoryCache(Cache):
"""In-memory cache"""
def __init__(self, timeout=60):
Cache.__init__(self, timeout)
self._entries = {}
self.lock = threading.Lock()
def __getstate__(self):
# pickle
return {'entries': self._entries, 'timeout': self.timeout}
def __setstate__(self, state):
# unpickle
self.lock = threading.Lock()
self._entries = state['entries']
self.timeout = state['timeout']
def _is_expired(self, entry, timeout):
return timeout > 0 and (time.time() - entry[0]) >= timeout
def store(self, key, value):
self.lock.acquire()
self._entries[key] = (time.time(), value)
self.lock.release()
def get(self, key, timeout=None):
self.lock.acquire()
try:
# check to see if we have this key
entry = self._entries.get(key)
if not entry:
# no hit, return nothing
return None
# use provided timeout in arguments if provided
# otherwise use the one provided during init.
if timeout is None:
timeout = self.timeout
# make sure entry is not expired
if self._is_expired(entry, timeout):
# entry expired, delete and return nothing
del self._entries[key]
return None
# entry found and not expired, return it
return entry[1]
finally:
self.lock.release()
def count(self):
return len(self._entries)
def cleanup(self):
self.lock.acquire()
try:
for k, v in self._entries.items():
if self._is_expired(v, self.timeout):
del self._entries[k]
finally:
self.lock.release()
def flush(self):
self.lock.acquire()
self._entries.clear()
self.lock.release()
class FileCache(Cache):
"""File-based cache"""
# locks used to make cache thread-safe
cache_locks = {}
def __init__(self, cache_dir, timeout=60):
Cache.__init__(self, timeout)
if os.path.exists(cache_dir) is False:
os.mkdir(cache_dir)
self.cache_dir = cache_dir
if cache_dir in FileCache.cache_locks:
self.lock = FileCache.cache_locks[cache_dir]
else:
self.lock = threading.Lock()
FileCache.cache_locks[cache_dir] = self.lock
if os.name == 'posix':
self._lock_file = self._lock_file_posix
self._unlock_file = self._unlock_file_posix
elif os.name == 'nt':
self._lock_file = self._lock_file_win32
self._unlock_file = self._unlock_file_win32
else:
print 'Warning! FileCache locking not supported on this system!'
self._lock_file = self._lock_file_dummy
self._unlock_file = self._unlock_file_dummy
def _get_path(self, key):
md5 = hashlib.md5()
md5.update(key)
return os.path.join(self.cache_dir, md5.hexdigest())
def _lock_file_dummy(self, path, exclusive=True):
return None
def _unlock_file_dummy(self, lock):
return
def _lock_file_posix(self, path, exclusive=True):
lock_path = path + '.lock'
if exclusive is True:
f_lock = open(lock_path, 'w')
fcntl.lockf(f_lock, fcntl.LOCK_EX)
else:
f_lock = open(lock_path, 'r')
fcntl.lockf(f_lock, fcntl.LOCK_SH)
if os.path.exists(lock_path) is False:
f_lock.close()
return None
return f_lock
def _unlock_file_posix(self, lock):
lock.close()
def _lock_file_win32(self, path, exclusive=True):
# TODO: implement
return None
def _unlock_file_win32(self, lock):
# TODO: implement
return
def _delete_file(self, path):
os.remove(path)
if os.path.exists(path + '.lock'):
os.remove(path + '.lock')
def store(self, key, value):
path = self._get_path(key)
self.lock.acquire()
try:
# acquire lock and open file
f_lock = self._lock_file(path)
datafile = open(path, 'wb')
# write data
pickle.dump((time.time(), value), datafile)
# close and unlock file
datafile.close()
self._unlock_file(f_lock)
finally:
self.lock.release()
def get(self, key, timeout=None):
return self._get(self._get_path(key), timeout)
def _get(self, path, timeout):
if os.path.exists(path) is False:
# no record
return None
self.lock.acquire()
try:
# acquire lock and open
f_lock = self._lock_file(path, False)
datafile = open(path, 'rb')
# read pickled object
created_time, value = pickle.load(datafile)
datafile.close()
# check if value is expired
if timeout is None:
timeout = self.timeout
if timeout > 0 and (time.time() - created_time) >= timeout:
# expired! delete from cache
value = None
self._delete_file(path)
# unlock and return result
self._unlock_file(f_lock)
return value
finally:
self.lock.release()
def count(self):
c = 0
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
c += 1
return c
def cleanup(self):
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
self._get(os.path.join(self.cache_dir, entry), None)
def flush(self):
for entry in os.listdir(self.cache_dir):
if entry.endswith('.lock'):
continue
self._delete_file(os.path.join(self.cache_dir, entry))
class MemCacheCache(Cache):
"""Cache interface"""
def __init__(self, client, timeout=60):
"""Initialize the cache
client: The memcache client
timeout: number of seconds to keep a cached entry
"""
self.client = client
self.timeout = timeout
def store(self, key, value):
"""Add new record to cache
key: entry key
value: data of entry
"""
self.client.set(key, value, time=self.timeout)
def get(self, key, timeout=None):
"""Get cached entry if exists and not expired
key: which entry to get
timeout: override timeout with this value [optional]. DOES NOT WORK HERE
"""
return self.client.get(key)
def count(self):
"""Get count of entries currently stored in cache. RETURN 0"""
raise NotImplementedError
def cleanup(self):
"""Delete any expired entries in cache. NO-OP"""
raise NotImplementedError
def flush(self):
"""Delete all cached entries. NO-OP"""
raise NotImplementedError
class RedisCache(Cache):
'''Cache running in a redis server'''
def __init__(self, client, timeout=60, keys_container = 'tweepy:keys', pre_identifier = 'tweepy:'):
Cache.__init__(self, timeout)
self.client = client
self.keys_container = keys_container
self.pre_identifier = pre_identifier
def _is_expired(self, entry, timeout):
# Returns true if the entry has expired
return timeout > 0 and (time.time() - entry[0]) >= timeout
def store(self, key, value):
'''Store the key, value pair in our redis server'''
# Prepend tweepy to our key, this makes it easier to identify tweepy keys in our redis server
key = self.pre_identifier + key
# Get a pipe (to execute several redis commands in one step)
pipe = self.client.pipeline()
# Set our values in a redis hash (similar to python dict)
pipe.set(key, pickle.dumps((time.time(), value)))
# Set the expiration
pipe.expire(key, self.timeout)
# Add the key to a set containing all the keys
pipe.sadd(self.keys_container, key)
# Execute the instructions in the redis server
pipe.execute()
def get(self, key, timeout=None):
'''Given a key, returns an element from the redis table'''
key = self.pre_identifier + key
# Check to see if we have this key
unpickled_entry = self.client.get(key)
if not unpickled_entry:
# No hit, return nothing
return None
entry = pickle.loads(unpickled_entry)
# Use provided timeout in arguments if provided
# otherwise use the one provided during init.
if timeout is None:
timeout = self.timeout
# Make sure entry is not expired
if self._is_expired(entry, timeout):
# entry expired, delete and return nothing
self.delete_entry(key)
return None
# entry found and not expired, return it
return entry[1]
def count(self):
'''Note: This is not very efficient, since it retreives all the keys from the redis
server to know how many keys we have'''
return len(self.client.smembers(self.keys_container))
def delete_entry(self, key):
'''Delete an object from the redis table'''
pipe = self.client.pipeline()
pipe.srem(self.keys_container, key)
pipe.delete(key)
pipe.execute()
def cleanup(self):
'''Cleanup all the expired keys'''
keys = self.client.smembers(self.keys_container)
for key in keys:
entry = self.client.get(key)
if entry:
entry = pickle.loads(entry)
if self._is_expired(entry, self.timeout):
self.delete_entry(key)
def flush(self):
'''Delete all entries from the cache'''
keys = self.client.smembers(self.keys_container)
for key in keys:
self.delete_entry(key)
class MongodbCache(Cache):
"""A simple pickle-based MongoDB cache sytem."""
def __init__(self, db, timeout=3600, collection='tweepy_cache'):
"""Should receive a "database" cursor from pymongo."""
Cache.__init__(self, timeout)
self.timeout = timeout
self.col = db[collection]
self.col.create_index('created', expireAfterSeconds=timeout)
def store(self, key, value):
from bson.binary import Binary
now = datetime.datetime.utcnow()
blob = Binary(pickle.dumps(value))
self.col.insert({'created': now, '_id': key, 'value': blob})
def get(self, key, timeout=None):
if timeout:
raise NotImplementedError
obj = self.col.find_one({'_id': key})
if obj:
return pickle.loads(obj['value'])
def count(self):
return self.col.find({}).count()
def delete_entry(self, key):
return self.col.remove({'_id': key})
def cleanup(self):
"""MongoDB will automatically clear expired keys."""
pass
def flush(self):
self.col.drop()
self.col.create_index('created', expireAfterSeconds=self.timeout)

View file

@ -1,170 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
from tweepy.error import TweepError
class Cursor(object):
"""Pagination helper class"""
def __init__(self, method, *args, **kargs):
if hasattr(method, 'pagination_mode'):
if method.pagination_mode == 'cursor':
self.iterator = CursorIterator(method, args, kargs)
elif method.pagination_mode == 'id':
self.iterator = IdIterator(method, args, kargs)
elif method.pagination_mode == 'page':
self.iterator = PageIterator(method, args, kargs)
else:
raise TweepError('Invalid pagination mode.')
else:
raise TweepError('This method does not perform pagination')
def pages(self, limit=0):
"""Return iterator for pages"""
if limit > 0:
self.iterator.limit = limit
return self.iterator
def items(self, limit=0):
"""Return iterator for items in each page"""
i = ItemIterator(self.iterator)
i.limit = limit
return i
class BaseIterator(object):
def __init__(self, method, args, kargs):
self.method = method
self.args = args
self.kargs = kargs
self.limit = 0
def next(self):
raise NotImplementedError
def prev(self):
raise NotImplementedError
def __iter__(self):
return self
class CursorIterator(BaseIterator):
def __init__(self, method, args, kargs):
BaseIterator.__init__(self, method, args, kargs)
self.next_cursor = -1
self.prev_cursor = 0
self.count = 0
def next(self):
if self.next_cursor == 0 or (self.limit and self.count == self.limit):
raise StopIteration
data, cursors = self.method(
cursor=self.next_cursor, *self.args, **self.kargs
)
self.prev_cursor, self.next_cursor = cursors
if len(data) == 0:
raise StopIteration
self.count += 1
return data
def prev(self):
if self.prev_cursor == 0:
raise TweepError('Can not page back more, at first page')
data, self.next_cursor, self.prev_cursor = self.method(
cursor=self.prev_cursor, *self.args, **self.kargs
)
self.count -= 1
return data
class IdIterator(BaseIterator):
def __init__(self, method, args, kargs):
BaseIterator.__init__(self, method, args, kargs)
self.max_id = kargs.get('max_id')
self.since_id = kargs.get('since_id')
self.count = 0
def next(self):
"""Fetch a set of items with IDs less than current set."""
if self.limit and self.limit == self.count:
raise StopIteration
# max_id is inclusive so decrement by one
# to avoid requesting duplicate items.
max_id = self.since_id - 1 if self.max_id else None
data = self.method(max_id = max_id, *self.args, **self.kargs)
if len(data) == 0:
raise StopIteration
self.max_id = data.max_id
self.since_id = data.since_id
self.count += 1
return data
def prev(self):
"""Fetch a set of items with IDs greater than current set."""
if self.limit and self.limit == self.count:
raise StopIteration
since_id = self.max_id
data = self.method(since_id = since_id, *self.args, **self.kargs)
if len(data) == 0:
raise StopIteration
self.max_id = data.max_id
self.since_id = data.since_id
self.count += 1
return data
class PageIterator(BaseIterator):
def __init__(self, method, args, kargs):
BaseIterator.__init__(self, method, args, kargs)
self.current_page = 0
def next(self):
self.current_page += 1
items = self.method(page=self.current_page, *self.args, **self.kargs)
if len(items) == 0 or (self.limit > 0 and self.current_page > self.limit):
raise StopIteration
return items
def prev(self):
if (self.current_page == 1):
raise TweepError('Can not page back more, at first page')
self.current_page -= 1
return self.method(page=self.current_page, *self.args, **self.kargs)
class ItemIterator(BaseIterator):
def __init__(self, page_iterator):
self.page_iterator = page_iterator
self.limit = 0
self.current_page = None
self.page_index = -1
self.count = 0
def next(self):
if self.limit > 0 and self.count == self.limit:
raise StopIteration
if self.current_page is None or self.page_index == len(self.current_page) - 1:
# Reached end of current page, get the next page...
self.current_page = self.page_iterator.next()
self.page_index = -1
self.page_index += 1
self.count += 1
return self.current_page[self.page_index]
def prev(self):
if self.current_page is None:
raise TweepError('Can not go back more, at first page')
if self.page_index == 0:
# At the beginning of the current page, move to next...
self.current_page = self.page_iterator.prev()
self.page_index = len(self.current_page)
if self.page_index == 0:
raise TweepError('No more items')
self.page_index -= 1
self.count -= 1
return self.current_page[self.page_index]

View file

@ -1,15 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
class TweepError(Exception):
"""Tweepy exception"""
def __init__(self, reason, response=None):
self.reason = unicode(reason)
self.response = response
Exception.__init__(self, reason)
def __str__(self):
return self.reason

View file

@ -1,431 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
from tweepy.error import TweepError
from tweepy.utils import parse_datetime, parse_html_value, parse_a_href, \
parse_search_datetime, unescape_html
class ResultSet(list):
"""A list like object that holds results from a Twitter API query."""
def __init__(self, max_id=None, since_id=None):
super(ResultSet, self).__init__()
self._max_id = max_id
self._since_id = since_id
@property
def max_id(self):
if self._max_id:
return self._max_id
ids = self.ids()
return max(ids) if ids else None
@property
def since_id(self):
if self._since_id:
return self._since_id
ids = self.ids()
return min(ids) if ids else None
def ids(self):
return [item.id for item in self if hasattr(item, 'id')]
class Model(object):
def __init__(self, api=None):
self._api = api
def __getstate__(self):
# pickle
pickle = dict(self.__dict__)
try:
del pickle['_api'] # do not pickle the API reference
except KeyError:
pass
return pickle
@classmethod
def parse(cls, api, json):
"""Parse a JSON object into a model instance."""
raise NotImplementedError
@classmethod
def parse_list(cls, api, json_list):
"""Parse a list of JSON objects into a result set of model instances."""
results = ResultSet()
for obj in json_list:
if obj:
results.append(cls.parse(api, obj))
return results
class Status(Model):
@classmethod
def parse(cls, api, json):
status = cls(api)
for k, v in json.items():
if k == 'user':
user_model = getattr(api.parser.model_factory, 'user')
user = user_model.parse(api, v)
setattr(status, 'author', user)
setattr(status, 'user', user) # DEPRECIATED
elif k == 'created_at':
setattr(status, k, parse_datetime(v))
elif k == 'source':
if '<' in v:
setattr(status, k, parse_html_value(v))
setattr(status, 'source_url', parse_a_href(v))
else:
setattr(status, k, v)
setattr(status, 'source_url', None)
elif k == 'retweeted_status':
setattr(status, k, Status.parse(api, v))
elif k == 'place':
if v is not None:
setattr(status, k, Place.parse(api, v))
else:
setattr(status, k, None)
else:
setattr(status, k, v)
return status
def destroy(self):
return self._api.destroy_status(self.id)
def retweet(self):
return self._api.retweet(self.id)
def retweets(self):
return self._api.retweets(self.id)
def favorite(self):
return self._api.create_favorite(self.id)
class User(Model):
@classmethod
def parse(cls, api, json):
user = cls(api)
for k, v in json.items():
if k == 'created_at':
setattr(user, k, parse_datetime(v))
elif k == 'status':
setattr(user, k, Status.parse(api, v))
elif k == 'following':
# twitter sets this to null if it is false
if v is True:
setattr(user, k, True)
else:
setattr(user, k, False)
else:
setattr(user, k, v)
return user
@classmethod
def parse_list(cls, api, json_list):
if isinstance(json_list, list):
item_list = json_list
else:
item_list = json_list['users']
results = ResultSet()
for obj in item_list:
results.append(cls.parse(api, obj))
return results
def timeline(self, **kargs):
return self._api.user_timeline(user_id=self.id, **kargs)
def friends(self, **kargs):
return self._api.friends(user_id=self.id, **kargs)
def followers(self, **kargs):
return self._api.followers(user_id=self.id, **kargs)
def follow(self):
self._api.create_friendship(user_id=self.id)
self.following = True
def unfollow(self):
self._api.destroy_friendship(user_id=self.id)
self.following = False
def lists_memberships(self, *args, **kargs):
return self._api.lists_memberships(user=self.screen_name, *args, **kargs)
def lists_subscriptions(self, *args, **kargs):
return self._api.lists_subscriptions(user=self.screen_name, *args, **kargs)
def lists(self, *args, **kargs):
return self._api.lists(user=self.screen_name, *args, **kargs)
def followers_ids(self, *args, **kargs):
return self._api.followers_ids(user_id=self.id, *args, **kargs)
class DirectMessage(Model):
@classmethod
def parse(cls, api, json):
dm = cls(api)
for k, v in json.items():
if k == 'sender' or k == 'recipient':
setattr(dm, k, User.parse(api, v))
elif k == 'created_at':
setattr(dm, k, parse_datetime(v))
else:
setattr(dm, k, v)
return dm
def destroy(self):
return self._api.destroy_direct_message(self.id)
class Friendship(Model):
@classmethod
def parse(cls, api, json):
relationship = json['relationship']
# parse source
source = cls(api)
for k, v in relationship['source'].items():
setattr(source, k, v)
# parse target
target = cls(api)
for k, v in relationship['target'].items():
setattr(target, k, v)
return source, target
class Category(Model):
@classmethod
def parse(cls, api, json):
category = cls(api)
for k, v in json.items():
setattr(category, k, v)
return category
class SavedSearch(Model):
@classmethod
def parse(cls, api, json):
ss = cls(api)
for k, v in json.items():
if k == 'created_at':
setattr(ss, k, parse_datetime(v))
else:
setattr(ss, k, v)
return ss
def destroy(self):
return self._api.destroy_saved_search(self.id)
class SearchResults(ResultSet):
@classmethod
def parse(cls, api, json):
metadata = json['search_metadata']
results = SearchResults(metadata.get('max_id'), metadata.get('since_id'))
results.refresh_url = metadata.get('refresh_url')
results.completed_in = metadata.get('completed_in')
results.query = metadata.get('query')
for status in json['statuses']:
results.append(Status.parse(api, status))
return results
class List(Model):
@classmethod
def parse(cls, api, json):
lst = List(api)
for k,v in json.items():
if k == 'user':
setattr(lst, k, User.parse(api, v))
elif k == 'created_at':
setattr(lst, k, parse_datetime(v))
else:
setattr(lst, k, v)
return lst
@classmethod
def parse_list(cls, api, json_list, result_set=None):
results = ResultSet()
if isinstance(json_list, dict):
json_list = json_list['lists']
for obj in json_list:
results.append(cls.parse(api, obj))
return results
def update(self, **kargs):
return self._api.update_list(self.slug, **kargs)
def destroy(self):
return self._api.destroy_list(self.slug)
def timeline(self, **kargs):
return self._api.list_timeline(self.user.screen_name, self.slug, **kargs)
def add_member(self, id):
return self._api.add_list_member(self.slug, id)
def remove_member(self, id):
return self._api.remove_list_member(self.slug, id)
def members(self, **kargs):
return self._api.list_members(self.user.screen_name, self.slug, **kargs)
def is_member(self, id):
return self._api.is_list_member(self.user.screen_name, self.slug, id)
def subscribe(self):
return self._api.subscribe_list(self.user.screen_name, self.slug)
def unsubscribe(self):
return self._api.unsubscribe_list(self.user.screen_name, self.slug)
def subscribers(self, **kargs):
return self._api.list_subscribers(self.user.screen_name, self.slug, **kargs)
def is_subscribed(self, id):
return self._api.is_subscribed_list(self.user.screen_name, self.slug, id)
class Relation(Model):
@classmethod
def parse(cls, api, json):
result = cls(api)
for k,v in json.items():
if k == 'value' and json['kind'] in ['Tweet', 'LookedupStatus']:
setattr(result, k, Status.parse(api, v))
elif k == 'results':
setattr(result, k, Relation.parse_list(api, v))
else:
setattr(result, k, v)
return result
class Relationship(Model):
@classmethod
def parse(cls, api, json):
result = cls(api)
for k,v in json.items():
if k == 'connections':
setattr(result, 'is_following', 'following' in v)
setattr(result, 'is_followed_by', 'followed_by' in v)
else:
setattr(result, k, v)
return result
class JSONModel(Model):
@classmethod
def parse(cls, api, json):
return json
class IDModel(Model):
@classmethod
def parse(cls, api, json):
if isinstance(json, list):
return json
else:
return json['ids']
class BoundingBox(Model):
@classmethod
def parse(cls, api, json):
result = cls(api)
if json is not None:
for k, v in json.items():
setattr(result, k, v)
return result
def origin(self):
"""
Return longitude, latitude of southwest (bottom, left) corner of
bounding box, as a tuple.
This assumes that bounding box is always a rectangle, which
appears to be the case at present.
"""
return tuple(self.coordinates[0][0])
def corner(self):
"""
Return longitude, latitude of northeast (top, right) corner of
bounding box, as a tuple.
This assumes that bounding box is always a rectangle, which
appears to be the case at present.
"""
return tuple(self.coordinates[0][2])
class Place(Model):
@classmethod
def parse(cls, api, json):
place = cls(api)
for k, v in json.items():
if k == 'bounding_box':
# bounding_box value may be null (None.)
# Example: "United States" (id=96683cc9126741d1)
if v is not None:
t = BoundingBox.parse(api, v)
else:
t = v
setattr(place, k, t)
elif k == 'contained_within':
# contained_within is a list of Places.
setattr(place, k, Place.parse_list(api, v))
else:
setattr(place, k, v)
return place
@classmethod
def parse_list(cls, api, json_list):
if isinstance(json_list, list):
item_list = json_list
else:
item_list = json_list['result']['places']
results = ResultSet()
for obj in item_list:
results.append(cls.parse(api, obj))
return results
class ModelFactory(object):
"""
Used by parsers for creating instances
of models. You may subclass this factory
to add your own extended models.
"""
status = Status
user = User
direct_message = DirectMessage
friendship = Friendship
saved_search = SavedSearch
search_results = SearchResults
category = Category
list = List
relation = Relation
relationship = Relationship
json = JSONModel
ids = IDModel
place = Place
bounding_box = BoundingBox

View file

@ -1,655 +0,0 @@
"""
The MIT License
Copyright (c) 2007 Leah Culver
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import cgi
import urllib
import time
import random
import urlparse
import hmac
import binascii
VERSION = '1.0' # Hi Blaine!
HTTP_METHOD = 'GET'
SIGNATURE_METHOD = 'PLAINTEXT'
class OAuthError(RuntimeError):
"""Generic exception class."""
def __init__(self, message='OAuth error occured.'):
self.message = message
def build_authenticate_header(realm=''):
"""Optional WWW-Authenticate header (401 error)"""
return {'WWW-Authenticate': 'OAuth realm="%s"' % realm}
def escape(s):
"""Escape a URL including any /."""
return urllib.quote(s, safe='~')
def _utf8_str(s):
"""Convert unicode to utf-8."""
if isinstance(s, unicode):
return s.encode("utf-8")
else:
return str(s)
def generate_timestamp():
"""Get seconds since epoch (UTC)."""
return int(time.time())
def generate_nonce(length=8):
"""Generate pseudorandom number."""
return ''.join([str(random.randint(0, 9)) for i in range(length)])
def generate_verifier(length=8):
"""Generate pseudorandom number."""
return ''.join([str(random.randint(0, 9)) for i in range(length)])
class OAuthConsumer(object):
"""Consumer of OAuth authentication.
OAuthConsumer is a data type that represents the identity of the Consumer
via its shared secret with the Service Provider.
"""
key = None
secret = None
def __init__(self, key, secret):
self.key = key
self.secret = secret
class OAuthToken(object):
"""OAuthToken is a data type that represents an End User via either an access
or request token.
key -- the token
secret -- the token secret
"""
key = None
secret = None
callback = None
callback_confirmed = None
verifier = None
def __init__(self, key, secret):
self.key = key
self.secret = secret
def set_callback(self, callback):
self.callback = callback
self.callback_confirmed = 'true'
def set_verifier(self, verifier=None):
if verifier is not None:
self.verifier = verifier
else:
self.verifier = generate_verifier()
def get_callback_url(self):
if self.callback and self.verifier:
# Append the oauth_verifier.
parts = urlparse.urlparse(self.callback)
scheme, netloc, path, params, query, fragment = parts[:6]
if query:
query = '%s&oauth_verifier=%s' % (query, self.verifier)
else:
query = 'oauth_verifier=%s' % self.verifier
return urlparse.urlunparse((scheme, netloc, path, params,
query, fragment))
return self.callback
def to_string(self):
data = {
'oauth_token': self.key,
'oauth_token_secret': self.secret,
}
if self.callback_confirmed is not None:
data['oauth_callback_confirmed'] = self.callback_confirmed
return urllib.urlencode(data)
def from_string(s):
""" Returns a token from something like:
oauth_token_secret=xxx&oauth_token=xxx
"""
params = cgi.parse_qs(s, keep_blank_values=False)
key = params['oauth_token'][0]
secret = params['oauth_token_secret'][0]
token = OAuthToken(key, secret)
try:
token.callback_confirmed = params['oauth_callback_confirmed'][0]
except KeyError:
pass # 1.0, no callback confirmed.
return token
from_string = staticmethod(from_string)
def __str__(self):
return self.to_string()
class OAuthRequest(object):
"""OAuthRequest represents the request and can be serialized.
OAuth parameters:
- oauth_consumer_key
- oauth_token
- oauth_signature_method
- oauth_signature
- oauth_timestamp
- oauth_nonce
- oauth_version
- oauth_verifier
... any additional parameters, as defined by the Service Provider.
"""
parameters = None # OAuth parameters.
http_method = HTTP_METHOD
http_url = None
version = VERSION
def __init__(self, http_method=HTTP_METHOD, http_url=None, parameters=None):
self.http_method = http_method
self.http_url = http_url
self.parameters = parameters or {}
def set_parameter(self, parameter, value):
self.parameters[parameter] = value
def get_parameter(self, parameter):
try:
return self.parameters[parameter]
except:
raise OAuthError('Parameter not found: %s' % parameter)
def _get_timestamp_nonce(self):
return self.get_parameter('oauth_timestamp'), self.get_parameter(
'oauth_nonce')
def get_nonoauth_parameters(self):
"""Get any non-OAuth parameters."""
parameters = {}
for k, v in self.parameters.iteritems():
# Ignore oauth parameters.
if k.find('oauth_') < 0:
parameters[k] = v
return parameters
def to_header(self, realm=''):
"""Serialize as a header for an HTTPAuth request."""
auth_header = 'OAuth realm="%s"' % realm
# Add the oauth parameters.
if self.parameters:
for k, v in self.parameters.iteritems():
if k[:6] == 'oauth_':
auth_header += ', %s="%s"' % (k, escape(str(v)))
return {'Authorization': auth_header}
def to_postdata(self):
"""Serialize as post data for a POST request."""
return '&'.join(['%s=%s' % (escape(str(k)), escape(str(v))) \
for k, v in self.parameters.iteritems()])
def to_url(self):
"""Serialize as a URL for a GET request."""
return '%s?%s' % (self.get_normalized_http_url(), self.to_postdata())
def get_normalized_parameters(self):
"""Return a string that contains the parameters that must be signed."""
params = self.parameters
try:
# Exclude the signature if it exists.
del params['oauth_signature']
except:
pass
# Escape key values before sorting.
key_values = [(escape(_utf8_str(k)), escape(_utf8_str(v))) \
for k,v in params.items()]
# Sort lexicographically, first after key, then after value.
key_values.sort()
# Combine key value pairs into a string.
return '&'.join(['%s=%s' % (k, v) for k, v in key_values])
def get_normalized_http_method(self):
"""Uppercases the http method."""
return self.http_method.upper()
def get_normalized_http_url(self):
"""Parses the URL and rebuilds it to be scheme://host/path."""
parts = urlparse.urlparse(self.http_url)
scheme, netloc, path = parts[:3]
# Exclude default port numbers.
if scheme == 'http' and netloc[-3:] == ':80':
netloc = netloc[:-3]
elif scheme == 'https' and netloc[-4:] == ':443':
netloc = netloc[:-4]
return '%s://%s%s' % (scheme, netloc, path)
def sign_request(self, signature_method, consumer, token):
"""Set the signature parameter to the result of build_signature."""
# Set the signature method.
self.set_parameter('oauth_signature_method',
signature_method.get_name())
# Set the signature.
self.set_parameter('oauth_signature',
self.build_signature(signature_method, consumer, token))
def build_signature(self, signature_method, consumer, token):
"""Calls the build signature method within the signature method."""
return signature_method.build_signature(self, consumer, token)
def from_request(http_method, http_url, headers=None, parameters=None,
query_string=None):
"""Combines multiple parameter sources."""
if parameters is None:
parameters = {}
# Headers
if headers and 'Authorization' in headers:
auth_header = headers['Authorization']
# Check that the authorization header is OAuth.
if auth_header[:6] == 'OAuth ':
auth_header = auth_header[6:]
try:
# Get the parameters from the header.
header_params = OAuthRequest._split_header(auth_header)
parameters.update(header_params)
except:
raise OAuthError('Unable to parse OAuth parameters from '
'Authorization header.')
# GET or POST query string.
if query_string:
query_params = OAuthRequest._split_url_string(query_string)
parameters.update(query_params)
# URL parameters.
param_str = urlparse.urlparse(http_url)[4] # query
url_params = OAuthRequest._split_url_string(param_str)
parameters.update(url_params)
if parameters:
return OAuthRequest(http_method, http_url, parameters)
return None
from_request = staticmethod(from_request)
def from_consumer_and_token(oauth_consumer, token=None,
callback=None, verifier=None, http_method=HTTP_METHOD,
http_url=None, parameters=None):
if not parameters:
parameters = {}
defaults = {
'oauth_consumer_key': oauth_consumer.key,
'oauth_timestamp': generate_timestamp(),
'oauth_nonce': generate_nonce(),
'oauth_version': OAuthRequest.version,
}
defaults.update(parameters)
parameters = defaults
if token:
parameters['oauth_token'] = token.key
if token.callback:
parameters['oauth_callback'] = token.callback
# 1.0a support for verifier.
if verifier:
parameters['oauth_verifier'] = verifier
elif callback:
# 1.0a support for callback in the request token request.
parameters['oauth_callback'] = callback
return OAuthRequest(http_method, http_url, parameters)
from_consumer_and_token = staticmethod(from_consumer_and_token)
def from_token_and_callback(token, callback=None, http_method=HTTP_METHOD,
http_url=None, parameters=None):
if not parameters:
parameters = {}
parameters['oauth_token'] = token.key
if callback:
parameters['oauth_callback'] = callback
return OAuthRequest(http_method, http_url, parameters)
from_token_and_callback = staticmethod(from_token_and_callback)
def _split_header(header):
"""Turn Authorization: header into parameters."""
params = {}
parts = header.split(',')
for param in parts:
# Ignore realm parameter.
if param.find('realm') > -1:
continue
# Remove whitespace.
param = param.strip()
# Split key-value.
param_parts = param.split('=', 1)
# Remove quotes and unescape the value.
params[param_parts[0]] = urllib.unquote(param_parts[1].strip('\"'))
return params
_split_header = staticmethod(_split_header)
def _split_url_string(param_str):
"""Turn URL string into parameters."""
parameters = cgi.parse_qs(param_str, keep_blank_values=False)
for k, v in parameters.iteritems():
parameters[k] = urllib.unquote(v[0])
return parameters
_split_url_string = staticmethod(_split_url_string)
class OAuthServer(object):
"""A worker to check the validity of a request against a data store."""
timestamp_threshold = 300 # In seconds, five minutes.
version = VERSION
signature_methods = None
data_store = None
def __init__(self, data_store=None, signature_methods=None):
self.data_store = data_store
self.signature_methods = signature_methods or {}
def set_data_store(self, data_store):
self.data_store = data_store
def get_data_store(self):
return self.data_store
def add_signature_method(self, signature_method):
self.signature_methods[signature_method.get_name()] = signature_method
return self.signature_methods
def fetch_request_token(self, oauth_request):
"""Processes a request_token request and returns the
request token on success.
"""
try:
# Get the request token for authorization.
token = self._get_token(oauth_request, 'request')
except OAuthError:
# No token required for the initial token request.
version = self._get_version(oauth_request)
consumer = self._get_consumer(oauth_request)
try:
callback = self.get_callback(oauth_request)
except OAuthError:
callback = None # 1.0, no callback specified.
self._check_signature(oauth_request, consumer, None)
# Fetch a new token.
token = self.data_store.fetch_request_token(consumer, callback)
return token
def fetch_access_token(self, oauth_request):
"""Processes an access_token request and returns the
access token on success.
"""
version = self._get_version(oauth_request)
consumer = self._get_consumer(oauth_request)
try:
verifier = self._get_verifier(oauth_request)
except OAuthError:
verifier = None
# Get the request token.
token = self._get_token(oauth_request, 'request')
self._check_signature(oauth_request, consumer, token)
new_token = self.data_store.fetch_access_token(consumer, token, verifier)
return new_token
def verify_request(self, oauth_request):
"""Verifies an api call and checks all the parameters."""
# -> consumer and token
version = self._get_version(oauth_request)
consumer = self._get_consumer(oauth_request)
# Get the access token.
token = self._get_token(oauth_request, 'access')
self._check_signature(oauth_request, consumer, token)
parameters = oauth_request.get_nonoauth_parameters()
return consumer, token, parameters
def authorize_token(self, token, user):
"""Authorize a request token."""
return self.data_store.authorize_request_token(token, user)
def get_callback(self, oauth_request):
"""Get the callback URL."""
return oauth_request.get_parameter('oauth_callback')
def build_authenticate_header(self, realm=''):
"""Optional support for the authenticate header."""
return {'WWW-Authenticate': 'OAuth realm="%s"' % realm}
def _get_version(self, oauth_request):
"""Verify the correct version request for this server."""
try:
version = oauth_request.get_parameter('oauth_version')
except:
version = VERSION
if version and version != self.version:
raise OAuthError('OAuth version %s not supported.' % str(version))
return version
def _get_signature_method(self, oauth_request):
"""Figure out the signature with some defaults."""
try:
signature_method = oauth_request.get_parameter(
'oauth_signature_method')
except:
signature_method = SIGNATURE_METHOD
try:
# Get the signature method object.
signature_method = self.signature_methods[signature_method]
except:
signature_method_names = ', '.join(self.signature_methods.keys())
raise OAuthError('Signature method %s not supported try one of the '
'following: %s' % (signature_method, signature_method_names))
return signature_method
def _get_consumer(self, oauth_request):
consumer_key = oauth_request.get_parameter('oauth_consumer_key')
consumer = self.data_store.lookup_consumer(consumer_key)
if not consumer:
raise OAuthError('Invalid consumer.')
return consumer
def _get_token(self, oauth_request, token_type='access'):
"""Try to find the token for the provided request token key."""
token_field = oauth_request.get_parameter('oauth_token')
token = self.data_store.lookup_token(token_type, token_field)
if not token:
raise OAuthError('Invalid %s token: %s' % (token_type, token_field))
return token
def _get_verifier(self, oauth_request):
return oauth_request.get_parameter('oauth_verifier')
def _check_signature(self, oauth_request, consumer, token):
timestamp, nonce = oauth_request._get_timestamp_nonce()
self._check_timestamp(timestamp)
self._check_nonce(consumer, token, nonce)
signature_method = self._get_signature_method(oauth_request)
try:
signature = oauth_request.get_parameter('oauth_signature')
except:
raise OAuthError('Missing signature.')
# Validate the signature.
valid_sig = signature_method.check_signature(oauth_request, consumer,
token, signature)
if not valid_sig:
key, base = signature_method.build_signature_base_string(
oauth_request, consumer, token)
raise OAuthError('Invalid signature. Expected signature base '
'string: %s' % base)
built = signature_method.build_signature(oauth_request, consumer, token)
def _check_timestamp(self, timestamp):
"""Verify that timestamp is recentish."""
timestamp = int(timestamp)
now = int(time.time())
lapsed = abs(now - timestamp)
if lapsed > self.timestamp_threshold:
raise OAuthError('Expired timestamp: given %d and now %s has a '
'greater difference than threshold %d' %
(timestamp, now, self.timestamp_threshold))
def _check_nonce(self, consumer, token, nonce):
"""Verify that the nonce is uniqueish."""
nonce = self.data_store.lookup_nonce(consumer, token, nonce)
if nonce:
raise OAuthError('Nonce already used: %s' % str(nonce))
class OAuthClient(object):
"""OAuthClient is a worker to attempt to execute a request."""
consumer = None
token = None
def __init__(self, oauth_consumer, oauth_token):
self.consumer = oauth_consumer
self.token = oauth_token
def get_consumer(self):
return self.consumer
def get_token(self):
return self.token
def fetch_request_token(self, oauth_request):
"""-> OAuthToken."""
raise NotImplementedError
def fetch_access_token(self, oauth_request):
"""-> OAuthToken."""
raise NotImplementedError
def access_resource(self, oauth_request):
"""-> Some protected resource."""
raise NotImplementedError
class OAuthDataStore(object):
"""A database abstraction used to lookup consumers and tokens."""
def lookup_consumer(self, key):
"""-> OAuthConsumer."""
raise NotImplementedError
def lookup_token(self, oauth_consumer, token_type, token_token):
"""-> OAuthToken."""
raise NotImplementedError
def lookup_nonce(self, oauth_consumer, oauth_token, nonce):
"""-> OAuthToken."""
raise NotImplementedError
def fetch_request_token(self, oauth_consumer, oauth_callback):
"""-> OAuthToken."""
raise NotImplementedError
def fetch_access_token(self, oauth_consumer, oauth_token, oauth_verifier):
"""-> OAuthToken."""
raise NotImplementedError
def authorize_request_token(self, oauth_token, user):
"""-> OAuthToken."""
raise NotImplementedError
class OAuthSignatureMethod(object):
"""A strategy class that implements a signature method."""
def get_name(self):
"""-> str."""
raise NotImplementedError
def build_signature_base_string(self, oauth_request, oauth_consumer, oauth_token):
"""-> str key, str raw."""
raise NotImplementedError
def build_signature(self, oauth_request, oauth_consumer, oauth_token):
"""-> str."""
raise NotImplementedError
def check_signature(self, oauth_request, consumer, token, signature):
built = self.build_signature(oauth_request, consumer, token)
return built == signature
class OAuthSignatureMethod_HMAC_SHA1(OAuthSignatureMethod):
def get_name(self):
return 'HMAC-SHA1'
def build_signature_base_string(self, oauth_request, consumer, token):
sig = (
escape(oauth_request.get_normalized_http_method()),
escape(oauth_request.get_normalized_http_url()),
escape(oauth_request.get_normalized_parameters()),
)
key = '%s&' % escape(consumer.secret)
if token:
key += escape(token.secret)
raw = '&'.join(sig)
return key, raw
def build_signature(self, oauth_request, consumer, token):
"""Builds the base signature string."""
key, raw = self.build_signature_base_string(oauth_request, consumer,
token)
# HMAC object.
try:
import hashlib # 2.5
hashed = hmac.new(key, raw, hashlib.sha1)
except:
import sha # Deprecated
hashed = hmac.new(key, raw, sha)
# Calculate the digest base 64.
return binascii.b2a_base64(hashed.digest())[:-1]
class OAuthSignatureMethod_PLAINTEXT(OAuthSignatureMethod):
def get_name(self):
return 'PLAINTEXT'
def build_signature_base_string(self, oauth_request, consumer, token):
"""Concatenates the consumer key and secret."""
sig = '%s&' % escape(consumer.secret)
if token:
sig = sig + escape(token.secret)
return sig, sig
def build_signature(self, oauth_request, consumer, token):
key, raw = self.build_signature_base_string(oauth_request, consumer,
token)
return key

View file

@ -1,97 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
from tweepy.models import ModelFactory
from tweepy.utils import import_simplejson
from tweepy.error import TweepError
class Parser(object):
def parse(self, method, payload):
"""
Parse the response payload and return the result.
Returns a tuple that contains the result data and the cursors
(or None if not present).
"""
raise NotImplementedError
def parse_error(self, payload):
"""
Parse the error message from payload.
If unable to parse the message, throw an exception
and default error message will be used.
"""
raise NotImplementedError
class RawParser(Parser):
def __init__(self):
pass
def parse(self, method, payload):
return payload
def parse_error(self, payload):
return payload
class JSONParser(Parser):
payload_format = 'json'
def __init__(self):
self.json_lib = import_simplejson()
def parse(self, method, payload):
try:
json = self.json_lib.loads(payload)
except Exception, e:
raise TweepError('Failed to parse JSON payload: %s' % e)
needsCursors = method.parameters.has_key('cursor')
if needsCursors and isinstance(json, dict) and 'previous_cursor' in json and 'next_cursor' in json:
cursors = json['previous_cursor'], json['next_cursor']
return json, cursors
else:
return json
def parse_error(self, payload):
error = self.json_lib.loads(payload)
if error.has_key('error'):
return error['error']
else:
return error['errors']
class ModelParser(JSONParser):
def __init__(self, model_factory=None):
JSONParser.__init__(self)
self.model_factory = model_factory or ModelFactory
def parse(self, method, payload):
try:
if method.payload_type is None: return
model = getattr(self.model_factory, method.payload_type)
except AttributeError:
raise TweepError('No model for this payload type: %s' % method.payload_type)
json = JSONParser.parse(self, method, payload)
if isinstance(json, tuple):
json, cursors = json
else:
cursors = None
if method.payload_list:
result = model.parse_list(method.api, json)
else:
result = model.parse(method.api, json)
if cursors:
return result, cursors
else:
return result

View file

@ -1,248 +0,0 @@
# Tweepy
# Copyright 2009-2010 Joshua Roesslein
# See LICENSE for details.
import httplib
from socket import timeout
from threading import Thread
from time import sleep
from tweepy.models import Status
from tweepy.api import API
from tweepy.error import TweepError
from tweepy.utils import import_simplejson, urlencode_noplus
json = import_simplejson()
STREAM_VERSION = '1.1'
class StreamListener(object):
def __init__(self, api=None):
self.api = api or API()
def on_connect(self):
"""Called once connected to streaming server.
This will be invoked once a successful response
is received from the server. Allows the listener
to perform some work prior to entering the read loop.
"""
pass
def on_data(self, data):
"""Called when raw data is received from connection.
Override this method if you wish to manually handle
the stream data. Return False to stop stream and close connection.
"""
if 'in_reply_to_status_id' in data:
status = Status.parse(self.api, json.loads(data))
if self.on_status(status) is False:
return False
elif 'delete' in data:
delete = json.loads(data)['delete']['status']
if self.on_delete(delete['id'], delete['user_id']) is False:
return False
elif 'limit' in data:
if self.on_limit(json.loads(data)['limit']['track']) is False:
return False
def on_status(self, status):
"""Called when a new status arrives"""
return
def on_delete(self, status_id, user_id):
"""Called when a delete notice arrives for a status"""
return
def on_limit(self, track):
"""Called when a limitation notice arrvies"""
return
def on_error(self, status_code):
"""Called when a non-200 status code is returned"""
return False
def on_timeout(self):
"""Called when stream connection times out"""
return
class Stream(object):
host = 'stream.twitter.com'
def __init__(self, auth, listener, **options):
self.auth = auth
self.listener = listener
self.running = False
self.timeout = options.get("timeout", 300.0)
self.retry_count = options.get("retry_count")
self.retry_time = options.get("retry_time", 10.0)
self.snooze_time = options.get("snooze_time", 5.0)
self.buffer_size = options.get("buffer_size", 1500)
if options.get("secure", True):
self.scheme = "https"
else:
self.scheme = "http"
self.api = API()
self.headers = options.get("headers") or {}
self.parameters = None
self.body = None
def _run(self):
# Authenticate
url = "%s://%s%s" % (self.scheme, self.host, self.url)
# Connect and process the stream
error_counter = 0
conn = None
exception = None
while self.running:
if self.retry_count is not None and error_counter > self.retry_count:
# quit if error count greater than retry count
break
try:
if self.scheme == "http":
conn = httplib.HTTPConnection(self.host)
else:
conn = httplib.HTTPSConnection(self.host)
self.auth.apply_auth(url, 'POST', self.headers, self.parameters)
conn.connect()
conn.sock.settimeout(self.timeout)
conn.request('POST', self.url, self.body, headers=self.headers)
resp = conn.getresponse()
if resp.status != 200:
if self.listener.on_error(resp.status) is False:
break
error_counter += 1
sleep(self.retry_time)
else:
error_counter = 0
self.listener.on_connect()
self._read_loop(resp)
except timeout:
if self.listener.on_timeout() == False:
break
if self.running is False:
break
conn.close()
sleep(self.snooze_time)
except Exception, exception:
# any other exception is fatal, so kill loop
break
# cleanup
self.running = False
if conn:
conn.close()
if exception:
raise
def _data(self, data):
if self.listener.on_data(data) is False:
self.running = False
def _read_loop(self, resp):
while self.running and not resp.isclosed():
# Note: keep-alive newlines might be inserted before each length value.
# read until we get a digit...
c = '\n'
while c == '\n' and self.running and not resp.isclosed():
c = resp.read(1)
delimited_string = c
# read rest of delimiter length..
d = ''
while d != '\n' and self.running and not resp.isclosed():
d = resp.read(1)
delimited_string += d
# read the next twitter status object
if delimited_string.strip().isdigit():
next_status_obj = resp.read( int(delimited_string) )
self._data(next_status_obj)
if resp.isclosed():
self.on_closed(resp)
def _start(self, async):
self.running = True
if async:
Thread(target=self._run).start()
else:
self._run()
def on_closed(self, resp):
""" Called when the response has been closed by Twitter """
pass
def userstream(self, count=None, async=False, secure=True):
self.parameters = {'delimited': 'length'}
if self.running:
raise TweepError('Stream object already connected!')
self.url = '/2/user.json?delimited=length'
self.host='userstream.twitter.com'
self._start(async)
def firehose(self, count=None, async=False):
self.parameters = {'delimited': 'length'}
if self.running:
raise TweepError('Stream object already connected!')
self.url = '/%s/statuses/firehose.json?delimited=length' % STREAM_VERSION
if count:
self.url += '&count=%s' % count
self._start(async)
def retweet(self, async=False):
self.parameters = {'delimited': 'length'}
if self.running:
raise TweepError('Stream object already connected!')
self.url = '/%s/statuses/retweet.json?delimited=length' % STREAM_VERSION
self._start(async)
def sample(self, count=None, async=False):
self.parameters = {'delimited': 'length'}
if self.running:
raise TweepError('Stream object already connected!')
self.url = '/%s/statuses/sample.json?delimited=length' % STREAM_VERSION
if count:
self.url += '&count=%s' % count
self._start(async)
def filter(self, follow=None, track=None, async=False, locations=None,
count = None, stall_warnings=False, languages=None):
self.parameters = {}
self.headers['Content-type'] = "application/x-www-form-urlencoded"
if self.running:
raise TweepError('Stream object already connected!')
self.url = '/%s/statuses/filter.json?delimited=length' % STREAM_VERSION
if follow:
self.parameters['follow'] = ','.join(map(str, follow))
if track:
self.parameters['track'] = ','.join(map(str, track))
if locations and len(locations) > 0:
assert len(locations) % 4 == 0
self.parameters['locations'] = ','.join(['%.2f' % l for l in locations])
if count:
self.parameters['count'] = count
if stall_warnings:
self.parameters['stall_warnings'] = stall_warnings
if languages:
self.parameters['language'] = ','.join(map(str, languages))
self.body = urlencode_noplus(self.parameters)
self.parameters['delimited'] = 'length'
self._start(async)
def disconnect(self):
if self.running is False:
return
self.running = False

View file

@ -1,103 +0,0 @@
# Tweepy
# Copyright 2010 Joshua Roesslein
# See LICENSE for details.
from datetime import datetime
import time
import htmlentitydefs
import re
import locale
from urllib import quote
def parse_datetime(string):
# Set locale for date parsing
locale.setlocale(locale.LC_TIME, 'C')
# We must parse datetime this way to work in python 2.4
date = datetime(*(time.strptime(string, '%a %b %d %H:%M:%S +0000 %Y')[0:6]))
# Reset locale back to the default setting
locale.setlocale(locale.LC_TIME, '')
return date
def parse_html_value(html):
return html[html.find('>')+1:html.rfind('<')]
def parse_a_href(atag):
start = atag.find('"') + 1
end = atag.find('"', start)
return atag[start:end]
def parse_search_datetime(string):
# Set locale for date parsing
locale.setlocale(locale.LC_TIME, 'C')
# We must parse datetime this way to work in python 2.4
date = datetime(*(time.strptime(string, '%a, %d %b %Y %H:%M:%S +0000')[0:6]))
# Reset locale back to the default setting
locale.setlocale(locale.LC_TIME, '')
return date
def unescape_html(text):
"""Created by Fredrik Lundh (http://effbot.org/zone/re-sub.htm#unescape-html)"""
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
def convert_to_utf8_str(arg):
# written by Michael Norton (http://docondev.blogspot.com/)
if isinstance(arg, unicode):
arg = arg.encode('utf-8')
elif not isinstance(arg, str):
arg = str(arg)
return arg
def import_simplejson():
try:
import simplejson as json
except ImportError:
try:
import json # Python 2.6+
except ImportError:
try:
from django.utils import simplejson as json # Google App Engine
except ImportError:
raise ImportError, "Can't load a json library"
return json
def list_to_csv(item_list):
if item_list:
return ','.join([str(i) for i in item_list])
def urlencode_noplus(query):
return '&'.join(['%s=%s' % (quote(str(k)), quote(str(v))) \
for k, v in query.iteritems()])

View file

@ -1,6 +0,0 @@
* Stuart Colville <pypi@muffinresearch.co.uk>
* Cyril Doussin
* Diogo Baeder
* Jannis Leidel
* *YOUR NAME HERE*

View file

@ -1,27 +0,0 @@
::
Copyright (c) 2009, Stuart Colville, Muffin Research Labs
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of "Muffin Research Labs" nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY "Muffin Research Labs" ''AS IS'' AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL "Muffin Research Labs" BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,640 +0,0 @@
"""
Python YQL
==========
YQL client for Python
Author: Stuart Colville http://muffinresearch.co.uk/
Docs at: http://python-yql.org/
TODO: More granular error handling
"""
import json
import re
import time
import pprint
from urlparse import urlparse
from urllib import urlencode
from httplib2 import Http
from yql.utils import get_http_method, clean_url, clean_query
from yql.logger import get_logger
import oauth2 as oauth
try:
from urlparse import parse_qs, parse_qsl
except ImportError: # pragma: no cover
from cgi import parse_qs, parse_qsl
__author__ = 'Stuart Colville'
__version__ = '0.7.5'
__all__ = ['Public', 'TwoLegged', 'ThreeLegged']
QUERY_PLACEHOLDER = re.compile(r"[ =]@(?P<param>[a-z].*?\b)", re.IGNORECASE)
REQUEST_TOKEN_URL = 'https://api.login.yahoo.com/oauth/v2/get_request_token'
ACCESS_TOKEN_URL = 'https://api.login.yahoo.com/oauth/v2/get_token'
AUTHORIZATION_URL = 'https://api.login.yahoo.com/oauth/v2/request_auth'
PUBLIC_ENDPOINT = "query.yahooapis.com/v1/public/yql"
PRIVATE_ENDPOINT = "query.yahooapis.com/v1/yql"
HTTP_SCHEME = "http:"
HTTPS_SCHEME = "https:"
yql_logger = get_logger()
class YQLObj(object):
"""A YQLObject is the object created as the result of a YQL query"""
def __init__(self, result_dict):
"""Init query object"""
self._raw = result_dict and result_dict.get('query') or {}
@property
def raw(self):
"""The raw data response"""
return self._raw
@property
def uri(self):
"""The uri used to query the YQL API"""
return self._raw.get('uri')
@property
def query_params(self):
"""The query parameters of the uri used to call the YQL API"""
if self.uri:
q_string = urlparse(self.uri)[4]
return dict(parse_qsl(q_string))
else:
return {}
@property
def results(self):
"""The query results dict."""
return self._raw.get('results')
def one(self):
"""Return just one result directly."""
rows = self.rows
if len(rows) > 1:
raise NotOneError, "More than one result"
else:
return rows[0]
@property
def rows(self):
"""Get a list of rows returned by the query.
Results is a dict with one key but that key changes depending on the results
This provides a way of getting at the rows list in an arbitrary way.
Added in version: 0.6 fixes results with 1 item so that they are still
returned within a list.
"""
result = []
if self.results:
vals = self.results.values()
if len(vals) == 1:
result = self.results.values()[0]
if self.count == 1 and result:
result = [result]
return result
@property
def query(self):
"""The YQL query"""
return self.query_params.get('q')
@property
def lang(self):
"""The language"""
return self._raw.get('lang')
@property
def count(self):
"""The results count"""
count = self._raw.get('count')
if count:
return int(count)
@property
def diagnostics(self):
"""The query diagnostics"""
return self._raw.get('diagnostics')
def pprint_raw(self, indent=4): # pragma: no cover
"""Pretty print the raw data"""
pprint.pprint(self._raw, indent=indent)
def pformat_raw(self, indent=4): # pragma: no cover
"""Pretty format the raw data"""
return pprint.pformat(self._raw, indent=indent)
class YQLError(Exception):
"""Default Error"""
def __init__(self, resp, content, url=None, query=None):
yql_logger.error("%s", content)
yql_logger.error("Error Response: %s", resp)
yql_logger.error("Error url: %s", url)
self.response = resp
self.content = content
self.url = url
self.query = query
def __str__(self):
"""Return the error message.
Attempt to parse the json if it fails
simply return the content attribute instead.
"""
try:
content = json.loads(self.content)
except:
content = {}
if content and content.get("error") and content["error"].get(
"description"):
return content['error']['description']
else:
if isinstance(self.content, basestring):
return self.content
else:
return repr(self.content)
class NotOneError(Exception):
"""Not One Error."""
def __init__(self, message):
self.message = message
def __str__(self):
"""Return the error message"""
return self.message
class Public(object):
"""Class for making public YQL queries"""
def __init__(self, api_key=None, shared_secret=None, httplib2_inst=None):
"""Init the base class.
Optionally you can pass in an httplib2 instance which allows you
to set-up the instance in a different way for your own uses.
Also it's very helpful in a testing scenario.
"""
self.api_key = api_key
self.secret = shared_secret
self.http = httplib2_inst or Http()
self.scheme = HTTPS_SCHEME
self.__endpoint = PUBLIC_ENDPOINT
self.uri = self.get_endpoint_uri()
def get_endpoint_uri(self):
"""Get endpoint"""
return "http://%s" % self.endpoint
def get_endpoint(self):
"""Gets the endpoint for requests"""
return self.__endpoint
def set_endpoint(self, value):
"""Sets the endpoint and updates the uri"""
if value in (PRIVATE_ENDPOINT, PUBLIC_ENDPOINT):
self.__endpoint = value
self.uri = self.get_endpoint_uri()
else:
raise ValueError, "Invalid endpoint: %s" % value
def get_query_params(self, query, params, **kwargs):
"""Get the query params and validate placeholders"""
query_params = {}
keys_from_query = self.get_placeholder_keys(query)
if keys_from_query and not params or (
params and not hasattr(params, 'get')):
raise ValueError, "If you are using placeholders a dictionary "\
"of substitutions is required"
elif not keys_from_query and params and hasattr(params, 'get'):
raise ValueError, "You supplied a dictionary of substitutions "\
"but the query doesn't have any placeholders"
elif keys_from_query and params:
keys_from_params = params.keys()
if set(keys_from_query) != set(keys_from_params):
raise ValueError, "Parameter keys don't match the query "\
"placeholders"
else:
query_params.update(params)
query_params['q'] = query
query_params['format'] = 'json'
env = kwargs.get('env')
if env:
query_params['env'] = env
return query_params
@staticmethod
def get_placeholder_keys(query):
"""Gets the @var placeholders
http://developer.yahoo.com/yql/guide/var_substitution.html
"""
result = []
for match in QUERY_PLACEHOLDER.finditer(query):
result.append(match.group('param'))
if result:
yql_logger.debug("placeholder_keys: %s", result)
return result
def get_uri(self, query, params=None, **kwargs):
"""Get the the request url"""
params = self.get_query_params(query, params, **kwargs)
query_string = urlencode(params)
uri = '%s?%s' % (self.uri, query_string)
uri = clean_url(uri)
return uri
def execute(self, query, params=None, **kwargs):
"""Execute YQL query"""
query = clean_query(query)
url = self.get_uri(query, params, **kwargs)
# Just in time change to https avoids
# invalid oauth sigs
if self.scheme == HTTPS_SCHEME:
url = url.replace(HTTP_SCHEME, HTTPS_SCHEME)
yql_logger.debug("executed url: %s", url)
http_method = get_http_method(query)
if http_method in ["DELETE", "PUT", "POST"]:
data = {"q": query}
# Encode as json and set Content-Type header
# to reflect we are sending JSON
# Fixes LP: 629064
data = json.dumps(data)
headers = {"Content-Type": "application/json"}
resp, content = self.http.request(
url, http_method, headers=headers, body=data)
yql_logger.debug("body: %s", data)
else:
resp, content = self.http.request(url, http_method)
yql_logger.debug("http_method: %s", http_method)
if resp.get('status') == '200':
return YQLObj(json.loads(content))
else:
raise YQLError, (resp, content)
endpoint = property(get_endpoint, set_endpoint)
class TwoLegged(Public):
"""Two legged Auth is simple request which is signed prior to sending"""
def __init__(self, api_key, shared_secret, httplib2_inst=None):
"""Override init to ensure required args"""
super(TwoLegged, self).__init__(api_key, shared_secret, httplib2_inst)
self.endpoint = PRIVATE_ENDPOINT
self.scheme = HTTPS_SCHEME
self.hmac_sha1_signature = oauth.SignatureMethod_HMAC_SHA1()
self.plaintext_signature = oauth.SignatureMethod_PLAINTEXT()
@staticmethod
def get_base_params():
"""Set-up the basic parameters needed for a request"""
params = {}
params['oauth_version'] = "1.0"
params['oauth_nonce'] = oauth.generate_nonce()
params['oauth_timestamp'] = int(time.time())
return params
def __two_legged_request(self, resource_url, parameters=None, method=None):
"""Sign a request for two-legged authentication"""
params = self.get_base_params()
if parameters:
params.update(parameters)
yql_logger.debug("params: %s", params)
yql_logger.debug("resource_url: %s", resource_url)
if not method:
method = "GET"
consumer = oauth.Consumer(self.api_key, self.secret)
request = oauth.Request(method=method, url=resource_url,
parameters=params)
request.sign_request(self.hmac_sha1_signature, consumer, None)
return request
def get_uri(self, query, params=None, **kwargs):
"""Get the the request url"""
query_params = self.get_query_params(query, params, **kwargs)
http_method = get_http_method(query)
request = self.__two_legged_request(self.uri,
parameters=query_params, method=http_method)
uri = "%s?%s" % (self.uri, request.to_postdata())
uri = clean_url(uri)
return uri
class ThreeLegged(TwoLegged):
"""
Three-legged Auth is used when it involves private data such as a
user's contacts.
Three-legged auth is most likely to be used in a web-site or
web-accessible application. Three-legged auth requires the user
to authenticate the request through the Yahoo login.
Three-legged auth requires the implementation to:
* Request a token
* Get a authentication url
* User uses the auth url to login which will redirect to a callback
or shows a verfier string on screen
* Verifier is read at the callback url or manually provided to get
the access token
* resources is access
For an implementation this will require calling the following methods
in order the first time the user needs to authenticate
* :meth:`get_token_and_auth_url` (returns a token and the auth url)
* get verifier through callback or from screen
* :meth:`get_access_token` (returns the access token)
* :meth:`execute` - makes the request to the protected resource.
Once the access token has been provided subsequent requests can re-use it.
Access tokens expire after 1 hour, however they can be refreshed with
the :meth:`refresh_token` method
"""
def __init__(self, api_key, shared_secret, httplib2_inst=None):
"""Override init to add consumer"""
super(ThreeLegged, self).__init__(
api_key, shared_secret, httplib2_inst)
self.scheme = HTTP_SCHEME
self.endpoint = PRIVATE_ENDPOINT
self.consumer = oauth.Consumer(self.api_key, self.secret)
def get_token_and_auth_url(self, callback_url=None):
"""First step is to get the token and then send the request that
provides the auth URL
Returns a tuple of token and the authorisation URL.
"""
client = oauth.Client(self.consumer)
params = {}
params['oauth_callback'] = callback_url or 'oob'
request = oauth.Request(parameters=params)
url = REQUEST_TOKEN_URL
resp, content = client.request(url, "POST", request.to_postdata())
if resp.get('status') == '200':
token = oauth.Token.from_string(content)
yql_logger.debug("token: %s", token)
data = dict(parse_qsl(content))
yql_logger.debug("data: %s", data)
return token, data['xoauth_request_auth_url']
else:
raise YQLError, (resp, content, url)
def get_access_token(self, token, verifier):
"""Get the access token
The verifier (required) should have been provided to the
user following login to at the url returned
by the :meth:`get_token_and_auth_url` method.
If not you will need need to extract the auth_verifier
parameter from your callback url on the site where you
are implementing 3-legged auth in order to pass it to this
function.
The access token can be stored and re-used for subsequent
calls.
The stored token will also need to be refreshed periodically
with :meth:`refresh_token`
"""
params = {}
params['oauth_verifier'] = verifier
oauth_request = oauth.Request.from_consumer_and_token(
self.consumer, token=token,
http_url=ACCESS_TOKEN_URL,
http_method="POST",
parameters=params)
yql_logger.debug("oauth_request: %s", oauth_request)
oauth_request.sign_request(
self.hmac_sha1_signature, self.consumer, token)
url = oauth_request.to_url()
yql_logger.debug("oauth_url: %s", url)
postdata = oauth_request.to_postdata()
yql_logger.debug("oauth_postdata: %s", postdata)
resp, content = self.http.request(url, "POST", postdata)
if resp.get('status') == '200':
access_token = YahooToken.from_string(content)
access_token.timestamp = oauth_request['oauth_timestamp']
return access_token
else:
raise YQLError, (resp, content, url)
def check_token(self, token):
"""Check to see if a token has expired"""
if not hasattr(token, 'timestamp'):
raise AttributeError, 'token doesn\'t have a timestamp attrbute'
if (int(token.timestamp) + 3600) < time.time():
token = self.refresh_token(token)
return token
def refresh_token(self, token):
"""Access Tokens only last for one hour from the point of being issued.
When a token has expired it needs to be refreshed this method takes an
expired token and refreshes it.
token parameter can be either a token object or a token string.
"""
if not hasattr(token, "key"):
token = YahooToken.from_string(token)
params = self.get_base_params()
params['oauth_token'] = token.key
params['oauth_token_secret'] = token.secret
params['oauth_session_handle'] = token.session_handle
oauth_request = oauth.Request.from_consumer_and_token(
self.consumer, token=token,
http_url=ACCESS_TOKEN_URL,
http_method="POST",
parameters=params)
yql_logger.debug("oauth_request: %s", oauth_request)
oauth_request.sign_request(
self.hmac_sha1_signature, self.consumer, token)
url = oauth_request.to_url()
yql_logger.debug("oauth_url: %s", url)
postdata = oauth_request.to_postdata()
yql_logger.debug("oauth_postdata: %s", postdata)
resp, content = self.http.request(url, "POST", postdata)
if resp.get('status') == '200':
access_token = YahooToken.from_string(content)
yql_logger.debug("oauth_access_token: %s", access_token)
access_token.timestamp = oauth_request['oauth_timestamp']
return access_token
else:
raise YQLError, (resp, content, url)
def get_uri(self, query, params=None, **kwargs):
"""Get the the request url"""
query_params = self.get_query_params(query, params, **kwargs)
token = kwargs.get("token")
if hasattr(token, "yahoo_guid"):
query_params["oauth_yahoo_guid"] = getattr(token, "yahoo_guid")
if not token:
raise ValueError, "Without a token three-legged-auth cannot be"\
" carried out"
yql_logger.debug("query_params: %s", query_params)
http_method = get_http_method(query)
oauth_request = oauth.Request.from_consumer_and_token(
self.consumer, http_url=self.uri,
token=token, parameters=query_params,
http_method=http_method)
yql_logger.debug("oauth_request: %s", oauth_request)
# Sign request
oauth_request.sign_request(
self.hmac_sha1_signature, self.consumer, token)
yql_logger.debug("oauth_signed_request: %s", oauth_request)
uri = "%s?%s" % (self.uri, oauth_request.to_postdata())
return uri.replace('+', '%20').replace('%7E', '~')
class YahooToken(oauth.Token):
"""A subclass of oauth.Token with the addition of a place to
stash the session_handler which is required for token refreshing
"""
@staticmethod
def from_string(data_string):
"""Deserializes a token from a string like one returned by
`to_string()`."""
if not len(data_string):
raise ValueError("Invalid parameter string.")
params = parse_qs(data_string, keep_blank_values=False)
if not len(params):
raise ValueError("Invalid parameter string.")
try:
key = params['oauth_token'][0]
except Exception:
raise ValueError("'oauth_token' not found in OAuth request.")
try:
secret = params['oauth_token_secret'][0]
except Exception:
raise ValueError("'oauth_token_secret' not found in "
"OAuth request.")
token = YahooToken(key, secret)
session_handle = params.get('oauth_session_handle')
if session_handle:
setattr(token, 'session_handle', session_handle[0])
timestamp = params.get('token_creation_timestamp')
if timestamp:
setattr(token, 'timestamp', timestamp[0])
try:
token.callback_confirmed = params['oauth_callback_confirmed'][0]
except KeyError:
pass # 1.0, no callback confirmed.
return token
def to_string(self):
"""Returns this token as a plain string, suitable for storage.
The resulting string includes the token's secret, so you should never
send or store this string where a third party can read it.
"""
data = {
'oauth_token': self.key,
'oauth_token_secret': self.secret,
}
if hasattr(self, 'session_handle'):
data['oauth_session_handle'] = self.session_handle
if hasattr(self, 'timestamp'):
data['token_creation_timestamp'] = self.timestamp
if self.callback_confirmed is not None:
data['oauth_callback_confirmed'] = self.callback_confirmed
return urlencode(data)

View file

@ -1,44 +0,0 @@
"""Logging for Python YQL."""
import os
import logging
import logging.handlers
LOG_DIRECTORY_DEFAULT = os.path.join(os.path.dirname(__file__), "../logs")
LOG_DIRECTORY = os.environ.get("YQL_LOG_DIR", LOG_DIRECTORY_DEFAULT)
LOG_LEVELS = {'debug': logging.DEBUG,
'info': logging.INFO,
'warning': logging.WARNING,
'error': logging.ERROR,
'critical': logging.CRITICAL}
LOG_LEVEL = os.environ.get("YQL_LOGGING_LEVEL", 'debug')
LOG_FILENAME = os.path.join(LOG_DIRECTORY, "python-yql.log")
MAX_BYTES = 1024 * 1024
log_level = LOG_LEVELS.get(LOG_LEVEL)
yql_logger = logging.getLogger("python-yql")
yql_logger.setLevel(LOG_LEVELS.get(LOG_LEVEL))
class NullHandler(logging.Handler):
def emit(self, record):
pass
def get_logger():
"""Set-upt the logger if enabled or fallback to NullHandler."""
if os.environ.get("YQL_LOGGING", False):
if not os.path.exists(LOG_DIRECTORY):
os.mkdir(LOG_DIRECTORY)
log_handler = logging.handlers.RotatingFileHandler(
LOG_FILENAME, maxBytes=MAX_BYTES,
backupCount=5)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
log_handler.setFormatter(formatter)
else:
log_handler = NullHandler()
yql_logger.addHandler(log_handler)
return yql_logger

View file

@ -1,72 +0,0 @@
import os
from hashlib import md5
from yql import YahooToken
SECRET = "FDHSJLUREIRPpieruweruwoeirhfsdjf"
class TokenStoreError(Exception):
"""Generic token storage"""
pass
class BaseTokenStore(object):
"""Base class for storage"""
def set(self, name, token):
raise NotImplementedError
def get(self, name):
raise NotImplementedError
class FileTokenStore(BaseTokenStore):
"""A simple filesystem based token store
Note: this is more intended as an example rather than
something for heavy duty production usage.
"""
def __init__(self, dir_path, secret=None):
"""Initialize token storage"""
if not os.path.exists(dir_path):
raise TokenStoreError("Path is not valid")
self.base_dir = dir_path
self.secret = secret or SECRET
def get_filepath(self, name):
"""Build filepath"""
filename = md5("%s%s" % (name, self.secret)).hexdigest()
filepath = os.path.join(self.base_dir, filename)
return filepath
def set(self, name, token):
"""Write a token to file"""
if hasattr(token, 'key'):
token = YahooToken.to_string(token)
if token:
filepath = self.get_filepath(name)
f_handle = open(filepath, 'w')
f_handle.write(token)
f_handle.close()
def get(self, name):
"""Get a token from the filesystem"""
filepath = self.get_filepath(name)
if os.path.exists(filepath):
f_handle = open(filepath, 'r')
token = f_handle.read()
f_handle.close()
token = YahooToken.from_string(token)
return token

View file

@ -1,29 +0,0 @@
import json
from unittest import TestCase
from yql import NotOneError, YQLError
class YQLErrorTest(TestCase):
def test_error_passed_error_string(self):
error = YQLError(resp='some response', content='some content')
self.assertEqual("some content", str(error))
def test_error_passed_object(self):
error = YQLError(resp='some response', content={"foo": 1})
self.assertEqual(repr({"foo": 1}), str(error))
def test_error_passed_json(self):
content = {
'error': {
'description': 'some description',
}
}
error = YQLError(resp='some response', content=json.dumps(content))
self.assertEqual("some description", str(error))
class NotOneErrorTest(TestCase):
def test_is_represented_by_message_as_json(self):
error = NotOneError('some message')
self.assertEqual("some message", str(error))

View file

@ -1,155 +0,0 @@
"""Tests against live services.
*** SKIPPED BY DEFAULT ***
These tests won't normally be run, as part of the main test suite but are run by
our hudson instance to tell us should Yahoo's API change in some way that will
break python-yql.
Note to end-users: These tests are dependent on defining a secrets file with API
keys and other secrets which are required to carry out these tests.
If the secrets file isn't present the tests are skipped
"""
import os
import sys
from time import time
from unittest import TestCase
from nose.plugins.skip import SkipTest
import yql
from yql.storage import FileTokenStore
SECRETS_DIR = os.path.join(os.path.dirname(__file__), "../../../secrets")
CACHE_DIR = os.path.abspath(os.path.join(SECRETS_DIR, "cache"))
try:
if SECRETS_DIR not in sys.path:
sys.path.append(SECRETS_DIR)
from secrets import *
except ImportError:
raise SkipTest("Unable to find secrets directory")
class LiveTestCase(TestCase):
"""A test case containing live tests"""
def test_write_bitly_url(self):
"""Test writing bit.ly url"""
query = """USE 'http://www.datatables.org/bitly/bit.ly.shorten.xml';
SELECT * from bit.ly.shorten where login='%s' and apiKey='%s' and
longUrl='http://yahoo.com'""" % (BITLY_USER, BITLY_API_KEY)
y = yql.TwoLegged(YQL_API_KEY, YQL_SHARED_SECRET)
res = y.execute(query)
assert res.one()["data"]["url"] == "http://yhoo.it/9PPTOr"
def test_public_request(self):
"""Test public two-legged request to flickr"""
query = """select * from flickr.photos.search where
text="panda" and api_key='%s' LIMIT 3""" % FLICKR_API_KEY
y = yql.TwoLegged(YQL_API_KEY, YQL_SHARED_SECRET)
res = y.execute(query)
assert len(res.rows) == 3
def test_two_legged_weather_select(self):
"""Tests the weather tables using two-legged"""
query = """select * from weather.forecast where location in
(select id from xml where
url='http://xoap.weather.com/search/search?where=london'
and itemPath='search.loc')"""
y = yql.TwoLegged(YQL_API_KEY, YQL_SHARED_SECRET)
res = y.execute(query)
assert len(res.rows) > 1
def test_update_social_status(self):
"""Updates status"""
y = yql.ThreeLegged(YQL_API_KEY, YQL_SHARED_SECRET)
timestamp = time()
query = """UPDATE social.profile.status
SET status='Using YQL. %s Update'
WHERE guid=me""" % timestamp
token_store = FileTokenStore(CACHE_DIR, secret='gsfdsfdsfdsfs')
stored_token = token_store.get('foo')
if not stored_token:
# Do the dance
request_token, auth_url = y.get_token_and_auth_url()
print "Visit url %s and get a verifier string" % auth_url
verifier = raw_input("Enter the code: ")
token = y.get_access_token(request_token, verifier)
token_store.set('foo', token)
else:
# Check access_token is within 1hour-old and if not refresh it
# and stash it
token = y.check_token(stored_token)
if token != stored_token:
token_store.set('foo', token)
res = y.execute(query, token=token)
assert res.rows[0] == "ok"
new_query = """select message from social.profile.status where guid=me"""
res = y.execute(new_query, token=token)
assert res.rows[0].get("message") == "Using YQL. %s Update" % timestamp
def test_update_meme_status(self):
"""Updates status"""
y = yql.ThreeLegged(YQL_API_KEY, YQL_SHARED_SECRET)
query = 'INSERT INTO meme.user.posts (type, content) VALUES("text", "test with pythonyql")'
token_store = FileTokenStore(CACHE_DIR, secret='fjdsfjllds')
store_name = "meme"
stored_token = token_store.get(store_name)
if not stored_token:
# Do the dance
request_token, auth_url = y.get_token_and_auth_url()
print "Visit url %s and get a verifier string" % auth_url
verifier = raw_input("Enter the code: ")
token = y.get_access_token(request_token, verifier)
token_store.set(store_name, token)
else:
# Check access_token is within 1hour-old and if not refresh it
# and stash it
token = y.check_token(stored_token)
if token != stored_token:
token_store.set(store_name, token)
# post a meme
res = y.execute(query, token=token)
assert y.uri == "http://query.yahooapis.com/v1/yql"
assert res.rows[0].get("message") == "ok"
pubid = None
if res.rows[0].get("post") and res.rows[0]["post"].get("pubid"):
pubid = res.rows[0]["post"]["pubid"]
# Delete the post we've just created
query = 'DELETE FROM meme.user.posts WHERE pubid=@pubid'
res2 = y.execute(query, token=token, params={"pubid": pubid})
assert res2.rows[0].get("message") == "ok"
def test_check_env_var(self):
"""Testing env variable"""
y = yql.Public()
env = "http://datatables.org/alltables.env"
query = "SHOW tables;"
res = y.execute(query, env=env)
assert res.count >= 800
def test_xpath_works(self):
y = yql.Public()
query = """SELECT * FROM html
WHERE url='http://google.co.uk'
AND xpath="//input[contains(@name, 'q')]"
LIMIT 10"""
res = y.execute(query)
assert res.rows[0].get("title") == "Search"

View file

@ -1,23 +0,0 @@
import os
import shutil
from unittest import TestCase
import yql.logger
class LoggerTest(TestCase):
def setUp(self):
self._logging = os.environ.get('YQL_LOGGING', '')
def tearDown(self):
os.environ['YQL_LOGGING'] = self._logging
def test_is_instantiated_even_if_log_dir_doesnt_exist(self):
os.environ['YQL_LOGGING'] = '1'
if os.path.exists(yql.logger.LOG_DIRECTORY):
shutil.rmtree(yql.logger.LOG_DIRECTORY)
yql.logger.get_logger()
def test_logs_message_to_file(self):
os.environ['YQL_LOGGING'] = '1'
yql.logger.get_logger()

View file

@ -1,77 +0,0 @@
"""Set of tests for the placeholder checking"""
from unittest import TestCase
from nose.tools import raises
import yql
class PublicTest(TestCase):
@raises(ValueError)
def test_empty_args_raises_valueerror(self):
y = yql.Public()
query = "SELECT * from foo where dog=@dog"
params = {}
y.execute(query, params)
@raises(ValueError)
def test_incorrect_args_raises_valueerror(self):
y = yql.Public()
query = "SELECT * from foo where dog=@dog"
params = {'test': 'fail'}
y.execute(query, params)
@raises(ValueError)
def test_params_raises_when_not_dict(self):
y = yql.Public()
query = "SELECT * from foo where dog=@dog"
params = ['test']
y.execute(query, params)
@raises(ValueError)
def test_unecessary_args_raises_valueerror(self):
y = yql.Public()
query = "SELECT * from foo where dog='test'"
params = {'test': 'fail'}
y.execute(query, params)
@raises(ValueError)
def test_incorrect_type_raises_valueerror(self):
y = yql.Public()
query = "SELECT * from foo where dog=@test"
params = ('fail')
y.execute(query, params)
def test_placeholder_regex_one(self):
y = yql.Public()
query = "SELECT * from foo where email='foo@foo.com'"
placeholders = y.get_placeholder_keys(query)
self.assertEqual(placeholders, [])
def test_placeholder_regex_two(self):
y = yql.Public()
query = "SELECT * from foo where email=@foo'"
placeholders = y.get_placeholder_keys(query)
self.assertEqual(placeholders, ['foo'])
def test_placeholder_regex_three(self):
y = yql.Public()
query = "SELECT * from foo where email=@foo and test=@bar'"
placeholders = y.get_placeholder_keys(query)
self.assertEqual(placeholders, ['foo', 'bar'])
def test_placeholder_regex_four(self):
y = yql.Public()
query = "SELECT * from foo where foo='bar' LIMIT @foo"
placeholders = y.get_placeholder_keys(query)
self.assertEqual(placeholders, ['foo'])
def test_placeholder_regex_five(self):
y = yql.Public()
query = """SELECT * from foo
where foo='bar' LIMIT
@foo"""
placeholders = y.get_placeholder_keys(query)
self.assertEqual(placeholders, ['foo'])

View file

@ -1,248 +0,0 @@
from email import message_from_file
import os
from unittest import TestCase
import urlparse
from urllib import urlencode
try:
from urlparse import parse_qsl
except ImportError:
from cgi import parse_qsl
from nose.tools import raises
from nose import with_setup
import oauth2 as oauth
import httplib2
import yql
HTTP_SRC_DIR = os.path.join(os.path.dirname(__file__), "http_src/")
class FileDataHttpReplacement(object):
"""Build a stand-in for httplib2.Http that takes its
response headers and bodies from files on disk
http://bitworking.org/news/172/Test-stubbing-httplib2
"""
def __init__(self, cache=None, timeout=None):
self.hit_counter = {}
def request(self, uri, method="GET", body=None, headers=None, redirections=5):
path = urlparse.urlparse(uri)[2]
fname = os.path.join(HTTP_SRC_DIR, path[1:])
if not os.path.exists(fname):
index = self.hit_counter.get(fname, 1)
if os.path.exists(fname + "." + str(index)):
self.hit_counter[fname] = index + 1
fname = fname + "." + str(index)
if os.path.exists(fname):
f = file(fname, "r")
response = message_from_file(f)
f.close()
body = response.get_payload()
response_headers = httplib2.Response(response)
return (response_headers, body)
else:
return (httplib2.Response({"status": "404"}), "")
def add_credentials(self, name, password):
pass
class RequestDataHttpReplacement:
"""Create an httplib stub that returns request data"""
def __init__(self):
pass
def request(self, uri, *args, **kwargs):
"""return the request data"""
return uri, args, kwargs
class TestPublic(yql.Public):
"""Subclass of YQL to allow returning of the request data"""
execute = yql.Public.get_uri
class TestTwoLegged(yql.TwoLegged):
"""Subclass of YQLTwoLegged to allow returning of the request data"""
execute = yql.TwoLegged.get_uri
class TestThreeLegged(yql.ThreeLegged):
"""Subclass of YQLTwoLegged to allow returning of the request data"""
execute = yql.ThreeLegged.get_uri
class StubbedHttpTestCase(TestCase):
stub = None
def setUp(self):
self._http = httplib2.Http
httplib2.Http = self.stub
def tearDown(self):
httplib2.Http = self._http
class PublicStubbedRequestTest(StubbedHttpTestCase):
stub = RequestDataHttpReplacement
def test_urlencoding_for_public_yql(self):
query = 'SELECT * from foo'
y = TestPublic(httplib2_inst=httplib2.Http())
uri = y.execute(query)
self.assertEqual(uri, "http://query.yahooapis.com/v1/public/yql?q=SELECT+%2A+from+foo&format=json")
def test_env_for_public_yql(self):
query = 'SELECT * from foo'
y = TestPublic(httplib2_inst=httplib2.Http())
uri = y.execute(query, env="http://foo.com")
self.assertTrue(uri.find(urlencode({"env":"http://foo.com"})) > -1)
def test_name_param_inserted_for_public_yql(self):
query = 'SELECT * from foo WHERE dog=@dog'
y = TestPublic(httplib2_inst=httplib2.Http())
uri = y.execute(query, {"dog": "fifi"})
self.assertTrue(uri.find('dog=fifi') >-1)
class PublicStubbedFromFileTest(StubbedHttpTestCase):
stub = FileDataHttpReplacement
def test_json_response_from_file(self):
query = 'SELECT * from foo WHERE dog=@dog'
y = yql.Public(httplib2_inst=httplib2.Http())
content = y.execute(query, {"dog": "fifi"})
self.assertEqual(content.count, 3)
class TwoLeggedTest(TestCase):
@raises(TypeError)
def test_yql_with_2leg_auth_raises_typerror(self):
TestTwoLegged()
def test_api_key_and_secret_attrs(self):
y = yql.TwoLegged('test-api-key', 'test-secret')
self.assertEqual(y.api_key, 'test-api-key')
self.assertEqual(y.secret, 'test-secret')
def test_get_two_legged_request_keys(self):
y = yql.TwoLegged('test-api-key', 'test-secret')
# Accessed this was because it's private
request = y._TwoLegged__two_legged_request('http://google.com')
self.assertEqual(set(['oauth_nonce', 'oauth_version', 'oauth_timestamp',
'oauth_consumer_key', 'oauth_signature_method', 'oauth_body_hash',
'oauth_version', 'oauth_signature']), set(request.keys()))
def test_get_two_legged_request_values(self):
y = yql.TwoLegged('test-api-key', 'test-secret')
# Accessed this was because it's private
request = y._TwoLegged__two_legged_request('http://google.com')
self.assertEqual(request['oauth_consumer_key'], 'test-api-key')
self.assertEqual(request['oauth_signature_method'], 'HMAC-SHA1')
self.assertEqual(request['oauth_version'], '1.0')
def test_get_two_legged_request_param(self):
y = yql.TwoLegged('test-api-key', 'test-secret')
# Accessed this way because it's private
request = y._TwoLegged__two_legged_request('http://google.com',
{"test-param": "test"})
self.assertEqual(request.get('test-param'), 'test')
class TwoLeggedStubbedRequestTest(StubbedHttpTestCase):
stub = RequestDataHttpReplacement
def test_request_for_two_legged(self):
query = 'SELECT * from foo'
y = TestTwoLegged('test-api-key', 'test-secret', httplib2_inst=httplib2.Http())
signed_url = y.execute(query)
qs = dict(parse_qsl(signed_url.split('?')[1]))
self.assertEqual(qs['q'], query)
self.assertEqual(qs['format'], 'json')
class TwoLeggedStubbedFromFileTest(StubbedHttpTestCase):
stub = FileDataHttpReplacement
def test_get_two_legged_from_file(self):
query = 'SELECT * from foo'
y = yql.TwoLegged('test-api-key', 'test-secret', httplib2_inst=httplib2.Http())
# Accessed this was because it's private
self.assertTrue(y.execute(query) is not None)
class ThreeLeggedTest(TestCase):
@raises(TypeError)
def test_yql_with_3leg_auth_raises_typerror(self):
TestThreeLegged()
def test_api_key_and_secret_attrs2(self):
y = yql.ThreeLegged('test-api-key', 'test-secret')
self.assertEqual(y.api_key, 'test-api-key')
self.assertEqual(y.secret, 'test-secret')
def test_get_base_params(self):
y = yql.ThreeLegged('test-api-key', 'test-secret')
result = y.get_base_params()
self.assertEqual(set(['oauth_nonce', 'oauth_version', 'oauth_timestamp']),
set(result.keys()))
@raises(ValueError)
def test_raises_for_three_legged_with_no_token(self):
query = 'SELECT * from foo'
y = TestThreeLegged('test-api-key', 'test-secret', httplib2_inst=httplib2.Http())
y.execute(query)
class ThreeLeggedStubbedRequestTest(StubbedHttpTestCase):
stub = RequestDataHttpReplacement
def test_request_for_three_legged(self):
query = 'SELECT * from foo'
y = TestThreeLegged('test-api-key', 'test-secret',
httplib2_inst=httplib2.Http())
token = oauth.Token.from_string(
'oauth_token=foo&oauth_token_secret=bar')
signed_url = y.execute(query, token=token)
qs = dict(parse_qsl(signed_url.split('?')[1]))
self.assertEqual(qs['q'], query)
self.assertEqual(qs['format'], 'json')
class ThreeLeggedStubbedFromFileTest(StubbedHttpTestCase):
stub = FileDataHttpReplacement
def test_three_legged_execution(self):
query = 'SELECT * from foo WHERE dog=@dog'
y = yql.ThreeLegged('test','test2', httplib2_inst=httplib2.Http())
token = yql.YahooToken('test', 'test2')
content = y.execute(query, {"dog": "fifi"}, token=token)
self.assertEqual(content.count, 3)
@raises(ValueError)
def test_three_legged_execution_raises_value_error_with_invalid_uri(self):
y = yql.ThreeLegged('test','test2', httplib2_inst=httplib2.Http())
y.uri = "fail"
token = yql.YahooToken('tes1t', 'test2')
y.execute("SELECT foo meh meh ", token=token)
def test_get_access_token_request3(self):
y = yql.ThreeLegged('test', 'test-does-not-exist',
httplib2_inst=httplib2.Http())
new_token = yql.YahooToken('test', 'test2')
new_token.session_handle = 'sess_handle_test'
token = y.refresh_token(token=new_token)
self.assertTrue(hasattr(token, 'key'))
self.assertTrue(hasattr(token, 'secret'))

View file

@ -1,12 +0,0 @@
from unittest import TestCase
from nose.tools import raises
import yql
class PublicTest(TestCase):
@raises(ValueError)
def test_cannot_use_unrecognizable_endpoint(self):
y = yql.Public()
y.endpoint = 'some-strange-endpoint'

View file

@ -1,61 +0,0 @@
import os
import tempfile
from unittest import TestCase
from nose.tools import raises
from yql import YahooToken
from yql.storage import BaseTokenStore, FileTokenStore, TokenStoreError
class BaseTokenStoreTest(TestCase):
@raises(NotImplementedError)
def test_must_implement_set(self):
class FooStore(BaseTokenStore):
pass
store = FooStore()
store.set('some name', 'some token')
@raises(NotImplementedError)
def test_must_implement_get(self):
class FooStore(BaseTokenStore):
pass
store = FooStore()
store.get('some name')
class FileTokenStoreTest(TestCase):
@raises(TokenStoreError)
def test_must_be_instanced_with_an_existant_path(self):
FileTokenStore('/some/inexistant/path')
def test_saves_token_string_to_filesystem(self):
directory = tempfile.mkdtemp()
store = FileTokenStore(directory)
store.set('foo', '?key=some-token')
with open(store.get_filepath('foo')) as stored_file:
self.assertTrue('some-token' in stored_file.read())
def test_retrieves_token_from_filesystem(self):
directory = tempfile.mkdtemp()
store = FileTokenStore(directory)
store.set('foo', '?key=%s&oauth_token=some-oauth-token&'\
'oauth_token_secret=some-token-secret' % 'some-token')
token = store.get('foo')
self.assertTrue('some-token' in token.to_string())
def test_cannot_retrieve_token_if_path_doesnt_exist(self):
directory = tempfile.mkdtemp()
store = FileTokenStore(directory)
store.set('foo', '?key=%s&oauth_token=some-oauth-token&'\
'oauth_token_secret=some-token-secret' % 'some-token')
os.remove(store.get_filepath('foo'))
self.assertTrue(store.get('foo') is None)
def test_saves_token_to_filesystem(self):
directory = tempfile.mkdtemp()
store = FileTokenStore(directory)
token = YahooToken('some-token', 'some-secret')
store.set('foo', token)
with open(store.get_filepath('foo')) as stored_file:
self.assertTrue('some-token' in stored_file.read())

View file

@ -1,40 +0,0 @@
from unittest import TestCase
from yql.utils import get_http_method
class UtilitiesTest(TestCase):
def test_finds_get_method_for_select_query(self):
self.assertEqual(get_http_method("SELECT foo"), "GET")
def test_finds_get_method_for_select_query_with_leading_space(self):
self.assertEqual(get_http_method(" SELECT foo"), "GET")
def test_finds_get_method_for_lowercase_select_query(self):
self.assertEqual(get_http_method("select foo"), "GET")
def test_finds_post_method_for_insert_query(self):
self.assertEqual(get_http_method("INSERT into"), "POST")
def test_finds_post_method_for_multiline_insert_query(self):
query = """
INSERT INTO yql.queries.query (name, query)
VALUES ("weather", "SELECT * FROM weather.forecast
WHERE location=90210")
"""
self.assertEqual(get_http_method(query), "POST")
def test_finds_put_method_for_update_query(self):
self.assertEqual(get_http_method("update foo"), "PUT")
def test_finds_post_method_for_delete_query(self):
self.assertEqual(get_http_method("DELETE from"), "POST")
def test_finds_post_method_for_lowercase_delete_query(self):
self.assertEqual(get_http_method("delete from"), "POST")
def test_finds_get_method_for_show_query(self):
self.assertEqual(get_http_method("SHOW tables"), "GET")
def test_finds_get_method_for_describe_query(self):
self.assertEqual(get_http_method("DESC tablename"), "GET")

View file

@ -1,81 +0,0 @@
from unittest import TestCase
from nose.tools import raises
try:
from urlparse import parse_qs, parse_qsl
except ImportError:
from cgi import parse_qs, parse_qsl
import yql
class YahooTokenTest(TestCase):
def test_create_yahoo_token(self):
token = yql.YahooToken('test-key', 'test-secret')
self.assertEqual(token.key, 'test-key')
self.assertEqual(token.secret, 'test-secret')
def test_y_token_to_string(self):
token = yql.YahooToken('test-key', 'test-secret')
token_to_string = token.to_string()
string_data = dict(parse_qsl(token_to_string))
self.assertEqual(string_data.get('oauth_token'), 'test-key')
self.assertEqual(string_data.get('oauth_token_secret'), 'test-secret')
def test_y_token_to_string2(self):
token = yql.YahooToken('test-key', 'test-secret')
token.timestamp = '1111'
token.session_handle = 'poop'
token.callback_confirmed = 'basilfawlty'
token_to_string = token.to_string()
string_data = dict(parse_qsl(token_to_string))
self.assertEqual(string_data.get('oauth_token'), 'test-key')
self.assertEqual(string_data.get('oauth_token_secret'), 'test-secret')
self.assertEqual(string_data.get('token_creation_timestamp'), '1111')
self.assertEqual(string_data.get('oauth_callback_confirmed'), 'basilfawlty')
self.assertEqual(string_data.get('oauth_session_handle'), 'poop')
def test_y_token_from_string(self):
token_string = "oauth_token=foo&oauth_token_secret=bar&"\
"oauth_session_handle=baz&token_creation_timestamp=1111"
token_from_string = yql.YahooToken.from_string(token_string)
self.assertEqual(token_from_string.key, 'foo')
self.assertEqual(token_from_string.secret, 'bar')
self.assertEqual(token_from_string.session_handle, 'baz')
self.assertEqual(token_from_string.timestamp, '1111')
@raises(ValueError)
def test_y_token_raises_value_error(self):
yql.YahooToken.from_string('')
@raises(ValueError)
def test_y_token_raises_value_error2(self):
yql.YahooToken.from_string('foo')
@raises(ValueError)
def test_y_token_raises_value_error3(self):
yql.YahooToken.from_string('oauth_token=bar')
@raises(ValueError)
def test_y_token_raises_value_error4(self):
yql.YahooToken.from_string('oauth_token_secret=bar')
@raises(AttributeError)
def test_y_token_without_timestamp_raises(self):
token = yql.YahooToken('test', 'test2')
y = yql.ThreeLegged('test', 'test2')
y.check_token(token)
def test_y_token_without_timestamp_raises2(self):
def refresh_token_replacement(token):
return 'replaced'
y = yql.ThreeLegged('test', 'test2')
y.refresh_token = refresh_token_replacement
token = yql.YahooToken('test', 'test2')
token.timestamp = 11111
self.assertEqual(y.check_token(token), 'replaced')

View file

@ -1,105 +0,0 @@
"""Tests for the YQL object"""
import json
from unittest import TestCase
from nose.tools import raises
from yql import YQLObj, NotOneError
data_dict = json.loads("""{"query":{"count":"3","created":"2009-11-20T12:11:56Z","lang":"en-US","updated":"2009-11-20T12:11:56Z","uri":"http://query.yahooapis.com/v1/yql?q=select+*+from+flickr.photos.search+where+text%3D%22panda%22+limit+3","diagnostics":{"publiclyCallable":"true","url":{"execution-time":"742","content":"http://api.flickr.com/services/rest/?method=flickr.photos.search&text=panda&page=1&per_page=10"},"user-time":"745","service-time":"742","build-version":"3805"},"results":{"photo":[{"farm":"3","id":"4117944207","isfamily":"0","isfriend":"0","ispublic":"1","owner":"12346075@N00","secret":"ce1f6092de","server":"2510","title":"Pandas"},{"farm":"3","id":"4118710292","isfamily":"0","isfriend":"0","ispublic":"1","owner":"12346075@N00","secret":"649632a3e2","server":"2754","title":"Pandas"},{"farm":"3","id":"4118698318","isfamily":"0","isfriend":"0","ispublic":"1","owner":"28451051@N02","secret":"ec0b508684","server":"2586","title":"fuzzy flowers (Kalanchoe tomentosa)"}]}}}""")
data_dict2 = json.loads("""{"query":{"count":"1","created":"2009-11-20T12:11:56Z","lang":"en-US","updated":"2009-11-20T12:11:56Z","uri":"http://query.yahooapis.com/v1/yql?q=select+*+from+flickr.photos.search+where+text%3D%22panda%22+limit+3","diagnostics":{"publiclyCallable":"true","url":{"execution-time":"742","content":"http://api.flickr.com/services/rest/?method=flickr.photos.search&text=panda&page=1&per_page=10"},"user-time":"745","service-time":"742","build-version":"3805"},"results":{"photo":{"farm":"3","id":"4117944207","isfamily":"0","isfriend":"0","ispublic":"1","owner":"12346075@N00","secret":"ce1f6092de","server":"2510","title":"Pandas"}}}}""")
yqlobj = YQLObj(data_dict)
yqlobj2 = YQLObj({})
yqlobj3 = YQLObj(data_dict2)
class YQLObjTest(TestCase):
@raises(AttributeError)
def test_yql_object_one(self):
"""Test that invalid query raises AttributeError"""
yqlobj.query = 1
def test_yqlobj_uri(self):
"""Test that the query uri is as expected."""
self.assertEqual(yqlobj.uri, u"http://query.yahooapis.com/v1/yql?q=select+*+"\
"from+flickr.photos.search+where+text%3D%22panda%22+limit+3")
def test_yqlobj_query(self):
"""Test retrieval of the actual query"""
self.assertEqual(yqlobj.query, u'select * from flickr.photos.search '\
'where text="panda" limit 3')
def test_yqlobj_count(self):
"""Check we have 3 records"""
self.assertEqual(yqlobj.count, 3)
def test_yqlobj_lang(self):
"""Check the lang attr."""
self.assertEqual(yqlobj.lang, u"en-US")
def test_yqlobj_results(self):
"""Check the results."""
expected_results = {u'photo': [
{u'isfamily': u'0',
u'title': u'Pandas',
u'farm': u'3',
u'ispublic': u'1',
u'server': u'2510',
u'isfriend': u'0',
u'secret': u'ce1f6092de',
u'owner': u'12346075@N00',
u'id': u'4117944207'},
{u'isfamily': u'0',
u'title': u'Pandas',
u'farm': u'3',
u'ispublic': u'1',
u'server': u'2754',
u'isfriend': u'0',
u'secret': u'649632a3e2',
u'owner': u'12346075@N00',
u'id': u'4118710292'},
{u'isfamily': u'0',
u'title': u'fuzzy flowers (Kalanchoe tomentosa)',
u'farm': u'3',
u'ispublic': u'1',
u'server': u'2586',
u'isfriend': u'0',
u'secret': u'ec0b508684',
u'owner': u'28451051@N02',
u'id': u'4118698318'}
]}
self.assertEqual(yqlobj.results, expected_results)
def test_yqlobj_raw(self):
"""Check the raw attr."""
self.assertEqual(yqlobj.raw, data_dict.get('query'))
def test_yqlobj_diagnostics(self):
"""Check the diagnostics"""
self.assertEqual(yqlobj.diagnostics, data_dict.get('query').get('diagnostics'))
def test_query_is_none(self):
"""Check query is None with no data."""
self.assertTrue(yqlobj2.query is None)
def test_rows(self):
"""Test we can iterate over the rows."""
stuff = []
for row in yqlobj.rows:
stuff.append(row.get('server'))
self.assertEqual(stuff, [u'2510', u'2754', u'2586'])
@raises(NotOneError)
def test_one(self):
"""Test that accessing one result raises exception"""
yqlobj.one()
def test_one_with_one_result(self):
"""Test accessing data with one result."""
res = yqlobj3.one()
self.assertEqual(res.get("title"), "Pandas")

View file

@ -1,38 +0,0 @@
""""Utility functions"""
import re
METHOD_MAP = (
("insert", "POST"),
("update", "PUT"),
("delete", "POST"),
)
MULTI_PLUS = re.compile(r"\+{2,}")
MULTI_SPACE = re.compile(r" {2,}")
def get_http_method(query):
"""Work out if this should be GET, POST, PUT or DELETE"""
lower_query = query.strip().lower()
http_method = "GET"
for method in METHOD_MAP:
if method[0] in lower_query:
http_method = method[1]
break
return http_method
def clean_url(url):
"""Cleans up a uri/url"""
url = url.replace("\n", "")
url = MULTI_PLUS.sub("+", url)
return url
def clean_query(query):
"""Cleans up a query"""
query = query.replace("\n", "")
query = MULTI_SPACE.sub(" ", query)
return query

View file

@ -3,5 +3,7 @@ lxml==3.1beta1
pyenchant==1.6.5
pydns>=2.3.6
watchdog
yql
tweepy
pygeoip
BeautifulSoup4