This repository has been archived on 2023-04-13. You can view files and clone it, but cannot push or open issues or pull requests.
CloudBot/lib/spotimeta/parser.py
Steven Smith c357bfed82 spotify
2013-02-01 06:47:25 +08:00

196 lines
5.8 KiB
Python

from xml.dom import minidom
# extremely boring dom parsing ahead. Consider yourself warned.
# The reason for the uri arg is that the xml returned from lookups do not
# contain the href uri of the thing that was looked up. However, when an
# element is encountered that is NOT the root of a query, it DOES contain
# the href. We pass it in so the returned data will have the same format
# always
def parse_lookup_doc(src, uri=None):
doc = minidom.parse(src)
root = doc.documentElement
if root.nodeName == "artist":
return {"type": "artist", "result": parse_artist(root, uri)}
elif root.nodeName == "album":
return {"type": "album", "result": parse_album(root, uri)}
elif root.nodeName == "track":
return {"type": "track", "result": parse_track(root, uri)}
else:
raise Exception("unknown node type! " + root.nodeName) # fixme: proper exception here
def parse_search_doc(src):
doc = minidom.parse(src)
root = doc.documentElement
if root.nodeName == "artists":
return parse_artist_search(root)
elif root.nodeName == "albums":
return parse_album_search(root)
elif root.nodeName == "tracks":
return parse_track_search(root)
else:
raise Exception("unknown node type! " + root.nodeName) # fixme: proper exception here
def parse_artist(root, uri=None):
ret = {}
if uri or root.hasAttribute("href"):
ret["href"] = uri or root.getAttribute("href")
for name, elem in _nodes(root):
if name == "name":
ret["name"] = _text(elem)
elif name == "albums":
ret["albums"] = parse_albumlist(elem)
return ret
def parse_artistlist(root):
return map(parse_artist, _filter(root, "artist"))
def parse_albumlist(root):
return map(parse_album, _filter(root, "album"))
def parse_tracklist(root):
return map(parse_track, _filter(root, "track"))
def parse_album(root, uri=None):
ret = {}
if uri or root.hasAttribute("href"):
ret["href"] = uri or root.getAttribute("href")
for name, elem in _nodes(root):
if name == "name":
ret["name"] = _text(elem)
elif name == "released":
released = _text(elem)
if released:
ret["released"] = int(_text(elem))
elif name == "id":
if not "ids" in ret:
ret["ids"] = []
ret["ids"].append(parse_id(elem))
elif name == "tracks":
ret["tracks"] = parse_tracklist(elem)
ret["artists"] = parse_artistlist(root)
if len(ret["artists"]) == 1:
ret["artist"] = ret["artists"][0]
else:
ret["artist"] = None
# todo: availability stuff. RFH
return ret
def parse_id(elem):
ret = {"type": elem.getAttribute("type"),
"id": _text(elem)}
if elem.hasAttribute("href"):
ret["href"] = elem.getAttribute("href")
return ret
def parse_track(root, uri=None):
ret = {}
if uri or root.hasAttribute("href"):
ret["href"] = uri or root.getAttribute("href")
for name, elem in _nodes(root):
if name == "name":
ret["name"] = _text(elem)
elif name == "disc-number":
ret["disc-number"] = int(_text(elem))
elif name == "track-number":
ret["track-number"] = int(_text(elem))
elif name == "length":
ret["length"] = float(_text(elem))
elif name == "popularity":
ret["popularity"] = float(_text(elem))
elif name == "album":
ret["album"] = parse_album(elem)
elif name == "id":
if not "ids" in ret:
ret["ids"] = []
ret["ids"].append(parse_id(elem))
ret["artists"] = parse_artistlist(root)
# Following prop is there for backwards compat. It may be dropped in a
# future version
if ret["artists"]:
ret["artist"] = ret["artists"][0]
return ret
def parse_opensearch(root):
ret = {}
elems = root.getElementsByTagNameNS("http://a9.com/-/spec/opensearch/1.1/", "*")
for name, elem in ((e.localName, e) for e in elems):
if name == "Query":
ret["term"] = elem.getAttribute("searchTerms")
ret["start_page"] = int(elem.getAttribute("startPage"))
elif name == "totalResults":
ret["total_results"] = int(_text(elem))
elif name == "startIndex":
ret["start_index"] = int(_text(elem))
elif name == "itemsPerPage":
ret["items_per_page"] = int(_text(elem))
return ret
def parse_album_search(root):
# Note that the search result tags are not <search> tags or similar.
# Instead they are normal <artists|albums|tracks> tags with extra
# stuff from the opensearch namespace. That's why we cant just directly
# return the result from parse_albumlist
ret = parse_opensearch(root)
ret["result"] = parse_albumlist(root)
return ret
def parse_artist_search(root):
ret = parse_opensearch(root)
ret["result"] = parse_artistlist(root)
return ret
def parse_track_search(root):
ret = parse_opensearch(root)
ret["result"] = parse_tracklist(root)
return ret
def _nodes(elem):
"""return an generator yielding element nodes that are children
of elem."""
return ((e.nodeName, e) for e
in elem.childNodes
if e.nodeType==e.ELEMENT_NODE)
def _text(elem):
"""Returns a concatenation of all text nodes that are children
of elem (roughly what elem.textContent does in web dom"""
return "".join((e.nodeValue for e
in elem.childNodes
if e.nodeType==e.TEXT_NODE))
def _filter(elem, filtername):
"""Returns a generator yielding all child nodes with the nodeName name"""
return (elem for (name, elem)
in _nodes(elem)
if name == filtername)