Merge branch 'develop' into refresh

This commit is contained in:
Luke Rogers 2014-02-13 12:06:11 +13:00
commit b35c977c35
3 changed files with 75 additions and 19 deletions

View file

@ -46,3 +46,5 @@ def help(inp, notice=None, input=None, conn=None, bot=None):
else:
if inp in commands:
notice(conn.conf["command_prefix"] + commands[inp].__doc__)
else:
notice("Command {}{} not found".format(conn.conf["command_prefix"], inp))

View file

@ -1,8 +1,8 @@
from util import hook, http, text
import re
api_url = "http://minecraftwiki.net/api.php?action=opensearch"
mc_url = "http://minecraftwiki.net/wiki/"
api_url = "http://minecraft.gamepedia.com/api.php?action=opensearch"
mc_url = "http://minecraft.gamepedia.com/"
@hook.command
@ -10,20 +10,40 @@ def mcwiki(inp):
"""mcwiki <phrase> -- Gets the first paragraph of
the Minecraft Wiki article on <phrase>."""
j = http.get_json(api_url, search=inp)
try:
j = http.get_json(api_url, search=inp)
except (http.HTTPError, http.URLError) as e:
return "Error fetching search results: {}".format(e)
except ValueError as e:
return "Error reading search results: {}".format(e)
if not j[1]:
return "No results found."
article_name = j[1][0].replace(' ', '_').encode('utf8')
# we remove items with a '/' in the name, because
# gamepedia uses subpages for different languages
# for some stupid reason
items = [item for item in j[1] if not "/" in item]
if items:
article_name = items[0].replace(' ', '_').encode('utf8')
else:
# there are no items without /, just return a / one
article_name = j[1][0].replace(' ', '_').encode('utf8')
url = mc_url + http.quote(article_name, '')
page = http.get_html(url)
try:
page = http.get_html(url)
except (http.HTTPError, http.URLError) as e:
return "Error fetching wiki page: {}".format(e)
for p in page.xpath('//div[@class="mw-content-ltr"]/p'):
if p.text_content():
summary = " ".join(p.text_content().splitlines())
summary = re.sub("\[\d+\]", "", summary)
summary = text.truncate_str(summary, 200)
return "{} :: {}".format(summary, url)
return u"{} :: {}".format(summary, url)
# this shouldn't happen
return "Unknown Error."

View file

@ -1,28 +1,62 @@
import re
from util import hook, http, web, text
from bs4 import BeautifulSoup
from util import hook, http, web
from util.text import truncate_str
from bs4 import BeautifulSoup, NavigableString, Tag
steam_re = (r'(.*:)//(store.steampowered.com)(:[0-9]+)?(.*)', re.I)
def get_steam_info(url):
# we get the soup manually because the steam pages have some odd encoding troubles
page = http.get(url)
soup = BeautifulSoup(page, 'lxml', from_encoding="utf-8")
name = soup.find('div', {'class': 'apphub_AppName'}).text
desc = ": " + text.truncate_str(soup.find('div', {'class': 'game_description_snippet'}).text.strip())
data = {}
# the page has a ton of returns and tabs
details = soup.find('div', {'class': 'glance_details'}).text.strip().split(u"\n\n\r\n\t\t\t\t\t\t\t\t\t")
genre = " - Genre: " + details[0].replace(u"Genre: ", u"")
date = " - Release date: " + details[1].replace(u"Release Date: ", u"")
price = ""
if not "Free to Play" in genre:
price = " - Price: " + soup.find('div', {'class': 'game_purchase_price price'}).text.strip()
data["name"] = soup.find('div', {'class': 'apphub_AppName'}).text
data["desc"] = truncate_str(soup.find('meta', {'name': 'description'})['content'].strip(), 80)
return name + desc + genre + date + price
# get the element details_block
details = soup.find('div', {'class': 'details_block'})
# loop over every <b></b> tag in details_block
for b in details.findAll('b'):
# get the contents of the <b></b> tag, which is our title
title = b.text.lower().replace(":", "")
if title == "languages":
# we have all we need!
break
# find the next element directly after the <b></b> tag
next = b.nextSibling
if next:
# if the element is some text
if isinstance(next, NavigableString):
text = next.string.strip()
if text:
# we found valid text, save it and continue the loop
data[title] = text
continue
else:
# the text is blank - sometimes this means there are
# useless spaces or tabs between the <b> and <a> tags.
# so we find the next <a> tag and carry on to the next
# bit of code below
next = next.find_next('a', href=True)
# if the element is an <a></a> tag
if isinstance(next, Tag) and next.name == 'a':
text = next.string.strip()
if text:
# we found valid text (in the <a></a> tag),
# save it and continue the loop
data[title] = text
continue
data["price"] = soup.find('div', {'class': 'game_purchase_price price'}).text.strip()
return u"\x02{name}\x02: {desc}, \x02Genre\x02: {genre}, \x02Release Date\x02: {release date}, \x02Price\x02: {price}".format(**data)
@hook.regex(*steam_re)