This repository has been archived on 2023-04-13. You can view files and clone it, but cannot push or open issues or pull requests.
CloudBot/plugins/metacritic.py
2013-09-05 10:11:18 +08:00

137 lines
4.5 KiB
Python
Executable file

# metacritic.com scraper
import re
from urllib2 import HTTPError
from util import hook, http
@hook.command('mc')
@hook.command
def metacritic(inp):
"""mc [all|movie|tv|album|x360|ps3|wii|pc|ds|3ds|vita] <title>
Gets rating for <title> from metacritic on the specified medium."""
# if the results suck, it's metacritic's fault
args = inp.strip()
game_platforms = ('x360', 'ps3', 'pc', 'ds', 'wii', '3ds', 'gba',
'psp', 'vita')
all_platforms = game_platforms + ('all', 'movie', 'tv', 'album')
try:
plat, title = args.split(' ', 1)
if plat not in all_platforms:
# raise the ValueError so that the except block catches it
# in this case, or in the case of the .split above raising the
# ValueError, we want the same thing to happen
raise ValueError
except ValueError:
plat = 'all'
title = args
cat = 'game' if plat in game_platforms else plat
title_safe = http.quote_plus(title)
url = 'http://www.metacritic.com/search/{}/{}/results'.format(cat, title_safe)
try:
doc = http.get_html(url)
except HTTPError:
return 'error fetching results'
''' result format:
-- game result, with score
-- subsequent results are the same structure, without first_result class
<li class="result first_result">
<div class="result_type">
<strong>Game</strong>
<span class="platform">WII</span>
</div>
<div class="result_wrap">
<div class="basic_stats has_score">
<div class="main_stats">
<h3 class="product_title basic_stat">...</h3>
<div class="std_score">
<div class="score_wrap">
<span class="label">Metascore: </span>
<span class="data metascore score_favorable">87</span>
</div>
</div>
</div>
<div class="more_stats extended_stats">...</div>
</div>
</div>
</li>
-- other platforms are the same basic layout
-- if it doesn't have a score, there is no div.basic_score
-- the <div class="result_type"> changes content for non-games:
<div class="result_type"><strong>Movie</strong></div>
'''
# get the proper result element we want to pull data from
result = None
if not doc.find_class('query_results'):
return 'No results found.'
# if they specified an invalid search term, the input box will be empty
if doc.get_element_by_id('search_term').value == '':
return 'Invalid search term.'
if plat not in game_platforms:
# for [all] results, or non-game platforms, get the first result
result = doc.find_class('result first_result')[0]
# find the platform, if it exists
result_type = result.find_class('result_type')
if result_type:
# if the result_type div has a platform div, get that one
platform_div = result_type[0].find_class('platform')
if platform_div:
plat = platform_div[0].text_content().strip()
else:
# otherwise, use the result_type text_content
plat = result_type[0].text_content().strip()
else:
# for games, we want to pull the first result with the correct
# platform
results = doc.find_class('result')
for res in results:
result_plat = res.find_class('platform')[0].text_content().strip()
if result_plat == plat.upper():
result = res
break
if not result:
return 'No results found.'
# get the name, release date, and score from the result
product_title = result.find_class('product_title')[0]
name = product_title.text_content()
link = 'http://metacritic.com' + product_title.find('a').attrib['href']
try:
release = result.find_class('release_date')[0]. \
find_class('data')[0].text_content()
# strip extra spaces out of the release date
release = re.sub(r'\s{2,}', ' ', release)
except IndexError:
release = None
try:
score = result.find_class('metascore')[0].text_content()
except IndexError:
score = None
return '[{}] {} - \x02{}/100\x02, {} - {}'.format(plat.upper(), name, score or 'no score',
'release: \x02%s\x02' % release if release else 'unreleased',
link)