2011-11-20 22:23:31 +13:00
# metacritic.com scraper
import re
from urllib2 import HTTPError
from util import hook , http
@hook.command ( ' mc ' )
2012-02-28 22:09:19 -08:00
@hook.command
2011-11-20 22:23:31 +13:00
def metacritic ( inp ) :
2013-11-20 22:20:54 +00:00
""" mc [all|movie|tv|album|x360|ps3|pc|gba|ds|3ds|wii|vita|wiiu|xone|ps4] <title>
2013-09-04 18:30:04 +08:00
Gets rating for < title > from metacritic on the specified medium . """
2011-11-20 22:23:31 +13:00
args = inp . strip ( )
2013-11-20 22:20:54 +00:00
game_platforms = ( ' x360 ' , ' ps3 ' , ' pc ' , ' gba ' , ' ds ' , ' 3ds ' , ' wii ' ,
' vita ' , ' wiiu ' , ' xone ' , ' ps4 ' )
2012-05-19 22:09:43 +12:00
2011-11-20 22:23:31 +13:00
all_platforms = game_platforms + ( ' all ' , ' movie ' , ' tv ' , ' album ' )
try :
plat , title = args . split ( ' ' , 1 )
if plat not in all_platforms :
# raise the ValueError so that the except block catches it
# in this case, or in the case of the .split above raising the
# ValueError, we want the same thing to happen
raise ValueError
except ValueError :
plat = ' all '
title = args
cat = ' game ' if plat in game_platforms else plat
title_safe = http . quote_plus ( title )
2013-09-05 10:11:18 +08:00
url = ' http://www.metacritic.com/search/ {} / {} /results ' . format ( cat , title_safe )
2011-11-20 22:23:31 +13:00
try :
doc = http . get_html ( url )
except HTTPError :
return ' error fetching results '
# get the proper result element we want to pull data from
result = None
if not doc . find_class ( ' query_results ' ) :
2012-05-19 22:09:43 +12:00
return ' No results found. '
2011-11-20 22:23:31 +13:00
# if they specified an invalid search term, the input box will be empty
if doc . get_element_by_id ( ' search_term ' ) . value == ' ' :
2012-05-19 22:09:43 +12:00
return ' Invalid search term. '
2011-11-20 22:23:31 +13:00
if plat not in game_platforms :
# for [all] results, or non-game platforms, get the first result
result = doc . find_class ( ' result first_result ' ) [ 0 ]
# find the platform, if it exists
result_type = result . find_class ( ' result_type ' )
if result_type :
# if the result_type div has a platform div, get that one
platform_div = result_type [ 0 ] . find_class ( ' platform ' )
if platform_div :
plat = platform_div [ 0 ] . text_content ( ) . strip ( )
else :
# otherwise, use the result_type text_content
plat = result_type [ 0 ] . text_content ( ) . strip ( )
else :
# for games, we want to pull the first result with the correct
# platform
results = doc . find_class ( ' result ' )
for res in results :
result_plat = res . find_class ( ' platform ' ) [ 0 ] . text_content ( ) . strip ( )
if result_plat == plat . upper ( ) :
result = res
break
if not result :
2012-05-19 22:09:43 +12:00
return ' No results found. '
2011-11-20 22:23:31 +13:00
# get the name, release date, and score from the result
product_title = result . find_class ( ' product_title ' ) [ 0 ]
name = product_title . text_content ( )
link = ' http://metacritic.com ' + product_title . find ( ' a ' ) . attrib [ ' href ' ]
try :
2013-09-04 18:30:04 +08:00
release = result . find_class ( ' release_date ' ) [ 0 ] . \
2011-11-20 22:23:31 +13:00
find_class ( ' data ' ) [ 0 ] . text_content ( )
# strip extra spaces out of the release date
release = re . sub ( r ' \ s { 2,} ' , ' ' , release )
except IndexError :
release = None
try :
2013-11-20 22:20:54 +00:00
score = result . find_class ( ' metascore_w ' ) [ 0 ] . text_content ( )
2011-11-20 22:23:31 +13:00
except IndexError :
score = None
2013-09-05 10:11:18 +08:00
return ' [ {} ] {} - \x02 {} /100 \x02 , {} - {} ' . format ( plat . upper ( ) , name , score or ' no score ' ,
' release: \x02 %s \x02 ' % release if release else ' unreleased ' ,
link )