CloudBot/disabled_stuff/metacritic.py

# metacritic.com scraper

import re
from urllib2 import HTTPError

from util import hook, http


@hook.command('mc')
@hook.command
def metacritic(inp):
    """mc [all|movie|tv|album|x360|ps3|pc|gba|ds|3ds|wii|vita|wiiu|xone|ps4] <title>
    Gets rating for <title> from metacritic on the specified medium."""

    args = inp.strip()

    game_platforms = ('x360', 'ps3', 'pc', 'gba', 'ds', '3ds', 'wii',
                      'vita', 'wiiu', 'xone', 'ps4')

    all_platforms = game_platforms + ('all', 'movie', 'tv', 'album')

    try:
        plat, title = args.split(' ', 1)
        if plat not in all_platforms:
            # raise the ValueError so that the except block catches it
            # in this case, or in the case of the .split above raising the
            # ValueError, we want the same thing to happen
            raise ValueError
    except ValueError:
        plat = 'all'
        title = args

    cat = 'game' if plat in game_platforms else plat

    title_safe = http.quote_plus(title)

    url = 'http://www.metacritic.com/search/{}/{}/results'.format(cat, title_safe)

    try:
        doc = http.get_html(url)
    except HTTPError:
        return 'error fetching results'

    # get the proper result element we want to pull data from
    result = None

    if not doc.find_class('query_results'):
        return 'No results found.'

    # if they specified an invalid search term, the input box will be empty
    if doc.get_element_by_id('search_term').value == '':
        return 'Invalid search term.'

    if plat not in game_platforms:
        # for [all] results, or non-game platforms, get the first result
        result = doc.find_class('result first_result')[0]

        # find the platform, if it exists
        result_type = result.find_class('result_type')
        if result_type:

            # if the result_type div has a platform div, get that one
            platform_div = result_type[0].find_class('platform')
            if platform_div:
                plat = platform_div[0].text_content().strip()
            else:
                # otherwise, use the result_type text_content
                plat = result_type[0].text_content().strip()

    else:
        # for games, we want to pull the first result with the correct
        # platform
        results = doc.find_class('result')
        for res in results:
            result_plat = res.find_class('platform')[0].text_content().strip()
            if result_plat == plat.upper():
                result = res
                break

    if not result:
        return 'No results found.'

    # get the name, release date, and score from the result
    product_title = result.find_class('product_title')[0]
    name = product_title.text_content()
    link = 'http://metacritic.com' + product_title.find('a').attrib['href']

    try:
        release = result.find_class('release_date')[0]. \
            find_class('data')[0].text_content()

        # strip extra spaces out of the release date
        release = re.sub(r'\s{2,}', ' ', release)
    except IndexError:
        release = None

    try:
        score = result.find_class('metascore_w')[0].text_content()
    except IndexError:
        score = None

    return '[{}] {} - \x02{}/100\x02, {} - {}'.format(plat.upper(), name, score or 'no score',
                                                      'release: \x02%s\x02' % release if release else 'unreleased',
                                                      link)
First :D 2011-11-20 10:23:31 +01:00			`# metacritic.com scraper`

			`import re`
			`from urllib2 import HTTPError`

			`from util import hook, http`


			`@hook.command('mc')`
pep8 2012-02-29 07:09:19 +01:00			`@hook.command`
First :D 2011-11-20 10:23:31 +01:00			`def metacritic(inp):`
uhh 2013-11-20 23:20:54 +01:00			`"""mc [all\|movie\|tv\|album\|x360\|ps3\|pc\|gba\|ds\|3ds\|wii\|vita\|wiiu\|xone\|ps4] <title>`
Fixed formatting. 2013-09-04 12:30:04 +02:00			`Gets rating for <title> from metacritic on the specified medium."""`
First :D 2011-11-20 10:23:31 +01:00
			`args = inp.strip()`

uhh 2013-11-20 23:20:54 +01:00			`game_platforms = ('x360', 'ps3', 'pc', 'gba', 'ds', '3ds', 'wii',`
			`'vita', 'wiiu', 'xone', 'ps4')`
Added support for more game consoles to MetaCritic, and made the output look nicer 2012-05-19 12:09:43 +02:00
First :D 2011-11-20 10:23:31 +01:00			`all_platforms = game_platforms + ('all', 'movie', 'tv', 'album')`

			`try:`
			`plat, title = args.split(' ', 1)`
			`if plat not in all_platforms:`
			`# raise the ValueError so that the except block catches it`
			`# in this case, or in the case of the .split above raising the`
			`# ValueError, we want the same thing to happen`
			`raise ValueError`
			`except ValueError:`
			`plat = 'all'`
			`title = args`

			`cat = 'game' if plat in game_platforms else plat`

			`title_safe = http.quote_plus(title)`

more .format. 2013-09-05 04:11:18 +02:00			`url = 'http://www.metacritic.com/search/{}/{}/results'.format(cat, title_safe)`
First :D 2011-11-20 10:23:31 +01:00
			`try:`
			`doc = http.get_html(url)`
			`except HTTPError:`
			`return 'error fetching results'`

			`# get the proper result element we want to pull data from`
			`result = None`

			`if not doc.find_class('query_results'):`
Added support for more game consoles to MetaCritic, and made the output look nicer 2012-05-19 12:09:43 +02:00			`return 'No results found.'`
First :D 2011-11-20 10:23:31 +01:00
			`# if they specified an invalid search term, the input box will be empty`
			`if doc.get_element_by_id('search_term').value == '':`
Added support for more game consoles to MetaCritic, and made the output look nicer 2012-05-19 12:09:43 +02:00			`return 'Invalid search term.'`
First :D 2011-11-20 10:23:31 +01:00
			`if plat not in game_platforms:`
			`# for [all] results, or non-game platforms, get the first result`
			`result = doc.find_class('result first_result')[0]`

			`# find the platform, if it exists`
			`result_type = result.find_class('result_type')`
			`if result_type:`

			`# if the result_type div has a platform div, get that one`
			`platform_div = result_type[0].find_class('platform')`
			`if platform_div:`
			`plat = platform_div[0].text_content().strip()`
			`else:`
			`# otherwise, use the result_type text_content`
			`plat = result_type[0].text_content().strip()`

			`else:`
			`# for games, we want to pull the first result with the correct`
			`# platform`
			`results = doc.find_class('result')`
			`for res in results:`
			`result_plat = res.find_class('platform')[0].text_content().strip()`
			`if result_plat == plat.upper():`
			`result = res`
			`break`

			`if not result:`
Added support for more game consoles to MetaCritic, and made the output look nicer 2012-05-19 12:09:43 +02:00			`return 'No results found.'`
First :D 2011-11-20 10:23:31 +01:00
			`# get the name, release date, and score from the result`
			`product_title = result.find_class('product_title')[0]`
			`name = product_title.text_content()`
			`link = 'http://metacritic.com' + product_title.find('a').attrib['href']`

			`try:`
Fixed formatting. 2013-09-04 12:30:04 +02:00			`release = result.find_class('release_date')[0]. \`
First :D 2011-11-20 10:23:31 +01:00			`find_class('data')[0].text_content()`

			`# strip extra spaces out of the release date`
			`release = re.sub(r'\s{2,}', ' ', release)`
			`except IndexError:`
			`release = None`

			`try:`
uhh 2013-11-20 23:20:54 +01:00			`score = result.find_class('metascore_w')[0].text_content()`
First :D 2011-11-20 10:23:31 +01:00			`except IndexError:`
			`score = None`

more .format. 2013-09-05 04:11:18 +02:00			`return '[{}] {} - \x02{}/100\x02, {} - {}'.format(plat.upper(), name, score or 'no score',`
			`'release: \x02%s\x02' % release if release else 'unreleased',`
			`link)`