From 6de14f33e360d7375bc0460108c380591347cdb5 Mon Sep 17 00:00:00 2001 From: Luke Rogers Date: Wed, 4 Sep 2013 19:42:08 +1200 Subject: [PATCH] Google translate is back, but you need a paid API key --- config.default | 3 +- plugins/google_translate.py | 166 ++++++++++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 plugins/google_translate.py diff --git a/config.default b/config.default index c9cd333..43fb526 100644 --- a/config.default +++ b/config.default @@ -28,7 +28,8 @@ "twitter_consumer_secret": "", "twitter_access_token": "", "twitter_access_secret": "", - "wunderground": "" + "wunderground": "", + "googletranslate": "" }, "permissions": { "admins": { diff --git a/plugins/google_translate.py b/plugins/google_translate.py new file mode 100644 index 0000000..f481781 --- /dev/null +++ b/plugins/google_translate.py @@ -0,0 +1,166 @@ +''' +A Google API key is required and retrieved from the bot config file. +Since December 1, 2011, the Google Translate API is a paid service only. +''' + +import htmlentitydefs +import re + +from util import hook, http + +max_length = 100 + + +########### from http://effbot.org/zone/re-sub.htm#unescape-html ############# + + +def unescape(text): + def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + pass + return text # leave as is + + return re.sub("&#?\w+;", fixup, text) + +############################################################################## + + +def goog_trans(api_key, text, slang, tlang): + url = 'https://www.googleapis.com/language/translate/v2' + + if len(text) > max_length: + return "This command only supports input of less then 100 characters." + + if slang: + parsed = http.get_json(url, key=api_key, q=text, source=slang, target=tlang, format="text") + else: + parsed = http.get_json(url, key=api_key, q=text, target=tlang, format="text") + + #if not 200 <= parsed['responseStatus'] < 300: + # raise IOError('error with the translation server: %d: %s' % ( + # parsed['responseStatus'], parsed['responseDetails'])) + if not slang: + return unescape('(%(detectedSourceLanguage)s) %(translatedText)s' % + (parsed['data']['translations'][0])) + return unescape('%(translatedText)s' % parsed['data']['translations'][0]) + + +def match_language(fragment): + fragment = fragment.lower() + for short, _ in lang_pairs: + if fragment in short.lower().split(): + return short.split()[0] + + for short, full in lang_pairs: + if fragment in full.lower(): + return short.split()[0] + + return None + + +@hook.command +def translate(inp, bot=None, say=None): + 'translate [source language [target language]] -- translates' \ + ' from source language (default autodetect) to target' \ + ' language (default English) using Google Translate' + + api_key = bot.config.get("api_keys", {}).get("googletranslate", None) + if not api_key: + return "This command requires a paid API key." + + args = inp.split(u' ', 2) + + try: + if len(args) >= 2: + sl = match_language(args[0]) + if not sl: + return goog_trans(api_key, inp, '', 'en') + if len(args) == 2: + return goog_trans(api_key, args[1], sl, 'en') + if len(args) >= 3: + tl = match_language(args[1]) + if not tl: + if sl == 'en': + return 'unable to determine desired target language' + return goog_trans(api_key, args[1] + ' ' + args[2], sl, 'en') + return goog_trans(api_key, args[2], sl, tl) + return goog_trans(api_key, inp, '', 'en') + except IOError, e: + return e + +lang_pairs = [ + ("no", "Norwegian"), + ("it", "Italian"), + ("ht", "Haitian Creole"), + ("af", "Afrikaans"), + ("sq", "Albanian"), + ("ar", "Arabic"), + ("hy", "Armenian"), + ("az", "Azerbaijani"), + ("eu", "Basque"), + ("be", "Belarusian"), + ("bg", "Bulgarian"), + ("ca", "Catalan"), + ("zh-CN zh", "Chinese"), + ("hr", "Croatian"), + ("cs", "Czech"), + ("da", "Danish"), + ("nl", "Dutch"), + ("en", "English"), + ("et", "Estonian"), + ("tl", "Filipino"), + ("fi", "Finnish"), + ("fr", "French"), + ("gl", "Galician"), + ("ka", "Georgian"), + ("de", "German"), + ("el", "Greek"), + ("ht", "Haitian Creole"), + ("iw", "Hebrew"), + ("hi", "Hindi"), + ("hu", "Hungarian"), + ("is", "Icelandic"), + ("id", "Indonesian"), + ("ga", "Irish"), + ("it", "Italian"), + ("ja jp jpn", "Japanese"), + ("ko", "Korean"), + ("lv", "Latvian"), + ("lt", "Lithuanian"), + ("mk", "Macedonian"), + ("ms", "Malay"), + ("mt", "Maltese"), + ("no", "Norwegian"), + ("fa", "Persian"), + ("pl", "Polish"), + ("pt", "Portuguese"), + ("ro", "Romanian"), + ("ru", "Russian"), + ("sr", "Serbian"), + ("sk", "Slovak"), + ("sl", "Slovenian"), + ("es", "Spanish"), + ("sw", "Swahili"), + ("sv", "Swedish"), + ("th", "Thai"), + ("tr", "Turkish"), + ("uk", "Ukrainian"), + ("ur", "Urdu"), + ("vi", "Vietnamese"), + ("cy", "Welsh"), + ("yi", "Yiddish") +] \ No newline at end of file