Merge branch 'feature/mygengo' into develop

Conflicts:
	README.md
This commit is contained in:
neersighted 2012-02-28 12:35:00 -08:00
commit 60adcd5c93
3 changed files with 121 additions and 197 deletions

View file

@ -3,7 +3,7 @@
### About ### About
CloudBot is very heavily based on [Skybot](http://git.io/skybot) by rmmh. CloudBot is very heavily based on [Skybot](http://git.io/skybot) by rmmh.
CloudBot is meant to be a simple, extendable python IRC bot, with minimal magic. To write your own plugins, vist the [Plugin Wiki Page](http://git.io/cloudbotplugins). CloudBot is meant to be a simple, extendable Python IRC bot, with minimal magic. To write your own plugins, vist the [Plugin Wiki Page](http://git.io/cloudbotplugins).
More at the [wiki](http://git.io/cloudbotwiki). More at the [wiki](http://git.io/cloudbotwiki).
@ -12,12 +12,16 @@ Get Cloudbot at [git.io/getcloudbot](http://git.io/getcloudbot "CloudBot").
Unzip, and follow the install directions. Unzip, and follow the install directions.
### Install ### Install
Before you can run the bot, you need to install a few Python modules. These are `lXML`, and `BeautifulSoup`. These can be installed with PIP (The python packager): Before you can run the bot, you need to install a few Python modules. These are `lXML`, `BeautifulSoup`, `MyGengo`, and `HTTPlib2`. These can be installed with PIP (The python packager):
`sudo pip install lxml` `sudo pip install lxml`
`sudo pip install beautifulsoup` `sudo pip install beautifulsoup`
`sudo pip install mygengo`
`sudo pip install httplib2`
On Debian based systems, you can get pip with On Debian based systems, you can get pip with
`sudo apt-get install pip` `sudo apt-get install pip`
@ -26,8 +30,6 @@ For .spell to work, we also need a library called `Enchant`. On Debian based sy
`sudo apt-get install python-enchant` `sudo apt-get install python-enchant`
Otherwise, use Google to find source packages.
In addition, for .whois to work optimally, you must have `whois` installed. Again, on Debian based systems, install it with In addition, for .whois to work optimally, you must have `whois` installed. Again, on Debian based systems, install it with
`sudo apt-get install whois` `sudo apt-get install whois`
@ -75,4 +77,4 @@ CloudBot is licenced under the GPL v3 license. The terms are as follows.
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with CloudBot. If not, see <http://www.gnu.org/licenses/>. along with CloudBot. If not, see <http://www.gnu.org/licenses/>.

View file

@ -1,192 +0,0 @@
import htmlentitydefs
import re
from util import hook, http
########### from http://effbot.org/zone/re-sub.htm#unescape-html #############
def unescape(text):
def fixup(m):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return re.sub("&#?\w+;", fixup, text)
##############################################################################
def goog_trans(text, slang, tlang):
url = 'http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&key=ABQIAAAAGjLiqTxkFw7F24ITXc4bNRS04yDz5pgaUTdxja2Sk3UoWlae7xTXom3fBzER6Upo8jfzcTtvz-8ebQ'
parsed = http.get_json(url, q=text, langpair=(slang + '|' + tlang))
if not 200 <= parsed['responseStatus'] < 300:
raise IOError('error with the translation server: %d: %s' % (
parsed['responseStatus'], parsed['responseDetails']))
if not slang:
return unescape('(%(detectedSourceLanguage)s) %(translatedText)s' %
(parsed['responseData']))
return unescape(parsed['responseData']['translatedText'])
def match_language(fragment):
fragment = fragment.lower()
for short, _ in lang_pairs:
if fragment in short.lower().split():
return short.split()[0]
for short, full in lang_pairs:
if fragment in full.lower():
return short.split()[0]
return None
@hook.command
def translate(inp):
'.translate [source language [target language]] <sentence> -- translates' \
' <sentence> from source language (default autodetect) to target' \
' language (default English) using Google Translate'
return "Due to Google deprecating the translation API, this command is no longer available :("
args = inp.split(' ', 2)
try:
if len(args) >= 2:
sl = match_language(args[0])
if not sl:
return goog_trans(inp, '', 'en')
if len(args) >= 3:
tl = match_language(args[1])
if not tl:
if sl == 'en':
return 'unable to determine desired target language'
return goog_trans(args[1] + ' ' + args[2], sl, 'en')
return goog_trans(args[2], sl, tl)
return goog_trans(inp, '', 'en')
except IOError, e:
return e
languages = 'ja fr de ko ru zh'.split()
language_pairs = zip(languages[:-1], languages[1:])
def babel_gen(inp):
for language in languages:
inp = inp.encode('utf8')
trans = goog_trans(inp, 'en', language).encode('utf8')
inp = goog_trans(trans, language, 'en')
yield language, trans, inp
@hook.command
def babel(inp):
".babel <sentence> -- translates <sentence> through multiple languages"
return "Due to Google deprecating the translation API, this command is no longer available :("
try:
return list(babel_gen(inp))[-1][2]
except IOError, e:
return e
@hook.command
def babelext(inp):
".babelext <sentence> -- like .babel, but with more detailed output"
return "Due to Google deprecating the translation API, this command is no longer available :("
try:
babels = list(babel_gen(inp))
except IOError, e:
return e
out = u''
for lang, trans, text in babels:
out += '%s:"%s", ' % (lang, text.decode('utf8'))
out += 'en:"' + babels[-1][2].decode('utf8') + '"'
if len(out) > 300:
out = out[:150] + ' ... ' + out[-150:]
return out
lang_pairs = [
("no", "Norwegian"),
("it", "Italian"),
("ht", "Haitian Creole"),
("af", "Afrikaans"),
("sq", "Albanian"),
("ar", "Arabic"),
("hy", "Armenian"),
("az", "Azerbaijani"),
("eu", "Basque"),
("be", "Belarusian"),
("bg", "Bulgarian"),
("ca", "Catalan"),
("zh-CN zh", "Chinese"),
("hr", "Croatian"),
("cs", "Czech"),
("da", "Danish"),
("nl", "Dutch"),
("en", "English"),
("et", "Estonian"),
("tl", "Filipino"),
("fi", "Finnish"),
("fr", "French"),
("gl", "Galician"),
("ka", "Georgian"),
("de", "German"),
("el", "Greek"),
("ht", "Haitian Creole"),
("iw", "Hebrew"),
("hi", "Hindi"),
("hu", "Hungarian"),
("is", "Icelandic"),
("id", "Indonesian"),
("ga", "Irish"),
("it", "Italian"),
("ja jp jpn", "Japanese"),
("ko", "Korean"),
("lv", "Latvian"),
("lt", "Lithuanian"),
("mk", "Macedonian"),
("ms", "Malay"),
("mt", "Maltese"),
("no", "Norwegian"),
("fa", "Persian"),
("pl", "Polish"),
("pt", "Portuguese"),
("ro", "Romanian"),
("ru", "Russian"),
("sr", "Serbian"),
("sk", "Slovak"),
("sl", "Slovenian"),
("es", "Spanish"),
("sw", "Swahili"),
("sv", "Swedish"),
("th", "Thai"),
("tr", "Turkish"),
("uk", "Ukrainian"),
("ur", "Urdu"),
("vi", "Vietnamese"),
("cy", "Welsh"),
("yi", "Yiddish")
]

114
plugins/translate.py Executable file
View file

@ -0,0 +1,114 @@
# MyGengo translation plugin by lukeroge and neersighted
from util import hook, http
import re, htmlentitydefs
import mygengo
gengo = mygengo.MyGengo(
public_key = 'PlwtF1CZ2tu27IdX_SXNxTFmfN0j|_-pJ^Rf({O-oLl--r^QM4FygRdt^jusSSDE',
private_key = 'wlXpL=SU[#JpPu[dQaf$v{S3@rg[=95$$TA(k$sb3_6~B_zDKkTbd4#hXxaorIae',
sandbox = False,
)
def gengo_translate(text, source, target):
try:
translation = gengo.postTranslationJob(job = {
'type': 'text',
'slug': 'Translating '+source+' to '+target+' with the myGengo API',
'body_src': text,
'lc_src': source,
'lc_tgt': target,
'tier': 'machine',
})
translated = translation['response']['job']['body_tgt']
return u"(%s > %s) %s" % (source, target, translated)
except mygengo.MyGengoError:
return "error: could not translate"
def match_language(fragment):
fragment = fragment.lower()
for short, _ in lang_pairs:
if fragment in short.lower().split():
return short.split()[0]
for short, full in lang_pairs:
if fragment in full.lower():
return short.split()[0]
return None
@hook.command
def translate(inp):
".translate <source language> <target language> <sentence> -- Translates <sentence> from <source language> to <target language> using MyGengo."
args = inp.split(' ')
sl = match_language(args[0])
tl = match_language(args[1])
txt = unicode(" ".join(args[2:]))
if sl and tl:
return unicode(gengo_translate(txt, sl, tl))
else:
return "error: translate could not reliably determine one or both languages"
languages = 'ja fr de ko ru zh'.split()
language_pairs = zip(languages[:-1], languages[1:])
lang_pairs = [
("no", "Norwegian"),
("it", "Italian"),
("ht", "Haitian Creole"),
("af", "Afrikaans"),
("sq", "Albanian"),
("ar", "Arabic"),
("hy", "Armenian"),
("az", "Azerbaijani"),
("eu", "Basque"),
("be", "Belarusian"),
("bg", "Bulgarian"),
("ca", "Catalan"),
("zh-CN zh", "Chinese"),
("hr", "Croatian"),
("cs cz", "Czech"),
("da dk", "Danish"),
("nl", "Dutch"),
("en", "English"),
("et", "Estonian"),
("tl", "Filipino"),
("fi", "Finnish"),
("fr", "French"),
("gl", "Galician"),
("ka", "Georgian"),
("de", "German"),
("el", "Greek"),
("ht", "Haitian Creole"),
("iw", "Hebrew"),
("hi", "Hindi"),
("hu", "Hungarian"),
("is", "Icelandic"),
("id", "Indonesian"),
("ga", "Irish"),
("it", "Italian"),
("ja jp jpn", "Japanese"),
("ko", "Korean"),
("lv", "Latvian"),
("lt", "Lithuanian"),
("mk", "Macedonian"),
("ms", "Malay"),
("mt", "Maltese"),
("no", "Norwegian"),
("fa", "Persian"),
("pl", "Polish"),
("pt", "Portuguese"),
("ro", "Romanian"),
("ru", "Russian"),
("sr", "Serbian"),
("sk", "Slovak"),
("sl", "Slovenian"),
("es", "Spanish"),
("sw", "Swahili"),
("sv", "Swedish"),
("th", "Thai"),
("tr", "Turkish"),
("uk", "Ukrainian"),
("ur", "Urdu"),
("vi", "Vietnamese"),
("cy", "Welsh"),
("yi", "Yiddish")
]