Merge branch 'feature/mygengo' into develop
Conflicts: README.md
This commit is contained in:
commit
60adcd5c93
3 changed files with 121 additions and 197 deletions
12
README.md
12
README.md
|
@ -3,7 +3,7 @@
|
||||||
### About
|
### About
|
||||||
CloudBot is very heavily based on [Skybot](http://git.io/skybot) by rmmh.
|
CloudBot is very heavily based on [Skybot](http://git.io/skybot) by rmmh.
|
||||||
|
|
||||||
CloudBot is meant to be a simple, extendable python IRC bot, with minimal magic. To write your own plugins, vist the [Plugin Wiki Page](http://git.io/cloudbotplugins).
|
CloudBot is meant to be a simple, extendable Python IRC bot, with minimal magic. To write your own plugins, vist the [Plugin Wiki Page](http://git.io/cloudbotplugins).
|
||||||
|
|
||||||
More at the [wiki](http://git.io/cloudbotwiki).
|
More at the [wiki](http://git.io/cloudbotwiki).
|
||||||
|
|
||||||
|
@ -12,12 +12,16 @@ Get Cloudbot at [git.io/getcloudbot](http://git.io/getcloudbot "CloudBot").
|
||||||
Unzip, and follow the install directions.
|
Unzip, and follow the install directions.
|
||||||
|
|
||||||
### Install
|
### Install
|
||||||
Before you can run the bot, you need to install a few Python modules. These are `lXML`, and `BeautifulSoup`. These can be installed with PIP (The python packager):
|
Before you can run the bot, you need to install a few Python modules. These are `lXML`, `BeautifulSoup`, `MyGengo`, and `HTTPlib2`. These can be installed with PIP (The python packager):
|
||||||
|
|
||||||
`sudo pip install lxml`
|
`sudo pip install lxml`
|
||||||
|
|
||||||
`sudo pip install beautifulsoup`
|
`sudo pip install beautifulsoup`
|
||||||
|
|
||||||
|
`sudo pip install mygengo`
|
||||||
|
|
||||||
|
`sudo pip install httplib2`
|
||||||
|
|
||||||
On Debian based systems, you can get pip with
|
On Debian based systems, you can get pip with
|
||||||
|
|
||||||
`sudo apt-get install pip`
|
`sudo apt-get install pip`
|
||||||
|
@ -26,8 +30,6 @@ For .spell to work, we also need a library called `Enchant`. On Debian based sy
|
||||||
|
|
||||||
`sudo apt-get install python-enchant`
|
`sudo apt-get install python-enchant`
|
||||||
|
|
||||||
Otherwise, use Google to find source packages.
|
|
||||||
|
|
||||||
In addition, for .whois to work optimally, you must have `whois` installed. Again, on Debian based systems, install it with
|
In addition, for .whois to work optimally, you must have `whois` installed. Again, on Debian based systems, install it with
|
||||||
|
|
||||||
`sudo apt-get install whois`
|
`sudo apt-get install whois`
|
||||||
|
@ -75,4 +77,4 @@ CloudBot is licenced under the GPL v3 license. The terms are as follows.
|
||||||
GNU General Public License for more details.
|
GNU General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
You should have received a copy of the GNU General Public License
|
||||||
along with CloudBot. If not, see <http://www.gnu.org/licenses/>.
|
along with CloudBot. If not, see <http://www.gnu.org/licenses/>.
|
|
@ -1,192 +0,0 @@
|
||||||
import htmlentitydefs
|
|
||||||
import re
|
|
||||||
|
|
||||||
from util import hook, http
|
|
||||||
|
|
||||||
########### from http://effbot.org/zone/re-sub.htm#unescape-html #############
|
|
||||||
|
|
||||||
|
|
||||||
def unescape(text):
|
|
||||||
def fixup(m):
|
|
||||||
text = m.group(0)
|
|
||||||
if text[:2] == "&#":
|
|
||||||
# character reference
|
|
||||||
try:
|
|
||||||
if text[:3] == "&#x":
|
|
||||||
return unichr(int(text[3:-1], 16))
|
|
||||||
else:
|
|
||||||
return unichr(int(text[2:-1]))
|
|
||||||
except ValueError:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
# named entity
|
|
||||||
try:
|
|
||||||
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
return text # leave as is
|
|
||||||
|
|
||||||
return re.sub("&#?\w+;", fixup, text)
|
|
||||||
|
|
||||||
##############################################################################
|
|
||||||
|
|
||||||
|
|
||||||
def goog_trans(text, slang, tlang):
|
|
||||||
url = 'http://ajax.googleapis.com/ajax/services/language/translate?v=1.0&key=ABQIAAAAGjLiqTxkFw7F24ITXc4bNRS04yDz5pgaUTdxja2Sk3UoWlae7xTXom3fBzER6Upo8jfzcTtvz-8ebQ'
|
|
||||||
parsed = http.get_json(url, q=text, langpair=(slang + '|' + tlang))
|
|
||||||
if not 200 <= parsed['responseStatus'] < 300:
|
|
||||||
raise IOError('error with the translation server: %d: %s' % (
|
|
||||||
parsed['responseStatus'], parsed['responseDetails']))
|
|
||||||
if not slang:
|
|
||||||
return unescape('(%(detectedSourceLanguage)s) %(translatedText)s' %
|
|
||||||
(parsed['responseData']))
|
|
||||||
return unescape(parsed['responseData']['translatedText'])
|
|
||||||
|
|
||||||
|
|
||||||
def match_language(fragment):
|
|
||||||
fragment = fragment.lower()
|
|
||||||
for short, _ in lang_pairs:
|
|
||||||
if fragment in short.lower().split():
|
|
||||||
return short.split()[0]
|
|
||||||
|
|
||||||
for short, full in lang_pairs:
|
|
||||||
if fragment in full.lower():
|
|
||||||
return short.split()[0]
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
@hook.command
|
|
||||||
def translate(inp):
|
|
||||||
'.translate [source language [target language]] <sentence> -- translates' \
|
|
||||||
' <sentence> from source language (default autodetect) to target' \
|
|
||||||
' language (default English) using Google Translate'
|
|
||||||
return "Due to Google deprecating the translation API, this command is no longer available :("
|
|
||||||
|
|
||||||
args = inp.split(' ', 2)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if len(args) >= 2:
|
|
||||||
sl = match_language(args[0])
|
|
||||||
if not sl:
|
|
||||||
return goog_trans(inp, '', 'en')
|
|
||||||
if len(args) >= 3:
|
|
||||||
tl = match_language(args[1])
|
|
||||||
if not tl:
|
|
||||||
if sl == 'en':
|
|
||||||
return 'unable to determine desired target language'
|
|
||||||
return goog_trans(args[1] + ' ' + args[2], sl, 'en')
|
|
||||||
return goog_trans(args[2], sl, tl)
|
|
||||||
return goog_trans(inp, '', 'en')
|
|
||||||
except IOError, e:
|
|
||||||
return e
|
|
||||||
|
|
||||||
|
|
||||||
languages = 'ja fr de ko ru zh'.split()
|
|
||||||
language_pairs = zip(languages[:-1], languages[1:])
|
|
||||||
|
|
||||||
|
|
||||||
def babel_gen(inp):
|
|
||||||
for language in languages:
|
|
||||||
inp = inp.encode('utf8')
|
|
||||||
trans = goog_trans(inp, 'en', language).encode('utf8')
|
|
||||||
inp = goog_trans(trans, language, 'en')
|
|
||||||
yield language, trans, inp
|
|
||||||
|
|
||||||
|
|
||||||
@hook.command
|
|
||||||
def babel(inp):
|
|
||||||
".babel <sentence> -- translates <sentence> through multiple languages"
|
|
||||||
return "Due to Google deprecating the translation API, this command is no longer available :("
|
|
||||||
|
|
||||||
try:
|
|
||||||
return list(babel_gen(inp))[-1][2]
|
|
||||||
except IOError, e:
|
|
||||||
return e
|
|
||||||
|
|
||||||
|
|
||||||
@hook.command
|
|
||||||
def babelext(inp):
|
|
||||||
".babelext <sentence> -- like .babel, but with more detailed output"
|
|
||||||
|
|
||||||
return "Due to Google deprecating the translation API, this command is no longer available :("
|
|
||||||
|
|
||||||
try:
|
|
||||||
babels = list(babel_gen(inp))
|
|
||||||
except IOError, e:
|
|
||||||
return e
|
|
||||||
|
|
||||||
out = u''
|
|
||||||
for lang, trans, text in babels:
|
|
||||||
out += '%s:"%s", ' % (lang, text.decode('utf8'))
|
|
||||||
|
|
||||||
out += 'en:"' + babels[-1][2].decode('utf8') + '"'
|
|
||||||
|
|
||||||
if len(out) > 300:
|
|
||||||
out = out[:150] + ' ... ' + out[-150:]
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
lang_pairs = [
|
|
||||||
("no", "Norwegian"),
|
|
||||||
("it", "Italian"),
|
|
||||||
("ht", "Haitian Creole"),
|
|
||||||
("af", "Afrikaans"),
|
|
||||||
("sq", "Albanian"),
|
|
||||||
("ar", "Arabic"),
|
|
||||||
("hy", "Armenian"),
|
|
||||||
("az", "Azerbaijani"),
|
|
||||||
("eu", "Basque"),
|
|
||||||
("be", "Belarusian"),
|
|
||||||
("bg", "Bulgarian"),
|
|
||||||
("ca", "Catalan"),
|
|
||||||
("zh-CN zh", "Chinese"),
|
|
||||||
("hr", "Croatian"),
|
|
||||||
("cs", "Czech"),
|
|
||||||
("da", "Danish"),
|
|
||||||
("nl", "Dutch"),
|
|
||||||
("en", "English"),
|
|
||||||
("et", "Estonian"),
|
|
||||||
("tl", "Filipino"),
|
|
||||||
("fi", "Finnish"),
|
|
||||||
("fr", "French"),
|
|
||||||
("gl", "Galician"),
|
|
||||||
("ka", "Georgian"),
|
|
||||||
("de", "German"),
|
|
||||||
("el", "Greek"),
|
|
||||||
("ht", "Haitian Creole"),
|
|
||||||
("iw", "Hebrew"),
|
|
||||||
("hi", "Hindi"),
|
|
||||||
("hu", "Hungarian"),
|
|
||||||
("is", "Icelandic"),
|
|
||||||
("id", "Indonesian"),
|
|
||||||
("ga", "Irish"),
|
|
||||||
("it", "Italian"),
|
|
||||||
("ja jp jpn", "Japanese"),
|
|
||||||
("ko", "Korean"),
|
|
||||||
("lv", "Latvian"),
|
|
||||||
("lt", "Lithuanian"),
|
|
||||||
("mk", "Macedonian"),
|
|
||||||
("ms", "Malay"),
|
|
||||||
("mt", "Maltese"),
|
|
||||||
("no", "Norwegian"),
|
|
||||||
("fa", "Persian"),
|
|
||||||
("pl", "Polish"),
|
|
||||||
("pt", "Portuguese"),
|
|
||||||
("ro", "Romanian"),
|
|
||||||
("ru", "Russian"),
|
|
||||||
("sr", "Serbian"),
|
|
||||||
("sk", "Slovak"),
|
|
||||||
("sl", "Slovenian"),
|
|
||||||
("es", "Spanish"),
|
|
||||||
("sw", "Swahili"),
|
|
||||||
("sv", "Swedish"),
|
|
||||||
("th", "Thai"),
|
|
||||||
("tr", "Turkish"),
|
|
||||||
("uk", "Ukrainian"),
|
|
||||||
("ur", "Urdu"),
|
|
||||||
("vi", "Vietnamese"),
|
|
||||||
("cy", "Welsh"),
|
|
||||||
("yi", "Yiddish")
|
|
||||||
]
|
|
114
plugins/translate.py
Executable file
114
plugins/translate.py
Executable file
|
@ -0,0 +1,114 @@
|
||||||
|
# MyGengo translation plugin by lukeroge and neersighted
|
||||||
|
from util import hook, http
|
||||||
|
import re, htmlentitydefs
|
||||||
|
import mygengo
|
||||||
|
|
||||||
|
gengo = mygengo.MyGengo(
|
||||||
|
public_key = 'PlwtF1CZ2tu27IdX_SXNxTFmfN0j|_-pJ^Rf({O-oLl--r^QM4FygRdt^jusSSDE',
|
||||||
|
private_key = 'wlXpL=SU[#JpPu[dQaf$v{S3@rg[=95$$TA(k$sb3_6~B_zDKkTbd4#hXxaorIae',
|
||||||
|
sandbox = False,
|
||||||
|
)
|
||||||
|
|
||||||
|
def gengo_translate(text, source, target):
|
||||||
|
try:
|
||||||
|
translation = gengo.postTranslationJob(job = {
|
||||||
|
'type': 'text',
|
||||||
|
'slug': 'Translating '+source+' to '+target+' with the myGengo API',
|
||||||
|
'body_src': text,
|
||||||
|
'lc_src': source,
|
||||||
|
'lc_tgt': target,
|
||||||
|
'tier': 'machine',
|
||||||
|
})
|
||||||
|
translated = translation['response']['job']['body_tgt']
|
||||||
|
return u"(%s > %s) %s" % (source, target, translated)
|
||||||
|
except mygengo.MyGengoError:
|
||||||
|
return "error: could not translate"
|
||||||
|
|
||||||
|
def match_language(fragment):
|
||||||
|
fragment = fragment.lower()
|
||||||
|
for short, _ in lang_pairs:
|
||||||
|
if fragment in short.lower().split():
|
||||||
|
return short.split()[0]
|
||||||
|
|
||||||
|
for short, full in lang_pairs:
|
||||||
|
if fragment in full.lower():
|
||||||
|
return short.split()[0]
|
||||||
|
return None
|
||||||
|
|
||||||
|
@hook.command
|
||||||
|
def translate(inp):
|
||||||
|
".translate <source language> <target language> <sentence> -- Translates <sentence> from <source language> to <target language> using MyGengo."
|
||||||
|
args = inp.split(' ')
|
||||||
|
sl = match_language(args[0])
|
||||||
|
tl = match_language(args[1])
|
||||||
|
txt = unicode(" ".join(args[2:]))
|
||||||
|
if sl and tl:
|
||||||
|
return unicode(gengo_translate(txt, sl, tl))
|
||||||
|
else:
|
||||||
|
return "error: translate could not reliably determine one or both languages"
|
||||||
|
|
||||||
|
languages = 'ja fr de ko ru zh'.split()
|
||||||
|
language_pairs = zip(languages[:-1], languages[1:])
|
||||||
|
lang_pairs = [
|
||||||
|
("no", "Norwegian"),
|
||||||
|
("it", "Italian"),
|
||||||
|
("ht", "Haitian Creole"),
|
||||||
|
("af", "Afrikaans"),
|
||||||
|
("sq", "Albanian"),
|
||||||
|
("ar", "Arabic"),
|
||||||
|
("hy", "Armenian"),
|
||||||
|
("az", "Azerbaijani"),
|
||||||
|
("eu", "Basque"),
|
||||||
|
("be", "Belarusian"),
|
||||||
|
("bg", "Bulgarian"),
|
||||||
|
("ca", "Catalan"),
|
||||||
|
("zh-CN zh", "Chinese"),
|
||||||
|
("hr", "Croatian"),
|
||||||
|
("cs cz", "Czech"),
|
||||||
|
("da dk", "Danish"),
|
||||||
|
("nl", "Dutch"),
|
||||||
|
("en", "English"),
|
||||||
|
("et", "Estonian"),
|
||||||
|
("tl", "Filipino"),
|
||||||
|
("fi", "Finnish"),
|
||||||
|
("fr", "French"),
|
||||||
|
("gl", "Galician"),
|
||||||
|
("ka", "Georgian"),
|
||||||
|
("de", "German"),
|
||||||
|
("el", "Greek"),
|
||||||
|
("ht", "Haitian Creole"),
|
||||||
|
("iw", "Hebrew"),
|
||||||
|
("hi", "Hindi"),
|
||||||
|
("hu", "Hungarian"),
|
||||||
|
("is", "Icelandic"),
|
||||||
|
("id", "Indonesian"),
|
||||||
|
("ga", "Irish"),
|
||||||
|
("it", "Italian"),
|
||||||
|
("ja jp jpn", "Japanese"),
|
||||||
|
("ko", "Korean"),
|
||||||
|
("lv", "Latvian"),
|
||||||
|
("lt", "Lithuanian"),
|
||||||
|
("mk", "Macedonian"),
|
||||||
|
("ms", "Malay"),
|
||||||
|
("mt", "Maltese"),
|
||||||
|
("no", "Norwegian"),
|
||||||
|
("fa", "Persian"),
|
||||||
|
("pl", "Polish"),
|
||||||
|
("pt", "Portuguese"),
|
||||||
|
("ro", "Romanian"),
|
||||||
|
("ru", "Russian"),
|
||||||
|
("sr", "Serbian"),
|
||||||
|
("sk", "Slovak"),
|
||||||
|
("sl", "Slovenian"),
|
||||||
|
("es", "Spanish"),
|
||||||
|
("sw", "Swahili"),
|
||||||
|
("sv", "Swedish"),
|
||||||
|
("th", "Thai"),
|
||||||
|
("tr", "Turkish"),
|
||||||
|
("uk", "Ukrainian"),
|
||||||
|
("ur", "Urdu"),
|
||||||
|
("vi", "Vietnamese"),
|
||||||
|
("cy", "Welsh"),
|
||||||
|
("yi", "Yiddish")
|
||||||
|
]
|
||||||
|
|
Reference in a new issue