This repository has been archived on 2023-04-13. You can view files and clone it, but cannot push or open issues or pull requests.
CloudBot/disabled_stuff/repaste.py
2013-11-29 19:47:40 -08:00

180 lines
4.4 KiB
Python

from util import hook, http
import urllib
import random
import urllib2
import htmlentitydefs
import re
re_htmlent = re.compile("&(" + "|".join(htmlentitydefs.name2codepoint.keys()) + ");")
re_numeric = re.compile(r'&#(x?)([a-fA-F0-9]+);')
def db_init(db):
db.execute("create table if not exists repaste(chan, manual, primary key(chan))")
db.commit()
def decode_html(text):
text = re.sub(re_htmlent,
lambda m: unichr(htmlentitydefs.name2codepoint[m.group(1)]),
text)
text = re.sub(re_numeric,
lambda m: unichr(int(m.group(2), 16 if m.group(1) else 10)),
text)
return text
def scrape_mibpaste(url):
if not url.startswith("http"):
url = "http://" + url
pagesource = http.get(url)
rawpaste = re.search(r'(?s)(?<=<body>\n).+(?=<hr>)', pagesource).group(0)
filterbr = rawpaste.replace("<br />", "")
unescaped = decode_html(filterbr)
stripped = unescaped.strip()
return stripped
def scrape_pastebin(url):
id = re.search(r'(?:www\.)?pastebin.com/([a-zA-Z0-9]+)$', url).group(1)
rawurl = "http://pastebin.com/raw.php?i=" + id
text = http.get(rawurl)
return text
autorepastes = {}
#@hook.regex('(pastebin\.com)(/[^ ]+)')
@hook.regex('(mibpaste\.com)(/[^ ]+)')
def autorepaste(inp, input=None, notice=None, db=None, chan=None, nick=None):
db_init(db)
manual = db.execute("select manual from repaste where chan=?", (chan, )).fetchone()
if manual and len(manual) and manual[0]:
return
url = inp.group(1) + inp.group(2)
urllib.unquote(url)
if url in autorepastes:
out = autorepastes[url]
notice("In the future, please use a less awful pastebin (e.g. pastebin.com)")
else:
out = repaste("http://" + url, input, db, False)
autorepastes[url] = out
notice("In the future, please use a less awful pastebin (e.g. pastebin.com) instead of %s." % inp.group(1))
input.say("%s (repasted for %s)" % (out, nick))
scrapers = {
r'mibpaste\.com': scrape_mibpaste,
r'pastebin\.com': scrape_pastebin
}
def scrape(url):
for pat, scraper in scrapers.iteritems():
print "matching " + repr(pat) + " " + url
if re.search(pat, url):
break
else:
return None
return scraper(url)
def paste_sprunge(text, syntax=None, user=None):
data = urllib.urlencode({"sprunge": text})
url = urllib2.urlopen("http://sprunge.us/", data).read().strip()
if syntax:
url += "?" + syntax
return url
def paste_ubuntu(text, user=None, syntax='text'):
data = urllib.urlencode({"poster": user,
"syntax": syntax,
"content": text})
return urllib2.urlopen("http://paste.ubuntu.com/", data).url
def paste_gist(text, user=None, syntax=None, description=None):
data = {
'file_contents[gistfile1]': text,
'action_button': "private"
}
if description:
data['description'] = description
if syntax:
data['file_ext[gistfile1]'] = "." + syntax
req = urllib2.urlopen('https://gist.github.com/gists', urllib.urlencode(data).encode('utf8'))
return req.url
def paste_strictfp(text, user=None, syntax="plain"):
data = urllib.urlencode(dict(
language=syntax,
paste=text,
private="private",
submit="Paste"))
req = urllib2.urlopen("http://paste.strictfp.com/", data)
return req.url
pasters = dict(
ubuntu=paste_ubuntu,
sprunge=paste_sprunge,
gist=paste_gist,
strictfp=paste_strictfp
)
@hook.command
def repaste(inp, input=None, db=None, isManual=True):
".repaste mode|list|[provider] [syntax] <pastebinurl> -- Reuploads mibpaste to [provider]."
parts = inp.split()
db_init(db)
if parts[0] == 'list':
return " ".join(pasters.keys())
paster = paste_gist
args = {}
if not parts[0].startswith("http"):
p = parts[0].lower()
if p in pasters:
paster = pasters[p]
parts = parts[1:]
if not parts[0].startswith("http"):
p = parts[0].lower()
parts = parts[1:]
args["syntax"] = p
if len(parts) > 1:
return "PEBKAC"
args["user"] = input.user
url = parts[0]
scraped = scrape(url)
if not scraped:
return "No scraper for given url"
args["text"] = scraped
pasted = paster(**args)
return pasted