This repository has been archived on 2023-04-13. You can view files and clone it, but cannot push or open issues or pull requests.
CloudBot/lib/pygeoip/__init__.py

669 lines
23 KiB
Python
Raw Normal View History

2013-07-14 12:12:31 +02:00
# -*- coding: utf-8 -*-
"""
2013-07-14 12:12:31 +02:00
Pure Python GeoIP API
2013-07-14 12:12:31 +02:00
The API is based on MaxMind's C-based Python API, but the code itself is
ported from the Pure PHP GeoIP API by Jim Winstead and Hans Lellelid.
2013-07-14 12:12:31 +02:00
@author: Jennifer Ennis <zaylea@gmail.com>
2013-07-14 12:12:31 +02:00
@license: Copyright(C) 2004 MaxMind LLC
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/lgpl.txt>.
"""
import os
import math
import socket
import mmap
import codecs
2013-07-14 12:12:31 +02:00
from threading import Lock
try:
from StringIO import StringIO
except ImportError:
from io import StringIO, BytesIO
2013-07-14 12:12:31 +02:00
from pygeoip import util, const
from pygeoip.const import PY2, PY3
from pygeoip.timezone import time_zone_by_country_and_region
2013-07-14 12:12:31 +02:00
STANDARD = const.STANDARD
MMAP_CACHE = const.MMAP_CACHE
MEMORY_CACHE = const.MEMORY_CACHE
2013-07-14 12:12:31 +02:00
ENCODING = const.ENCODING
class GeoIPError(Exception):
pass
2013-07-14 12:12:31 +02:00
class GeoIPMetaclass(type):
def __new__(cls, *args, **kwargs):
"""
Singleton method to gets an instance without reparsing the db. Unique
instances are instantiated based on the filename of the db. Flags are
2013-07-14 12:12:31 +02:00
ignored for this, i.e. if you initialize one with STANDARD
flag (default) and then try later to initialize with MEMORY_CACHE, it
will still return the STANDARD one.
"""
if not hasattr(cls, '_instances'):
cls._instances = {}
if len(args) > 0:
filename = args[0]
elif 'filename' in kwargs:
filename = kwargs['filename']
2013-07-14 12:12:31 +02:00
if filename not in cls._instances:
cls._instances[filename] = type.__new__(cls, *args, **kwargs)
return cls._instances[filename]
2013-07-14 12:12:31 +02:00
GeoIPBase = GeoIPMetaclass('GeoIPBase', (object,), {})
2013-07-14 12:12:31 +02:00
class GeoIP(GeoIPBase):
def __init__(self, filename, flags=0):
"""
Initialize the class.
2013-07-14 12:12:31 +02:00
@param filename: Path to a geoip database.
@type filename: str
2013-07-14 12:12:31 +02:00
@param flags: Flags that affect how the database is processed.
Currently supported flags are STANDARD (the default),
MEMORY_CACHE (preload the whole file into memory) and
MMAP_CACHE (access the file via mmap).
@type flags: int
"""
self._filename = filename
self._flags = flags
if self._flags & const.MMAP_CACHE:
2013-07-14 12:12:31 +02:00
f = open(filename, 'rb')
access = mmap.ACCESS_READ
self._filehandle = mmap.mmap(f.fileno(), 0, access=access)
f.close()
elif self._flags & const.MEMORY_CACHE:
2013-07-14 12:12:31 +02:00
f = open(filename, 'rb')
self._memoryBuffer = f.read()
iohandle = BytesIO if PY3 else StringIO
self._filehandle = iohandle(self._memoryBuffer)
f.close()
else:
2013-07-14 12:12:31 +02:00
self._filehandle = codecs.open(filename, 'rb', ENCODING)
2013-07-14 12:12:31 +02:00
self._lock = Lock()
self._setup_segments()
def _setup_segments(self):
"""
2013-07-14 12:12:31 +02:00
Parses the database file to determine what kind of database is
being used and setup segment sizes and start points that will
be used by the seek*() methods later.
Supported databases:
* COUNTRY_EDITION
* COUNTRY_EDITION_V6
* REGION_EDITION_REV0
* REGION_EDITION_REV1
* CITY_EDITION_REV0
* CITY_EDITION_REV1
* CITY_EDITION_REV1_V6
* ORG_EDITION
* ISP_EDITION
* ASNUM_EDITION
* ASNUM_EDITION_V6
"""
self._databaseType = const.COUNTRY_EDITION
self._recordLength = const.STANDARD_RECORD_LENGTH
2013-07-14 12:12:31 +02:00
self._databaseSegments = const.COUNTRY_BEGIN
2013-07-14 12:12:31 +02:00
self._lock.acquire()
filepos = self._filehandle.tell()
self._filehandle.seek(-3, os.SEEK_END)
for i in range(const.STRUCTURE_INFO_MAX_SIZE):
2013-07-14 12:12:31 +02:00
chars = chr(255) * 3
delim = self._filehandle.read(3)
2013-07-14 12:12:31 +02:00
if PY3 and type(delim) is bytes:
delim = delim.decode(ENCODING)
2013-07-14 12:12:31 +02:00
if PY2:
chars = chars.decode(ENCODING)
if type(delim) is str:
delim = delim.decode(ENCODING)
if delim == chars:
byte = self._filehandle.read(1)
self._databaseType = ord(byte)
# Compatibility with databases from April 2003 and earlier
if (self._databaseType >= 106):
self._databaseType -= 105
if self._databaseType == const.REGION_EDITION_REV0:
self._databaseSegments = const.STATE_BEGIN_REV0
elif self._databaseType == const.REGION_EDITION_REV1:
self._databaseSegments = const.STATE_BEGIN_REV1
elif self._databaseType in (const.CITY_EDITION_REV0,
const.CITY_EDITION_REV1,
2013-07-14 12:12:31 +02:00
const.CITY_EDITION_REV1_V6,
const.ORG_EDITION,
const.ISP_EDITION,
2013-07-14 12:12:31 +02:00
const.ASNUM_EDITION,
const.ASNUM_EDITION_V6):
self._databaseSegments = 0
buf = self._filehandle.read(const.SEGMENT_RECORD_LENGTH)
2013-07-14 12:12:31 +02:00
if PY3 and type(buf) is bytes:
buf = buf.decode(ENCODING)
for j in range(const.SEGMENT_RECORD_LENGTH):
self._databaseSegments += (ord(buf[j]) << (j * 8))
2013-07-14 12:12:31 +02:00
LONG_RECORDS = (const.ORG_EDITION, const.ISP_EDITION)
if self._databaseType in LONG_RECORDS:
self._recordLength = const.ORG_RECORD_LENGTH
break
else:
self._filehandle.seek(-4, os.SEEK_CUR)
self._filehandle.seek(filepos, os.SEEK_SET)
2013-07-14 12:12:31 +02:00
self._lock.release()
def _seek_country(self, ipnum):
"""
Using the record length and appropriate start points, seek to the
country that corresponds to the converted IP address integer.
@param ipnum: result of ip2long conversion
@type ipnum: int
@return: offset of start of record
@rtype: int
"""
2013-07-14 12:12:31 +02:00
try:
offset = 0
seek_depth = 127 if len(str(ipnum)) > 10 else 31
for depth in range(seek_depth, -1, -1):
if self._flags & const.MEMORY_CACHE:
startIndex = 2 * self._recordLength * offset
endIndex = startIndex + (2 * self._recordLength)
buf = self._memoryBuffer[startIndex:endIndex]
else:
startIndex = 2 * self._recordLength * offset
readLength = 2 * self._recordLength
self._lock.acquire()
self._filehandle.seek(startIndex, os.SEEK_SET)
buf = self._filehandle.read(readLength)
self._lock.release()
if PY3 and type(buf) is bytes:
buf = buf.decode(ENCODING)
x = [0, 0]
for i in range(2):
for j in range(self._recordLength):
byte = buf[self._recordLength * i + j]
x[i] += ord(byte) << (j * 8)
if ipnum & (1 << depth):
if x[1] >= self._databaseSegments:
return x[1]
offset = x[1]
else:
if x[0] >= self._databaseSegments:
return x[0]
offset = x[0]
except:
pass
2013-07-14 12:12:31 +02:00
raise GeoIPError('Corrupt database')
def _get_org(self, ipnum):
"""
2013-07-14 12:12:31 +02:00
Seek and return organization or ISP name for ipnum.
@param ipnum: Converted IP address
@type ipnum: int
@return: org/isp name
@rtype: str
"""
seek_org = self._seek_country(ipnum)
if seek_org == self._databaseSegments:
return None
2013-07-14 12:12:31 +02:00
read_length = (2 * self._recordLength - 1) * self._databaseSegments
self._lock.acquire()
self._filehandle.seek(seek_org + read_length, os.SEEK_SET)
buf = self._filehandle.read(const.MAX_ORG_RECORD_LENGTH)
self._lock.release()
2013-07-14 12:12:31 +02:00
if PY3 and type(buf) is bytes:
buf = buf.decode(ENCODING)
2013-07-14 12:12:31 +02:00
return buf[:buf.index(chr(0))]
def _get_region(self, ipnum):
"""
2013-07-14 12:12:31 +02:00
Seek and return the region info (dict containing country_code
and region_name).
2013-07-14 12:12:31 +02:00
@param ipnum: Converted IP address
@type ipnum: int
@return: dict containing country_code and region_name
@rtype: dict
"""
region = ''
2013-07-14 12:12:31 +02:00
country_code = ''
seek_country = self._seek_country(ipnum)
def get_region_name(offset):
region1 = chr(offset // 26 + 65)
region2 = chr(offset % 26 + 65)
return ''.join([region1, region2])
if self._databaseType == const.REGION_EDITION_REV0:
seek_region = seek_country - const.STATE_BEGIN_REV0
if seek_region >= 1000:
country_code = 'US'
2013-07-14 12:12:31 +02:00
region = get_region_name(seek_region - 1000)
else:
country_code = const.COUNTRY_CODES[seek_region]
elif self._databaseType == const.REGION_EDITION_REV1:
seek_region = seek_country - const.STATE_BEGIN_REV1
if seek_region < const.US_OFFSET:
2013-07-14 12:12:31 +02:00
pass
elif seek_region < const.CANADA_OFFSET:
country_code = 'US'
2013-07-14 12:12:31 +02:00
region = get_region_name(seek_region - const.US_OFFSET)
elif seek_region < const.WORLD_OFFSET:
country_code = 'CA'
2013-07-14 12:12:31 +02:00
region = get_region_name(seek_region - const.CANADA_OFFSET)
else:
2013-07-14 12:12:31 +02:00
index = (seek_region - const.WORLD_OFFSET) // const.FIPS_RANGE
if index in const.COUNTRY_CODES:
country_code = const.COUNTRY_CODES[index]
elif self._databaseType in const.CITY_EDITIONS:
rec = self._get_record(ipnum)
2013-07-14 12:12:31 +02:00
region = rec.get('region_name', '')
country_code = rec.get('country_code', '')
2013-07-14 12:12:31 +02:00
return {'country_code': country_code, 'region_name': region}
def _get_record(self, ipnum):
"""
Populate location dict for converted IP.
2013-07-14 12:12:31 +02:00
@param ipnum: Converted IP address
@type ipnum: int
@return: dict with country_code, country_code3, country_name,
region, city, postal_code, latitude, longitude,
dma_code, metro_code, area_code, region_name, time_zone
@rtype: dict
"""
seek_country = self._seek_country(ipnum)
if seek_country == self._databaseSegments:
2013-07-14 12:12:31 +02:00
return {}
2013-07-14 12:12:31 +02:00
read_length = (2 * self._recordLength - 1) * self._databaseSegments
self._lock.acquire()
self._filehandle.seek(seek_country + read_length, os.SEEK_SET)
buf = self._filehandle.read(const.FULL_RECORD_LENGTH)
self._lock.release()
2013-07-14 12:12:31 +02:00
if PY3 and type(buf) is bytes:
buf = buf.decode(ENCODING)
2013-07-14 12:12:31 +02:00
record = {
'dma_code': 0,
'area_code': 0,
'metro_code': '',
'postal_code': ''
}
2013-07-14 12:12:31 +02:00
latitude = 0
longitude = 0
buf_pos = 0
# Get country
char = ord(buf[buf_pos])
record['country_code'] = const.COUNTRY_CODES[char]
record['country_code3'] = const.COUNTRY_CODES3[char]
record['country_name'] = const.COUNTRY_NAMES[char]
2013-07-14 12:12:31 +02:00
record['continent'] = const.CONTINENT_NAMES[char]
buf_pos += 1
def get_data(buf, buf_pos):
offset = buf_pos
char = ord(buf[offset])
while (char != 0):
offset += 1
char = ord(buf[offset])
if offset > buf_pos:
return (offset, buf[buf_pos:offset])
return (offset, '')
offset, record['region_name'] = get_data(buf, buf_pos)
offset, record['city'] = get_data(buf, offset + 1)
offset, record['postal_code'] = get_data(buf, offset + 1)
buf_pos = offset + 1
for j in range(3):
2013-07-14 12:12:31 +02:00
char = ord(buf[buf_pos])
buf_pos += 1
latitude += (char << (j * 8))
for j in range(3):
2013-07-14 12:12:31 +02:00
char = ord(buf[buf_pos])
buf_pos += 1
longitude += (char << (j * 8))
2013-07-14 12:12:31 +02:00
record['latitude'] = (latitude / 10000.0) - 180.0
record['longitude'] = (longitude / 10000.0) - 180.0
2013-07-14 12:12:31 +02:00
if self._databaseType in (const.CITY_EDITION_REV1, const.CITY_EDITION_REV1_V6):
dmaarea_combo = 0
if record['country_code'] == 'US':
for j in range(3):
2013-07-14 12:12:31 +02:00
char = ord(buf[buf_pos])
dmaarea_combo += (char << (j * 8))
buf_pos += 1
2013-07-14 12:12:31 +02:00
record['dma_code'] = int(math.floor(dmaarea_combo / 1000))
record['area_code'] = dmaarea_combo % 1000
2013-07-14 12:12:31 +02:00
record['metro_code'] = const.DMA_MAP.get(record['dma_code'])
params = (record['country_code'], record['region_name'])
record['time_zone'] = time_zone_by_country_and_region(*params)
2013-07-14 12:12:31 +02:00
return record
def _gethostbyname(self, hostname):
if self._databaseType in const.IPV6_EDITIONS:
try:
response = socket.getaddrinfo(hostname, 0, socket.AF_INET6)
family, socktype, proto, canonname, sockaddr = response[0]
address, port, flow, scope = sockaddr
return address
except socket.gaierror:
return ''
else:
2013-07-14 12:12:31 +02:00
return socket.gethostbyname(hostname)
2013-07-14 12:12:31 +02:00
def id_by_addr(self, addr):
"""
Get the country index.
Looks up the index for the country which is the key for
the code and name.
@param addr: The IP address
@type addr: str
@return: network byte order 32-bit integer
@rtype: int
"""
ipnum = util.ip2long(addr)
if not ipnum:
raise ValueError("Invalid IP address: %s" % addr)
COUNTY_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
if self._databaseType not in COUNTY_EDITIONS:
message = 'Invalid database type, expected Country'
raise GeoIPError(message)
return self._seek_country(ipnum) - const.COUNTRY_BEGIN
def country_code_by_addr(self, addr):
"""
Returns 2-letter country code (e.g. 'US') for specified IP address.
Use this method if you have a Country, Region, or City database.
@param addr: IP address
@type addr: str
@return: 2-letter country code
@rtype: str
"""
try:
2013-07-14 12:12:31 +02:00
VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
if self._databaseType in VALID_EDITIONS:
ipv = 6 if addr.find(':') >= 0 else 4
if ipv == 4 and self._databaseType != const.COUNTRY_EDITION:
message = 'Invalid database type; expected IPv6 address'
raise ValueError(message)
if ipv == 6 and self._databaseType != const.COUNTRY_EDITION_V6:
message = 'Invalid database type; expected IPv4 address'
raise ValueError(message)
country_id = self.id_by_addr(addr)
return const.COUNTRY_CODES[country_id]
2013-07-14 12:12:31 +02:00
elif self._databaseType in const.REGION_CITY_EDITIONS:
return self.region_by_addr(addr).get('country_code')
2013-07-14 12:12:31 +02:00
message = 'Invalid database type, expected Country, City or Region'
raise GeoIPError(message)
except ValueError:
2013-07-14 12:12:31 +02:00
raise GeoIPError('Failed to lookup address %s' % addr)
def country_code_by_name(self, hostname):
"""
Returns 2-letter country code (e.g. 'US') for specified hostname.
Use this method if you have a Country, Region, or City database.
2013-07-14 12:12:31 +02:00
@param hostname: Hostname
@type hostname: str
@return: 2-letter country code
@rtype: str
"""
2013-07-14 12:12:31 +02:00
addr = self._gethostbyname(hostname)
return self.country_code_by_addr(addr)
def country_name_by_addr(self, addr):
"""
Returns full country name for specified IP address.
Use this method if you have a Country or City database.
@param addr: IP address
@type addr: str
@return: country name
@rtype: str
"""
try:
2013-07-14 12:12:31 +02:00
VALID_EDITIONS = (const.COUNTRY_EDITION, const.COUNTRY_EDITION_V6)
if self._databaseType in VALID_EDITIONS:
country_id = self.id_by_addr(addr)
return const.COUNTRY_NAMES[country_id]
2013-07-14 12:12:31 +02:00
elif self._databaseType in const.CITY_EDITIONS:
return self.record_by_addr(addr).get('country_name')
else:
2013-07-14 12:12:31 +02:00
message = 'Invalid database type, expected Country or City'
raise GeoIPError(message)
except ValueError:
2013-07-14 12:12:31 +02:00
raise GeoIPError('Failed to lookup address %s' % addr)
def country_name_by_name(self, hostname):
"""
Returns full country name for specified hostname.
Use this method if you have a Country database.
2013-07-14 12:12:31 +02:00
@param hostname: Hostname
@type hostname: str
@return: country name
@rtype: str
"""
2013-07-14 12:12:31 +02:00
addr = self._gethostbyname(hostname)
return self.country_name_by_addr(addr)
def org_by_addr(self, addr):
"""
2013-07-14 12:12:31 +02:00
Lookup Organization, ISP or ASNum for given IP address.
Use this method if you have an Organization, ISP or ASNum database.
@param addr: IP address
@type addr: str
@return: organization or ISP name
@rtype: str
"""
try:
2013-07-14 12:12:31 +02:00
ipnum = util.ip2long(addr)
if not ipnum:
2013-07-14 12:12:31 +02:00
raise ValueError('Invalid IP address')
2013-07-14 12:12:31 +02:00
valid = (const.ORG_EDITION, const.ISP_EDITION, const.ASNUM_EDITION, const.ASNUM_EDITION_V6)
if self._databaseType not in valid:
message = 'Invalid database type, expected Org, ISP or ASNum'
raise GeoIPError(message)
return self._get_org(ipnum)
except ValueError:
2013-07-14 12:12:31 +02:00
raise GeoIPError('Failed to lookup address %s' % addr)
def org_by_name(self, hostname):
"""
Lookup the organization (or ISP) for hostname.
Use this method if you have an Organization/ISP database.
2013-07-14 12:12:31 +02:00
@param hostname: Hostname
@type hostname: str
2013-07-14 12:12:31 +02:00
@return: Organization or ISP name
@rtype: str
"""
2013-07-14 12:12:31 +02:00
addr = self._gethostbyname(hostname)
return self.org_by_addr(addr)
def record_by_addr(self, addr):
"""
Look up the record for a given IP address.
Use this method if you have a City database.
@param addr: IP address
@type addr: str
2013-07-14 12:12:31 +02:00
@return: Dictionary with country_code, country_code3, country_name,
region, city, postal_code, latitude, longitude, dma_code,
metro_code, area_code, region_name, time_zone
@rtype: dict
"""
try:
2013-07-14 12:12:31 +02:00
ipnum = util.ip2long(addr)
if not ipnum:
2013-07-14 12:12:31 +02:00
raise ValueError('Invalid IP address')
2013-07-14 12:12:31 +02:00
if self._databaseType not in const.CITY_EDITIONS:
message = 'Invalid database type, expected City'
raise GeoIPError(message)
2013-07-14 12:12:31 +02:00
rec = self._get_record(ipnum)
if not rec:
return None
return rec
except ValueError:
2013-07-14 12:12:31 +02:00
raise GeoIPError('Failed to lookup address %s' % addr)
def record_by_name(self, hostname):
"""
Look up the record for a given hostname.
Use this method if you have a City database.
2013-07-14 12:12:31 +02:00
@param hostname: Hostname
@type hostname: str
2013-07-14 12:12:31 +02:00
@return: Dictionary with country_code, country_code3, country_name,
region, city, postal_code, latitude, longitude, dma_code,
metro_code, area_code, region_name, time_zone
@rtype: dict
"""
2013-07-14 12:12:31 +02:00
addr = self._gethostbyname(hostname)
return self.record_by_addr(addr)
def region_by_addr(self, addr):
"""
Lookup the region for given IP address.
Use this method if you have a Region database.
@param addr: IP address
@type addr: str
2013-07-14 12:12:31 +02:00
@return: Dictionary containing country_code, region and region_name
@rtype: dict
"""
try:
2013-07-14 12:12:31 +02:00
ipnum = util.ip2long(addr)
if not ipnum:
2013-07-14 12:12:31 +02:00
raise ValueError('Invalid IP address')
2013-07-14 12:12:31 +02:00
if self._databaseType not in const.REGION_CITY_EDITIONS:
message = 'Invalid database type, expected Region or City'
raise GeoIPError(message)
return self._get_region(ipnum)
except ValueError:
2013-07-14 12:12:31 +02:00
raise GeoIPError('Failed to lookup address %s' % addr)
def region_by_name(self, hostname):
"""
Lookup the region for given hostname.
Use this method if you have a Region database.
2013-07-14 12:12:31 +02:00
@param hostname: Hostname
@type hostname: str
2013-07-14 12:12:31 +02:00
@return: Dictionary containing country_code, region, and region_name
@rtype: dict
"""
2013-07-14 12:12:31 +02:00
addr = self._gethostbyname(hostname)
return self.region_by_addr(addr)
def time_zone_by_addr(self, addr):
"""
Look up the time zone for a given IP address.
Use this method if you have a Region or City database.
2013-07-14 12:12:31 +02:00
@param addr: IP address
@type addr: str
@return: Time zone
@rtype: str
"""
try:
2013-07-14 12:12:31 +02:00
ipnum = util.ip2long(addr)
if not ipnum:
2013-07-14 12:12:31 +02:00
raise ValueError('Invalid IP address')
2013-07-14 12:12:31 +02:00
if self._databaseType not in const.CITY_EDITIONS:
message = 'Invalid database type, expected City'
raise GeoIPError(message)
2013-07-14 12:12:31 +02:00
return self._get_record(ipnum).get('time_zone')
except ValueError:
2013-07-14 12:12:31 +02:00
raise GeoIPError('Failed to lookup address %s' % addr)
def time_zone_by_name(self, hostname):
"""
Look up the time zone for a given hostname.
Use this method if you have a Region or City database.
2013-07-14 12:12:31 +02:00
@param hostname: Hostname
@type hostname: str
@return: Time zone
@rtype: str
"""
2013-07-14 12:12:31 +02:00
addr = self._gethostbyname(hostname)
return self.time_zone_by_addr(addr)