Skip to content
71 changes: 70 additions & 1 deletion extension/maxminddb.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,64 @@ static int Reader_init(PyObject *self, PyObject *args, PyObject *kwds)
static PyObject *Reader_get(PyObject *self, PyObject *args)
{
char *ip_address = NULL;
PyObject *object = NULL;
#if PY_MAJOR_VERSION >= 3
PyObject *bytes = NULL;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to declare the variables at the top of the function.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where do you propose I declare it? I don't think I can declare it inside the "if" condition since that will cause "bytes" to be scoped to that block which will mean it can't be used in the code further down.

Copy link
Author

@ericpruitt ericpruitt Aug 8, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also can't declare object inside the "if" block because I need to use object in the condition expression.

#endif

Reader_obj *mmdb_obj = (Reader_obj *)self;
if (!PyArg_ParseTuple(args, "s", &ip_address)) {

if (PyArg_ParseTuple(args, "s", &ip_address)) {
// pass
} else if (PyErr_Clear(), PyArg_ParseTuple(args, "O", &object)) {
PyObject* module = PyImport_ImportModule("ipaddress");
if (module == NULL) {
return NULL;
}

PyObject* module_dict = PyModule_GetDict(module);
if (module_dict == NULL) {
return NULL;
}

PyObject* ipaddress_IPv4Address = PyDict_GetItemString(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we be using PyObject_IsInstance or something here?

Copy link
Author

@ericpruitt ericpruitt Aug 8, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm calling PyObject_IsInstance below. PyObject_IsInstance requires a PyObject as its second argument, so I don't understand how I'm supposed to use PyObject_IsInstance without getting the IPv4Address class first.

module_dict, "IPv4Address");
int is_ipaddress_object = 0;

if (ipaddress_IPv4Address != NULL) {
is_ipaddress_object = PyObject_IsInstance(ipaddress_IPv4Address,
object);
}

PyObject* ipaddress_IPv6Address;
if (!is_ipaddress_object &&
(ipaddress_IPv6Address = PyDict_GetItemString(module_dict,
"IPv6Address")) != NULL) {
is_ipaddress_object = PyObject_IsInstance(ipaddress_IPv6Address,
object);
}

PyErr_Clear();
PyObject *str;

if (!is_ipaddress_object) {
PyErr_SetString(PyExc_TypeError, "IP address must be a string,"
" ipaddress.IPv4Address or ipaddress.IPv6Address object");
} else if ((str = PyObject_Str(object)) != NULL) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a little bit weird that we are doing all of these gymnastics and creating these heavy Python string and byte objects. I wonder if we could just get the packed address. Ideally, we wouldn't even bother converting that back to a *char, but it probably doesn't hurt too much to do that conversion.

Copy link
Author

@ericpruitt ericpruitt Aug 8, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only simpler method I could think of would be casting the argument to a str, but then you lose out on type safety.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we could just get the packed address

That is exposed:

~$ python
Python 3.5.3 (default, Sep 27 2018, 17:25:39)
[GCC 6.3.0 20170516] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import ipaddress
>>> i = ipaddress.ip_address("127.0.0.1")
>>> i.packed
b'\x7f\x00\x00\x01'

#if PY_MAJOR_VERSION >= 3
bytes = PyUnicode_AsEncodedString(str, "UTF-8", "strict");
ip_address = PyBytes_AS_STRING(bytes);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this code is kept, I think you can just decrement the reference count on bytes here and get rid of the ones below. There are also leaks from early returns as it is done now.

Copy link
Author

@ericpruitt ericpruitt Aug 8, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does Python guarantee that garbage collection will not immediately take place when the reference counter drops to 0? The reason I'm i'm decrementing the reference count in more than one place is because the ip_address pointer points into bytes, and if decrementing the reference counter frees that memory, it introduces a use-after-free bug.

#else
ip_address = PyString_AsString(str);
#endif
}

Py_DECREF(module);
} else {
return NULL;
}

if (ip_address == NULL) {
return NULL;
}

Expand All @@ -131,6 +186,10 @@ static PyObject *Reader_get(PyObject *self, PyObject *args)
PyErr_Format(PyExc_ValueError,
"'%s' does not appear to be an IPv4 or IPv6 address.",
ip_address);

#if PY_MAJOR_VERSION >= 3
Py_XDECREF(bytes);
#endif
return NULL;
}

Expand All @@ -143,6 +202,9 @@ static PyObject *Reader_get(PyObject *self, PyObject *args)
}
PyErr_Format(exception, "Error looking up %s. %s",
ip_address, MMDB_strerror(mmdb_error));
#if PY_MAJOR_VERSION >= 3
Py_XDECREF(bytes);
#endif
return NULL;
}

Expand All @@ -157,9 +219,16 @@ static PyObject *Reader_get(PyObject *self, PyObject *args)
"Error while looking up data for %s. %s",
ip_address, MMDB_strerror(status));
MMDB_free_entry_data_list(entry_data_list);
#if PY_MAJOR_VERSION >= 3
Py_XDECREF(bytes);
#endif
return NULL;
}

#if PY_MAJOR_VERSION >= 3
Py_XDECREF(bytes);
#endif

MMDB_entry_data_list_s *original_entry_data_list = entry_data_list;
PyObject *py_obj = from_entry_data_list(&entry_data_list);
MMDB_free_entry_data_list(original_entry_data_list);
Expand Down
10 changes: 7 additions & 3 deletions maxminddb/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# pylint: disable=invalid-name
mmap = None

import ipaddress
import struct

from maxminddb.compat import (byte_from_int, compat_ip_address, string_type,
Expand Down Expand Up @@ -105,11 +106,14 @@ def get(self, ip_address):
Arguments:
ip_address -- an IP address in the standard string notation
"""
if not isinstance(ip_address, string_type):
if isinstance(ip_address,
(ipaddress.IPv4Address, ipaddress.IPv6Address)):
address = ip_address
elif not isinstance(ip_address, string_type):
raise TypeError('argument 1 must be %s, not %s' %
(string_type_name, type(ip_address).__name__))

address = compat_ip_address(ip_address)
else:
address = compat_ip_address(ip_address)

if address.version == 6 and self._metadata.ip_version == 4:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion tests/data
Submodule data updated 59 files
+1 −0 .gitattributes
+2 −0 .gitconfig
+1 −0 .gitignore
+38 −26 MaxMind-DB-spec.md
+ MaxMind-DB-test-metadata-pointers.mmdb
+7 −0 bad-data/README.md
+ bad-data/libmaxminddb/libmaxminddb-offset-integer-overflow.mmdb
+ bad-data/maxminddb-golang/cyclic-data-structure.mmdb
+1 −0 bad-data/maxminddb-golang/invalid-bytes-length.mmdb
+ bad-data/maxminddb-golang/invalid-data-record-offset.mmdb
+ bad-data/maxminddb-golang/invalid-map-key-length.mmdb
+1 −0 bad-data/maxminddb-golang/invalid-string-length.mmdb
+1 −0 bad-data/maxminddb-golang/metadata-is-an-uint128.mmdb
+ bad-data/maxminddb-golang/unexpected-bytes.mmdb
+12 −0 perltidyrc
+14 −5 source-data/GeoIP2-Anonymous-IP-Test.json
+476 −0 source-data/GeoIP2-City-Test.json
+372 −0 source-data/GeoIP2-Country-Test.json
+14 −0 source-data/GeoIP2-DensityIncome-Test.json
+673 −0 source-data/GeoIP2-Enterprise-Test.json
+9 −1 source-data/GeoIP2-ISP-Test.json
+0 −12,454 source-data/GeoIP2-Precision-City-Test.json
+1,689 −0 source-data/GeoIP2-Precision-Enterprise-Test.json
+2,824 −0 source-data/GeoIP2-User-Count-Test.json
+37 −0 source-data/GeoLite2-ASN-Test.json
+10 −3 source-data/README
+ test-data/GeoIP2-Anonymous-IP-Test.mmdb
+ test-data/GeoIP2-City-Test-Broken-Double-Format.mmdb
+ test-data/GeoIP2-City-Test-Invalid-Node-Count.mmdb
+ test-data/GeoIP2-City-Test.mmdb
+ test-data/GeoIP2-Connection-Type-Test.mmdb
+ test-data/GeoIP2-Country-Test.mmdb
+ test-data/GeoIP2-DensityIncome-Test.mmdb
+ test-data/GeoIP2-Domain-Test.mmdb
+ test-data/GeoIP2-Enterprise-Test.mmdb
+ test-data/GeoIP2-ISP-Test.mmdb
+ test-data/GeoIP2-Precision-City-Test.mmdb
+ test-data/GeoIP2-Precision-Enterprise-Test.mmdb
+ test-data/GeoIP2-User-Count-Test.mmdb
+ test-data/GeoLite2-ASN-Test.mmdb
+ test-data/MaxMind-DB-no-ipv4-search-tree.mmdb
+ test-data/MaxMind-DB-string-value-entries.mmdb
+ test-data/MaxMind-DB-test-broken-pointers-24.mmdb
+ test-data/MaxMind-DB-test-broken-search-tree-24.mmdb
+ test-data/MaxMind-DB-test-decoder.mmdb
+ test-data/MaxMind-DB-test-ipv4-24.mmdb
+ test-data/MaxMind-DB-test-ipv4-28.mmdb
+ test-data/MaxMind-DB-test-ipv4-32.mmdb
+ test-data/MaxMind-DB-test-ipv6-24.mmdb
+ test-data/MaxMind-DB-test-ipv6-28.mmdb
+ test-data/MaxMind-DB-test-ipv6-32.mmdb
+ test-data/MaxMind-DB-test-metadata-pointers.mmdb
+ test-data/MaxMind-DB-test-mixed-24.mmdb
+ test-data/MaxMind-DB-test-mixed-28.mmdb
+ test-data/MaxMind-DB-test-mixed-32.mmdb
+ test-data/MaxMind-DB-test-nested.mmdb
+17 −0 test-data/README.md
+180 −70 test-data/write-test-data.pl
+5 −0 tidyall.ini
18 changes: 14 additions & 4 deletions tests/reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import unicode_literals

import ipaddress
import logging
import mock
import os
Expand Down Expand Up @@ -125,9 +126,7 @@ def test_no_extension_exception(self):
def test_ip_object_lookup(self):
reader = open_database('tests/data/test-data/GeoIP2-City-Test.mmdb',
self.mode)
with self.assertRaisesRegex(TypeError,
"must be str(?:ing)?, not IPv6Address"):
reader.get(compat_ip_address('2001:220::'))
reader.get(compat_ip_address('2001:220::'))
reader.close()

def test_broken_database(self):
Expand Down Expand Up @@ -184,7 +183,7 @@ def test_no_constructor_args(self):
def test_too_many_get_args(self):
reader = open_database(
'tests/data/test-data/MaxMind-DB-test-decoder.mmdb', self.mode)
self.assertRaises(TypeError, reader.get, ('1.1.1.1', 'blah'))
self.assertRaises(TypeError, reader.get, '1.1.1.1', 'blah')
reader.close()

def test_no_get_args(self):
Expand Down Expand Up @@ -362,6 +361,11 @@ def _check_ip_v4(self, reader, file_name):
'found expected data record for ' + key_address + ' in ' +
file_name)

self.assertEqual(
data, reader.get(ipaddress.ip_address(key_address)),
'found expected data record for ' + key_address + ' in ' +
file_name)

for ip in ['1.1.1.33', '255.254.253.123']:
self.assertIsNone(reader.get(ip))

Expand Down Expand Up @@ -391,8 +395,14 @@ def _check_ip_v6(self, reader, file_name):
'found expected data record for ' + key_address +
' in ' + file_name)

self.assertEqual({'ip': value_address},
reader.get(ipaddress.ip_address(key_address)),
'found expected data record for ' + key_address +
' in ' + file_name)

for ip in ['1.1.1.33', '255.254.253.123', '89fa::']:
self.assertIsNone(reader.get(ip))
self.assertIsNone(reader.get(ipaddress.ip_address(ip)))


def has_maxminddb_extension():
Expand Down