-
Notifications
You must be signed in to change notification settings - Fork 19
[DB-40533] Add support for new type VECTOR(<dim>,DOUBLE) #182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -775,6 +775,39 @@ def putScaledCount2(self, value): | |
self.__output += data | ||
return self | ||
|
||
def putVectorDouble(self, value): | ||
# type: (datatype.Vector) -> EncodedSession | ||
"""Append a Vector with subtype Vector.DOUBLE to the message. | ||
|
||
:type value: datatype.Vector | ||
""" | ||
self.__output.append(protocol.VECTOR) | ||
# subtype | ||
self.__output.append(protocol.VECTOR_DOUBLE) | ||
# length in bytes in count notation, i.e. first | ||
# number of bytes needed for the length, then the | ||
# encoded length | ||
lengthStr = crypt.toByteString(len(value) * 8) | ||
self.__output.append(len(lengthStr)) | ||
self.__output += lengthStr | ||
|
||
# the actual vector: Each value as double in little endian encoding | ||
for val in value: | ||
self.__output += struct.pack('<d', float(val)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this work if the value is NULL? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A NULL should be set via But assuming someone would put some kind of NULL-value in here: What NULL representation would you expect there? I'll try to outline a few:
|
||
|
||
return self | ||
|
||
def putVector(self, value): | ||
# type: (datatype.Vector) -> EncodedSession | ||
"""Append a Vector type to the message. | ||
|
||
:type value: datatype.Vector | ||
""" | ||
if value.getSubtype() == datatype.Vector.DOUBLE: | ||
return self.putVectorDouble(value) | ||
|
||
raise DataError("unsupported value for VECTOR subtype: %d" % (value.getSubtype())) | ||
|
||
def putValue(self, value): # pylint: disable=too-many-return-statements | ||
# type: (Any) -> EncodedSession | ||
"""Call the supporting function based on the type of the value.""" | ||
|
@@ -806,6 +839,11 @@ def putValue(self, value): # pylint: disable=too-many-return-statements | |
if isinstance(value, bool): | ||
return self.putBoolean(value) | ||
|
||
# we don't want to autodetect lists as being VECTOR, so we | ||
# only bind double if it is the explicit type | ||
if isinstance(value, datatype.Vector): | ||
return self.putVector(value) | ||
|
||
# I find it pretty bogus that we pass str(value) here: why not value? | ||
return self.putString(str(value)) | ||
|
||
|
@@ -1035,6 +1073,36 @@ def getUUID(self): | |
|
||
raise DataError('Not a UUID') | ||
|
||
def getVector(self): | ||
# type: () -> datatype.Vector | ||
"""Read the next vector off the session. | ||
|
||
:rtype datatype.Vector | ||
""" | ||
if self._getTypeCode() == protocol.VECTOR: | ||
subtype = crypt.fromByteString(self._takeBytes(1)) | ||
if subtype == protocol.VECTOR_DOUBLE: | ||
# VECTOR(<dim>, DOUBLE) | ||
lengthBytes = crypt.fromByteString(self._takeBytes(1)) | ||
length = crypt.fromByteString(self._takeBytes(lengthBytes)) | ||
|
||
if length % 8 != 0: | ||
raise DataError("Invalid size for VECTOR DOUBLE data: %d" % (length)) | ||
|
||
dimension = length // 8 | ||
|
||
# VECTOR DOUBLE stores the data as little endian | ||
vector = datatype.Vector(datatype.Vector.DOUBLE, | ||
[struct.unpack('<d', self._takeBytes(8))[0] | ||
for _ in range(dimension)]) | ||
|
||
return vector | ||
else: | ||
raise DataError("Unknown VECTOR type: %d" % (subtype)) | ||
return 1 | ||
|
||
raise DataError('Not a VECTOR') | ||
|
||
def getScaledCount2(self): | ||
# type: () -> decimal.Decimal | ||
"""Read a scaled and signed decimal from the session. | ||
|
@@ -1110,6 +1178,9 @@ def getValue(self): | |
if code == protocol.UUID: | ||
return self.getUUID() | ||
|
||
if code == protocol.VECTOR: | ||
return self.getVector() | ||
|
||
if code == protocol.SCALEDCOUNT2: | ||
return self.getScaledCount2() | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,8 @@ | |
import decimal | ||
import datetime | ||
|
||
from pynuodb import datatype | ||
|
||
from . import nuodb_base | ||
|
||
|
||
|
@@ -125,3 +127,87 @@ def test_null_type(self): | |
assert len(row) == 1 | ||
assert cursor.description[0][1] == null_type | ||
assert row[0] is None | ||
|
||
def test_vector_type(self): | ||
con = self._connect() | ||
cursor = con.cursor() | ||
|
||
# only activate this tests if tested against version 8 or above | ||
cursor.execute("select cast(substring_index(release_ver, '.', 1) as int)" | ||
" from system.nodes limit 1") | ||
Comment on lines
+136
to
+137
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't it be safer and easier to use geteffectiveversion() and compare based on that integer value, rather than parsing the version string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just didn't know we have that function. Much better idea to use that. |
||
row = cursor.fetchone() | ||
database_major_version = row[0] | ||
if database_major_version < 8: | ||
return | ||
Comment on lines
+135
to
+141
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have a problem with this, per se. I think it would be cleaner to obtain this information once somewhere in conftest.py and make it available there, for example maybe in the DATABASE_FIXTURE since that already creates a connection. The current DATABASE_FIXTURE is the connection arguments but you could add this there and then extract it again in the nuodb_base.NuoBase._setup(). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll look into that. If it isn't extremely hard (which I doubt) I'll do that. |
||
|
||
cursor.execute("CREATE TEMPORARY TABLE tmp (" | ||
" vec3 VECTOR(3, DOUBLE)," | ||
" vec5 VECTOR(5, DOUBLE))") | ||
|
||
cursor.execute("INSERT INTO tmp VALUES (" | ||
" '[1.1,2.2,33.33]'," | ||
" '[-1,2,-3,4,-5]')") | ||
|
||
cursor.execute("SELECT * FROM tmp") | ||
|
||
# check metadata | ||
[name, type, _, _, precision, scale, _] = cursor.description[0] | ||
assert name == "VEC3" | ||
assert type == datatype.VECTOR_DOUBLE | ||
assert precision == 3 | ||
assert scale == 0 | ||
|
||
[name, type, _, _, precision, scale, _] = cursor.description[1] | ||
assert name == "VEC5" | ||
assert type == datatype.VECTOR_DOUBLE | ||
assert precision == 5 | ||
assert scale == 0 | ||
|
||
# check content | ||
row = cursor.fetchone() | ||
assert len(row) == 2 | ||
assert row[0] == [1.1, 2.2, 33.33] | ||
assert row[1] == [-1, 2, -3, 4, -5] | ||
assert cursor.fetchone() is None | ||
|
||
# check this is actually a Vector type, not just a list | ||
assert isinstance(row[0], datatype.Vector) | ||
assert row[0].getSubtype() == datatype.Vector.DOUBLE | ||
assert isinstance(row[1], datatype.Vector) | ||
assert row[1].getSubtype() == datatype.Vector.DOUBLE | ||
|
||
# check prepared parameters | ||
parameters = [datatype.Vector(datatype.Vector.DOUBLE, [11.11, -2.2, 3333.333]), | ||
datatype.Vector(datatype.Vector.DOUBLE, [-1.23, 2.345, -0.34, 4, -5678.9])] | ||
cursor.execute("TRUNCATE TABLE tmp") | ||
cursor.execute("INSERT INTO tmp VALUES (?, ?)", parameters) | ||
|
||
cursor.execute("SELECT * FROM tmp") | ||
|
||
# check content | ||
row = cursor.fetchone() | ||
assert len(row) == 2 | ||
assert row[0] == parameters[0] | ||
assert row[1] == parameters[1] | ||
assert cursor.fetchone() is None | ||
|
||
# check that the inserted values are interpreted correctly by the database | ||
cursor.execute("SELECT CAST(vec3 AS STRING) || ' - ' || CAST(vec5 AS STRING) AS strRep" | ||
" FROM tmp") | ||
|
||
row = cursor.fetchone() | ||
assert len(row) == 1 | ||
assert row[0] == "[11.11,-2.2,3333.333] - [-1.23,2.345,-0.34,4,-5678.9]" | ||
assert cursor.fetchone() is None | ||
|
||
# currently binding a list also works - this is done via implicit string | ||
# conversion of the passed argument in default bind case | ||
parameters = [[11.11, -2.2, 3333.333]] | ||
cursor.execute("SELECT VEC3 = ? FROM tmp", parameters) | ||
|
||
# check content | ||
row = cursor.fetchone() | ||
assert len(row) == 1 | ||
assert row[0] is True | ||
assert cursor.fetchone() is None | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is not quite right, is it? A Vector element doesn't have to be a DOUBLE (I mean, it does now but it might not in the future)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right. It is supposed to be usable with other element types as well. Will fix that.