nuodb · alampe3ds · Jun 17, 2025 · Jun 17, 2025 · Jun 23, 2025 · madscientist
diff --git a/pynuodb/datatype.py b/pynuodb/datatype.py
@@ -37,6 +37,8 @@
 from datetime import datetime as Timestamp, date as Date, time as Time
 from datetime import timedelta as TimeDelta
 
+from pynuodb import protocol
+
 try:
     from typing import Tuple, Union  # pylint: disable=unused-import
 except ImportError:
@@ -149,10 +151,37 @@ def __cmp__(self, other):
         return -1
 
 
+class Vector(list):
+    """A specific type for SQL VECTOR(<dim>, DOUBLE)
+       to be able to detect the desired type when binding parameters.
+       Apart from creating the value as a Vector with subtype
+       this can be used as a list."""
+    DOUBLE = protocol.VECTOR_DOUBLE
+
+    def __init__(self, subtype, *args, **kwargs):
+        if args:
+            if subtype != Vector.DOUBLE:
+                raise TypeError("Vector type only supported for subtype DOUBLE")
+
+            self.subtype = subtype
+
+            # forward the remaining arguments to the list __init__
+            super(Vector, self).__init__(*args, **kwargs)
+        else:
+            raise TypeError("Vector needs to be initialized with a subtype like Vector.DOUBLE as"
+                            " first argument")
+
+    def getSubtype(self):
+        # type: () -> int
+        """Returns the subtype of vector this instance holds data for"""
+        return self.subtype
+
+
 STRING = TypeObject(str)
 BINARY = TypeObject(str)
 NUMBER = TypeObject(int, decimal.Decimal)
 DATETIME = TypeObject(Timestamp, Date, Time)
+VECTOR_DOUBLE = TypeObject(list)
 ROWID = TypeObject()
 NULL = TypeObject(None)
 
@@ -179,6 +208,7 @@ def __cmp__(self, other):
            "timestamp without time zone": DATETIME,
            "timestamp with time zone": DATETIME,
            "time without time zone": DATETIME,
+           "vector double": VECTOR_DOUBLE,
            # Old types used by NuoDB <2.0.3
            "binarystring": BINARY,
            "binaryvaryingstring": BINARY,

diff --git a/pynuodb/encodedsession.py b/pynuodb/encodedsession.py
@@ -775,6 +775,39 @@ def putScaledCount2(self, value):
         self.__output += data
         return self
 
+    def putVectorDouble(self, value):
+        # type: (datatype.Vector) -> EncodedSession
+        """Append a Vector with subtype Vector.DOUBLE to the message.
+
+        :type value: datatype.Vector
+        """
+        self.__output.append(protocol.VECTOR)
+        # subtype
+        self.__output.append(protocol.VECTOR_DOUBLE)
+        # length in bytes in count notation, i.e. first
+        # number of bytes needed for the length, then the
+        # encoded length
+        lengthStr = crypt.toByteString(len(value) * 8)
+        self.__output.append(len(lengthStr))
+        self.__output += lengthStr
+
+        # the actual vector: Each value as double in little endian encoding
+        for val in value:
+            self.__output += struct.pack('<d', float(val))
+
+        return self
+
+    def putVector(self, value):
+        # type: (datatype.Vector) -> EncodedSession
+        """Append a Vector type to the message.
+
+        :type value: datatype.Vector
+        """
+        if value.getSubtype() == datatype.Vector.DOUBLE:
+            return self.putVectorDouble(value)
+
+        raise DataError("unsupported value for VECTOR subtype: %d" % (value.getSubtype()))
+
     def putValue(self, value):  # pylint: disable=too-many-return-statements
         # type: (Any) -> EncodedSession
         """Call the supporting function based on the type of the value."""
@@ -806,6 +839,11 @@ def putValue(self, value):  # pylint: disable=too-many-return-statements
         if isinstance(value, bool):
             return self.putBoolean(value)
 
+        # we don't want to autodetect lists as being VECTOR, so we
+        # only bind double if it is the explicit type
+        if isinstance(value, datatype.Vector):
+            return self.putVector(value)
+
         # I find it pretty bogus that we pass str(value) here: why not value?
         return self.putString(str(value))
 
@@ -1035,6 +1073,36 @@ def getUUID(self):
 
         raise DataError('Not a UUID')
 
+    def getVector(self):
+        # type: () -> datatype.Vector
+        """Read the next vector off the session.
+
+        :rtype datatype.Vector
+        """
+        if self._getTypeCode() == protocol.VECTOR:
+            subtype = crypt.fromByteString(self._takeBytes(1))
+            if subtype == protocol.VECTOR_DOUBLE:
+                # VECTOR(<dim>, DOUBLE)
+                lengthBytes = crypt.fromByteString(self._takeBytes(1))
+                length = crypt.fromByteString(self._takeBytes(lengthBytes))
+
+                if length % 8 != 0:
+                    raise DataError("Invalid size for VECTOR DOUBLE data: %d" % (length))
+
+                dimension = length // 8
+
+                # VECTOR DOUBLE stores the data as little endian
+                vector = datatype.Vector(datatype.Vector.DOUBLE,
+                                         [struct.unpack('<d', self._takeBytes(8))[0]
+                                          for _ in range(dimension)])
+
+                return vector
+            else:
+                raise DataError("Unknown VECTOR type: %d" % (subtype))
+            return 1
+
+        raise DataError('Not a VECTOR')
+
     def getScaledCount2(self):
         # type: () -> decimal.Decimal
         """Read a scaled and signed decimal from the session.
@@ -1110,6 +1178,9 @@ def getValue(self):
         if code == protocol.UUID:
             return self.getUUID()
 
+        if code == protocol.VECTOR:
+            return self.getVector()
+
         if code == protocol.SCALEDCOUNT2:
             return self.getScaledCount2()
 

diff --git a/pynuodb/protocol.py b/pynuodb/protocol.py
@@ -45,7 +45,7 @@
 BLOBLEN4                          = 193
 CLOBLEN0                          = 194
 CLOBLEN4                          = 198
-SCALEDCOUNT1                      = 199
+VECTOR                            = 199
 UUID                              = 200
 SCALEDDATELEN0                    = 200
 SCALEDDATELEN1                    = 201
@@ -66,6 +66,9 @@
 DEBUGBARRIER                      = 240
 SCALEDTIMESTAMPNOTZ               = 241
 
+# subtypes of the VECTOR type
+VECTOR_DOUBLE                     = 0
+
 # Protocol Messages
 FAILURE                           = 0
 OPENDATABASE                      = 3

diff --git a/tests/nuodb_types_test.py b/tests/nuodb_types_test.py
@@ -8,6 +8,8 @@
 import decimal
 import datetime
 
+from pynuodb import datatype
+
 from . import nuodb_base
 
 
@@ -125,3 +127,87 @@ def test_null_type(self):
         assert len(row) == 1
         assert cursor.description[0][1] == null_type
         assert row[0] is None
+
+    def test_vector_type(self):
+        con = self._connect()
+        cursor = con.cursor()
+
+        # only activate this tests if tested against version 8 or above
+        cursor.execute("select cast(substring_index(release_ver, '.', 1) as int)"
+                       " from system.nodes limit 1")
+        row = cursor.fetchone()
+        database_major_version = row[0]
+        if database_major_version < 8:
+            return
+
+        cursor.execute("CREATE TEMPORARY TABLE tmp ("
+                       " vec3 VECTOR(3, DOUBLE),"
+                       " vec5 VECTOR(5, DOUBLE))")
+
+        cursor.execute("INSERT INTO tmp VALUES ("
+                       " '[1.1,2.2,33.33]',"
+                       " '[-1,2,-3,4,-5]')")
+
+        cursor.execute("SELECT * FROM tmp")
+
+        # check metadata
+        [name, type, _, _, precision, scale, _] = cursor.description[0]
+        assert name == "VEC3"
+        assert type == datatype.VECTOR_DOUBLE
+        assert precision == 3
+        assert scale == 0
+
+        [name, type, _, _, precision, scale, _] = cursor.description[1]
+        assert name == "VEC5"
+        assert type == datatype.VECTOR_DOUBLE
+        assert precision == 5
+        assert scale == 0
+
+        # check content
+        row = cursor.fetchone()
+        assert len(row) == 2
+        assert row[0] == [1.1, 2.2, 33.33]
+        assert row[1] == [-1, 2, -3, 4, -5]
+        assert cursor.fetchone() is None
+
+        # check this is actually a Vector type, not just a list
+        assert isinstance(row[0], datatype.Vector)
+        assert row[0].getSubtype() == datatype.Vector.DOUBLE
+        assert isinstance(row[1], datatype.Vector)
+        assert row[1].getSubtype() == datatype.Vector.DOUBLE
+
+        # check prepared parameters
+        parameters = [datatype.Vector(datatype.Vector.DOUBLE, [11.11, -2.2, 3333.333]),
+                      datatype.Vector(datatype.Vector.DOUBLE, [-1.23, 2.345, -0.34, 4, -5678.9])]
+        cursor.execute("TRUNCATE TABLE tmp")
+        cursor.execute("INSERT INTO tmp VALUES (?, ?)", parameters)
+
+        cursor.execute("SELECT * FROM tmp")
+
+        # check content
+        row = cursor.fetchone()
+        assert len(row) == 2
+        assert row[0] == parameters[0]
+        assert row[1] == parameters[1]
+        assert cursor.fetchone() is None
+
+        # check that the inserted values are interpreted correctly by the database
+        cursor.execute("SELECT CAST(vec3 AS STRING) || ' - ' || CAST(vec5 AS STRING) AS strRep"
+                       " FROM tmp")
+
+        row = cursor.fetchone()
+        assert len(row) == 1
+        assert row[0] == "[11.11,-2.2,3333.333] - [-1.23,2.345,-0.34,4,-5678.9]"
+        assert cursor.fetchone() is None
+
+        # currently binding a list also works - this is done via implicit string
+        # conversion of the passed argument in default bind case
+        parameters = [[11.11, -2.2, 3333.333]]
+        cursor.execute("SELECT VEC3 = ? FROM tmp", parameters)
+
+        # check content
+        row = cursor.fetchone()
+        assert len(row) == 1
+        assert row[0] is True
+        assert cursor.fetchone() is None
+