MonetDBSolutions · gijzelaerr · Jan 14, 2022 · Dec 21, 2021 · Dec 22, 2021 · Dec 22, 2021
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
@@ -35,7 +35,7 @@ jobs:
     runs-on: ubuntu-20.04
     strategy:
       matrix:
-        python-version: ["3.6", "3.7"," 3.8", "3.9", "3.10"]
+        python-version: ["3.7"," 3.8", "3.9", "3.10"]
         branch: [default, Jul2021, Jan2022]
     container: monetdb/dev-builds:${{ matrix.branch }}_manylinux
 

diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml
@@ -10,6 +10,9 @@ on:
   workflow_dispatch:
 
 jobs:
+  #
+  ## This uses a homebrew bottle of the latest stable monetdb, which is jul2021 branch for now.
+  #
   osx-monetdb-brew:
     runs-on: macos-10.15
     steps:
@@ -20,11 +23,15 @@ jobs:
           name: osx-monetdb-brew
           path: /usr/local/Cellar/monetdb
 
+#
+## This builds monetdb from source, so we can build for multiple branches.
+## Disabled for now, since it slows down CI.
+#
 #  osx-monetdb-build:
 #    runs-on: macos-10.15
 #    strategy:
 #      matrix:
-#        branch: [Jul2021]
+#        branch: [default, Jul2021, Jan2022]
 #    steps:
 #      -
 #        name: brew packages
@@ -60,8 +67,8 @@ jobs:
     runs-on: macos-10.15
     strategy:
       matrix:
-        python-version: [ "3.6", "3.7", "3.8", "3.9", "3.10"]
-        branch: [Jul2021]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        branch: [Jul2021]  # default, , Jan2022]
     needs: [osx-monetdb-brew]
     steps:
       -
@@ -70,7 +77,7 @@ jobs:
       - 
         uses: actions/download-artifact@v2 
         with:
-          name: osx-monetdb-brew
+          name: osx-monetdb-brew  # ${{ matrix.branch }}
           path: /usr/local/Cellar/monetdb
       -
         name: Homebrew link monetdb

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -18,14 +18,14 @@ jobs:
       matrix:
         include:
           - MONETDB_BRANCH: default
-            MONETDB_WIN_PREFIX: 84056
-            MONETDB_WIN_VERSION: a19336b1e6fd
+            MONETDB_WIN_PREFIX: 84157
+            MONETDB_WIN_VERSION: 7a26d14b0fd2
           - MONETDB_BRANCH: Jul2021
             MONETDB_WIN_PREFIX: 84054
             MONETDB_WIN_VERSION: 6650381af78a
           - MONETDB_BRANCH: Jan2022
-            MONETDB_WIN_PREFIX: 84059
-            MONETDB_WIN_VERSION: c7bc6a69cf73
+            MONETDB_WIN_PREFIX: 84158
+            MONETDB_WIN_VERSION: 153f401628b1
     env:
       MSI: MonetDB5-SQL-Installer-x86_64-${{ matrix.MONETDB_WIN_VERSION }}.msi
     steps:
@@ -55,7 +55,7 @@ jobs:
     runs-on: windows-2019
     strategy:
       matrix:
-        python-version: ["3.6", "3.7"," 3.8", "3.9", "3.10"]
+        python-version: ["3.7"," 3.8", "3.9", "3.10"]
         branch: [ default, Jul2021, Jan2022 ]
     env:
       MONETDB_BRANCH: ${{ matrix.branch }}

diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ you need:
 
  * An up-to-date Linux, OSX or Windows 
  * pip `>= 19.3`
- * Python `>= 3.6`
+ * Python `>= 3.7`
 
 to make sure you have a recent pip first upgrade pip:
 ```

diff --git a/doc/installation.rst b/doc/installation.rst
@@ -9,7 +9,7 @@ you need:
 
  * An up-to-date Linux, OSX or Windows, running on a 64-bit Intel compatible x86 architecture
  * pip ``>= 19.3``
- * Python ``>= 3.6``
+ * Python ``>= 3.7``
 
 to make sure you have a recent pip first upgrade pip::
 

diff --git a/monetdbe/_cffi/convert/__init__.py b/monetdbe/_cffi/convert/__init__.py
@@ -48,34 +48,33 @@ class MonetdbTypeInfo(NamedTuple):
     numpy_type: np.dtype
     c_string_type: str
     py_converter: Optional[Callable]
-    null_value: Optional[Union[int, np.floating]]
 
 
 inversable_type_infos: List[MonetdbTypeInfo] = [
-    MonetdbTypeInfo(lib.monetdbe_bool, "boolean", np.dtype(np.bool_), "bool", None, None),
-    MonetdbTypeInfo(lib.monetdbe_int8_t, "tinyint", np.dtype(np.int8), "int8_t", None, np.iinfo(np.int8).min),  # type: ignore
-    MonetdbTypeInfo(lib.monetdbe_int16_t, "smallint", np.dtype(np.int16), "int16_t", None, np.iinfo(np.int16).min),  # type: ignore
-    MonetdbTypeInfo(lib.monetdbe_int32_t, "int", np.dtype(np.int32), "int32_t", None, np.iinfo(np.int32).min),  # type: ignore
-    MonetdbTypeInfo(lib.monetdbe_int64_t, "bigint", np.dtype(np.int64), "int64_t", None, np.iinfo(np.int64).min),  # type: ignore
-    MonetdbTypeInfo(lib.monetdbe_float, "real", np.dtype(np.float32), "float", py_float, np.finfo(np.float32).min),
-    MonetdbTypeInfo(lib.monetdbe_double, "float", np.dtype(np.float64), "double", py_float, np.finfo(np.float64).min),
+    MonetdbTypeInfo(lib.monetdbe_bool, "boolean", np.dtype(np.bool_), "bool", None),
+    MonetdbTypeInfo(lib.monetdbe_int8_t, "tinyint", np.dtype(np.int8), "int8_t", None),  # type: ignore
+    MonetdbTypeInfo(lib.monetdbe_int16_t, "smallint", np.dtype(np.int16), "int16_t", None),  # type: ignore
+    MonetdbTypeInfo(lib.monetdbe_int32_t, "int", np.dtype(np.int32), "int32_t", None),  # type: ignore
+    MonetdbTypeInfo(lib.monetdbe_int64_t, "bigint", np.dtype(np.int64), "int64_t", None),  # type: ignore
+    MonetdbTypeInfo(lib.monetdbe_float, "real", np.dtype(np.float32), "float", py_float),
+    MonetdbTypeInfo(lib.monetdbe_double, "float", np.dtype(np.float64), "double", py_float),
 ]
 
 # things that can have a mapping from numpy to monetdb but not back
 numpy_to_monetdb_type_infos: List[MonetdbTypeInfo] = [
-    MonetdbTypeInfo(lib.monetdbe_int8_t, "tinyint", np.dtype(np.uint8), "int8_t", None, None),
-    MonetdbTypeInfo(lib.monetdbe_int16_t, "smallint", np.dtype(np.uint16), "int16_t", None, None),
-    MonetdbTypeInfo(lib.monetdbe_int32_t, "int", np.dtype(np.uint32), "int32_t", None, None),
-    MonetdbTypeInfo(lib.monetdbe_int64_t, "bigint", np.dtype(np.uint64), "int64_t", None, None),
+    MonetdbTypeInfo(lib.monetdbe_int8_t, "tinyint", np.dtype(np.uint8), "int8_t", None),
+    MonetdbTypeInfo(lib.monetdbe_int16_t, "smallint", np.dtype(np.uint16), "int16_t", None),
+    MonetdbTypeInfo(lib.monetdbe_int32_t, "int", np.dtype(np.uint32), "int32_t", None),
+    MonetdbTypeInfo(lib.monetdbe_int64_t, "bigint", np.dtype(np.uint64), "int64_t", None),
 ]
 
 # things that can have a mapping from monetdb to numpy but not back
 monetdb_to_numpy_type_infos: List[MonetdbTypeInfo] = [
-    MonetdbTypeInfo(lib.monetdbe_str, "string", np.dtype('=O'), "str", make_string, None),
-    MonetdbTypeInfo(lib.monetdbe_blob, "blob", np.dtype('=O'), "blob", make_blob, None),
-    MonetdbTypeInfo(lib.monetdbe_date, "date", np.dtype('=O'), "date", py_date, None),
-    MonetdbTypeInfo(lib.monetdbe_time, "time", np.dtype('=O'), "time", py_time, None),
-    MonetdbTypeInfo(lib.monetdbe_timestamp, "timestamp", np.dtype('=O'), "timestamp", py_timestamp, None),
+    MonetdbTypeInfo(lib.monetdbe_str, "string", np.dtype('=O'), "str", make_string),
+    MonetdbTypeInfo(lib.monetdbe_blob, "blob", np.dtype('=O'), "blob", make_blob),
+    MonetdbTypeInfo(lib.monetdbe_date, "date", np.dtype('=O'), "date", py_date),
+    MonetdbTypeInfo(lib.monetdbe_time, "time", np.dtype('=O'), "time", py_time),
+    MonetdbTypeInfo(lib.monetdbe_timestamp, "timestamp", np.dtype('=O'), "timestamp", py_timestamp),
 ]
 
 numpy_type_map: Mapping[np.dtype, MonetdbTypeInfo] = {i.numpy_type: i for i in
@@ -101,13 +100,26 @@ def numpy_monetdb_map(numpy_type: np.dtype):
         # this is an odd one, the numpy type string includes the width. Also, we don't format
         # monetdb string columns as fixed width numpy columns yet, so technically this type is
         # non-reversable for now.
-        return MonetdbTypeInfo(lib.monetdbe_str, "string", numpy_type, "char *", None, None)
+        return MonetdbTypeInfo(lib.monetdbe_str, "string", numpy_type, "char *", None)
+    if numpy_type.kind == 'M':
+        # TODO: another odd one
+        return MonetdbTypeInfo(lib.monetdbe_timestamp, "timestamp", np.dtype(np.datetime64), "int64_t", None)
 
     if numpy_type.kind in supported_numpy_types:  # type: ignore
         return numpy_type_map[numpy_type]
     raise ProgrammingError(f"append() called with unsupported type {numpy_type}")
 
 
+def timestamp_to_date():
+    return MonetdbTypeInfo(lib.monetdbe_date, "date", np.dtype(np.datetime64), "int64_t", None)
+
+
+def get_null_value(rcol: monetdbe_column):
+    type_info = monet_c_type_map[rcol.type]
+    col = ffi.cast(f"monetdbe_column_{type_info.c_string_type} *", rcol)
+    return col.null_value
+
+
 if newer_then_jul2021:
     def extract(rcol: monetdbe_column, r: int, text_factory: Optional[Callable[[str], Any]] = None):
         """

diff --git a/monetdbe/_cffi/embed.h.j2 b/monetdbe/_cffi/embed.h.j2
@@ -151,4 +151,5 @@ extern char* monetdbe_get_columns(monetdbe_database dbhdl, const char* schema_na
 extern char* monetdbe_dump_database(monetdbe_database dbhdl, const char *backupfile);
 extern char* monetdbe_dump_table(monetdbe_database dbhdl, const char *schema_name, const char *table_name, const char *backupfile);
 
-extern void initialize_string_array_from_numpy(char** restrict output, size_t size, char* restrict numpy_string_input, size_t stride_length);
+extern void initialize_string_array_from_numpy(char** restrict output, size_t size, char* restrict numpy_string_input, size_t stride_length, bool* restrict mask);
+extern void initialize_timestamp_array_from_numpy(monetdbe_database dbhdl, void* restrict output, const size_t size, int64_t* restrict numpy_datetime_input, char const *unit_string, const monetdbe_types type);
diff --git a/monetdbe/_cffi/internal.py b/monetdbe/_cffi/internal.py
@@ -6,10 +6,9 @@
 from collections import namedtuple
 
 import numpy as np
-
 from monetdbe._lowlevel import ffi, lib
 from monetdbe import exceptions
-from monetdbe._cffi.convert import make_string, monet_c_type_map, extract, numpy_monetdb_map, precision_warning
+from monetdbe._cffi.convert import make_string, monet_c_type_map, extract, numpy_monetdb_map, precision_warning, timestamp_to_date, get_null_value
 from monetdbe._cffi.convert.bind import monetdbe_decimal_to_bte, monetdbe_decimal_to_sht, monetdbe_decimal_to_int, monetdbe_decimal_to_lng, prepare_bind
 from monetdbe._cffi.errors import check_error
 from monetdbe._cffi.types_ import monetdbe_result, monetdbe_database, monetdbe_column, monetdbe_statement
@@ -33,28 +32,27 @@ def result_fetch_numpy(result: monetdbe_result) -> Mapping[str, np.ndarray]:
         name = make_string(rcol.name)
         type_info = monet_c_type_map[rcol.type]
 
+        np_mask = np.ma.nomask  # type: ignore[attr-defined]
         # for non float/int we for now first make a numpy object array which we then convert to the right numpy type
         if type_info.numpy_type.type == np.object_:
-            np_col: np.ndarray = np.array([extract(rcol, r) for r in range(result.nrows)])
+            values = [extract(rcol, r) for r in range(result.nrows)]
+            np_col: np.ndarray = np.array(values)
+            np_mask = np.array([v is None for v in values])
             if rcol.type == lib.monetdbe_str:
                 np_col = np_col.astype(str)
             elif rcol.type == lib.monetdbe_date:
                 np_col = np_col.astype('datetime64[D]')  # type: ignore
             elif rcol.type == lib.monetdbe_time:
                 warn("Not converting column with type column since no proper numpy equivalent")
             elif rcol.type == lib.monetdbe_timestamp:
-                np_col = np_col.astype('datetime64[ns]')  # type: ignore
+                np_col = np_col.astype('datetime64[ms]')  # type: ignore
         else:
             buffer_size = result.nrows * type_info.numpy_type.itemsize  # type: ignore
             c_buffer = ffi.buffer(rcol.data, buffer_size)
             np_col = np.frombuffer(c_buffer, dtype=type_info.numpy_type)  # type: ignore
+            np_mask = np_col == get_null_value(rcol)
 
-        if type_info.null_value:
-            mask = np_col == type_info.null_value
-        else:
-            mask = np.ma.nomask  # type: ignore[attr-defined]
-
-        masked: np.ndarray = np.ma.masked_array(np_col, mask=mask)
+        masked: np.ndarray = np.ma.masked_array(np_col, mask=np_mask)
 
         result_dict[name] = masked
     return result_dict
@@ -248,7 +246,6 @@ def append(self, table: str, data: Mapping[str, np.ndarray], schema: str = 'sys'
         """
         Directly append an array structure
         """
-
         self._switch()
         n_columns = len(data)
         existing_columns = list(self.get_columns(schema=schema, table=table))
@@ -259,31 +256,49 @@ def append(self, table: str, data: Mapping[str, np.ndarray], schema: str = 'sys'
             raise exceptions.ProgrammingError(error)
 
         work_columns = ffi.new(f'monetdbe_column * [{n_columns}]')
+        work_objs = []
         # cffi_objects assists to keep all in-memory native data structure alive during the execution of this call
-        cffi_objects = []
+        cffi_objects = list()
         for column_num, (column_name, existing_type) in enumerate(existing_columns):
             column_values = data[column_name]
             work_column = ffi.new('monetdbe_column *')
             type_info = numpy_monetdb_map(column_values.dtype)
 
             # try to convert the values if types don't match
             if type_info.c_type != existing_type:
-                precision_warning(type_info.c_type, existing_type)
-                to_numpy_type = monet_c_type_map[existing_type].numpy_type
-                try:
-                    column_values = column_values.astype(to_numpy_type)
-                    type_info = numpy_monetdb_map(column_values.dtype)
-                except Exception as e:
-                    existing_type_string = monet_c_type_map[existing_type].c_string_type
-                    error = f"Can't convert '{type_info.c_string_type}' " \
-                            f"to type '{existing_type_string}' for column '{column_name}': {e} "
-                    raise ValueError(error)
+                if type_info.c_type == lib.monetdbe_timestamp and existing_type == lib.monetdbe_date and np.issubdtype(column_values.dtype, np.datetime64):
+                    """
+                    We are going to cast to a monetdbe_date and
+                    consider monetdbe_timestamp as a 'base type' to signal this.
+                    """
+                    type_info = timestamp_to_date()
+                else:
+                    precision_warning(type_info.c_type, existing_type)
+                    to_numpy_type = monet_c_type_map[existing_type].numpy_type
+                    try:
+                        column_values = column_values.astype(to_numpy_type)
+                        type_info = numpy_monetdb_map(column_values.dtype)
+                    except Exception as e:
+                        existing_type_string = monet_c_type_map[existing_type].c_string_type
+                        error = f"Can't convert '{type_info.c_string_type}' " \
+                                f"to type '{existing_type_string}' for column '{column_name}': {e} "
+                        raise ValueError(error)
 
             work_column.type = type_info.c_type
             work_column.count = column_values.shape[0]
             work_column.name = ffi.new('char[]', column_name.encode())
-            if type_info.numpy_type.kind == 'U':
+            if type_info.numpy_type.kind == 'M':
+                t = ffi.new('monetdbe_data_timestamp[]', work_column.count)
+                cffi_objects.append(t)
+                unit = np.datetime_data(column_values.dtype)[0].encode()
+                p = ffi.from_buffer("int64_t*", column_values)
+
+                lib.initialize_timestamp_array_from_numpy(self._monetdbe_database, t, work_column.count, p, unit, existing_type)
+                work_column.data = t
+            elif type_info.numpy_type.kind == 'U':
                 # first massage the numpy array of unicode into a matrix of null terminated rows of bytes.
+                m = ffi.from_buffer("bool*", column_values.mask) if np.ma.isMaskedArray(column_values) else 0  # type: ignore[attr-defined]
+                cffi_objects.append(m)
                 v = np.char.encode(column_values).view('b').reshape((work_column.count, -1))
                 v = np.c_[v, np.zeros(work_column.count, dtype=np.int8)]
                 stride_length = v.shape[1]
@@ -292,12 +307,14 @@ def append(self, table: str, data: Mapping[str, np.ndarray], schema: str = 'sys'
                 cffi_objects.append(t)
                 p = ffi.from_buffer("char*", v)
                 cffi_objects.append(p)
-                lib.initialize_string_array_from_numpy(t, work_column.count, p, stride_length)
+                lib.initialize_string_array_from_numpy(t, work_column.count, p, stride_length, ffi.cast("bool*", m))
                 work_column.data = t
             else:
-                work_column.data = ffi.from_buffer(f"{type_info.c_string_type}*", column_values)
+                p = ffi.from_buffer(f"{type_info.c_string_type}*", column_values)
+                cffi_objects.append(p)
+                work_column.data = p
             work_columns[column_num] = work_column
-            cffi_objects.append(work_column)
+            work_objs.append(work_column)
         check_error(lib.monetdbe_append(self._monetdbe_database, schema.encode(),
                                         table.encode(), work_columns, n_columns))