diff --git a/src/core_functions/scalar/map/map.cpp b/src/core_functions/scalar/map/map.cpp index 62f54cdd25d0..664946267cee 100644 --- a/src/core_functions/scalar/map/map.cpp +++ b/src/core_functions/scalar/map/map.cpp @@ -21,21 +21,28 @@ static void MapFunctionEmptyInput(Vector &result, const idx_t row_count) { result.Verify(row_count); } +static bool MapIsNull(const LogicalType &map) { + D_ASSERT(map.id() == LogicalTypeId::MAP); + auto &key = MapType::KeyType(map); + auto &value = MapType::ValueType(map); + return (key.id() == LogicalTypeId::SQLNULL && value.id() == LogicalTypeId::SQLNULL); +} + static void MapFunction(DataChunk &args, ExpressionState &, Vector &result) { // internal MAP representation // - LIST-vector that contains STRUCTs as child entries // - STRUCTs have exactly two fields, a key-field, and a value-field // - key names are unique + D_ASSERT(result.GetType().id() == LogicalTypeId::MAP); - if (result.GetType().id() == LogicalTypeId::SQLNULL) { + if (MapIsNull(result.GetType())) { auto &validity = FlatVector::Validity(result); validity.SetInvalid(0); result.SetVectorType(VectorType::CONSTANT_VECTOR); return; } - D_ASSERT(result.GetType().id() == LogicalTypeId::MAP); auto row_count = args.size(); // early-out, if no data @@ -162,16 +169,20 @@ static unique_ptr MapBind(ClientContext &, ScalarFunction &bound_f MapVector::EvalMapInvalidReason(MapInvalidReason::INVALID_PARAMS); } - // bind an empty MAP + bool is_null = false; if (arguments.empty()) { - bound_function.return_type = LogicalType::MAP(LogicalTypeId::SQLNULL, LogicalTypeId::SQLNULL); - return make_uniq(bound_function.return_type); + is_null = true; + } + if (!is_null) { + auto key_id = arguments[0]->return_type.id(); + auto value_id = arguments[1]->return_type.id(); + if (key_id == LogicalTypeId::SQLNULL || value_id == LogicalTypeId::SQLNULL) { + is_null = true; + } } - auto key_id = arguments[0]->return_type.id(); - auto value_id = arguments[1]->return_type.id(); - if (key_id == LogicalTypeId::SQLNULL || value_id == LogicalTypeId::SQLNULL) { - bound_function.return_type = LogicalTypeId::SQLNULL; + if (is_null) { + bound_function.return_type = LogicalType::MAP(LogicalTypeId::SQLNULL, LogicalTypeId::SQLNULL); return make_uniq(bound_function.return_type); } diff --git a/tools/pythonpkg/src/native/python_conversion.cpp b/tools/pythonpkg/src/native/python_conversion.cpp index cbdbb9f57f00..133d3fb768f1 100644 --- a/tools/pythonpkg/src/native/python_conversion.cpp +++ b/tools/pythonpkg/src/native/python_conversion.cpp @@ -37,6 +37,20 @@ vector TransformStructKeys(py::handle keys, idx_t size, const LogicalTyp return res; } +static bool IsValidMapComponent(const py::handle &component) { + // The component is either NULL + if (py::none().is(component)) { + return true; + } + if (!py::hasattr(component, "__getitem__")) { + return false; + } + if (!py::hasattr(component, "__len__")) { + return false; + } + return true; +} + bool DictionaryHasMapFormat(const PyDictionary &dict) { if (dict.len != 2) { return false; @@ -51,13 +65,19 @@ bool DictionaryHasMapFormat(const PyDictionary &dict) { return false; } - // Dont check for 'py::list' to allow ducktyping - if (!py::hasattr(keys, "__getitem__") || !py::hasattr(keys, "__len__")) { + if (!IsValidMapComponent(keys)) { return false; } - if (!py::hasattr(values, "__getitem__") || !py::hasattr(values, "__len__")) { + if (!IsValidMapComponent(values)) { return false; } + + // If either of the components is NULL, return early + if (py::none().is(keys) || py::none().is(values)) { + return true; + } + + // Verify that both the keys and values are of the same length auto size = py::len(keys); if (size != py::len(values)) { return false; @@ -91,6 +111,11 @@ Value TransformStructFormatDictionaryToMap(const PyDictionary &dict, const Logic if (target_type.id() != LogicalTypeId::MAP) { throw InvalidInputException("Please provide a valid target type for transform from Python to Value"); } + + if (py::none().is(dict.keys) || py::none().is(dict.values)) { + return Value(LogicalType::MAP(LogicalTypeId::SQLNULL, LogicalTypeId::SQLNULL)); + } + auto size = py::len(dict.keys); D_ASSERT(size == py::len(dict.values)); @@ -130,12 +155,18 @@ Value TransformDictionaryToMap(const PyDictionary &dict, const LogicalType &targ auto keys = dict.values.attr("__getitem__")(0); auto values = dict.values.attr("__getitem__")(1); + if (py::none().is(keys) || py::none().is(values)) { + // Either 'key' or 'value' is None, return early with a NULL value + return Value(LogicalType::MAP(LogicalTypeId::SQLNULL, LogicalTypeId::SQLNULL)); + } + auto key_size = py::len(keys); D_ASSERT(key_size == py::len(values)); if (key_size == 0) { // dict == { 'key': [], 'value': [] } return EmptyMapValue(); } + // dict == { 'key': [ ... ], 'value' : [ ... ] } LogicalType key_target = LogicalTypeId::UNKNOWN; LogicalType value_target = LogicalTypeId::UNKNOWN; diff --git a/tools/pythonpkg/src/pandas/analyzer.cpp b/tools/pythonpkg/src/pandas/analyzer.cpp index 508270894403..660d1fb2b3d2 100644 --- a/tools/pythonpkg/src/pandas/analyzer.cpp +++ b/tools/pythonpkg/src/pandas/analyzer.cpp @@ -331,6 +331,10 @@ LogicalType PandasAnalyzer::DictToMap(const PyDictionary &dict, bool &can_conver auto keys = dict.values.attr("__getitem__")(0); auto values = dict.values.attr("__getitem__")(1); + if (py::none().is(keys) || py::none().is(values)) { + return LogicalType::MAP(LogicalTypeId::SQLNULL, LogicalTypeId::SQLNULL); + } + auto key_type = GetListType(keys, can_convert); if (!can_convert) { return EmptyMap(); diff --git a/tools/pythonpkg/tests/fast/pandas/test_df_object_resolution.py b/tools/pythonpkg/tests/fast/pandas/test_df_object_resolution.py index 0f20f9fe0309..1a07e47fc8f4 100644 --- a/tools/pythonpkg/tests/fast/pandas/test_df_object_resolution.py +++ b/tools/pythonpkg/tests/fast/pandas/test_df_object_resolution.py @@ -324,7 +324,7 @@ def test_map_duplicate(self, pandas, duckdb_cursor): with pytest.raises( duckdb.InvalidInputException, match="Dict->Map conversion failed because 'key' list contains duplicates" ): - converted_col = duckdb_cursor.sql("select * from x").df() + duckdb_cursor.sql("select * from x").show() @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) def test_map_nullkey(self, pandas, duckdb_cursor): @@ -337,9 +337,8 @@ def test_map_nullkey(self, pandas, duckdb_cursor): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) def test_map_nullkeylist(self, pandas, duckdb_cursor): x = pandas.DataFrame([[{'key': None, 'value': None}]]) - # Isn't actually converted to MAP because isinstance(None, list) != True converted_col = duckdb_cursor.sql("select * from x").df() - duckdb_col = duckdb_cursor.sql("SELECT {key: NULL, value: NULL} as '0'").df() + duckdb_col = duckdb_cursor.sql("SELECT MAP(NULL, NULL) as '0'").df() pandas.testing.assert_frame_equal(duckdb_col, converted_col) @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()])