daphne-eu · corepointer · Sep 11, 2023 · Sep 4, 2023 · Sep 8, 2023 · corepointer
diff --git a/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp b/src/compiler/lowering/SpecializeGenericFunctionsPass.cpp
@@ -188,6 +188,7 @@ namespace {
         std::multimap<std::string, func::FuncOp> specializedVersions;
         std::set<func::FuncOp> visited;
         std::set<func::FuncOp> called;
+        std::set<func::FuncOp> templateFunctions;
 
         const DaphneUserConfig& userConfig;
         std::shared_ptr<spdlog::logger> logger;
@@ -304,6 +305,7 @@ namespace {
                 calledFunction->getLoc().print(stream);
                 logger->debug("calledFunction\n\tname: {}\n\tlocation: {}", calledFunction.getSymName().str(), s);
             }
+            templateFunctions.insert(calledFunction);
             return specializedFunc;
         }
 
@@ -417,7 +419,7 @@ void SpecializeGenericFunctionsPass::runOnOperation() {
         entryFunctions.push_back(entry.second);
     }
     for(const auto &function : entryFunctions) {
-        if(isFunctionTemplate(function) || visited.count(function))
+        if(isFunctionTemplate(function) || visited.count(function) || templateFunctions.count(function))
             continue;
         if(!inferTypesInFunction(function)) {
             return signalPassFailure();
@@ -431,7 +433,7 @@ void SpecializeGenericFunctionsPass::runOnOperation() {
             continue;
         // Remove a function that was present before creating specializations,
         // if it is never called.
-        if(!called.count(f.second))
+        if(!called.count(f.second) || templateFunctions.count(f.second))
             f.second.erase();
     }
 }

diff --git a/src/compiler/utils/CompilerUtils.cpp b/src/compiler/utils/CompilerUtils.cpp
@@ -20,6 +20,25 @@
 
 #include <string>
 
+// **************************************************************************************************
+// Specializations of isConstantHelper for string types
+// **************************************************************************************************
+
+template<>
+std::pair<bool, std::string> CompilerUtils::isConstantHelper<std::string, mlir::StringAttr>(mlir::Value v, std::function<std::string(const mlir::StringAttr&)> func) {
+    if(auto co = v.getDefiningOp<mlir::daphne::ConstantOp>()) {
+        if(auto attr = co.getValue().dyn_cast<mlir::StringAttr>()) {
+            return std::make_pair(true, func(attr));
+        }
+    }
+    if(auto co = v.getDefiningOp<mlir::arith::ConstantOp>()) {
+        if(auto attr = co.getValue().dyn_cast<mlir::StringAttr>()) {
+            return std::make_pair(true, func(attr));
+        }
+    }
+    return std::make_pair(false, std::string());
+}
+
 // **************************************************************************************************
 // Specializations of isConstant for various types
 // **************************************************************************************************

diff --git a/src/ir/daphneir/DaphneInferFrameLabelsOpInterface.cpp b/src/ir/daphneir/DaphneInferFrameLabelsOpInterface.cpp
@@ -48,6 +48,24 @@ void inferFrameLabels_ExtractOrFilterRowOp(ExtractOrFilterRowOp * op) {
 // Frame label inference implementations
 // ****************************************************************************
 
+void daphne::ReadOp::inferFrameLabels() {
+    auto p = CompilerUtils::isConstant<std::string>(getFileName());
+    if (auto resType = getRes().getType().dyn_cast<daphne::FrameType>()) {
+        if (p.first) {
+            std::vector<std::string> * labels;
+            FileMetaData fmd = CompilerUtils::getFileMetaData(getFileName());
+            if (fmd.labels.empty()) {
+                labels = nullptr;
+            } else {
+                labels = new std::vector<std::string>(fmd.labels);
+            }
+
+            Value res = getResult();
+            res.setType(res.getType().dyn_cast<daphne::FrameType>().withLabels(labels));
+        }
+    }
+}
+
 void daphne::ColBindOp::inferFrameLabels() {
     auto ftLhs = getLhs().getType().dyn_cast<daphne::FrameType>();
     auto ftRhs = getRhs().getType().dyn_cast<daphne::FrameType>();

diff --git a/src/ir/daphneir/DaphneInferShapeOpInterface.cpp b/src/ir/daphneir/DaphneInferShapeOpInterface.cpp
@@ -206,8 +206,13 @@ std::vector<std::pair<ssize_t, ssize_t>> daphne::MatMulOp::inferShape() {
 }
 
 std::vector<std::pair<ssize_t, ssize_t>> daphne::ReadOp::inferShape() {
-    FileMetaData fmd = CompilerUtils::getFileMetaData(getFileName());
-    return {{fmd.numRows, fmd.numCols}};
+    auto p = CompilerUtils::isConstant<std::string>(getFileName());
+    if (p.first) {
+        FileMetaData fmd = CompilerUtils::getFileMetaData(getFileName());
+        return {{fmd.numRows, fmd.numCols}};
+    } else {
+        return {{-1, -1}};
+    }
 }
 
 std::vector<std::pair<ssize_t, ssize_t>> daphne::OrderOp::inferShape() {

diff --git a/src/ir/daphneir/DaphneInferTypesOpInterface.cpp b/src/ir/daphneir/DaphneInferTypesOpInterface.cpp
@@ -276,6 +276,60 @@ std::vector<Type> daphne::OrderOp::inferTypes() {
     return {t};
 }
 
+
+mlir::Type mlirTypeForCode(ValueTypeCode type, Builder builder) {
+    switch(type) {
+        case ValueTypeCode::SI8:  return builder.getIntegerType(8, true);
+        case ValueTypeCode::SI32: return builder.getIntegerType(32, true);
+        case ValueTypeCode::SI64: return builder.getIntegerType(64, true);
+        case ValueTypeCode::UI8:  return builder.getIntegerType(8, false);
+        case ValueTypeCode::UI32: return builder.getIntegerType(32, false);
+        case ValueTypeCode::UI64: return builder.getIntegerType(64, false);
+        case ValueTypeCode::F32: return builder.getF32Type();
+        case ValueTypeCode::F64: return builder.getF64Type();
+        default: throw std::runtime_error("mlirTypeForCode: unknown value type code");
+    }
+}
+
+std::vector<Type> daphne::ReadOp::inferTypes() {
+
+    auto p = CompilerUtils::isConstant<std::string>(getFileName());
+    Builder builder(getContext());
+    if (auto resType = getRes().getType().dyn_cast<daphne::MatrixType>()) {
+        // If an individual value type was specified per column
+        // (fmd.isSingleValueType == false), then this silently uses the
+        // type of the first column.
+        // TODO: add sparsity information here already (if present), currently not possible as many other ops
+        //  just take input types as output types, which is incorrect for sparsity
+        if (p.first) {
+            FileMetaData fmd = CompilerUtils::getFileMetaData(getFileName());
+            mlir::Type valType = mlirTypeForCode(fmd.schema[0], builder);
+            return {mlir::daphne::MatrixType::get(getContext(), valType)};
+        } else {
+            return {mlir::daphne::MatrixType::get(getContext(), daphne::UnknownType::get(getContext()))};
+        }
+    }
+    else if (auto resType = getRes().getType().dyn_cast<daphne::FrameType>()) {
+        if (p.first) {
+            FileMetaData fmd = CompilerUtils::getFileMetaData(getFileName());
+            std::vector<mlir::Type> cts;
+            if (fmd.isSingleValueType) {
+                for (size_t i = 0; i < fmd.numCols; i++) {
+                    cts.push_back(mlirTypeForCode(fmd.schema[0], builder));
+                }
+            } else {
+                for (ValueTypeCode vtc : fmd.schema) {
+                    cts.push_back(mlirTypeForCode(vtc, builder));
+                }
+            }
+            return {mlir::daphne::FrameType::get(builder.getContext(), cts)};
+        } else {
+            return {mlir::daphne::FrameType::get(builder.getContext(), {daphne::UnknownType::get(getContext())})};
+        }
+    }
+    return {daphne::UnknownType::get(getContext())};
+}
+
 std::vector<Type> daphne::SliceColOp::inferTypes() {
     Type u = daphne::UnknownType::get(getContext());
     Type srcTy = getSource().getType();

diff --git a/src/ir/daphneir/DaphneOps.td b/src/ir/daphneir/DaphneOps.td
@@ -1293,7 +1293,9 @@ def Daphne_PrintOp : Daphne_Op<"print"> {
 
 // TODO Take asynchronous read into account.
 def Daphne_ReadOp : Daphne_Op<"read", [
+    DeclareOpInterfaceMethods<InferTypesOpInterface>,
     DeclareOpInterfaceMethods<InferShapeOpInterface>,
+    DeclareOpInterfaceMethods<InferFrameLabelsOpInterface>,
     DeclareOpInterfaceMethods<InferSparsityOpInterface>
 ]> {
     // TODO We might add arguments for a UDF later.

diff --git a/src/parser/ParserUtils.h b/src/parser/ParserUtils.h
@@ -188,20 +188,6 @@ class ParserUtils {
         throw std::runtime_error("unsupported value type: " + name);
     }
 
-    mlir::Type mlirTypeForCode(ValueTypeCode type) {
-        switch(type) {
-            case ValueTypeCode::SI8:  return builder.getIntegerType(8, true);
-            case ValueTypeCode::SI32: return builder.getIntegerType(32, true);
-            case ValueTypeCode::SI64: return builder.getIntegerType(64, true);
-            case ValueTypeCode::UI8:  return builder.getIntegerType(8, false);
-            case ValueTypeCode::UI32: return builder.getIntegerType(32, false);
-            case ValueTypeCode::UI64: return builder.getIntegerType(64, false);
-            case ValueTypeCode::F32: return builder.getF32Type();
-            case ValueTypeCode::F64: return builder.getF64Type();
-            default: throw std::runtime_error("ParserUtils::mlirTypeForCode: unknown value type code");
-        }
-    }
-
     // ************************************************************************
     // Misc
     // ************************************************************************

diff --git a/src/parser/daphnedsl/DaphneDSLBuiltins.cpp b/src/parser/daphnedsl/DaphneDSLBuiltins.cpp
@@ -1009,47 +1009,19 @@ antlrcpp::Any DaphneDSLBuiltins::build(mlir::Location loc, const std::string & f
                 loc, arg, newline, err
         );
     }
-    if(func == "readFrame" || func == "readMatrix") {
-        checkNumArgsExact(func, numArgs, 1);
-
-        mlir::Value filename = args[0];
-        FileMetaData fmd = CompilerUtils::getFileMetaData(filename);
-
-        mlir::Type resType;
 
-        if(func == "readFrame") {
-            std::vector<mlir::Type> cts;
-            if(fmd.isSingleValueType)
-                for(size_t i = 0; i < fmd.numCols; i++)
-                    cts.push_back(utils.mlirTypeForCode(fmd.schema[0]));
-            else
-                for(ValueTypeCode vtc : fmd.schema)
-                    cts.push_back(utils.mlirTypeForCode(vtc));
-
-            std::vector<std::string> * labels;
-            if(fmd.labels.empty())
-                labels = nullptr;
-            else
-                labels = new std::vector<std::string>(fmd.labels);
+    if (func == "readMatrix") {
+        checkNumArgsExact(func, numArgs, 1);
+        mlir::Type resType = mlir::daphne::MatrixType::get(builder.getContext(), utils.unknownType);
+        return static_cast<mlir::Value>(builder.create<ReadOp>(loc, resType, /*filename = */ args[0]));
+    }
 
-            resType = mlir::daphne::FrameType::get(
-                    // TODO Inserting #rows/#cols here could cause problems, if
-                    // the frame is involved in any SCF ops (if/while/for).
-                    builder.getContext(), cts, fmd.numRows, fmd.numCols, labels
-            );
-        }
-        else // func == "read.matrix"
-            // If an individual value type was specified per column
-            // (fmd.isSingleValueType == false), then this silently uses the
-            // type of the first column.
-            // TODO: add sparsity information here already (if present), currently not possible as many other ops
-            //  just take input types as output types, which is incorrect for sparsity
-            resType = utils.matrixOf(utils.mlirTypeForCode(fmd.schema[0]));
-
-        return static_cast<mlir::Value>(builder.create<ReadOp>(
-                loc, resType, filename
-        ));
+    if (func == "readFrame") {
+        checkNumArgsExact(func, numArgs, 1);
+        mlir::Type resType = mlir::daphne::FrameType::get(builder.getContext(), {utils.unknownType});
+        return static_cast<mlir::Value>(builder.create<ReadOp>(loc, resType, /*filename = */ args[0]));
     }
+
     if(func == "writeFrame" || func == "writeMatrix" || func == "write") {
         // Note that the type of arg already indicates if it is a frame or a
         // matrix.

diff --git a/test/api/cli/io/ReadCsv1.csv b/test/api/cli/io/ReadCsv1.csv
@@ -0,0 +1,2 @@
+-0.1,-0.2,0.1,0.2
+3.14,5.41,6.22216,5
diff --git a/test/api/cli/io/ReadCsv1.csv.meta b/test/api/cli/io/ReadCsv1.csv.meta
@@ -0,0 +1,6 @@
+{
+    "numRows": 2,
+    "numCols": 4,
+    "valueType": "f64",
+    "numNonZeros": 0
+}
diff --git a/test/api/cli/io/ReadTest.cpp b/test/api/cli/io/ReadTest.cpp
@@ -38,4 +38,20 @@ TEST_CASE("readSparse", TAG_IO) {
         "--args",
         arg.c_str());
 }
-#endif
+#endif
+
+TEST_CASE("readFrameFromCSV", TAG_IO)
+{
+    compareDaphneToRef(dirPath + "testReadFrame.txt", dirPath + "testReadFrame.daphne");
+}
+
+TEST_CASE("readMatrixFromCSV", TAG_IO)
+{
+    compareDaphneToRef(dirPath + "testReadMatrix.txt", dirPath + "testReadMatrix.daphne");
+}
+
+// does not yet work!
+// TEST_CASE("readReadMatrixFromCSV_DynamicPath", TAG_IO)
+// {
+//     compareDaphneToRef(dirPath + "testReadMatrix.txt", dirPath + "testReadMatrix_DynamicPath.daphne");
+// }
diff --git a/test/api/cli/io/testReadFrame.daphne b/test/api/cli/io/testReadFrame.daphne
@@ -0,0 +1,6 @@
+# Test reading from a file when the file path is not trivially constant (i.e., a parameter to a UDF)
+def readFrameFromCSV(path: str) {
+    print(readFrame(path));
+}
+
+readFrameFromCSV("test/api/cli/io/ReadCsv1.csv");
diff --git a/test/api/cli/io/testReadFrame.txt b/test/api/cli/io/testReadFrame.txt
@@ -0,0 +1,3 @@
+Frame(2x4, [col_0:double, col_1:double, col_2:double, col_3:double])
+-0.1 -0.2 0.1 0.2
+3.14 5.41 6.22216 5
diff --git a/test/api/cli/io/testReadMatrix.daphne b/test/api/cli/io/testReadMatrix.daphne
@@ -0,0 +1,6 @@
+# Test reading from a file when the file path is not trivially constant (i.e., a parameter to a UDF)
+def readMatrixFromCSV(path: str) {
+    print(readMatrix(path));
+}
+
+readMatrixFromCSV("test/api/cli/io/ReadCsv1.csv");
diff --git a/test/api/cli/io/testReadMatrix.txt b/test/api/cli/io/testReadMatrix.txt
@@ -0,0 +1,3 @@
+DenseMatrix(2x4, double)
+-0.1 -0.2 0.1 0.2
+3.14 5.41 6.22216 5
diff --git a/test/api/cli/io/testReadMatrix_DynamicPath.daphne b/test/api/cli/io/testReadMatrix_DynamicPath.daphne
@@ -0,0 +1,5 @@
+# Test dynamic computation of string path -> does not yet work!
+i = 1;
+filename = "test/api/cli/io/ReadCsv" + i + ".csv";
+m = readMatrix(filename);
+print(m);