Skip to content

Commit

Permalink
36 improve api errors on wrong datastructures (#41)
Browse files Browse the repository at this point in the history
* Added DataStructure TYPO error message. Added hint on error message if the TYPO is close enough to a valid input.

* Fixed mypy and flake8 errors.

---------

Co-authored-by: Francisco Javier Hernández del Caño <javier.hernandez@meaningfuldata.eu>
  • Loading branch information
mla2001 and javihern98 authored Oct 29, 2024
1 parent 70932d5 commit 2e82bfa
Show file tree
Hide file tree
Showing 27 changed files with 661 additions and 19 deletions.
14 changes: 9 additions & 5 deletions src/vtlengine/API/_InternalApi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

from vtlengine.AST import PersistentAssignment, Start
from vtlengine.DataTypes import SCALAR_TYPES
from vtlengine.Model import ValueDomain, Dataset, Scalar, Component, Role, ExternalRoutine
from vtlengine.Exceptions import check_key
from vtlengine.Model import (ValueDomain, Dataset, Scalar, Component, Role,
ExternalRoutine, Role_keys)
from vtlengine.files.parser import _validate_pandas, _fill_dataset_empty_data

base_path = Path(__file__).parent
Expand All @@ -30,15 +32,17 @@ def _load_dataset_from_structure(structures: Dict[str, Any]) -> Dict[str, Any]:
if "datasets" in structures:
for dataset_json in structures["datasets"]:
dataset_name = dataset_json["name"]
components = {
component["name"]: Component(
components = {}

for component in dataset_json["DataStructure"]:
check_key("data_type", SCALAR_TYPES.keys(), component["type"])
check_key("role", Role_keys, component["role"])
components[component["name"]] = Component(
name=component["name"],
data_type=SCALAR_TYPES[component["type"]],
role=Role(component["role"]),
nullable=component["nullable"],
)
for component in dataset_json["DataStructure"]
}

datasets[dataset_name] = Dataset(name=dataset_name, components=components, data=None)
if "scalars" in structures:
Expand Down
44 changes: 44 additions & 0 deletions src/vtlengine/Exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,47 @@ def __init__(
super().__init__(message, lino, colno, code)
else:
super().__init__(message, lino, colno)


def check_key(field: str, dict_keys: Any, key: str) -> None:
if key not in dict_keys:
closest_key = find_closest_key(dict_keys, key)
message_append = f". Did you mean {closest_key}?" if closest_key else ""
raise SemanticError("0-1-1-13", field=field, key=key, closest_key=message_append)


def find_closest_key(dict_keys: Any, key: str) -> Optional[str]:
closest_key = None
max_distance = 3
min_distance = float('inf')

for dict_key in dict_keys:
distance = key_distance(key, dict_key)
if distance < min_distance:
min_distance = distance
closest_key = dict_key

if min_distance <= max_distance:
return closest_key
return None


def key_distance(key: str, objetive: str) -> int:
dp = [[0] * (len(objetive) + 1) for _ in range(len(key) + 1)]

for i in range(len(key) + 1):
dp[i][0] = i
for j in range(len(objetive) + 1):
dp[0][j] = j

for i in range(1, len(key) + 1):
for j in range(1, len(objetive) + 1):
if key[i - 1] == objetive[j - 1]:
cost = 0
else:
cost = 1
dp[i][j] = min(dp[i - 1][j] + 1,
dp[i][j - 1] + 1,
dp[i - 1][j - 1] + cost)

return dp[-1][-1]
16 changes: 8 additions & 8 deletions src/vtlengine/Exceptions/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,21 @@
# Infer Data Structure errors
# "0-1-1-1": "A csv file or a dataframe is required.",
"0-1-1-2": "The provided {source} must have data to can infer the data structure.",
"0-1-1-3": "Can not infer data structure: {errors}",
"0-1-1-3": "Can not infer data structure: {errors}.",
"0-1-1-4": "On Dataset {name} loading: An identifier cannot have null values, found null "
"values on {null_identifier}.",
"0-1-1-5": "On Dataset {name} loading: Datasets without identifiers must have 0 or "
"1 datapoints.",
"0-1-1-6": "Duplicated records. Combination of identifiers are repeated.",
"0-1-1-7": "G1 - The provided CSV file is empty",
"0-1-1-8": "The following identifiers {ids} were not found , review file {file}",
"0-1-1-7": "G1 - The provided CSV file is empty.",
"0-1-1-8": "The following identifiers {ids} were not found , review file {file}.",
"0-1-1-9": "You have a problem related with commas, review rfc4180 standard, review file "
"{file}",
"{file}.",
"0-1-1-10": "On Dataset {name} loading: Component {comp_name} is missing in Datapoints.",
"0-1-1-11": "Wrong data in the file for this scalardataset {name}",
"0-1-1-12": "On Dataset {name} loading: not possible to cast column {column} to {type}",
#
"0-1-0-1": " Trying to redefine input datasets {dataset}", # Semantic Error
"0-1-1-11": "Wrong data in the file for this scalardataset {name}.",
"0-1-1-12": "On Dataset {name} loading: not possible to cast column {column} to {type}.",
"0-1-1-13": "Invalid key on {field} field: {key}{closest_key}.",
"0-1-0-1": " Trying to redefine input datasets {dataset}.", # Semantic Error
# ------------Operators-------------
# General Semantic errors
# "1-1-1-1": "At op {op}. Unable to validate types.",
Expand Down
7 changes: 7 additions & 0 deletions src/vtlengine/Model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ def __eq__(self, other: Any) -> bool:
return same_name and same_type and same_value


Role_keys = [
"Identifier",
"Attribute",
"Measure",
]


class Role(Enum):
"""
Enum class for the role of a component (Identifier, Attribute, Measure)
Expand Down
2 changes: 2 additions & 0 deletions tests/DataLoad/data/DataSet/input/IK_1-1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Id_1,Id_2,Id_3,Id_4,Id_5,Id_6,Id_7,Id_8,Me_1
1,0.2e-3,TRUE,"2010-01-01/2010-12-01","2014-01-01",2015M03,blabla,"Q",blabla
2 changes: 2 additions & 0 deletions tests/DataLoad/data/DataSet/input/IK_2-1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Id_1,Id_2,Id_3,Id_4,Id_5,Id_6,Id_7,Id_8,Me_1
1,0.2e-3,TRUE,"2010-01-01/2010-12-01","2014-01-01",2015M03,blabla,"Q",blabla
2 changes: 2 additions & 0 deletions tests/DataLoad/data/DataSet/input/IK_3-1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Id_1,Id_2,Id_3,Id_4,Id_5,Id_6,Id_7,Id_8,Me_1
1,0.2e-3,TRUE,"2010-01-01/2010-12-01","2014-01-01",2015M03,blabla,"Q",blabla
2 changes: 2 additions & 0 deletions tests/DataLoad/data/DataSet/input/IK_4-1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Id_1,Id_2,Id_3,Id_4,Id_5,Id_6,Id_7,Id_8,Me_1
1,0.2e-3,TRUE,"2010-01-01/2010-12-01","2014-01-01",2015M03,blabla,"Q",blabla
2 changes: 2 additions & 0 deletions tests/DataLoad/data/DataSet/input/IK_5-1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Id_1,Id_2,Id_3,Id_4,Id_5,Id_6,Id_7,Id_8,Me_1
1,0.2e-3,TRUE,"2010-01-01/2010-12-01","2014-01-01",2015M03,blabla,"Q",blabla
2 changes: 2 additions & 0 deletions tests/DataLoad/data/DataSet/input/IK_6-1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Id_1,Id_2,Id_3,Id_4,Id_5,Id_6,Id_7,Id_8,Me_1
1,0.2e-3,TRUE,"2010-01-01/2010-12-01","2014-01-01",2015M03,blabla,"Q",blabla
2 changes: 2 additions & 0 deletions tests/DataLoad/data/DataSet/input/IK_7-1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Id_1,Id_2,Id_3,Id_4,Id_5,Id_6,Id_7,Id_8,Me_1
1,0.2e-3,TRUE,"2010-01-01/2010-12-01","2014-01-01",2015M03,blabla,"Q",blabla
63 changes: 63 additions & 0 deletions tests/DataLoad/data/DataStructure/input/IK-1-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"datasets": [
{
"name": "DS_1",
"DataStructure": [
{
"name": "Id_1",
"role": "Identfier",
"type": "Integer",
"nullable": false
},
{
"name": "Id_2",
"role": "Identifier",
"type": "Number",
"nullable": false
},
{
"name": "Id_3",
"role": "Identifier",
"type": "Boolean",
"nullable": false
},
{
"name": "Id_4",
"role": "Identifier",
"type": "Time",
"nullable": false
},
{
"name": "Id_5",
"role": "Identifier",
"type": "Date",
"nullable": false
},
{
"name": "Id_6",
"role": "Identifier",
"type": "Time_Period",
"nullable": false
},
{
"name": "Id_7",
"role": "Identifier",
"type": "String",
"nullable": false
},
{
"name": "Id_8",
"role": "Identifier",
"type": "Duration",
"nullable": false
},
{
"name": "Me_1",
"role": "Measure",
"type": "String",
"nullable": true
}
]
}
]
}
63 changes: 63 additions & 0 deletions tests/DataLoad/data/DataStructure/input/IK-2-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"datasets": [
{
"name": "DS_1",
"DataStructure": [
{
"name": "Id_1",
"role": "Identifier",
"type": "Integer",
"nullable": false
},
{
"name": "Id_2",
"role": "Identifier",
"type": "Number",
"nullable": false
},
{
"name": "Id_3",
"role": "Identifier",
"type": "Boolean",
"nullable": false
},
{
"name": "Id_4",
"role": "Identifier",
"type": "Time",
"nullable": false
},
{
"name": "Id_5",
"role": "Identifier",
"type": "Date",
"nullable": false
},
{
"name": "Id_6",
"role": "Identifier",
"type": "Time_Period",
"nullable": false
},
{
"name": "Id_7",
"role": "Identifier",
"type": "String",
"nullable": false
},
{
"name": "Id_8",
"role": "Identifier",
"type": "Duration",
"nullable": false
},
{
"name": "Me_1",
"role": "Masure",
"type": "String",
"nullable": true
}
]
}
]
}
63 changes: 63 additions & 0 deletions tests/DataLoad/data/DataStructure/input/IK-3-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
{
"datasets": [
{
"name": "DS_1",
"DataStructure": [
{
"name": "Id_1",
"role": "Identifier",
"type": "Integer",
"nullable": false
},
{
"name": "Id_2",
"role": "Identifier",
"type": "Numver",
"nullable": false
},
{
"name": "Id_3",
"role": "Identifier",
"type": "Boolean",
"nullable": false
},
{
"name": "Id_4",
"role": "Identifier",
"type": "Time",
"nullable": false
},
{
"name": "Id_5",
"role": "Identifier",
"type": "Date",
"nullable": false
},
{
"name": "Id_6",
"role": "Identifier",
"type": "Time_Period",
"nullable": false
},
{
"name": "Id_7",
"role": "Identifier",
"type": "String",
"nullable": false
},
{
"name": "Id_8",
"role": "Identifier",
"type": "Duration",
"nullable": false
},
{
"name": "Me_1",
"role": "Measure",
"type": "String",
"nullable": true
}
]
}
]
}
Loading

0 comments on commit 2e82bfa

Please sign in to comment.