Skip to content

Commit 5aa06f7

Browse files
authored
Merge pull request #183 from yingmanwumen/feat_read_csv
feat(io): complete read_csv
2 parents b6c2f10 + 99960e0 commit 5aa06f7

File tree

12 files changed

+304
-35
lines changed

12 files changed

+304
-35
lines changed

tests/test_csv/00_test_int.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
int32, int64
2+
-2147483648, -9223372036854774808
3+
2147483647, 9223372036854774807
4+
+2147483647, +9223372036854774807

tests/test_csv/01_test_float.csv

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
float32, float64
2+
3.14159, 3.14159
3+
3.14e-1, 3.14e+1
4+
3.14E-1, 3.14E+1

tests/test_csv/02_test_bool.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
bool
2+
true
3+
false
4+
True
5+
False

tests/test_csv/03_test_string.csv

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
string
2+
String
3+
"Hello, ""World"""
4+
""
5+
"Long
6+
String"

tests/test_csv/04_test_nan.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
int, float, string, bool
2+
, 1.0,"1", True
3+
2, ,"2", False
4+
3, 3.0, , True
5+
4, 4.0,"4",

tests/test_exceptions.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,44 @@ class _Foo:
151151
{"fn": lambda x: x < 2, "then": 1.0},
152152
TypeError
153153
),
154+
(
155+
ul.read_csv, # mismatch dtype
156+
{
157+
"path": "./test_csv/00_test_int.csv",
158+
"schema": {"int32": "bool", "int64": "string"}
159+
},
160+
TypeError
161+
),
162+
(
163+
ul.read_csv, # mismatch dtype test case 2
164+
{
165+
"path": "./test_csv/04_test_nan.csv",
166+
"schema": {"int": "bool",
167+
"float": "int",
168+
"string": "float",
169+
"bool": "int"}
170+
},
171+
TypeError
172+
),
173+
(
174+
ul.read_csv,
175+
{
176+
"path": "./non_exists_csv.csv",
177+
"schema": {"whatever": "int32"}
178+
},
179+
IOError
180+
),
181+
(
182+
ul.read_csv, # wrong dtype
183+
{
184+
"path": "./test_csv/04_test_nan.csv",
185+
"schema": {"int": "integer",
186+
"float": "double",
187+
"string": "str",
188+
"bool": "boolean"}
189+
},
190+
ValueError
191+
)
154192
],
155193
)
156194
def test_exceptions(

tests/test_inputs.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from typing import Callable, Dict, List
2+
3+
import pytest
4+
import ulist as ul
5+
from ulist.utils import check_test_result
6+
7+
8+
@pytest.mark.parametrize(
9+
"test_method, args, kwargs, expected_value",
10+
[
11+
(ul.read_csv, (), {
12+
"path": "./test_csv/00_test_int.csv",
13+
"schema": {"int32": "int32", "int64": "int64"}
14+
}, {
15+
"int32": [-2147483648, 2147483647, +2147483647],
16+
"int64": [-9223372036854774808,
17+
9223372036854774807,
18+
+9223372036854774807],
19+
}),
20+
(ul.read_csv, (), {
21+
"path": "./test_csv/01_test_float.csv",
22+
"schema": {"float32": "float32", "float64": "float64"}
23+
}, {
24+
# Precision problem in float32
25+
"float32": [3.14159, 0.314, 0.314],
26+
"float64": [3.14159, 31.4, 31.4]
27+
}),
28+
(ul.read_csv, (), {
29+
"path": "./test_csv/02_test_bool.csv",
30+
"schema": {"bool": "bool"}
31+
}, {
32+
"bool": [True, False, True, False]
33+
}),
34+
(ul.read_csv, (), {
35+
"path": "./test_csv/03_test_string.csv",
36+
"schema": {"string": "string"}
37+
}, {
38+
"string": ["String", 'Hello, "World"', None, "Long\nString"]
39+
}),
40+
(ul.read_csv, (), {
41+
"path": "./test_csv/04_test_nan.csv",
42+
"schema": {"int": "int",
43+
"float": "float",
44+
"string": "string",
45+
"bool": "bool"}
46+
}, {
47+
"int": [None, 2, 3, 4],
48+
"float": [1.0, None, 3.0, 4.0],
49+
"string": ["1", "2", None, "4"],
50+
"bool": [True, False, True, None]
51+
}),
52+
(ul.read_csv, (), { # schema.len() < field.len()
53+
"path": "./test_csv/04_test_nan.csv",
54+
"schema": {"int": "int",
55+
"bool": "bool"}
56+
}, {
57+
"int": [None, 2, 3, 4],
58+
"bool": [True, False, True, None]
59+
}),
60+
(ul.read_csv, (), { # schema.len() > field.len()
61+
"path": "./test_csv/02_test_bool.csv",
62+
"schema": {"foo": "int",
63+
"bar": "bool",
64+
"bool": "bool"}
65+
}, {
66+
"foo": [],
67+
"bar": [],
68+
"bool": [True, False, True, False]
69+
})
70+
],
71+
)
72+
def test_constructors(
73+
test_method: Callable,
74+
args: tuple,
75+
kwargs: dict,
76+
expected_value: Dict[str, List]
77+
) -> None:
78+
result = test_method(*args, **kwargs)
79+
check_test_result(kwargs["path"], test_method, result, expected_value)

ulist/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ name = "ulist"
1313
# crate-type = ["cdylib", "rlib"]
1414
crate-type = ["cdylib"]
1515

16+
[dependencies]
17+
csv = "1.1"
18+
1619
[dependencies.pyo3]
1720
version = "0.16.4"
1821
features = ["extension-module"]

ulist/python/ulist/io.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,28 @@
11
from __future__ import annotations # To avoid circular import.
22
from .ulist import read_csv as _read_csv
3-
from typing import List, TYPE_CHECKING
3+
from typing import Dict, TYPE_CHECKING
44

55
if TYPE_CHECKING: # To avoid circular import.
66
from . import UltraFastList
77

88

9-
def read_csv() -> List[UltraFastList]:
9+
def read_csv(path: str, schema: Dict[str, str]) -> Dict[str, UltraFastList]:
10+
"""Read the csv file.
11+
12+
Args:
13+
path (str):
14+
The path of the csv file.
15+
schema (Dict[str,str]):
16+
The structure of the csv file, such as
17+
`{"foo" : "int", "bar" : "bool"}`
18+
19+
Returns:
20+
Dict[str, UltraFastList]
21+
"""
1022
from . import UltraFastList # To avoid circular import.
11-
return [UltraFastList(x) for x in _read_csv()]
23+
schema_seq = [x for x in schema.items()] # To ensure the right order
24+
rslist = [UltraFastList(x) for x in _read_csv(path, schema_seq)]
25+
res = {}
26+
for i in range(len(schema_seq)):
27+
res[schema_seq[i][0]] = rslist[i]
28+
return res

ulist/python/ulist/ulist.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from lib2to3.pgen2.token import OP
2-
from typing import List, Sequence, Dict, Set, Optional
2+
from typing import List, Sequence, Dict, Set, Optional, Tuple
33

44
from .typedef import ELEM, LIST_PY, NUM, NUM_LIST_RS, LIST_RS, ELEM_OPT
55

@@ -327,7 +327,7 @@ def arange32(start: int, stop: int, step: int) -> IntegerList32: ...
327327
def arange64(start: int, stop: int, step: int) -> IntegerList64: ...
328328

329329

330-
def read_csv() -> list: ...
330+
def read_csv(path: str, schema: Sequence[Tuple[str, str]]) -> List[LIST_RS]: ...
331331

332332

333333
def select_bool(

0 commit comments

Comments
 (0)