-
Notifications
You must be signed in to change notification settings - Fork 3
/
split.py
71 lines (52 loc) · 2.06 KB
/
split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from typing import Dict
import marshmallow as ma
import pandas as pd
from .base import DataFrameSchemaMeta, DataFrameSchemaOpts
from .converters import dtype_to_field
class SplitDataFrameSchemaMeta(DataFrameSchemaMeta):
@classmethod
def get_fields(
mcs, opts: DataFrameSchemaOpts, dict_cls
) -> Dict[str, ma.fields.Field]:
if opts.dtypes is not None:
index_dtype = opts.index_dtype
fields: Dict[str, ma.fields.Field] = dict_cls()
data_tuple_fields = [
dtype_to_field(dtype) for dtype in opts.dtypes.dtypes
]
fields["data"] = ma.fields.List(
ma.fields.Tuple(data_tuple_fields), required=True
)
index_field = (
ma.fields.Raw()
if index_dtype is None
else dtype_to_field(index_dtype)
)
index_required = False if index_dtype is None else True
fields["index"] = ma.fields.List(
index_field, required=index_required
)
fields["columns"] = ma.fields.List(
ma.fields.String,
required=True,
validate=ma.validate.Equal(opts.dtypes.columns),
)
return fields
return dict_cls()
class SplitDataFrameSchema(ma.Schema, metaclass=SplitDataFrameSchemaMeta):
"""Schema to generate pandas DataFrame from split oriented JSON"""
OPTIONS_CLASS = DataFrameSchemaOpts
@ma.validates_schema(skip_on_field_errors=True)
def validate_index_data_length(self, data: dict, **kwargs) -> None:
if data.get("index") is not None and len(data["index"]) != len(
data["data"]
):
raise ma.ValidationError(
"Length of `index` and `data` must be equal.", "data"
)
@ma.post_load
def make_df(self, data: dict, **kwargs) -> pd.DataFrame:
df = pd.DataFrame(dtype=None, **data).astype(
dict(zip(self.opts.dtypes.columns, self.opts.dtypes.dtypes))
)
return df