Skip to content

Commit

Permalink
fix infer column data types and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
nick-gorman committed May 6, 2024
1 parent e6aaeda commit 7330d2d
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 14 deletions.
6 changes: 6 additions & 0 deletions nemosis/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,12 @@

months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]

date_formats = [
"%Y/%m/%d %H:%M:%S",
'%Y/%m/%d %H:%M:%S.%f',
'%Y-%m-%d %H:%M:%S'
]

nem_data_model_start_time = "2009/07/01 00:00:00"


Expand Down
18 changes: 11 additions & 7 deletions nemosis/value_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import pandas as pd

from . import defaults as _defaults


def _parse_datetime(series):
"""
Attempts to parse a column into a datetime
Expand All @@ -10,18 +13,19 @@ def _parse_datetime(series):
Returns:
series (np.Array)
"""

try:
# this first format is the most common
return pd.to_datetime(series, format="%Y/%m/%d %H:%M:%S")
return pd.to_datetime(series, format=_defaults.date_formats[0])
except ValueError as e:
try:
# this format with milliseconds is used in some bidding columns
return pd.to_datetime(series, format=date_formats[1])
return pd.to_datetime(series, format=_defaults.date_formats[1])
except ValueError as e:
# this format is used in some 4-second FCAS data
return pd.to_datetime(series, format=date_formats[2])

return pd.to_datetime(series, format=_defaults.date_formats[2])


def _parse_column(series):
"""
Attempts to parse a column into a datetime or numeric.
Expand All @@ -43,7 +47,7 @@ def _parse_column(series):
return series


def _infer_column_data_types(series):
def _infer_column_data_types(data):
"""
Infer datatype of DataFrame assuming inference need only be carried out
for any columns with dtype "object". Adapted from StackOverflow.
Expand All @@ -56,6 +60,6 @@ def _infer_column_data_types(series):
"""

for col in data:
data[col] = _parse_column(series)
data[col] = _parse_column(data[col])

return data
15 changes: 8 additions & 7 deletions tests/test_data_fetch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,8 @@ def setUp(self):

def test_dispatch_tables_start_of_month(self):
start_time = "2022/11/01 00:00:00"
end_time = "2022/11/01 05:15:00"
start_time = "2024/04/01 00:00:00"
end_time = "2024/04/01 05:15:00"
for table in self.table_names:
print(f"Testing {table} returning values at start of month one.")
dat_col = defaults.primary_date_columns[table]
Expand Down Expand Up @@ -856,8 +857,8 @@ def test_dispatch_tables_start_of_month(self):
print("Passed")

def test_dispatch_tables_middle_of_month_and_day(self):
start_time = "2022/11/05 12:00:00"
end_time = "2022/11/05 17:15:00"
start_time = "2024/04/05 12:00:00"
end_time = "2024/04/05 17:15:00"
for table in self.table_names:
print(f"Testing {table} returning values at start of month one.")
dat_col = defaults.primary_date_columns[table]
Expand Down Expand Up @@ -885,8 +886,8 @@ def test_dispatch_tables_middle_of_month_and_day(self):
print("Passed")

def test_dispatch_tables_start_market_day(self):
start_time = "2022/11/05 04:00:00"
end_time = "2022/11/05 04:05:00"
start_time = "2024/04/05 04:00:00"
end_time = "2024/04/05 04:05:00"
for table in self.table_names:
print(f"Testing {table} returning values at start of month one.")
dat_col = defaults.primary_date_columns[table]
Expand Down Expand Up @@ -914,8 +915,8 @@ def test_dispatch_tables_start_market_day(self):
print("Passed")

def test_dispatch_tables_end_market_day(self):
start_time = "2022/11/05 03:55:00"
end_time = "2022/11/05 04:00:00"
start_time = "2024/04/05 03:55:00"
end_time = "2024/04/05 04:00:00"
for table in self.table_names:
print(f"Testing {table} returning values at start of month one.")
dat_col = defaults.primary_date_columns[table]
Expand Down
1 change: 1 addition & 0 deletions tests/test_processing_info_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def test_all_no_duplication_between_batches(self):
"INTERCONNECTORCONSTRAINT",
"DUDETAIL",
"MARKET_PRICE_THRESHOLDS",
"PARTICIPANT"
]:
print(
"{} is known to contain duplicate entries and is exempted from this test, a finalise "
Expand Down

0 comments on commit 7330d2d

Please sign in to comment.