-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtransform_monash.py
64 lines (54 loc) · 1.66 KB
/
transform_monash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from samay.utils import arrow_to_csv
import os
import pandas as pd
FREQS = {
"weather": "1D",
"tourism_yearly": "1YE",
"tourism_quarterly": "1Q",
"tourism_monthly": "1M",
"cif_2016": "1M",
"london_smart_meters": "30min",
"australian_electricity_demand": "30min",
"wind_farms_minutely": "1min",
"bitcoin": "1D",
"pedestrian_counts": "1h",
"vehicle_trips": "1D",
"kdd_cup_2018": "1H",
"nn5_daily": "1D",
"nn5_weekly": "1W",
"kaggle_web_traffic": "1D",
"kaggle_web_traffic_weekly": "1W",
"solar_10_minutes": "10min",
"solar_weekly": "1W",
"car_parts": "1M",
"fred_md": "1M",
"traffic_hourly": "1h",
"traffic_weekly": "1W",
"hospital": "1M",
"covid_deaths": "1D",
"sunspot": "1D",
"saugeenday": "1D",
"us_births": "1D",
"solar_4_seconds": "4s",
"wind_4_seconds": "4s",
"rideshare": "1h",
"oikolab_weather": "1h",
"temperature_rain": "1D"
}
if __name__ == "__main__":
monash_dir = "data/monash"
dataset_list = os.listdir(monash_dir)
splits = ["train", "validation", "test"]
for dataset in dataset_list:
# if not dataset in ["rideshare"]:
# continue
print(f"Converting {dataset} dataset")
for split in splits:
arrow_dir = os.path.join(monash_dir, dataset, split)
freq = FREQS[dataset]
arrow_to_csv(arrow_dir, freq)
csv_file = os.path.join(monash_dir, dataset, split + "/data.csv")
df = pd.read_csv(csv_file)
# fill missing values with 0
df.fillna(0, inplace=True)
df.to_csv(csv_file, index=False)