Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit a965404

Browse files
author
Icaro Felipe de Lima Ribeiro
committedMar 20, 2023
Finishing all modifications to training
1 parent 6525351 commit a965404

File tree

168 files changed

+137327
-6249
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

168 files changed

+137327
-6249
lines changed
 

‎src/dataset_generator.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@ def read_dataset(path, target_name = 'temp'):
1010
columns = ['date', 'hour', 'temp', 'dwpt', 'rhum', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt', 'pres', 'tsun', 'coco']
1111
dataframe = pd.read_csv(path, names=columns)
1212
dataframe = dataframe.loc[(dataframe['date'] >= '2010-01-01')]
13-
X_dataframe = dataframe.drop(columns=['wpgt', 'prcp', 'snow', 'tsun', 'coco'], axis=1)
13+
X_dataframe = dataframe.drop(columns=['wpgt', 'snow', 'tsun', 'coco'], axis=1)
14+
15+
print(f"Dataset {path} | Orifinal size {len(X_dataframe['date'].unique())}")
16+
X_dataframe.dropna(subset=[target_name], inplace=True)
17+
print(f"Dataset {path} | New size {len(X_dataframe['date'].unique())}")
1418

1519
y_dataframe = X_dataframe[['date', 'hour', target_name]].copy()
1620
X_dataframe.pop(target_name)
@@ -163,15 +167,15 @@ def download_file(path, city, _id):
163167
shutil.copyfileobj(f_in, f_out)
164168

165169

166-
def generate_datasets(path, city, _id):
170+
def generate_datasets(path, city, _id, target_name=""):
167171
fullpath = f'{path}/{city}_{_id}/'
168172
csv_file = f'{fullpath}/{_id}.csv'
169-
ts_file = f'{city}_{_id}'
173+
ts_file = f'{city}_{_id}_{target_name}'
170174

171-
X_train, X_test, y_train, y_test = read_dataset(csv_file)
172-
X_train, X_test = clean_data(X_train, X_test)
173-
train_instances, test_instances = format_file(X_train, y_train, X_test, y_test)
174-
write_ts_file(fullpath, ts_file, train_instances, test_instances)
175+
X_train, X_test, y_train, y_test = read_dataset(csv_file, target_name)
176+
# X_train, X_test = clean_data(X_train, X_test)
177+
# train_instances, test_instances = format_file(X_train, y_train, X_test, y_test, target_name)
178+
# write_ts_file(fullpath, ts_file, train_instances, test_instances)
175179

176180

177181
if __name__ == "__main__":
@@ -190,5 +194,5 @@ def generate_datasets(path, city, _id):
190194
path = "./datasets/files"
191195
for key, value in cities.items():
192196
#download_file(path, key, value)
193-
generate_datasets(path, key, value)
197+
generate_datasets(path, key, value, target_name="prcp")
194198

‎src/datasets/files/curitiba_83842/Curitiba83242_TEST.ts

-645
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.