-
Notifications
You must be signed in to change notification settings - Fork 0
/
file_convertor.py
251 lines (202 loc) · 7.56 KB
/
file_convertor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# %%
import re
import pandas as pd
from typing import List, Tuple
class FileConverter:
"""
It's designed to convert igc data that has been converted to CSV via KML. The program is designed to be used with the processed output of the following websites:
Utilities:
- https://igc2kml.com/
- https://products.aspose.app/gis/conversion/kml-to-csv
To run this script use the execute_file_convertor.ipynb notebook in executor folder.
"""
def __init__(self, input_file: str, output_file: str) -> None:
"""
Initializes the FileConverter class.
Parameters:
- input_file: the path to the input file to be converted (xlsx or csv)
- output_file: the path to the CSV file to be created
Returns:
- None
"""
self.input_file: str = input_file
self.output_file: str = output_file
def read_input_file(self) -> pd.DataFrame:
"""
Reads the input file and returns a Pandas DataFrame.
Parameters:
- None
Returns:
- A Pandas DataFrame
"""
if self.input_file.endswith(".csv"):
return pd.read_csv(self.input_file)
elif self.input_file.endswith(".xlsx"):
return pd.read_excel(self.input_file)
def filter_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Filters the DataFrame by removing rows with 'altitudeMode' equal to 'clampToGround'.
Parameters:
- df: the DataFrame to be filtered
Returns:
- A filtered DataFrame
"""
return df[df["altitudeMode"] == "clampToGround"]
def split_and_reorder_columns(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Splits the 'name' column into multiple columns and reorders the columns.
Parameters:
- df: the DataFrame to be filtered
Returns:
- A filtered DataFrame
"""
df[["timestamp", "altitude", "horizontal", "vertical", "distance"]] = df[
"name"
].str.split(expand=True)
return df[
["timestamp", "altitude", "horizontal", "vertical", "distance", "WKT"]
]
def extract_coordinates_raw(self, row: pd.Series) -> pd.Series:
"""
Extracts the coordinates from the 'WKT' column of the DataFrame.
Parameters:
- row: a row of the DataFrame
Returns:
- A Pandas Series containing the coordinates
"""
line_string: str = row["WKT"]
match: re.Match = re.search(r"\(([^)]+)", line_string)
coordinates_str: str = match.group(1)
coordinates: List[Tuple[str, str]] = [
tuple(pair.split()[:2]) for pair in coordinates_str.split(",")
]
return pd.Series(coordinates, index=["coordinates_a", "coordinates_b"])
def extract_coordinates(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Extracts coordinates from the 'WKT' column and adds them to the DataFrame.
Parameters:
- df: the input DataFrame
Returns:
- DataFrame with coordinates added
"""
df["WKT"] = df["WKT"].astype(str)
coordinates_raw: pd.DataFrame = df.apply(self.extract_coordinates_raw, axis=1)
df = pd.concat([df, coordinates_raw], axis=1)
df = df.drop(["WKT", "coordinates_b"], axis=1)
return df
def extract_coordinates_a(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Extracts coordinates from 'coordinates_a' column and adds 'longitude' and 'latitude' columns.
Parameters:
- df: the input DataFrame
Returns:
- DataFrame with 'longitude' and 'latitude' columns added
"""
df["coordinates_a"] = df["coordinates_a"].astype(str)
coordinates_a: pd.DataFrame = df["coordinates_a"].str.split(" ", expand=True)
df["longitude"] = coordinates_a[0]
df["latitude"] = coordinates_a[1]
df = df.drop("coordinates_a", axis=1)
return df
def clean_up_coordinates(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Cleans up 'longitude' and 'latitude' columns by removing unwanted characters.
Parameters:
- df: the input DataFrame
Returns:
- DataFrame with 'longitude' and 'latitude' columns cleaned up
"""
df["longitude"] = (
df["longitude"].str.replace(r"[\[\]()',]", "", regex=True).astype(float)
)
df["latitude"] = (
df["latitude"].str.replace(r"[\[\]()',]", "", regex=True).astype(float)
)
return df
def remove_units(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Removes the units from the 'altitude', 'horizontal', 'vertical', and 'distance' columns of the DataFrame.
Parameters:
- df: input DataFrame
Returns:
- DataFrame with units removed
"""
df["altitude"] = (
df["altitude"].str.replace(r"[m]", "", regex=True).astype(float).round(2)
)
df["horizontal"] = (
df["horizontal"]
.str.replace(r"[kmh]", "", regex=True)
.astype(float)
.round(2)
)
df["vertical"] = (
df["vertical"].str.replace(r"[m/s]", "", regex=True).astype(float).round(2)
)
df["distance"] = (
df["distance"].str.replace(r"[km]", "", regex=True).astype(float).round(2)
)
return df
def remove_static_speeds(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Removes rows with static horizontal speed.
Parameters:
- df: the DataFrame to be filtered
Returns:
- A filtered DataFrame
"""
return df[df["horizontal"] != 0]
def convert_horizontal_speed(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Converts horizontal speed to meters per second.
Parameters:
- df: the input DataFrame
Returns:
- DataFrame with horizontal speed converted
"""
df["horizontal"] = (df["horizontal"] / 3.6).round(2)
return df
# AI content (GitHub Copilot, 01/25/2024), verified and adapted by Nicolas Huber.
def export_to_csv(self, df: pd.DataFrame, custom_headers: List[str]) -> None:
"""
Exports the DataFrame to a CSV file with custom headers.
Parameters:
- df: the input DataFrame
- custom_headers: a list of custom headers
Returns:
- None
"""
df.to_csv(self.output_file, index=False, header=custom_headers)
# AI content (GitHub Copilot, 01/25/2024), verified and adapted by Nicolas Huber.
def process_csv(
self,
) -> None:
"""
Converts the CSV file to a Pandas DataFrame, cleans up the data, and exports the DataFrame to a CSV file.
Parameters:
- None
Returns:
- None
"""
df: pd.DataFrame = self.read_input_file()
# Process steps
df = self.filter_dataframe(df)
df = self.split_and_reorder_columns(df)
df = self.extract_coordinates(df)
df = self.extract_coordinates_a(df)
df = self.remove_units(df)
df = self.remove_static_speeds(df)
df = self.convert_horizontal_speed(df)
df = self.clean_up_coordinates(df)
# EXPORT DATAFRAME TO CSV
custom_headers: List[str] = [
"timestamp [UTC]",
"relative altitude [m]",
"horizontal velocity [m/s]",
"vertical velocity [m/s]",
"distance to takeoff [km]",
"longitude",
"latitude",
]
self.export_to_csv(df, custom_headers)
# %%