-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdatacleaner_helper.py
78 lines (60 loc) · 2.42 KB
/
datacleaner_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import builtins
import sys
import pandas as pd
import re
def get_first_num_from_str(s):
if s.isdigit():
return s
all_nums = []
current_num = ""
for x in s:
if x.isdigit():
current_num += x
elif current_num != "":
all_nums.append(current_num)
current_num = ""
return all_nums[0]
def clean_builtin(memory_string):
return int(get_first_num_from_str(memory_string.split(",")[0]))
def clean_ram(memory_string):
return int(get_first_num_from_str(memory_string.split(",")[1]))
def clean_cams(cams_string):
def get_last_num_from_str(s):
if s.isdigit():
return s
all_nums = []
current_num = ""
for x in s:
if x.isdigit():
current_num += x
elif x == " " and current_num != "":
all_nums.append(current_num)
current_num = ""
return all_nums[-1]
cams_lst = []
previous_mp_index = 0
current_mp_index = cams_string.find("MP")
while current_mp_index != -1: # all MPS exhausted
current_cam = cams_string[previous_mp_index : current_mp_index]
current_cam = get_last_num_from_str(current_cam)
current_cam = re.sub("[^0-9]", "", current_cam)
current_cam = int(current_cam)
cams_lst.append(current_cam)
previous_mp_index = current_mp_index + 2
current_mp_index = cams_string.find("MP", current_mp_index + 2)
return cams_lst
# df1 = pd.read_excel("MobileDB.xlsx")
# rear_cams = (df1["Front"])
# # rear_cams = rear_cams.apply(lambda cams_str: cams_str.split("+")) # seperate every camera for every row (based on +)
# # # rear_cams = rear_cams.apply(lambda cams_lst: [cam[ cam.find("+") + 1 : ] for cam in cams_lst]) # seperate every camera for every row (based on +)
# # rear_cams = rear_cams.apply(lambda cam_list: [
# # (
# # re.sub("[^0-9]", "", cam[:cam.find("MP")])
# # )
# # for cam in cam_list]) # for every seperate camera string, truncate it at "MP", remove non numeric chars (re) and convert to int to get MPS
# rear_cams = rear_cams.apply(clean_cams)
# [print(i, (x)) for i, x in enumerate(rear_cams)]
def clean_sensors(sensors_string):
if "(" in sensors_string:
sensors_string = sensors_string[ : sensors_string.find("(")] + sensors_string[ sensors_string.find(")") + 1 : ]
return sensors_string.split(",") # make a list of the sensors