generated from subhalingamd/mypy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
44 lines (33 loc) · 1.34 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def read_test_file(path):
tweets = []
with open(path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
line = line.strip()
tweets.append(line)
return tweets
def read_train_file(path):
tweets, sentiments = [], []
with open(path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
line = line.strip()
if line.startswith('"'):
sentiment = 0 if line[1] == "0" else 1
sentiments.append(sentiment)
tweets.append(line[5:-1])
else:
tweets.append(line)
return tweets, sentiments
def read_file(path):
return read_train_file(path)
def data_split(X, y):
from sklearn.model_selection import train_test_split
from constants import RANDOM_SEED
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=RANDOM_SEED)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, stratify=y_test, random_state=RANDOM_SEED)
return (X_train, y_train), (X_val, y_val), (X_test, y_test)
def read_file_and_split(path):
X, y = read_file(path)
return data_split(X, y)
def write_predictions(path, arr):
with open(path, 'w', encoding='utf-8', errors='ignore') as f:
print(*arr, sep='\n', file=f)