Skip to content

Commit a45a3b2

Browse files
small improvement
1 parent ce6aba1 commit a45a3b2

File tree

6 files changed

+64
-63
lines changed

6 files changed

+64
-63
lines changed

ann_logistic_extra/ann_predict.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
from process import get_data
99

10-
X, Y = get_data()
10+
X, Y, _, _ = get_data()
1111

1212
# randomly initialize weights
1313
M = 5

ann_logistic_extra/ann_train.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,13 @@ def y2indicator(y, K):
1717
ind[i, y[i]] = 1
1818
return ind
1919

20-
X, Y = get_data()
21-
X, Y = shuffle(X, Y)
22-
Y = Y.astype(np.int32)
23-
M = 5
24-
D = X.shape[1]
25-
K = len(set(Y))
26-
27-
# create train and test sets
28-
Xtrain = X[:-100]
29-
Ytrain = Y[:-100]
20+
Xtrain, Ytrain, Xtest, Ytest = get_data()
21+
D = Xtrain.shape[1]
22+
K = len(set(Ytrain) | set(Ytest))
23+
M = 5 # num hidden units
24+
25+
# convert to indicator
3026
Ytrain_ind = y2indicator(Ytrain, K)
31-
Xtest = X[-100:]
32-
Ytest = Y[-100:]
3327
Ytest_ind = y2indicator(Ytest, K)
3428

3529
# randomly initialize weights

ann_logistic_extra/logistic_predict.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import numpy as np
88
from process import get_binary_data
99

10-
X, Y = get_binary_data()
10+
X, Y, _, _ = get_binary_data()
1111

1212
# randomly initialize weights
1313
D = X.shape[1]

ann_logistic_extra/logistic_softmax_train.py

+5-11
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,12 @@ def y2indicator(y, K):
1717
ind[i, y[i]] = 1
1818
return ind
1919

20-
X, Y = get_data()
21-
X, Y = shuffle(X, Y)
22-
Y = Y.astype(np.int32)
23-
D = X.shape[1]
24-
K = len(set(Y))
25-
26-
# create train and test sets
27-
Xtrain = X[:-100]
28-
Ytrain = Y[:-100]
20+
Xtrain, Ytrain, Xtest, Ytest = get_data()
21+
D = Xtrain.shape[1]
22+
K = len(set(Ytrain) | set(Ytest))
23+
24+
# convert to indicator
2925
Ytrain_ind = y2indicator(Ytrain, K)
30-
Xtest = X[-100:]
31-
Ytest = Y[-100:]
3226
Ytest_ind = y2indicator(Ytest, K)
3327

3428
# randomly initialize weights

ann_logistic_extra/logistic_train.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,11 @@
1010
from sklearn.utils import shuffle
1111
from process import get_binary_data
1212

13-
X, Y = get_binary_data()
14-
X, Y = shuffle(X, Y)
15-
16-
# create train and test sets
17-
Xtrain = X[:-100]
18-
Ytrain = Y[:-100]
19-
Xtest = X[-100:]
20-
Ytest = Y[-100:]
13+
# get the data
14+
Xtrain, Ytrain, Xtest, Ytest = get_binary_data()
2115

2216
# randomly initialize weights
23-
D = X.shape[1]
17+
D = Xtrain.shape[1]
2418
W = np.random.randn(D)
2519
b = 0 # bias term
2620

ann_logistic_extra/process.py

+48-29
Original file line numberDiff line numberDiff line change
@@ -15,43 +15,62 @@
1515
# one-hot categorical columns
1616

1717
def get_data():
18-
df = pd.read_csv(dir_path + '/ecommerce_data.csv')
18+
df = pd.read_csv(dir_path + '/ecommerce_data.csv')
1919

20-
# just in case you're curious what's in it
21-
# df.head()
20+
# just in case you're curious what's in it
21+
# df.head()
2222

23-
# easier to work with numpy array
24-
data = df.as_matrix()
23+
# easier to work with numpy array
24+
data = df.as_matrix()
2525

26-
X = data[:,:-1]
27-
Y = data[:,-1]
26+
# shuffle it
27+
np.random.shuffle(data)
2828

29-
# normalize columns 1 and 2
30-
X[:,1] = (X[:,1] - X[:,1].mean()) / X[:,1].std()
31-
X[:,2] = (X[:,2] - X[:,2].mean()) / X[:,2].std()
29+
# split features and labels
30+
X = data[:,:-1]
31+
Y = data[:,-1].astype(np.int32)
3232

33-
# create a new matrix X2 with the correct number of columns
34-
N, D = X.shape
35-
X2 = np.zeros((N, D+3))
36-
X2[:,0:(D-1)] = X[:,0:(D-1)] # non-categorical
33+
# one-hot encode the categorical data
34+
# create a new matrix X2 with the correct number of columns
35+
N, D = X.shape
36+
X2 = np.zeros((N, D+3))
37+
X2[:,0:(D-1)] = X[:,0:(D-1)] # non-categorical
3738

38-
# one-hot
39-
for n in range(N):
40-
t = int(X[n,D-1])
41-
X2[n,t+D-1] = 1
39+
# one-hot
40+
for n in range(N):
41+
t = int(X[n,D-1])
42+
X2[n,t+D-1] = 1
4243

43-
# method 2
44-
# Z = np.zeros((N, 4))
45-
# Z[np.arange(N), X[:,D-1].astype(np.int32)] = 1
46-
# # assign: X2[:,-4:] = Z
47-
# assert(np.abs(X2[:,-4:] - Z).sum() < 1e-10)
44+
# method 2
45+
# Z = np.zeros((N, 4))
46+
# Z[np.arange(N), X[:,D-1].astype(np.int32)] = 1
47+
# # assign: X2[:,-4:] = Z
48+
# assert(np.abs(X2[:,-4:] - Z).sum() < 1e-10)
4849

49-
return X2, Y
50+
# assign X2 back to X, since we don't need original anymore
51+
X = X2
52+
53+
# split train and test
54+
Xtrain = X[:-100]
55+
Ytrain = Y[:-100]
56+
Xtest = X[-100:]
57+
Ytest = Y[-100:]
58+
59+
# normalize columns 1 and 2
60+
for i in (1, 2):
61+
m = Xtrain[:,i].mean()
62+
s = Xtrain[:,i].std()
63+
Xtrain[:,i] = (Xtrain[:,i] - m) / s
64+
Xtest[:,i] = (Xtest[:,i] - m) / s
65+
66+
return Xtrain, Ytrain, Xtest, Ytest
5067

5168

5269
def get_binary_data():
53-
# return only the data from the first 2 classes
54-
X, Y = get_data()
55-
X2 = X[Y <= 1]
56-
Y2 = Y[Y <= 1]
57-
return X2, Y2
70+
# return only the data from the first 2 classes
71+
Xtrain, Ytrain, Xtest, Ytest = get_data()
72+
X2train = Xtrain[Ytrain <= 1]
73+
Y2train = Ytrain[Ytrain <= 1]
74+
X2test = Xtest[Ytest <= 1]
75+
Y2test = Ytest[Ytest <= 1]
76+
return X2train, Y2train, X2test, Y2test

0 commit comments

Comments
 (0)