Skip to content

Commit

Permalink
change as_matrix to values
Browse files Browse the repository at this point in the history
  • Loading branch information
bob7783 committed Aug 4, 2018
1 parent 94c4328 commit b8b97e3
Show file tree
Hide file tree
Showing 16 changed files with 34 additions and 25 deletions.
4 changes: 2 additions & 2 deletions ab_testing/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
df = pd.read_csv('advertisement_clicks.csv')
a = df[df['advertisement_id'] == 'A']
b = df[df['advertisement_id'] == 'B']
a = a['action'].as_matrix()
b = b['action'].as_matrix()
a = a['action'].values
b = b['action'].values

print("a.mean:", a.mean())
print("b.mean:", b.mean())
Expand Down
2 changes: 1 addition & 1 deletion ann_logistic_extra/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def get_data():
# df.head()

# easier to work with numpy array
data = df.as_matrix()
data = df.values

# shuffle it
np.random.shuffle(data)
Expand Down
7 changes: 2 additions & 5 deletions cnn_class2/class_activation_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@
# Note: you may need to update your version of future
# sudo pip install -U future

from keras.layers import Input, Lambda, Dense, Flatten
from keras.models import Model
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
# from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import confusion_matrix
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
Expand All @@ -24,7 +20,7 @@



# useful for getting number of files
# get the image files
image_files = glob('../large_files/256_ObjectCategories/*/*.jp*g')
image_files += glob('../large_files/101_ObjectCategories/*/*.jp*g')

Expand Down Expand Up @@ -72,6 +68,7 @@
cam = fmaps.dot(w)

# upsample to 224 x 224
# 7 x 32 = 224
cam = sp.ndimage.zoom(cam, (32, 32), order=1)

plt.subplot(1,2,1)
Expand Down
2 changes: 1 addition & 1 deletion cnn_class2/fashion.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def y2indicator(Y):
# get the data
# https://www.kaggle.com/zalando-research/fashionmnist
data = pd.read_csv('../large_files/fashionmnist/fashion-mnist_train.csv')
data = data.as_matrix()
data = data.values
np.random.shuffle(data)

X = data[:, 1:].reshape(-1, 28, 28, 1) / 255.0
Expand Down
2 changes: 1 addition & 1 deletion cnn_class2/fashion2.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def y2indicator(Y):
# get the data
# https://www.kaggle.com/zalando-research/fashionmnist
data = pd.read_csv('../large_files/fashionmnist/fashion-mnist_train.csv')
data = data.as_matrix()
data = data.values
np.random.shuffle(data)

X = data[:, 1:].reshape(-1, 28, 28, 1) / 255.0
Expand Down
4 changes: 4 additions & 0 deletions hmm_class/hmmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime


def random_normalized(d1, d2):
Expand All @@ -22,6 +23,7 @@ def __init__(self, M):
self.M = M # number of hidden states

def fit(self, X, max_iter=30):
t0 = datetime.now()
np.random.seed(123)
# train the HMM model using the Baum-Welch algorithm
# a specific instance of the expectation-maximization algorithm
Expand Down Expand Up @@ -136,6 +138,8 @@ def fit(self, X, max_iter=30):
print("B:", self.B)
print("pi:", self.pi)

print("Fit duration:", (datetime.now() - t0))

plt.plot(costs)
plt.show()

Expand Down
2 changes: 1 addition & 1 deletion linear_regression_class/systolic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import pandas as pd

df = pd.read_excel('mlr02.xls')
X = df.as_matrix()
X = df.values

# using age to predict systolic blood pressure
plt.scatter(X[:,1], X[:,0])
Expand Down
2 changes: 1 addition & 1 deletion nlp_class/nb.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# it will work for other types of "counts", like tf-idf, so it should
# also work for our "word proportions"

data = pd.read_csv('spambase.data').as_matrix() # use pandas for convenience
data = pd.read_csv('spambase.data').values # use pandas for convenience
np.random.shuffle(data) # shuffle each row in-place, but preserve the row

X = data[:,:48]
Expand Down
5 changes: 3 additions & 2 deletions nlp_class/spam2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from wordcloud import WordCloud


Expand All @@ -32,7 +33,7 @@

# create binary labels
df['b_labels'] = df['labels'].map({'ham': 0, 'spam': 1})
Y = df['b_labels'].as_matrix()
Y = df['b_labels'].values

# try multiple ways of calculating features
# tfidf = TfidfVectorizer(decode_error='ignore')
Expand All @@ -49,7 +50,7 @@
model.fit(Xtrain, Ytrain)
print("train score:", model.score(Xtrain, Ytrain))
print("test score:", model.score(Xtest, Ytest))

exit()


# visualize the data
Expand Down
2 changes: 1 addition & 1 deletion nlp_class3/bilstm_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_mnist(limit=None):

print("Reading in and transforming data...")
df = pd.read_csv('../large_files/train.csv')
data = df.as_matrix()
data = df.values
np.random.shuffle(data)
X = data[:, 1:].reshape(-1, 28, 28) / 255.0 # data is from 0..255
Y = data[:, 0]
Expand Down
7 changes: 7 additions & 0 deletions supervised_class/bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


import numpy as np
import matplotlib.pyplot as plt
from util import get_data
from datetime import datetime
from scipy.stats import norm
Expand Down Expand Up @@ -60,3 +61,9 @@ def predict(self, X):
t0 = datetime.now()
print("Test accuracy:", model.score(Xtest, Ytest))
print("Time to compute test accuracy:", (datetime.now() - t0), "Test size:", len(Ytest))

This comment has been minimized.

Copy link
@arashmath

arashmath Jul 21, 2020

Dear @lazyprogrammer
I think there is a problem with this part that needs to be fixed. When I run the code, I get this error:
ValueError: cannot reshape array of size 20 into shape (28,28)
And then I changed the numbers in the reshape function so that their product would be 20 (i.e. (1,20) or (4,5)), but then I couldn't understand it. Here is a screenshot of the result though:
scrn

This comment has been minimized.

Copy link
@lazyprogrammer

lazyprogrammer Jul 21, 2020

Owner

Dear @lazyprogrammer
I think there is a problem with this part that needs to be fixed. When I run the code, I get this error:
ValueError: cannot reshape array of size 20 into shape (28,28)
And then I changed the numbers in the reshape function so that their product would be 20 (i.e. (1,20) or (4,5)), but then I couldn't understand it. Here is a screenshot of the result though:
scrn

You'll have to take the course if you want to know what dataset was used and to observe the working code in video.

This comment has been minimized.

Copy link
@lazyprogrammer

lazyprogrammer Jul 21, 2020

Owner

Dear @lazyprogrammer
I think there is a problem with this part that needs to be fixed. When I run the code, I get this error:
ValueError: cannot reshape array of size 20 into shape (28,28)
And then I changed the numbers in the reshape function so that their product would be 20 (i.e. (1,20) or (4,5)), but then I couldn't understand it. Here is a screenshot of the result though:
scrn

You can get the course here: https://deeplearningcourses.com/c/data-science-supervised-machine-learning-in-python

# plot the mean of each class
for c, g in iteritems(model.gaussians):
plt.imshow(g['mean'].reshape(28, 28))
plt.title(c)
plt.show()
2 changes: 1 addition & 1 deletion supervised_class/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def get_data(limit=None):
print("Reading in and transforming data...")
df = pd.read_csv('../large_files/train.csv')
data = df.as_matrix()
data = df.values
np.random.shuffle(data)
X = data[:, 1:] / 255.0 # data is from 0..255
Y = data[:, 0]
Expand Down
4 changes: 2 additions & 2 deletions supervised_class2/rf_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def transform(self, df):
X = np.zeros((N, self.D))
i = 0
for col, scaler in iteritems(self.scalers):
X[:,i] = scaler.transform(df[col].as_matrix().reshape(-1, 1)).flatten()
X[:,i] = scaler.transform(df[col].values.reshape(-1, 1)).flatten()
i += 1

for col, encoder in iteritems(self.labelEncoders):
Expand Down Expand Up @@ -98,7 +98,7 @@ def get_data():
transformer = DataTransformer()

X = transformer.fit_transform(df)
Y = df[0].as_matrix()
Y = df[0].values
return X, Y


Expand Down
8 changes: 4 additions & 4 deletions supervised_class2/rf_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def fit(self, df):
self.scalers = {}
for col in NUMERICAL_COLS:
scaler = StandardScaler()
scaler.fit(df[col].as_matrix().reshape(-1, 1))
scaler.fit(df[col].values.reshape(-1, 1))
self.scalers[col] = scaler

def transform(self, df):
Expand All @@ -53,7 +53,7 @@ def transform(self, df):
X = np.zeros((N, D))
i = 0
for col, scaler in iteritems(self.scalers):
X[:,i] = scaler.transform(df[col].as_matrix().reshape(-1, 1)).flatten()
X[:,i] = scaler.transform(df[col].values.reshape(-1, 1)).flatten()
i += 1
for col in NO_TRANSFORM:
X[:,i] = df[col]
Expand Down Expand Up @@ -96,9 +96,9 @@ def get_data():
df_test = df.loc[test_idx]

Xtrain = transformer.fit_transform(df_train)
Ytrain = np.log(df_train['medv'].as_matrix())
Ytrain = np.log(df_train['medv'].values)
Xtest = transformer.transform(df_test)
Ytest = np.log(df_test['medv'].as_matrix())
Ytest = np.log(df_test['medv'].values)
return Xtrain, Ytrain, Xtest, Ytest


Expand Down
4 changes: 2 additions & 2 deletions unsupervised_class/kmeans_mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from .kmeans import plot_k_means, get_simple_data
from kmeans import plot_k_means, get_simple_data
from datetime import datetime

def get_data(limit=None):
print("Reading in and transforming data...")
df = pd.read_csv('../large_files/train.csv')
data = df.as_matrix()
data = df.values
np.random.shuffle(data)
X = data[:, 1:] / 255.0 # data is from 0..255
Y = data[:, 0]
Expand Down
2 changes: 1 addition & 1 deletion unsupervised_class2/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def getKaggleMNIST():
# column 0 is labels
# column 1-785 is data, with values 0 .. 255
# total size of CSV: (42000, 1, 28, 28)
train = pd.read_csv('../large_files/train.csv').as_matrix().astype(np.float32)
train = pd.read_csv('../large_files/train.csv').values.astype(np.float32)
train = shuffle(train)

Xtrain = train[:-1000,1:] / 255
Expand Down

0 comments on commit b8b97e3

Please sign in to comment.