-
Notifications
You must be signed in to change notification settings - Fork 13
/
python_demo.py
63 lines (46 loc) · 1.79 KB
/
python_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 30 11:00:14 2017
@author: MC
"""
import sklearn as sk
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from os import chdir
# import data
chdir('Documents/Stats/Repositories/Docs/introduction-to-machine-learning')
wine = pd.read_csv('data/wine.csv')
# data preprocessing
np.random.seed(1234)
X = wine.drop(['free.sulfur.dioxide', 'density', 'quality', 'color', 'white','good'], axis=1)
X = MinMaxScaler().fit_transform(X) # by default on 0, 1 scale
y = wine['good']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# train model
rf = RandomForestClassifier(n_estimators=1000)
rf_train = rf.fit(X_train, y_train)
# get test predictions
rf_predict = rf_train.predict(X_test)
# create confusion matrix, and accuracy
cm = sk.metrics.confusion_matrix(y_test,rf_predict)
cm_prob = cm / np.sum(cm)
cm_prob
acc = sk.metrics.accuracy_score(y_test, rf_predict)
acc = pd.DataFrame(np.array([acc]), columns=['Accuracy'])
acc
import tensorflow.contrib.learn as skflow
from sklearn import metrics
y = wine['good'] == 'Good'
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=8675309)
feats = skflow.infer_real_valued_columns_from_input(X_train)
classifier_tf = skflow.DNNClassifier(feature_columns=feats,
hidden_units=[50, 50, 50, 40, 30, 20, 10],
dropout=.2,
n_classes=2)
classifier_tf.fit(X_train, y_train, steps=10000)
predictions = list(classifier_tf.predict(X_test, as_iterable=True))
score = metrics.accuracy_score(y_test, predictions)
print("Accuracy: %f" % score)