Skip to content
This repository was archived by the owner on Jun 29, 2019. It is now read-only.
Merged
9 changes: 5 additions & 4 deletions Misc/PythonSQL/DeserializeSavePlots.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import pyodbc
import pickle
import os

cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER={SERVER_NAME};DATABASE={DB_NAME};UID={USER_NAME};PWD={PASSWORD}')
cursor = cnxn.cursor()
cursor.execute("EXECUTE [dbo].[SerializePlots]")
tables = cursor.fetchall()
for i in range(0, len(tables)):
fig = pickle.loads(tables[i][0])
fig.savefig(str(i)+'.png');
for i, table in enumerate(tables):
fig = pickle.loads(table[0])
fig.savefig(str(i)+'.png')

print("The plots are saved in directory: ",os.getcwd())
print("The plots are saved in directory:", os.getcwd())
7 changes: 4 additions & 3 deletions Misc/PythonSQL/DeserializeSavePlots2.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import pyodbc
import pickle
import os

cnxn = pyodbc.connect('DRIVER={SQL Server};SERVER={SERVER_NAME};DATABASE={DB_NAME};Trusted_Connection=yes;')
cursor = cnxn.cursor()
cursor.execute("EXECUTE [dbo].[SerializePlots]")
tables = cursor.fetchall()
for i in range(0, len(tables)):
fig = pickle.loads(tables[i][0])
for i, table in enumerate(tables):
fig = pickle.loads(table[0])
fig.savefig(str(i)+'.png')

print("The plots are saved in directory: ",os.getcwd())
print("The plots are saved in directory:", os.getcwd())
24 changes: 11 additions & 13 deletions Misc/PythonSQL/PredictTipRxPy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,24 @@ BEGIN
EXEC sp_execute_external_script
@language = N'Python',
@script = N'
import pickle;
import numpy;
import pandas;
import pickle
import numpy
import pandas
from sklearn import metrics
from revoscalepy.functions.RxPredict import rx_predict_ex;
from revoscalepy.functions.RxPredict import rx_predict_ex

mod = pickle.loads(lmodel2)
X = InputDataSet[["passenger_count", "trip_distance", "trip_time_in_secs", "direct_distance"]]
y = numpy.ravel(InputDataSet[["tipped"]])

probArray = rx_predict_ex(mod, X)
probList = []
for i in range(len(probArray._results["tipped_Pred"])):
probList.append((probArray._results["tipped_Pred"][i]))
prob_array = rx_predict_ex(mod, X)
prob_list = list(prob_rrray._results["tipped_Pred"])

probArray = numpy.asarray(probList)
fpr, tpr, thresholds = metrics.roc_curve(y, probArray)
aucResult = metrics.auc(fpr, tpr)
print ("AUC on testing data is: " + str(aucResult))
OutputDataSet = pandas.DataFrame(data = probList, columns = ["predictions"])
prob_array = numpy.asarray(prob_list)
fpr, tpr, thresholds = metrics.roc_curve(y, prob_array)
auc_result = metrics.auc(fpr, tpr)
print("AUC on testing data is:", auc_result)
OutputDataSet = pandas.DataFrame(data=prob_list, columns=["predictions"])
',
@input_data_1 = @inquery,
@input_data_1_name = N'InputDataSet',
Expand Down
22 changes: 10 additions & 12 deletions Misc/PythonSQL/PredictTipSciKitPy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,27 +21,25 @@ BEGIN
EXEC sp_execute_external_script
@language = N'Python',
@script = N'
import pickle;
import numpy;
import pandas;
import pickle
import numpy
import pandas
from sklearn import metrics

mod = pickle.loads(lmodel2)

X = InputDataSet[["passenger_count", "trip_distance", "trip_time_in_secs", "direct_distance"]]
y = numpy.ravel(InputDataSet[["tipped"]])

probArray = mod.predict_proba(X)
probList = []
for i in range(len(probArray)):
probList.append((probArray[i])[1])
prob_array = mod.predict_proba(X)
prob_list = [item[1] for item in prob_array]

probArray = numpy.asarray(probList)
fpr, tpr, thresholds = metrics.roc_curve(y, probArray)
aucResult = metrics.auc(fpr, tpr)
print ("AUC on testing data is: " + str(aucResult))
prob_array = numpy.asarray(prob_list)
fpr, tpr, thresholds = metrics.roc_curve(y, prob_array)
auc_result = metrics.auc(fpr, tpr)
print("AUC on testing data is:", auc_result)

OutputDataSet = pandas.DataFrame(data = probList, columns = ["predictions"])
OutputDataSet = pandas.DataFrame(data=prob_list, columns=["predictions"])
',
@input_data_1 = @inquery,
@input_data_1_name = N'InputDataSet',
Expand Down
17 changes: 8 additions & 9 deletions Misc/PythonSQL/PredictTipSingleModeRxPy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,25 @@ BEGIN
EXEC sp_execute_external_script
@language = N'Python',
@script = N'
import pickle;
import numpy;
import pandas;
from revoscalepy.functions.RxPredict import rx_predict_ex;
import pickle
import numpy
import pandas
from revoscalepy.functions.RxPredict import rx_predict_ex

# Load model and unserialize
mod = pickle.loads(model)

# Get features for scoring from input data
X = InputDataSet[["passenger_count", "trip_distance", "trip_time_in_secs", "direct_distance"]]
x = InputDataSet[["passenger_count", "trip_distance", "trip_time_in_secs", "direct_distance"]]

# Score data to get tip prediction probability as a list (of float)

probArray = rx_predict_ex(mod, X)
prob_array = rx_predict_ex(mod, x)

probList = []
probList.append(probArray._results["tipped_Pred"])
prob_list = [prob_array._results["tipped_Pred"]]

# Create output data frame
OutputDataSet = pandas.DataFrame(data = probList, columns = ["predictions"])
OutputDataSet = pandas.DataFrame(data=prob_list, columns=["predictions"])
',
@input_data_1 = @inquery,
@params = N'@model varbinary(max),@passenger_count int,@trip_distance float,
Expand Down
11 changes: 5 additions & 6 deletions Misc/PythonSQL/PredictTipSingleModeSciKitPy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ BEGIN
EXEC sp_execute_external_script
@language = N'Python',
@script = N'
import pickle;
import numpy;
import pandas;
import pickle
import numpy
import pandas

# Load model and unserialize
mod = pickle.loads(model)
Expand All @@ -42,11 +42,10 @@ mod = pickle.loads(model)
X = InputDataSet[["passenger_count", "trip_distance", "trip_time_in_secs", "direct_distance"]]

# Score data to get tip prediction probability as a list (of float)
probList = []
probList.append((mod.predict_proba(X)[0])[1])
prob = [mod.predict_proba(X)[0][1]]

# Create output data frame
OutputDataSet = pandas.DataFrame(data = probList, columns = ["predictions"])
OutputDataSet = pandas.DataFrame(data=prob, columns=["predictions"])
',
@input_data_1 = @inquery,
@params = N'@model varbinary(max),@passenger_count int,@trip_distance float,
Expand Down
8 changes: 4 additions & 4 deletions Misc/PythonSQL/SerializePlots.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,28 @@ plt.hist(InputDataSet.tipped)
plt.xlabel("Tipped")
plt.ylabel("Counts")
plt.title("Histogram, Tipped")
plot0 = pd.DataFrame(data =[pickle.dumps(fig_handle)], columns =["plot"])
plot0 = pd.DataFrame(data =[pickle.dumps(fig_handle)], columns=["plot"])
plt.clf()

plt.hist(InputDataSet.tip_amount)
plt.xlabel("Tip amount ($)")
plt.ylabel("Counts")
plt.title("Histogram, Tip amount")
plot1 = pd.DataFrame(data =[pickle.dumps(fig_handle)], columns =["plot"])
plot1 = pd.DataFrame(data =[pickle.dumps(fig_handle)], columns=["plot"])
plt.clf()

plt.hist(InputDataSet.fare_amount)
plt.xlabel("Fare amount ($)")
plt.ylabel("Counts")
plt.title("Histogram, Fare amount")
plot2 = pd.DataFrame(data =[pickle.dumps(fig_handle)], columns =["plot"])
plot2 = pd.DataFrame(data =[pickle.dumps(fig_handle)], columns=["plot"])
plt.clf()

plt.scatter( InputDataSet.fare_amount, InputDataSet.tip_amount)
plt.xlabel("Fare Amount ($)")
plt.ylabel("Tip Amount ($)")
plt.title("Tip amount by Fare amount")
plot3 = pd.DataFrame(data =[pickle.dumps(fig_handle)], columns =["plot"])
plot3 = pd.DataFrame(data =[pickle.dumps(fig_handle)], columns=["plot"])
plt.clf()

OutputDataSet = plot0.append(plot1, ignore_index=True).append(plot2, ignore_index=True).append(plot3, ignore_index=True)
Expand Down
6 changes: 3 additions & 3 deletions Misc/PythonSQL/TrainTipPredictionModelRxPy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ BEGIN
import numpy
import pickle
import pandas
from revoscalepy.functions.RxLogit import rx_logit_ex;
from revoscalepy.functions.RxPredict import rx_predict_ex;
from revoscalepy.functions.RxLogit import rx_logit_ex
from revoscalepy.functions.RxPredict import rx_predict_ex

logitObj = rx_logit_ex("tipped ~ passenger_count + trip_distance + trip_time_in_secs + direct_distance", data = InputDataSet);
logitObj = rx_logit_ex("tipped ~ passenger_count + trip_distance + trip_time_in_secs + direct_distance", data=InputDataSet);

## Serialize model
trained_model = pickle.dumps(logitObj)
Expand Down