@@ -91,3 +91,78 @@ def test_bqml_getting_started(random_model_id):
91
91
replace = True ,
92
92
)
93
93
# [END bigquery_dataframes_bqml_getting_started_tutorial]
94
+
95
+ # [START bigquery_dataframes_bqml_getting_started_tutorial_evaluate]
96
+ import bigframes .pandas as bpd
97
+
98
+ # Select model you'll use for training. `read_gbq_model` loads model data from a
99
+ # BigQuery, but you could also use the `model` object from the previous steps.
100
+ model = bpd .read_gbq_model (
101
+ your_model_id , # For example: "bqml_tutorial.sample_model",
102
+ )
103
+
104
+ # The WHERE clause — _TABLE_SUFFIX BETWEEN '20170701' AND '20170801' —
105
+ # limits the number of tables scanned by the query. The date range scanned is
106
+ # July 1, 2017 to August 1, 2017. This is the data you're using to evaluate the predictive performance
107
+ # of the model. It was collected in the month immediately following the time
108
+ # period spanned by the training data.
109
+
110
+ df = bpd .read_gbq (
111
+ """
112
+ SELECT GENERATE_UUID() AS rowindex, *
113
+ FROM
114
+ `bigquery-public-data.google_analytics_sample.ga_sessions_*`
115
+ WHERE
116
+ _TABLE_SUFFIX BETWEEN '20170701' AND '20170801'
117
+ """ ,
118
+ index_col = "rowindex" ,
119
+ )
120
+ transactions = df ["totals" ].struct .field ("transactions" )
121
+ label = transactions .notnull ().map ({True : 1 , False : 0 })
122
+ operatingSystem = df ["device" ].struct .field ("operatingSystem" )
123
+ operatingSystem = operatingSystem .fillna ("" )
124
+ isMobile = df ["device" ].struct .field ("isMobile" )
125
+ country = df ["geoNetwork" ].struct .field ("country" ).fillna ("" )
126
+ pageviews = df ["totals" ].struct .field ("pageviews" ).fillna (0 )
127
+ features = bpd .DataFrame (
128
+ {
129
+ "os" : operatingSystem ,
130
+ "is_mobile" : isMobile ,
131
+ "country" : country ,
132
+ "pageviews" : pageviews ,
133
+ }
134
+ )
135
+
136
+ # Some models include a convenient .score(X, y) method for evaluation with a preset accuracy metric:
137
+
138
+ # Because you performed a logistic regression, the results include the following columns:
139
+
140
+ # - precision — A metric for classification models. Precision identifies the frequency with
141
+ # which a model was correct when predicting the positive class.
142
+
143
+ # - recall — A metric for classification models that answers the following question:
144
+ # Out of all the possible positive labels, how many did the model correctly identify?
145
+
146
+ # - accuracy — Accuracy is the fraction of predictions that a classification model got right.
147
+
148
+ # - f1_score — A measure of the accuracy of the model. The f1 score is the harmonic average of
149
+ # the precision and recall. An f1 score's best value is 1. The worst value is 0.
150
+
151
+ # - log_loss — The loss function used in a logistic regression. This is the measure of how far the
152
+ # model's predictions are from the correct labels.
153
+
154
+ # - roc_auc — The area under the ROC curve. This is the probability that a classifier is more confident that
155
+ # a randomly chosen positive example
156
+ # is actually positive than that a randomly chosen negative example is positive. For more information,
157
+ # see ['Classification']('https://developers.google.com/machine-learning/crash-course/classification/video-lecture')
158
+ # in the Machine Learning Crash Course.
159
+
160
+ model .score (features , label )
161
+ # precision recall accuracy f1_score log_loss roc_auc
162
+ # 0 0.412621 0.079143 0.985074 0.132812 0.049764 0.974285
163
+ # [1 rows x 6 columns]
164
+ # [END bigquery_dataframes_bqml_getting_started_tutorial_evaluate]
165
+
166
+ # [START bigquery_dataframes_bqml_getting_started_tutorial_predict]
167
+
168
+ # [END bigquery_dataframes_bqml_getting_started_tutorial_predict]
0 commit comments