Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .development.env
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
LAUCHDARKLY_SDK_KEY=
oauth_url=https://deepchecks-monitoring.eu.auth0.com
OAUTH_CLIENT_ID=
OAUTH_CLIENT_SECRET=
email_smtp_host=email-smtp.eu-west-1.amazonaws.com
email_smtp_port=25
email_smtp_username=
Expand Down
2 changes: 1 addition & 1 deletion backend/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.12.0
0.15.1
31 changes: 13 additions & 18 deletions docs/source/user-guide/demos/plot_rent_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,16 @@
# We'll start by downloading the training data from the deepchecks testing package. This training data will be used
# to set the reference for the model version. We'll also download the pre-calculated predictions for this data.

from deepchecks.tabular.datasets.regression.airbnb import load_data_large
from deepchecks.tabular.datasets.regression.airbnb import load_data, load_pre_calculated_prediction

train_df, ref_predictions = load_data_large(data_format='DataFrame')
train_df.head(2)
ref_df, _ = load_data(data_format='Dataframe')
ref_predictions, _ = load_pre_calculated_prediction()
ref_df.head(2)

# %%
# So what do we have? Let's note the special columns in our data:
#
# 1. datestamp - The timestamp of the sample (seconds since epoch)
# 1. timestamp - The timestamp of the sample (seconds since epoch)
# 2. price - Our label
#
# All the other columns are features that can be used by our model to predict the price. We note that there are some
Expand All @@ -106,13 +107,12 @@
# together with metadata about the role of each column.

from deepchecks.tabular import Dataset
timestamp, label_col = 'datestamp', 'price'
timestamp, label_col = 'timestamp', 'price'
train_dataset = Dataset(
train_df, label=label_col,
ref_df, label=label_col,
features=['room_type', 'neighbourhood', 'neighbourhood_group', 'has_availability', 'minimum_nights',
'number_of_reviews', 'reviews_per_month', 'calculated_host_listings_count', 'availability_365'],
cat_features=['neighbourhood_group', 'neighbourhood', 'room_type', 'has_availability'],
datetime_name=timestamp)
cat_features=['neighbourhood_group', 'neighbourhood', 'room_type', 'has_availability'])

# %%
# We'll create the schema file, and print it to show (and validate) the schema that was created.
Expand Down Expand Up @@ -174,14 +174,8 @@
# read more, refer to the :doc:`Production Data Guide </user-guide/tabular/tabular-production>`. Here we'll
# show how to use the batch upload method.

prod_data, prod_predictions = load_data_large(data_format='DataFrame', load_train=False)

# %%
# We'll change the original timestamps so the samples are recent

import datetime
yesterdays_timestamp = int(datetime.datetime.now().timestamp()) - 3600*24
prod_data[timestamp] = prod_data[timestamp] + (yesterdays_timestamp - prod_data[timestamp].max())
_, prod_data = load_data(data_format='DataFrame')
_, prod_predictions = load_pre_calculated_prediction()

# %%
# Uploading a Batch of Data
Expand All @@ -193,11 +187,12 @@
#
# Let's start by uploading the first part of the dataset

prod_data[timestamp] = prod_data[timestamp].astype(int) // 10 ** 9 # Convert to second-based epoch time
timestamps = prod_data[timestamp].unique()
end_of_first_half = timestamps[3 * int(len(timestamps) // 4)] # This is the first 3 weeks of the production data

first_half_df = prod_data[prod_data.datestamp < end_of_first_half]
second_half_df = prod_data[prod_data.datestamp >= end_of_first_half]
first_half_df = prod_data[prod_data.timestamp < end_of_first_half]
second_half_df = prod_data[prod_data.timestamp >= end_of_first_half]

model_version.log_batch(sample_ids=first_half_df.index,
data=first_half_df.drop([timestamp, label_col], axis=1),
Expand Down
10 changes: 5 additions & 5 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -315,12 +315,12 @@ external-services-setup:
@sleep 2

env-setup: external-services-setup
@docker run -d --env-file .development.env --network deepchecks -p 8000:8000 deepchecks-enterprise-testing start-test.sh
@docker run -d --env-file .development.env -e LAUCHDARKLY_SDK_KEY -e OAUTH_CLIENT_ID -e OAUTH_CLIENT_SECRET --network deepchecks -p 8000:8000 deepchecks-enterprise-testing start-test.sh
@sleep 15
@docker run -d --env-file .development.env --network deepchecks deepchecks-enterprise-testing start-alert-scheduler.sh
@docker run -d --env-file .development.env --network deepchecks deepchecks-enterprise-testing start-worker.sh
@docker run -d --env-file .development.env --network deepchecks deepchecks-enterprise-testing start-task-queuer.sh
@docker run -d --env-file .development.env --network deepchecks deepchecks-enterprise-testing start-task-runner.sh
@docker run -d --env-file .development.env -e LAUCHDARKLY_SDK_KEY --network deepchecks deepchecks-enterprise-testing start-alert-scheduler.sh
@docker run -d --env-file .development.env -e LAUCHDARKLY_SDK_KEY --network deepchecks deepchecks-enterprise-testing start-worker.sh
@docker run -d --env-file .development.env -e LAUCHDARKLY_SDK_KEY --network deepchecks deepchecks-enterprise-testing start-task-queuer.sh
@docker run -d --env-file .development.env -e LAUCHDARKLY_SDK_KEY --network deepchecks deepchecks-enterprise-testing start-task-runner.sh
@sleep 10

cypress: env-setup
Expand Down