Skip to content

Commit 7cbf94c

Browse files
committed
Make readme examples more brief
1 parent c870afe commit 7cbf94c

File tree

1 file changed

+14
-65
lines changed

1 file changed

+14
-65
lines changed

README.md

+14-65
Original file line numberDiff line numberDiff line change
@@ -34,35 +34,26 @@ To track changes and version everything about data Cascade has Datasets - specia
3434
that encapsulate changes that are done during preprocessing.
3535

3636
```python
37+
from pprint import pprint
3738
from cascade import data as cdd
38-
3939
from sklearn.datasets import load_digits
4040
import numpy as np
4141

4242

43-
# Load dataset
4443
X, y = load_digits(return_X_y=True)
4544
pairs = [(x, y) for (x, y) in zip(X, y)]
4645

47-
# To track all preparation stages we wrap cdd.Dataset over
48-
# collection of items and targets
46+
# To track all preparation stages we wrap cdd.Dataset
4947
ds = cdd.Wrapper(pairs)
5048

51-
# Let's make a pipeline - shuffle the dataset
49+
# This creates pipeline
5250
ds = cdd.RandomSampler(ds)
53-
54-
# Splitting the data is also tracked in pipeline's metadata
5551
train_ds, test_ds = cdd.split(ds)
56-
57-
# Add small noise to images
5852
train_ds = cdd.ApplyModifier(
5953
train_ds,
6054
lambda pair: pair + np.random.random() * 0.1 - 0.05
6155
)
6256

63-
# Let's see the metadata we got
64-
from pprint import pprint
65-
6657
pprint(train_ds.get_meta())
6758
```
6859

@@ -103,72 +94,36 @@ See all use-cases in [documentation](https://oxid15.github.io/cascade/quickstart
10394
Not only data and pipelines changes over time. Models change more frequently and require special system to handle experiments and artifacts.
10495

10596
```python
97+
import random
10698
from cascade import models as cdm
10799
from cascade import data as cdd
108100

109-
from sklearn.datasets import load_breast_cancer
110-
from sklearn.model_selection import train_test_split
111-
from sklearn.metrics import accuracy_score, f1_score
112-
113-
114-
X, y = load_breast_cancer(return_X_y=True)
115-
X_train, X_test, y_train, y_test = train_test_split(X, y)
116-
117-
# Define the simple model that using
118-
# basic methods from cdm.BasicModel
119-
class BaselineModel(cdm.BasicModel):
120-
def __init__(self, const=0, *args, **kwargs) -> None:
121-
self.const = const
122-
super().__init__(const=const, *args, **kwargs)
123-
124-
def predict(self, x, *args, **kwargs):
125-
return [self.const for _ in range(len(x))]
126-
127-
# Models define the way whey are trained loaded and saved
128-
# we don't use these here, but they exist
129-
def fit(self, *args, **kwargs):
130-
pass
131-
132-
def save(self, path):
133-
pass
101+
model = cdm.Model()
102+
model.metrics.update({
103+
'acc': random.random()
104+
})
134105

135-
136-
model = BaselineModel(1)
137-
138-
# Fit and evaluate do not return anything
139-
model.fit(X_train, y_train)
140-
model.evaluate(X_test, y_test, {'acc': accuracy_score, 'f1': f1_score})
141-
142-
# Model repository is the solution for experiment and artifact storage
106+
# Repo is the collection of model lines
143107
repo = cdm.ModelRepo('repos/use_case_repo')
144108

145-
# Repo is the collection of model lines
146109
# Line can be a bunch of experiments on one model type
147110
line = repo.add_line('baseline')
148-
149-
# We save the model - everything is held automatically
150111
line.save(model, only_meta=True)
151-
152-
from pprint import pprint
153-
pprint(model.get_meta())
154112
```
155113

156114
Let's see what is saved as meta data of this experiment.
157115

158116
```json
159117
[
160118
{
161-
"name": "<__main__.BaselineModel object at 0x000001F69F493820>",
162-
"created_at": "2023-01-02T16:36:59.041979+00:00",
119+
"name": "cascade.models.model.Model",
120+
"created_at": "2023-05-29T21:06:23.341752+00:00",
163121
"metrics": {
164-
"acc": 0.6293706293706294,
165-
"f1": 0.7725321888412017
166-
},
167-
"params": {
168-
"const": 1
122+
"acc": 0.6745652975946803
169123
},
124+
"params": {},
170125
"type": "model",
171-
"saved_at": "2023-01-02T16:36:59.103781+00:00"
126+
"saved_at": "2023-05-29T21:06:25.977728+00:00"
172127
}
173128
]
174129
```
@@ -189,24 +144,20 @@ from sklearn.datasets import load_digits
189144
import numpy as np
190145

191146

192-
# Load data
193147
X, y = load_digits(return_X_y=True)
194148
pairs = [(x, y) for (x, y) in zip(X, y)]
195149

196-
# Let's define a pipeline
197150
ds = cdd.Wrapper(pairs)
198151
ds = cdd.RandomSampler(ds)
199152
train_ds, test_ds = cdd.split(ds)
200153

201-
# Validate using this tool
202154
cme.PredicateValidator(
203155
train_ds,
204156
[
205157
lambda pair: all(pair[0] < 20),
206158
lambda pair: pair[1] in (i for i in range(10))
207159
]
208160
)
209-
210161
```
211162

212163
See all use-cases in [documentation](https://oxid15.github.io/cascade/quickstart.html)
@@ -221,7 +172,6 @@ metrics of all models in repository.
221172
from cascade import meta as cme
222173
from cascade import models as cdm
223174

224-
# Open the existing repo
225175
repo = cdm.ModelRepo('repos/use_case_repo')
226176

227177
# This runs web-server that relies on optional dependency
@@ -244,7 +194,6 @@ cme.HistoryViewer(repo).plot()
244194

245195
# This runs a server ans allows to see changes in real time (for example while models are trained)
246196
cme.HistoryViewer(repo).serve()
247-
248197
```
249198

250199
See all use-cases in [documentation](https://oxid15.github.io/cascade/quickstart.html)

0 commit comments

Comments
 (0)