-
Notifications
You must be signed in to change notification settings - Fork 3
/
catalog.yml
86 lines (79 loc) · 2.49 KB
/
catalog.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Here you can define all your data sets by using simple YAML syntax.
#
# Documentation for this file format can be found in "The Data Catalog"
# Link: https://kedro.readthedocs.io/en/stable/05_data/01_data_catalog.html
#
# We support interacting with a variety of data stores including local file systems, cloud, network and HDFS
#
# An example data set definition can look as follows:
#
#bikes:
# type: pandas.CSVDataSet
# filepath: "data/01_raw/bikes.csv"
#
#weather:
# type: spark.SparkDataSet
# filepath: s3a://your_bucket/data/01_raw/weather*
# file_format: csv
# credentials: dev_s3
# load_args:
# header: True
# inferSchema: True
# save_args:
# sep: '|'
# header: True
#
#scooters:
# type: pandas.SQLTableDataSet
# credentials: scooters_credentials
# table_name: scooters
# load_args:
# index_col: ['name']
# columns: ['name', 'gear']
# save_args:
# if_exists: 'replace'
# # if_exists: 'fail'
# # if_exists: 'append'
#
# The Data Catalog supports being able to reference the same file using two different DataSet implementations
# (transcoding), templating and a way to reuse arguments that are frequently repeated. See more here:
# https://kedro.readthedocs.io/en/stable/05_data/01_data_catalog.html
#
# This is a data set used by the "Hello World" example pipeline provided with the project
# template. Please feel free to remove it once you remove the example pipeline.
example_iris_data:
type: pandas.CSVDataSet
filepath: data/01_raw/iris.csv
imageset:
type: PartitionedDataSet
dataset: {
"type": "kedro_tf_image.extras.datasets.tf_image_dataset.TfImageDataSet",
"imagedim": 224,
"preprocess_input": "tensorflow.keras.applications.resnet50.preprocess_input"
}
path: data/01_raw/imageset
filename_suffix: ".jpg"
skintype_data:
type: pandas.CSVDataSet
filepath: data/01_raw/skintype.csv
imagefolder:
type: kedro_tf_image.extras.datasets.tf_image_folder.TfImageFolder
folderpath: "/home/a/archer/beapen/scratch/dermnet/train/rosacea-pd/tf"
imagedim: 224
load_args:
validation_split: 0.2
seed: 123
batch_size: 1
processeddataset:
type: kedro_tf_image.extras.datasets.tf_image_processed.TfImageProcessed
folderpath: data/02_intermediate/
imagedim: 224
datasetinmemory:
type: MemoryDataSet
copy_mode: assign
chexnet_weights:
type: kedro_tf_image.extras.datasets.tf_model_weights.TfModelWeights
filepath: data/03_primary/brucechou1983_CheXNet_Keras_0.3.0_weights.h5
architecture: DenseNet121
load_args:
class_num: 14