-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Components - CatBoost * Train classifier * Predict class * Added CatBoost converter components * Added a sample pipeline * Fixed bug * Fixed the style * Updated the converter components * Added the Train_regression component * Added the Predict_values component * Added the Predict_class_probabilities component * Added the additional_training_options input to the trainers * Changed the output type of Train_classifier to CatBoostModel * Ranmed Predict_class to Predict_classes * Updated the sample pipeline * Moved some components to the from_CSV subdirectories * Updated the sample pipeline * Fixed the sample * Sample - Simplified the training data referencing * Sample - Added training_data_for_classification calculation * Updated a stale component.yaml file * Fixed the input type * FIxed the default loss function for the regression training * Fixed the CSV reading in the predictor components * FIxed the prediction_type * Fixed the class predictions saving * Update sample_pipeline.py * Finalized the sample
- Loading branch information
Showing
15 changed files
with
1,393 additions
and
0 deletions.
There are no files selected for viewing
58 changes: 58 additions & 0 deletions
58
components/CatBoost/Predict_class_probabilities/from_CSV/component.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from kfp.components import InputPath, OutputPath, create_component_from_func | ||
|
||
def catboost_predict_class_probabilities( | ||
data_path: InputPath('CSV'), | ||
model_path: InputPath('CatBoostModel'), | ||
predictions_path: OutputPath(), | ||
|
||
label_column: int = None, | ||
): | ||
'''Predict class probabilities with a CatBoost model. | ||
Args: | ||
data_path: Path for the data in CSV format. | ||
model_path: Path for the trained model in binary CatBoostModel format. | ||
label_column: Column containing the label data. | ||
predictions_path: Output path for the predictions. | ||
Outputs: | ||
predictions: Predictions in text format. | ||
Annotations: | ||
author: Alexey Volkov <alexey.volkov@ark-kun.com> | ||
''' | ||
import tempfile | ||
|
||
from catboost import CatBoost, Pool | ||
import numpy | ||
|
||
if label_column: | ||
column_descriptions = {label_column: 'Label'} | ||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name | ||
with open(column_description_path, 'w') as column_description_file: | ||
for idx, kind in column_descriptions.items(): | ||
column_description_file.write('{}\t{}\n'.format(idx, kind)) | ||
else: | ||
column_description_path = None | ||
|
||
eval_data = Pool( | ||
data_path, | ||
column_description=column_description_path, | ||
has_header=True, | ||
delimiter=',', | ||
) | ||
|
||
model = CatBoost() | ||
model.load_model(model_path) | ||
|
||
predictions = model.predict(eval_data, prediction_type='Probability') | ||
numpy.savetxt(predictions_path, predictions) | ||
|
||
|
||
if __name__ == '__main__': | ||
catboost_predict_class_probabilities_op = create_component_from_func( | ||
catboost_predict_class_probabilities, | ||
output_component_file='component.yaml', | ||
base_image='python:3.7', | ||
packages_to_install=['catboost==0.23'] | ||
) |
108 changes: 108 additions & 0 deletions
108
components/CatBoost/Predict_class_probabilities/from_CSV/component.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
name: Catboost predict class probabilities | ||
description: |- | ||
Predict class probabilities with a CatBoost model. | ||
Args: | ||
data_path: Path for the data in CSV format. | ||
model_path: Path for the trained model in binary CatBoostModel format. | ||
label_column: Column containing the label data. | ||
predictions_path: Output path for the predictions. | ||
Outputs: | ||
predictions: Predictions in text format. | ||
Annotations: | ||
author: Alexey Volkov <alexey.volkov@ark-kun.com> | ||
inputs: | ||
- {name: data, type: CSV} | ||
- {name: model, type: CatBoostModel} | ||
- {name: label_column, type: Integer, optional: true} | ||
outputs: | ||
- {name: predictions} | ||
implementation: | ||
container: | ||
image: python:3.7 | ||
command: | ||
- sh | ||
- -c | ||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location | ||
'catboost==0.23' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet | ||
--no-warn-script-location 'catboost==0.23' --user) && "$0" "$@" | ||
- python3 | ||
- -u | ||
- -c | ||
- | | ||
def _make_parent_dirs_and_return_path(file_path: str): | ||
import os | ||
os.makedirs(os.path.dirname(file_path), exist_ok=True) | ||
return file_path | ||
def catboost_predict_class_probabilities( | ||
data_path, | ||
model_path, | ||
predictions_path, | ||
label_column = None, | ||
): | ||
'''Predict class probabilities with a CatBoost model. | ||
Args: | ||
data_path: Path for the data in CSV format. | ||
model_path: Path for the trained model in binary CatBoostModel format. | ||
label_column: Column containing the label data. | ||
predictions_path: Output path for the predictions. | ||
Outputs: | ||
predictions: Predictions in text format. | ||
Annotations: | ||
author: Alexey Volkov <alexey.volkov@ark-kun.com> | ||
''' | ||
import tempfile | ||
from catboost import CatBoost, Pool | ||
import numpy | ||
if label_column: | ||
column_descriptions = {label_column: 'Label'} | ||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name | ||
with open(column_description_path, 'w') as column_description_file: | ||
for idx, kind in column_descriptions.items(): | ||
column_description_file.write('{}\t{}\n'.format(idx, kind)) | ||
else: | ||
column_description_path = None | ||
eval_data = Pool( | ||
data_path, | ||
column_description=column_description_path, | ||
has_header=True, | ||
delimiter=',', | ||
) | ||
model = CatBoost() | ||
model.load_model(model_path) | ||
predictions = model.predict(eval_data, prediction_type='Probability') | ||
numpy.savetxt(predictions_path, predictions) | ||
import argparse | ||
_parser = argparse.ArgumentParser(prog='Catboost predict class probabilities', description='Predict class probabilities with a CatBoost model.\n\n Args:\n data_path: Path for the data in CSV format.\n model_path: Path for the trained model in binary CatBoostModel format.\n label_column: Column containing the label data.\n predictions_path: Output path for the predictions.\n\n Outputs:\n predictions: Predictions in text format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>') | ||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS) | ||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS) | ||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS) | ||
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) | ||
_parsed_args = vars(_parser.parse_args()) | ||
_outputs = catboost_predict_class_probabilities(**_parsed_args) | ||
args: | ||
- --data | ||
- {inputPath: data} | ||
- --model | ||
- {inputPath: model} | ||
- if: | ||
cond: {isPresent: label_column} | ||
then: | ||
- --label-column | ||
- {inputValue: label_column} | ||
- --predictions | ||
- {outputPath: predictions} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from kfp.components import InputPath, OutputPath, create_component_from_func | ||
|
||
def catboost_predict_classes( | ||
data_path: InputPath('CSV'), | ||
model_path: InputPath('CatBoostModel'), | ||
predictions_path: OutputPath(), | ||
|
||
label_column: int = None, | ||
): | ||
'''Predict classes using the CatBoost classifier model. | ||
Args: | ||
data_path: Path for the data in CSV format. | ||
model_path: Path for the trained model in binary CatBoostModel format. | ||
label_column: Column containing the label data. | ||
predictions_path: Output path for the predictions. | ||
Outputs: | ||
predictions: Class predictions in text format. | ||
Annotations: | ||
author: Alexey Volkov <alexey.volkov@ark-kun.com> | ||
''' | ||
import tempfile | ||
|
||
from catboost import CatBoostClassifier, Pool | ||
import numpy | ||
|
||
if label_column: | ||
column_descriptions = {label_column: 'Label'} | ||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name | ||
with open(column_description_path, 'w') as column_description_file: | ||
for idx, kind in column_descriptions.items(): | ||
column_description_file.write('{}\t{}\n'.format(idx, kind)) | ||
else: | ||
column_description_path = None | ||
|
||
eval_data = Pool( | ||
data_path, | ||
column_description=column_description_path, | ||
has_header=True, | ||
delimiter=',', | ||
) | ||
|
||
model = CatBoostClassifier() | ||
model.load_model(model_path) | ||
|
||
predictions = model.predict(eval_data) | ||
numpy.savetxt(predictions_path, predictions, fmt='%s') | ||
|
||
|
||
if __name__ == '__main__': | ||
catboost_predict_classes_op = create_component_from_func( | ||
catboost_predict_classes, | ||
output_component_file='component.yaml', | ||
base_image='python:3.7', | ||
packages_to_install=['catboost==0.22'] | ||
) |
108 changes: 108 additions & 0 deletions
108
components/CatBoost/Predict_classes/from_CSV/component.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
name: Catboost predict classes | ||
description: |- | ||
Predict classes using the CatBoost classifier model. | ||
Args: | ||
data_path: Path for the data in CSV format. | ||
model_path: Path for the trained model in binary CatBoostModel format. | ||
label_column: Column containing the label data. | ||
predictions_path: Output path for the predictions. | ||
Outputs: | ||
predictions: Class predictions in text format. | ||
Annotations: | ||
author: Alexey Volkov <alexey.volkov@ark-kun.com> | ||
inputs: | ||
- {name: data, type: CSV} | ||
- {name: model, type: CatBoostModel} | ||
- {name: label_column, type: Integer, optional: true} | ||
outputs: | ||
- {name: predictions} | ||
implementation: | ||
container: | ||
image: python:3.7 | ||
command: | ||
- sh | ||
- -c | ||
- (PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location | ||
'catboost==0.22' || PIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet | ||
--no-warn-script-location 'catboost==0.22' --user) && "$0" "$@" | ||
- python3 | ||
- -u | ||
- -c | ||
- | | ||
def _make_parent_dirs_and_return_path(file_path: str): | ||
import os | ||
os.makedirs(os.path.dirname(file_path), exist_ok=True) | ||
return file_path | ||
def catboost_predict_classes( | ||
data_path, | ||
model_path, | ||
predictions_path, | ||
label_column = None, | ||
): | ||
'''Predict classes using the CatBoost classifier model. | ||
Args: | ||
data_path: Path for the data in CSV format. | ||
model_path: Path for the trained model in binary CatBoostModel format. | ||
label_column: Column containing the label data. | ||
predictions_path: Output path for the predictions. | ||
Outputs: | ||
predictions: Class predictions in text format. | ||
Annotations: | ||
author: Alexey Volkov <alexey.volkov@ark-kun.com> | ||
''' | ||
import tempfile | ||
from catboost import CatBoostClassifier, Pool | ||
import numpy | ||
if label_column: | ||
column_descriptions = {label_column: 'Label'} | ||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name | ||
with open(column_description_path, 'w') as column_description_file: | ||
for idx, kind in column_descriptions.items(): | ||
column_description_file.write('{}\t{}\n'.format(idx, kind)) | ||
else: | ||
column_description_path = None | ||
eval_data = Pool( | ||
data_path, | ||
column_description=column_description_path, | ||
has_header=True, | ||
delimiter=',', | ||
) | ||
model = CatBoostClassifier() | ||
model.load_model(model_path) | ||
predictions = model.predict(eval_data) | ||
numpy.savetxt(predictions_path, predictions, fmt='%s') | ||
import argparse | ||
_parser = argparse.ArgumentParser(prog='Catboost predict classes', description='Predict classes using the CatBoost classifier model.\n\n Args:\n data_path: Path for the data in CSV format.\n model_path: Path for the trained model in binary CatBoostModel format.\n label_column: Column containing the label data.\n predictions_path: Output path for the predictions.\n\n Outputs:\n predictions: Class predictions in text format.\n\n Annotations:\n author: Alexey Volkov <alexey.volkov@ark-kun.com>') | ||
_parser.add_argument("--data", dest="data_path", type=str, required=True, default=argparse.SUPPRESS) | ||
_parser.add_argument("--model", dest="model_path", type=str, required=True, default=argparse.SUPPRESS) | ||
_parser.add_argument("--label-column", dest="label_column", type=int, required=False, default=argparse.SUPPRESS) | ||
_parser.add_argument("--predictions", dest="predictions_path", type=_make_parent_dirs_and_return_path, required=True, default=argparse.SUPPRESS) | ||
_parsed_args = vars(_parser.parse_args()) | ||
_outputs = catboost_predict_classes(**_parsed_args) | ||
args: | ||
- --data | ||
- {inputPath: data} | ||
- --model | ||
- {inputPath: model} | ||
- if: | ||
cond: {isPresent: label_column} | ||
then: | ||
- --label-column | ||
- {inputValue: label_column} | ||
- --predictions | ||
- {outputPath: predictions} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from kfp.components import InputPath, OutputPath, create_component_from_func | ||
|
||
def catboost_predict_values( | ||
data_path: InputPath('CSV'), | ||
model_path: InputPath('CatBoostModel'), | ||
predictions_path: OutputPath(), | ||
|
||
label_column: int = None, | ||
): | ||
'''Predict values with a CatBoost model. | ||
Args: | ||
data_path: Path for the data in CSV format. | ||
model_path: Path for the trained model in binary CatBoostModel format. | ||
label_column: Column containing the label data. | ||
predictions_path: Output path for the predictions. | ||
Outputs: | ||
predictions: Predictions in text format. | ||
Annotations: | ||
author: Alexey Volkov <alexey.volkov@ark-kun.com> | ||
''' | ||
import tempfile | ||
|
||
from catboost import CatBoost, Pool | ||
import numpy | ||
|
||
if label_column: | ||
column_descriptions = {label_column: 'Label'} | ||
column_description_path = tempfile.NamedTemporaryFile(delete=False).name | ||
with open(column_description_path, 'w') as column_description_file: | ||
for idx, kind in column_descriptions.items(): | ||
column_description_file.write('{}\t{}\n'.format(idx, kind)) | ||
else: | ||
column_description_path = None | ||
|
||
eval_data = Pool( | ||
data_path, | ||
column_description=column_description_path, | ||
has_header=True, | ||
delimiter=',', | ||
) | ||
|
||
model = CatBoost() | ||
model.load_model(model_path) | ||
|
||
predictions = model.predict(eval_data, prediction_type='RawFormulaVal') | ||
numpy.savetxt(predictions_path, predictions) | ||
|
||
|
||
if __name__ == '__main__': | ||
catboost_predict_values_op = create_component_from_func( | ||
catboost_predict_values, | ||
output_component_file='component.yaml', | ||
base_image='python:3.7', | ||
packages_to_install=['catboost==0.23'] | ||
) |
Oops, something went wrong.