code-kern-ai · LeonardPuettmannKern · Aug 15, 2023 · Jun 23, 2023 · Jun 23, 2023 · Jun 23, 2023
diff --git a/classifiers/active_learner/bayesian_optimization/config.py b/classifiers/active_learner/bayesian_optimization/config.py
@@ -1,5 +1,6 @@
 from util.configs import build_classifier_learner_config
-from util.enums import State
+from util.enums import State, RefineryDataType, BricksVariableType, SelectionType
+
 from . import bayesian_optimization
 
 
@@ -20,6 +21,40 @@ def get_config():
             "gdpr_compliant",
         ],
         integrator_inputs={
-            "input": "coming soon"
+            "name": "MyBayesian",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "variables": {
+                "EMBEDDING": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "text-classification-distilbert-base-uncased",
+                    "description": "pick this from the options above",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MIN_CONFIDENCE": {
+                    "selectionType": SelectionType.FLOAT.value,
+                    "defaultValue": 0.8,
+                    "addInfo": [
+                        BricksVariableType.GENERIC_FLOAT.value
+                    ]
+                },
+                "ITERATIONS": {
+                    "selectionType": SelectionType.INTEGER.value,
+                    "defaultValue": 100,
+                    "description": "this can be modified by the user",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_INT.value
+                    ]
+                },
+                "LABELS": {
+                    "selectionType": SelectionType.STRING.value,
+                    "description": "optional, you can specify a list to filter the predictions (e.g. [\"label-a\", \"label-b\"])",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
         }
-    )
+)
+
diff --git a/classifiers/active_learner/decision_tree/code_snippet_refinery.md b/classifiers/active_learner/decision_tree/code_snippet_refinery.md
@@ -3,25 +3,25 @@ from sklearn.tree import DecisionTreeClassifier
 from typing import List
 # you can find further models here: https://scikit-learn.org/stable/supervised_learning.html#supervised-learning
 
-YOUR_EMBEDDING: str = "text-classification-distilbert-base-uncased" 
-YOUR_MIN_CONFIDENCE: float = 0.8
-YOUR_LABELS: List[str] = None # optional, you can specify a list to filter the predictions
+EMBEDDING: str = "text-classification-distilbert-base-uncased" 
+MIN_CONFIDENCE: float = 0.8
+LABELS: List[str] = None # you can specify a list to filter the predictions (e.g. ["label-a", "label-b"])
 
 class MyDT(LearningClassifier):
 
     def __init__(self):
         self.model = DecisionTreeClassifier()
 
     @params_fit(
-        embedding_name = YOUR_EMBEDDING, 
+        embedding_name = EMBEDDING, 
         train_test_split = 0.5 # we have this fixed at the moment, but you'll soon be able to specify this individually! 
     )
     def fit(self, embeddings, labels):
         self.model.fit(embeddings, labels)
 
     @params_inference(
-        min_confidence = YOUR_MIN_CONFIDENCE,
-        label_names = YOUR_LABELS 
+        min_confidence = MIN_CONFIDENCE,
+        label_names = LABELS 
     )
     def predict_proba(self, embeddings):
         return self.model.predict_proba(embeddings)

diff --git a/classifiers/active_learner/decision_tree/config.py b/classifiers/active_learner/decision_tree/config.py
@@ -1,5 +1,5 @@
 from util.configs import build_classifier_learner_config
-from util.enums import State
+from util.enums import State, RefineryDataType, BricksVariableType, SelectionType
 from . import decision_tree
 
 
@@ -20,6 +20,34 @@ def get_config():
             "gdpr_compliant",
         ],
         integrator_inputs={
-            "input": "coming soon"
+            "name": "MyDT",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "variables": {
+                "EMBEDDING": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "text-classification-distilbert-base-uncased",
+                    "description": "pick this from the options above",
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MIN_CONFIDENCE": {
+                    "selectionType": SelectionType.FLOAT.value,
+                    "defaultValue": 0.8,
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_FLOAT.value
+                    ]
+                },
+                "LABELS": {
+                    "selectionType": SelectionType.STRING.value,
+                    "description": "optional, you can specify a list to filter the predictions (e.g. [\"label-a\", \"label-b\"])",
+                    "optional": "true",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
         }
     )
diff --git a/classifiers/active_learner/grid_search/code_snippet_refinery.md b/classifiers/active_learner/grid_search/code_snippet_refinery.md
@@ -35,5 +35,4 @@ class MyGrid(LearningClassifier):
 
     def predict_proba(self, embeddings):
         return self.model.predict_proba(embeddings)
-
 ```
diff --git a/classifiers/active_learner/grid_search/config.py b/classifiers/active_learner/grid_search/config.py
@@ -1,5 +1,5 @@
 from util.configs import build_classifier_learner_config
-from util.enums import State
+from util.enums import State, RefineryDataType, SelectionType, BricksVariableType
 from . import grid_search
 
 
@@ -20,6 +20,34 @@ def get_config():
             "gdpr_compliant",
         ],
         integrator_inputs={
-            "input": "coming soon"
+            "name": "MyGrid",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "variables": {
+                "EMBEDDING": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "text-classification-distilbert-base-uncased",
+                    "description": "pick this from the options above",
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MIN_CONFIDENCE": {
+                    "selectionType": SelectionType.FLOAT.value,
+                    "defaultValue": 0.8,
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_FLOAT.value
+                    ]
+                },
+                "LABELS": {
+                    "selectionType": SelectionType.STRING.value,
+                    "description": "optional, you can specify a list to filter the predictions (e.g. [\"label-a\", \"label-b\"])",
+                    "optional": "true",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
         }
-    )
+    )
diff --git a/classifiers/active_learner/logistic_regression/config.py b/classifiers/active_learner/logistic_regression/config.py
@@ -1,5 +1,5 @@
 from util.configs import build_classifier_learner_config
-from util.enums import State
+from util.enums import State, RefineryDataType, SelectionType, BricksVariableType
 from . import logistic_regression
 
 
@@ -20,6 +20,34 @@ def get_config():
             "gdpr_compliant",
         ],
         integrator_inputs={
-            "input": "coming soon"
+            "name": "MyLR",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "variables": {
+                "EMBEDDING": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "text-classification-distilbert-base-uncased",
+                    "description": "pick this from the options above",
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MIN_CONFIDENCE": {
+                    "selectionType": SelectionType.FLOAT.value,
+                    "defaultValue": 0.8,
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_FLOAT.value
+                    ]
+                },
+                "LABELS": {
+                    "selectionType": SelectionType.STRING.value,
+                    "description": "optional, you can specify a list to filter the predictions (e.g. [\"label-a\", \"label-b\"])",
+                    "optional": "true",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
         }
     )
diff --git a/classifiers/active_learner/random_forest/config.py b/classifiers/active_learner/random_forest/config.py
@@ -1,5 +1,5 @@
 from util.configs import build_classifier_learner_config
-from util.enums import State
+from util.enums import State, RefineryDataType, SelectionType, BricksVariableType
 from . import random_forest
 
 
@@ -20,6 +20,34 @@ def get_config():
             "gdpr_compliant",
         ],
         integrator_inputs={
-            "input": "coming soon"
+            "name": "MyRF",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "variables": {
+                "EMBEDDING": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "text-classification-distilbert-base-uncased",
+                    "description": "pick this from the options above",
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MIN_CONFIDENCE": {
+                    "selectionType": SelectionType.FLOAT.value,
+                    "defaultValue": 0.8,
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_FLOAT.value
+                    ]
+                },
+                "LABELS": {
+                    "selectionType": SelectionType.STRING.value,
+                    "description": "optional, you can specify a list to filter the predictions (e.g. [\"label-a\", \"label-b\"])",
+                    "optional": "true",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
         }
-    )
+    )
diff --git a/classifiers/active_learner/random_search/config.py b/classifiers/active_learner/random_search/config.py
@@ -1,5 +1,5 @@
 from util.configs import build_classifier_learner_config
-from util.enums import State
+from util.enums import State, RefineryDataType, SelectionType, BricksVariableType
 from . import random_search
 
 
@@ -20,6 +20,42 @@ def get_config():
             "gdpr_compliant",
         ],
         integrator_inputs={
-            "input": "coming soon"
+            "name": "MyRandom",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "variables": {
+                "EMBEDDING": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "text-classification-distilbert-base-uncased",
+                    "description": "pick this from the options above",
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MIN_CONFIDENCE": {
+                    "selectionType": SelectionType.FLOAT.value,
+                    "defaultValue": 0.8,
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_FLOAT.value
+                    ]
+                },
+                "ITERATIONS": {
+                    "selectionType": SelectionType.INTEGER.value,
+                    "defaultValue": 100,
+                    "description": "this can be modified by the user",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_INT.value
+                    ]
+                },
+                "LABELS": {
+                    "selectionType": SelectionType.STRING.value,
+                    "description": "optional, you can specify a list to filter the predictions (e.g. [\"label-a\", \"label-b\"])",
+                    "optional": "true",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
         }
     )
diff --git a/extractors/active_learner/crf_tagger/code_snippet_refinery.md b/extractors/active_learner/crf_tagger/code_snippet_refinery.md
@@ -7,7 +7,7 @@ EMBEDDING: str = "text-extraction-distilbert-base-uncased" # pick this from the
 MIN_CONFIDENCE: float = 0.8
 LABELS: List[str] = None # optional, you can specify a list to filter the predictions (e.g. ["label-a", "label-b"])
 
-class MyActiveLearner(LearningExtractor):
+class MyCRF(LearningExtractor):
 
     def __init__(self):
         self.model = CRFTagger(
@@ -31,5 +31,4 @@ class MyActiveLearner(LearningExtractor):
     )
     def predict_proba(self, embeddings):
         return self.model.predict_proba(embeddings)
-
 ```
diff --git a/extractors/active_learner/crf_tagger/config.py b/extractors/active_learner/crf_tagger/config.py
@@ -1,5 +1,5 @@
 from util.configs import build_extractor_learner_config
-from util.enums import State
+from util.enums import State, RefineryDataType, BricksVariableType, SelectionType
 from . import crf_tagger
 
 
@@ -20,6 +20,34 @@ def get_config():
             "gdpr_compliant",
         ],
         integrator_inputs={
-            "input": "coming soon"
+            "name": "MyCRF",
+            "refineryDataType": RefineryDataType.TEXT.value,
+            "variables": {
+                "EMBEDDING": {
+                    "selectionType": SelectionType.STRING.value,
+                    "defaultValue": "text-classification-distilbert-base-uncased",
+                    "description": "pick this from the options above",
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                },
+                "MIN_CONFIDENCE": {
+                    "selectionType": SelectionType.FLOAT.value,
+                    "defaultValue": 0.8,
+                    "optional": "false",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_FLOAT.value
+                    ]
+                },
+                "LABELS": {
+                    "selectionType": SelectionType.STRING.value,
+                    "description": "optional, you can specify a list to filter the predictions (e.g. [\"label-a\", \"label-b\"])",
+                    "optional": "true",
+                    "addInfo": [
+                        BricksVariableType.GENERIC_STRING.value
+                    ]
+                }
+            }
         }
     )
Original file line number	Diff line number	Diff line change
Expand Up		@@ -35,5 +35,4 @@ class MyGrid(LearningClassifier):

		def predict_proba(self, embeddings):
		return self.model.predict_proba(embeddings)

		```