Merge pull request #73 from togethercomputer/Vprov/add_dpo

VProv · web-flow · commit 7b170c8357eb · 2025-03-11T18:15:37.000Z
Add support for the training_method and dpo_beta parameters
diff --git a/openapi.yaml b/openapi.yaml
@@ -484,6 +484,18 @@ paths:
                   type: boolean
                   default: auto
                   description: Whether to mask the user messages in conversational data or prompts in instruction data.
+                training_method:
+                  type: string
+                  enum:
+                    - sft
+                    - dpo
+                  default: sft
+                  description: The training method to use. 'sft' for Supervised Fine-Tuning or 'dpo' for Direct Preference Optimization.
+                dpo_beta:
+                  type: number
+                  format: float
+                  default: 0.1
+                  description: The beta parameter for DPO training. Only applicable when training_method is 'dpo'.
                 training_type:
                   type: object
                   oneOf:
@@ -2337,6 +2349,16 @@ components:
               enum:
                 - auto
           default: auto
+        training_method:
+          type: string
+          enum:
+            - sft
+            - dpo
+          default: sft
+        dpo_beta:
+          type: number
+          format: float
+          default: 0.1
         training_type:
           type: object
           oneOf: