diff --git a/eval/final/components.py b/eval/final/components.py index 1ca53e4..b2aad8e 100644 --- a/eval/final/components.py +++ b/eval/final/components.py @@ -20,7 +20,6 @@ def run_final_eval_op( candidate_model: str = None, taxonomy_path: str = "/input/taxonomy", sdg_path: str = "/input/sdg", - use_tls: bool = False, ): import json import os @@ -35,8 +34,9 @@ def run_final_eval_op( judge_api_key = os.getenv("JUDGE_API_KEY", "") judge_model_name = os.getenv("JUDGE_NAME") judge_endpoint = os.getenv("JUDGE_ENDPOINT") - judge_ca_cert = os.getenv("JUDGE_CA_CERT_PATH") - judge_http_client = httpx.Client(verify=judge_ca_cert) if use_tls else None + judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH") + use_tls = os.path.exists(judge_ca_cert_path) and (os.path.getsize(judge_ca_cert_path) > 0) + judge_http_client = httpx.Client(verify=judge_ca_cert_path) if use_tls else None print("Starting Final Eval...") diff --git a/eval/mt_bench/components.py b/eval/mt_bench/components.py index da79154..5c2bdbb 100644 --- a/eval/mt_bench/components.py +++ b/eval/mt_bench/components.py @@ -17,7 +17,6 @@ def run_mt_bench_op( models_folder: str, output_path: str = "/output/mt_bench_data.json", best_score_file: Optional[str] = None, - use_tls: bool = False, ) -> NamedTuple("outputs", best_model=str, best_score=float): import json import os @@ -30,8 +29,9 @@ def run_mt_bench_op( judge_api_key = os.getenv("JUDGE_API_KEY", "") judge_model_name = os.getenv("JUDGE_NAME") judge_endpoint = os.getenv("JUDGE_ENDPOINT") - judge_ca_cert = os.getenv("JUDGE_CA_CERT_PATH") - judge_http_client = httpx.Client(verify=judge_ca_cert) if use_tls else None + judge_ca_cert_path = os.getenv("JUDGE_CA_CERT_PATH") + use_tls = os.path.exists(judge_ca_cert_path) and (os.path.getsize(judge_ca_cert_path) > 0) + judge_http_client = httpx.Client(verify=judge_ca_cert_path) if use_tls else None def launch_vllm( model_path: str, gpu_count: int, retries: int = 120, delay: int = 10 diff --git a/pipeline.py b/pipeline.py index b64ce1d..401c3c4 100644 --- a/pipeline.py +++ b/pipeline.py @@ -104,7 +104,6 @@ def pipeline( sdg_pipeline: str = "full", # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L122 sdg_max_batch_len: int = 5000, # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L334 sdg_sample_size: float = 1.0, # FIXME: Not present in default config. Not configurable upstream at this point, capability added via https://github.com/instructlab/sdg/pull/432 - sdg_use_tls: bool = False, # Training phase train_nproc_per_node: int = 2, # FIXME: Not present in default config. Arbitrary value chosen to demonstrate multi-node multi-gpu capabilities. Needs proper reference architecture justification. @@ -123,14 +122,12 @@ def pipeline( # MT Bench mt_bench_max_workers: str = "auto", # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L74 mt_bench_merge_system_user_message: bool = False, # https://github.com/instructlab/instructlab/blob/v0.21.2/src/instructlab/model/evaluate.py#L474 - mt_bench_use_tls: bool = False, # Final evaluation final_eval_max_workers: str = "auto", # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L74 final_eval_few_shots: int = 5, # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L56 final_eval_batch_size: str = "auto", # https://github.com/instructlab/instructlab/blob/v0.21.2/tests/testdata/default_config.yaml#L52 final_eval_merge_system_user_message: bool = False, # https://github.com/instructlab/instructlab/blob/v0.21.2/src/instructlab/model/evaluate.py#L474 - final_eval_use_tls: bool = False, # Other options k8s_storage_class_name: str = "standard", # FIXME: https://github.com/kubeflow/pipelines/issues/11396, https://issues.redhat.com/browse/RHOAIRFE-470 @@ -146,7 +143,6 @@ def pipeline( sdg_pipeline: SDG parameter. Data generation pipeline to use. Available: 'simple', 'full', or a valid path to a directory of pipeline workflow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b. sdg_max_batch_len: SDG parameter. Maximum tokens per gpu for each batch that will be handled in a single step. sdg_sample_size: SDG parameter. Represents the sdg skills recipe sampling size as percentage in decimal form. - sdg_use_tls: SDG parameter. Use TLS Certs (defined in the ConfigMap 'teacher-server' under key 'ca.crt') to connect to the Teacher model train_nproc_per_node: Training parameter. Number of GPUs per each node/worker to use for training. train_nnodes: Training parameter. Number of nodes/workers to train on. @@ -164,13 +160,11 @@ def pipeline( mt_bench_max_workers: MT Bench parameter. Number of workers to use for evaluation with mt_bench or mt_bench_branch. Must be a positive integer or 'auto'. mt_bench_merge_system_user_message: MT Bench parameter. Boolean indicating whether to merge system and user messages (required for Mistral based judges) - mt_bench_use_tls: MT Bench parameter. Use TLS Certs (defined in the ConfigMap 'judge-server' under key 'ca.crt') to connect to the Judge model final_eval_max_workers: Final model evaluation parameter for MT Bench Branch. Number of workers to use for evaluation with mt_bench or mt_bench_branch. Must be a positive integer or 'auto'. final_eval_few_shots: Final model evaluation parameter for MMLU. Number of question-answer pairs provided in the context preceding the question used for evaluation. final_eval_batch_size: Final model evaluation parameter for MMLU. Batch size for evaluation. Valid values are a positive integer or 'auto' to select the largest batch size that will fit in memory. final_eval_merge_system_user_message: Final model evaluation parameter for MT Bench Branch. Boolean indicating whether to merge system and user messages (required for Mistral based judges) - mt_bench_use_tls: Final model evaluation parameter. Use TLS Certs (defined in the ConfigMap 'judge-server' under key 'ca.crt') to connect to the Judge model k8s_storage_class_name: A Kubernetes StorageClass name for persistent volumes. Selected StorageClass must support RWX PersistentVolumes. """ @@ -200,7 +194,6 @@ def pipeline( repo_branch=sdg_repo_branch, repo_pr=sdg_repo_pr, sdg_sampling_size=sdg_sample_size, - use_tls=sdg_use_tls, ) sdg_task.set_env_variable("HOME", "/tmp") sdg_task.set_env_variable("HF_HOME", "/tmp") @@ -354,7 +347,6 @@ def pipeline( models_folder="/output/phase_2/model/hf_format", max_workers=mt_bench_max_workers, merge_system_user_message=mt_bench_merge_system_user_message, - use_tls=mt_bench_use_tls, ) mount_pvc( task=run_mt_bench_task, @@ -390,7 +382,6 @@ def pipeline( merge_system_user_message=final_eval_merge_system_user_message, few_shots=final_eval_few_shots, batch_size=final_eval_batch_size, - use_tls=final_eval_use_tls, ) mount_pvc( task=final_eval_task, pvc_name=output_pvc_task.output, mount_path="/output" @@ -624,10 +615,10 @@ def gen_standalone(): # The list of executor names to extract details from to generate the standalone script executors = { "exec-data-processing-op": 'data_processing_op(max_seq_len={MAX_SEQ_LEN}, max_batch_len={MAX_BATCH_LEN}, sdg_path="{DATA_PVC_SDG_PATH}", model_path="{DATA_PVC_MODEL_PATH}", skills_path="{PREPROCESSED_DATA_SKILLS_PATH}", knowledge_path="{PREPROCESSED_DATA_KNOWLEDGE_PATH}")', - "exec-sdg-op": 'sdg_op(num_instructions_to_generate={num_instructions_to_generate}, pipeline="{sdg_pipeline}", repo_branch="{exec_git_clone_op_repo_branch or ""}", repo_pr={exec_git_clone_op_repo_pr or 0}, taxonomy_path="{TAXONOMY_DATA_PATH}", sdg_path="{DATA_PVC_SDG_PATH}", sdg_sampling_size={sdg_sampling_size}, use_tls={sdg_use_tls})', + "exec-sdg-op": 'sdg_op(num_instructions_to_generate={num_instructions_to_generate}, pipeline="{sdg_pipeline}", repo_branch="{exec_git_clone_op_repo_branch or ""}", repo_pr={exec_git_clone_op_repo_pr or 0}, taxonomy_path="{TAXONOMY_DATA_PATH}", sdg_path="{DATA_PVC_SDG_PATH}", sdg_sampling_size={sdg_sampling_size})', "exec-git-clone-op": {}, - "exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",output_path="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, use_tls={mt_bench_use_tls})', - "exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_BRANCH_SCORES_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy_path="{TAXONOMY_PATH}", sdg_path="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, few_shots={FEW_SHOTS}, batch_size="{BATCH_SIZE}", use_tls={final_eval_use_tls})', + "exec-run-mt-bench-op": 'run_mt_bench_op(best_score_file="{MT_BENCH_SCORES_PATH}",output_path="{MT_BENCH_OUTPUT_PATH}",models_folder="{CANDIDATE_MODEL_PATH_PREFIX}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE})', + "exec-run-final-eval-op": 'run_final_eval_op(mmlu_branch_output="{MMLU_BRANCH_SCORES_PATH}", mt_bench_branch_output="{MT_BENCH_BRANCH_SCORES_PATH}", candidate_model="{CANDIDATE_MODEL_PATH}", taxonomy_path="{TAXONOMY_PATH}", sdg_path="{DATA_PVC_SDG_PATH}", base_branch="", candidate_branch="", base_model_dir="{DATA_PVC_MODEL_PATH}", max_workers="{MAX_WORKERS}", merge_system_user_message={MERGE_SYSTEM_USER_MESSAGE}, few_shots={FEW_SHOTS}, batch_size="{BATCH_SIZE}")', } details = {} diff --git a/pipeline.yaml b/pipeline.yaml index 47e8f28..75b0d38 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -6,11 +6,9 @@ # final_eval_few_shots: int [Default: 5.0] # final_eval_max_workers: str [Default: 'auto'] # final_eval_merge_system_user_message: bool [Default: False] -# final_eval_use_tls: bool [Default: False] # k8s_storage_class_name: str [Default: 'standard'] # mt_bench_max_workers: str [Default: 'auto'] # mt_bench_merge_system_user_message: bool [Default: False] -# mt_bench_use_tls: bool [Default: False] # sdg_base_model: str [Default: 's3:///'] # sdg_max_batch_len: int [Default: 5000.0] # sdg_pipeline: str [Default: 'full'] @@ -19,7 +17,6 @@ # sdg_repo_url: str [Default: 'https://github.com/instructlab/taxonomy.git'] # sdg_sample_size: float [Default: 1.0] # sdg_scale_factor: int [Default: 30.0] -# sdg_use_tls: bool [Default: False] # train_effective_batch_size_phase_1: int [Default: 128.0] # train_effective_batch_size_phase_2: int [Default: 3840.0] # train_learning_rate_phase_1: float [Default: 2e-05] @@ -475,10 +472,6 @@ components: defaultValue: /input/taxonomy isOptional: true parameterType: STRING - use_tls: - defaultValue: false - isOptional: true - parameterType: BOOLEAN outputDefinitions: artifacts: mmlu_branch_output: @@ -506,10 +499,6 @@ components: defaultValue: /output/mt_bench_data.json isOptional: true parameterType: STRING - use_tls: - defaultValue: false - isOptional: true - parameterType: BOOLEAN outputDefinitions: parameters: best_model: @@ -540,10 +529,6 @@ components: defaultValue: /data/taxonomy isOptional: true parameterType: STRING - use_tls: - defaultValue: false - isOptional: true - parameterType: BOOLEAN comp-sdg-to-artifact-op: executorLabel: exec-sdg-to-artifact-op inputDefinitions: @@ -1158,15 +1143,16 @@ deploymentSpec: \ base_branch: str,\n candidate_branch: str,\n max_workers: str,\n\ \ few_shots: int,\n batch_size: str,\n merge_system_user_message:\ \ bool,\n candidate_model: str = None,\n taxonomy_path: str = \"/input/taxonomy\"\ - ,\n sdg_path: str = \"/input/sdg\",\n use_tls: bool = False,\n):\n\ - \ import json\n import os\n import httpx\n import subprocess\n\ - \n import torch\n from instructlab.eval.mmlu import MMLUBranchEvaluator\n\ - \ from instructlab.eval.mt_bench import MTBenchBranchEvaluator\n from\ - \ instructlab.model.evaluate import qa_pairs_to_qna_to_avg_scores, sort_score\n\ - \n judge_api_key = os.getenv(\"JUDGE_API_KEY\", \"\")\n judge_model_name\ - \ = os.getenv(\"JUDGE_NAME\")\n judge_endpoint = os.getenv(\"JUDGE_ENDPOINT\"\ - )\n judge_ca_cert = os.getenv(\"JUDGE_CA_CERT_PATH\")\n judge_http_client\ - \ = httpx.Client(verify=judge_ca_cert) if use_tls else None\n\n print(\"\ + ,\n sdg_path: str = \"/input/sdg\",\n):\n import json\n import\ + \ os\n import httpx\n import subprocess\n\n import torch\n from\ + \ instructlab.eval.mmlu import MMLUBranchEvaluator\n from instructlab.eval.mt_bench\ + \ import MTBenchBranchEvaluator\n from instructlab.model.evaluate import\ + \ qa_pairs_to_qna_to_avg_scores, sort_score\n\n judge_api_key = os.getenv(\"\ + JUDGE_API_KEY\", \"\")\n judge_model_name = os.getenv(\"JUDGE_NAME\"\ + )\n judge_endpoint = os.getenv(\"JUDGE_ENDPOINT\")\n judge_ca_cert_path\ + \ = os.getenv(\"JUDGE_CA_CERT_PATH\")\n use_tls = os.path.exists(judge_ca_cert_path)\ + \ and (os.path.getsize(judge_ca_cert_path) > 0)\n judge_http_client =\ + \ httpx.Client(verify=judge_ca_cert_path) if use_tls else None\n\n print(\"\ Starting Final Eval...\")\n\n def launch_vllm(\n model_path: str,\ \ gpu_count: int, retries: int = 120, delay: int = 10\n ) -> tuple:\n\ \ import subprocess\n import sys\n import time\n\n\ @@ -1415,21 +1401,22 @@ deploymentSpec: \ calculated based on environment\n # https://github.com/instructlab/eval/blob/main/src/instructlab/eval/mt_bench.py#L36\n\ \ max_workers: str,\n models_folder: str,\n output_path: str =\ \ \"/output/mt_bench_data.json\",\n best_score_file: Optional[str] =\ - \ None,\n use_tls: bool = False,\n) -> NamedTuple(\"outputs\", best_model=str,\ - \ best_score=float):\n import json\n import os\n import httpx\n\ - \ import subprocess\n\n import torch\n from instructlab.eval.mt_bench\ - \ import MTBenchEvaluator\n\n judge_api_key = os.getenv(\"JUDGE_API_KEY\"\ - , \"\")\n judge_model_name = os.getenv(\"JUDGE_NAME\")\n judge_endpoint\ - \ = os.getenv(\"JUDGE_ENDPOINT\")\n judge_ca_cert = os.getenv(\"JUDGE_CA_CERT_PATH\"\ - )\n judge_http_client = httpx.Client(verify=judge_ca_cert) if use_tls\ - \ else None\n\n def launch_vllm(\n model_path: str, gpu_count:\ - \ int, retries: int = 120, delay: int = 10\n ) -> tuple:\n import\ - \ subprocess\n import sys\n import time\n\n import\ - \ requests\n from instructlab.model.backends.common import free_tcp_ipv4_port\n\ - \n free_port = free_tcp_ipv4_port(\"127.0.0.1\")\n port =\ - \ str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\n\ - \n command = [\n sys.executable,\n \"-m\",\n\ - \ \"vllm.entrypoints.openai.api_server\",\n \"--port\"\ + \ None,\n) -> NamedTuple(\"outputs\", best_model=str, best_score=float):\n\ + \ import json\n import os\n import httpx\n import subprocess\n\ + \n import torch\n from instructlab.eval.mt_bench import MTBenchEvaluator\n\ + \n judge_api_key = os.getenv(\"JUDGE_API_KEY\", \"\")\n judge_model_name\ + \ = os.getenv(\"JUDGE_NAME\")\n judge_endpoint = os.getenv(\"JUDGE_ENDPOINT\"\ + )\n judge_ca_cert_path = os.getenv(\"JUDGE_CA_CERT_PATH\")\n use_tls\ + \ = os.path.exists(judge_ca_cert_path) and (os.path.getsize(judge_ca_cert_path)\ + \ > 0)\n judge_http_client = httpx.Client(verify=judge_ca_cert_path)\ + \ if use_tls else None\n\n def launch_vllm(\n model_path: str,\ + \ gpu_count: int, retries: int = 120, delay: int = 10\n ) -> tuple:\n\ + \ import subprocess\n import sys\n import time\n\n\ + \ import requests\n from instructlab.model.backends.common\ + \ import free_tcp_ipv4_port\n\n free_port = free_tcp_ipv4_port(\"\ + 127.0.0.1\")\n port = str(free_port)\n vllm_server = f\"http://127.0.0.1:{port}/v1\"\ + \n\n command = [\n sys.executable,\n \"-m\"\ + ,\n \"vllm.entrypoints.openai.api_server\",\n \"--port\"\ ,\n port,\n \"--model\",\n model_path,\n\ \ ]\n if gpu_count > 0:\n command += [\n \ \ \"--tensor-parallel-size\",\n str(gpu_count),\n\ @@ -1541,13 +1528,14 @@ deploymentSpec: \ *\n\ndef sdg_op(\n num_instructions_to_generate: int,\n pipeline:\ \ str,\n repo_branch: Optional[str],\n repo_pr: Optional[int],\n \ \ taxonomy_path: str = \"/data/taxonomy\",\n sdg_path: str = \"/data/sdg\"\ - ,\n sdg_sampling_size: float = 1.0,\n use_tls: bool = False,\n):\n\ - \ from os import getenv, path\n\n import instructlab.sdg\n import\ - \ openai\n import yaml\n\n api_key = getenv(\"api_key\")\n model\ - \ = getenv(\"model\")\n endpoint = getenv(\"endpoint\")\n\n if use_tls:\n\ - \ import httpx\n\n sdg_ca_cert = getenv(\"SDG_CA_CERT_PATH\"\ - )\n custom_http_client = httpx.Client(verify=sdg_ca_cert)\n \ - \ client = openai.OpenAI(\n base_url=endpoint, api_key=api_key,\ + ,\n sdg_sampling_size: float = 1.0,\n):\n from os import getenv, path\n\ + \n import instructlab.sdg\n import openai\n import yaml\n import\ + \ os\n\n api_key = getenv(\"api_key\")\n model = getenv(\"model\"\ + )\n endpoint = getenv(\"endpoint\")\n\n sdg_ca_cert_path = getenv(\"\ + SDG_CA_CERT_PATH\")\n use_tls = os.path.exists(sdg_ca_cert_path) and\ + \ (os.path.getsize(sdg_ca_cert_path) > 0)\n if use_tls:\n import\ + \ httpx\n\n custom_http_client = httpx.Client(verify=sdg_ca_cert_path)\n\ + \ client = openai.OpenAI(\n base_url=endpoint, api_key=api_key,\ \ http_client=custom_http_client\n )\n else:\n client =\ \ openai.OpenAI(base_url=endpoint, api_key=api_key)\n\n taxonomy_base\ \ = \"main\" if repo_branch or (repo_pr and int(repo_pr) > 0) else \"empty\"\ @@ -1997,8 +1985,6 @@ root: componentInputParameter: final_eval_max_workers merge_system_user_message: componentInputParameter: final_eval_merge_system_user_message - use_tls: - componentInputParameter: final_eval_use_tls taskInfo: name: run-final-eval-op run-mt-bench-op: @@ -2017,8 +2003,6 @@ root: models_folder: runtimeValue: constant: /output/phase_2/model/hf_format - use_tls: - componentInputParameter: mt_bench_use_tls taskInfo: name: run-mt-bench-op sdg-op: @@ -2040,8 +2024,6 @@ root: componentInputParameter: sdg_repo_pr sdg_sampling_size: componentInputParameter: sdg_sample_size - use_tls: - componentInputParameter: sdg_use_tls taskInfo: name: sdg-op sdg-to-artifact-op: @@ -2104,10 +2086,6 @@ root: based judges) isOptional: true parameterType: BOOLEAN - final_eval_use_tls: - defaultValue: false - isOptional: true - parameterType: BOOLEAN k8s_storage_class_name: defaultValue: standard description: A Kubernetes StorageClass name for persistent volumes. Selected @@ -2126,13 +2104,6 @@ root: and user messages (required for Mistral based judges) isOptional: true parameterType: BOOLEAN - mt_bench_use_tls: - defaultValue: false - description: Final model evaluation parameter. Use TLS Certs (defined in - the ConfigMap 'teacher-server' under key 'ca.crt') to connect to the Judge - model - isOptional: true - parameterType: BOOLEAN sdg_base_model: defaultValue: s3:/// description: SDG parameter. LLM model used to generate the synthetic dataset @@ -2177,12 +2148,6 @@ root: description: SDG parameter. The total number of instructions to be generated. isOptional: true parameterType: NUMBER_INTEGER - sdg_use_tls: - defaultValue: false - description: SDG parameter. Use TLS Certs (defined in the ConfigMap 'teacher-server' - under key 'ca.crt') to connect to the Teacher model - isOptional: true - parameterType: BOOLEAN train_effective_batch_size_phase_1: defaultValue: 128.0 description: Training parameter for in Phase 1. The number of samples in a diff --git a/sdg/components.py b/sdg/components.py index e248b86..3461751 100644 --- a/sdg/components.py +++ b/sdg/components.py @@ -36,23 +36,24 @@ def sdg_op( taxonomy_path: str = "/data/taxonomy", sdg_path: str = "/data/sdg", sdg_sampling_size: float = 1.0, - use_tls: bool = False, ): from os import getenv, path import instructlab.sdg import openai import yaml + import os api_key = getenv("api_key") model = getenv("model") endpoint = getenv("endpoint") + sdg_ca_cert_path = getenv("SDG_CA_CERT_PATH") + use_tls = os.path.exists(sdg_ca_cert_path) and (os.path.getsize(sdg_ca_cert_path) > 0) if use_tls: import httpx - sdg_ca_cert = getenv("SDG_CA_CERT_PATH") - custom_http_client = httpx.Client(verify=sdg_ca_cert) + custom_http_client = httpx.Client(verify=sdg_ca_cert_path) client = openai.OpenAI( base_url=endpoint, api_key=api_key, http_client=custom_http_client )