update precision to compute_dtype in forgotten places (#106)

eole-nlp · Sep 18, 2024 · 00b6bad · 00b6bad
1 parent 2daa5da
commit 00b6bad
Show file tree

Hide file tree

Showing 9 changed files with 11 additions and 11 deletions.
diff --git a/docs/docusaurus_tsx/docs/quickstart.md b/docs/docusaurus_tsx/docs/quickstart.md
@@ -163,7 +163,7 @@ max_length: 256
 gpu: 0
 batch_type: sents
 batch_size: 1
-precision: fp16
+compute_dtype: fp16
 #random_sampling_topk: 40
 #random_sampling_topp: 0.75
 #random_sampling_temp: 0.1
@@ -195,7 +195,7 @@ max_length: 1
 gpu: 0
 batch_type: sents
 batch_size: 1
-precision: fp16
+compute_dtype: fp16
 #random_sampling_topk: 40
 #random_sampling_topp: 0.75
 #random_sampling_temp: 0.8

diff --git a/eole/inputters/dynamic_iterator.py b/eole/inputters/dynamic_iterator.py
@@ -204,7 +204,7 @@ def from_config(
             if running_config.batch_size_multiple is not None:
                 batch_size_multiple = running_config.batch_size_multiple
             else:
-                batch_size_multiple = 8 if running_config.precision == "fp16" else 1
+                batch_size_multiple = 8 if running_config.compute_dtype == "fp16" else 1
             corpora_info = config.data
             bucket_size = (
                 running_config.bucket_size

diff --git a/recipes/cometkiwi/cometkiwi-xl-inference.yaml b/recipes/cometkiwi/cometkiwi-xl-inference.yaml
@@ -21,7 +21,7 @@ gpu_ranks: [0]
 #parallel_mode: "tensor_parallel"
 #quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
 #quant_type: "bnb_NF4"
-precision: fp16
+compute_dtype: fp16
 report_time: true
 src: None
 
diff --git a/recipes/cometkiwi/cometkiwi-xxl-inference.yaml b/recipes/cometkiwi/cometkiwi-xxl-inference.yaml
@@ -21,7 +21,7 @@ gpu_ranks: [0]
 #parallel_mode: "tensor_parallel"
 quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
 quant_type: "bnb_NF4"
-precision: fp16
+compute_dtype: fp16
 report_time: true
 src: None
 
diff --git a/recipes/gpt2/inference.yaml b/recipes/gpt2/inference.yaml
@@ -22,7 +22,7 @@ n_best: 5
 
 seed: 42
 self_attn_backend: "pytorch"
-precision: fp32
+compute_dtype: fp32
 
 max_length: 30
 

diff --git a/recipes/llama2/llama-inference-tp-2gpu.yaml b/recipes/llama2/llama-inference-tp-2gpu.yaml
@@ -5,7 +5,7 @@ transforms_configs:
     tgt_subword_model: "${EOLE_MODEL_DIR}/llama2-7b-chat-hf/tokenizer.model"
 
 # Model
-model_path: "${EOLE_MODEL_DIR}/llama2-7b-chat-hf/model.pt"
+model_path: "${EOLE_MODEL_DIR}/llama2-7b-chat-hf"
 
 # Inference
 seed: 42
@@ -18,7 +18,7 @@ gpu_ranks: [0, 1]
 parallel_mode: "tensor_parallel"
 quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
 quant_type: "bnb_NF4"
-precision: fp16
+compute_dtype: fp16
 random_sampling_topk: 5
 random_sampling_topp: 0.8
 random_sampling_temp: 0.9

diff --git a/recipes/llama2/llama-inference.yaml b/recipes/llama2/llama-inference.yaml
@@ -18,7 +18,7 @@ gpu_ranks: [0]
 #parallel_mode: "tensor_parallel"
 quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
 quant_type: "bnb_NF4"
-precision: fp16
+compute_dtype: fp16
 #random_sampling_topk: 1
 #random_sampling_topp: 0.0
 #random_sampling_temp: 0.9

diff --git a/recipes/llama3/llama-mmlu.yaml b/recipes/llama3/llama-mmlu.yaml
@@ -25,7 +25,7 @@ gpu_ranks: [0]
 # parallel_mode: "tensor_parallel"
 # quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
 # quant_type: "bnb_NF4"
-precision: fp16
+compute_dtype: fp16
 #random_sampling_topk: 1
 #random_sampling_topp: 0.0
 #random_sampling_temp: 0.9

diff --git a/recipes/mixtral/mixtral-inference-awq.yaml b/recipes/mixtral/mixtral-inference-awq.yaml
@@ -19,7 +19,7 @@ parallel_mode: "tensor_parallel"
 #quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
 #quant_layers: ['gate_up_proj', 'down_proj', 'up_proj']
 #quant_type: "bnb_sparse"
-precision: fp16
+compute_dtype: fp16
 #random_sampling_topk: 1
 #random_sampling_topp: 0.6
 #random_sampling_temp: 0.9