Merge pull request #4 from retrieva/add-ir-training

Add training ir script
retrieva · Feb 22, 2024 · c49255d · c49255d
2 parents 59b1fa6 + 3e7363c
commit c49255d
Show file tree

Hide file tree

Showing 9 changed files with 645 additions and 0 deletions.
diff --git a/examples/training/swallow-tart/args.py b/examples/training/swallow-tart/args.py
@@ -0,0 +1,37 @@
+import json
+from dataclasses import dataclass, field
+from typing import Optional
+
+from peft import get_peft_config
+from transformers import TrainingArguments as STTrainingArguments
+
+__all__ = ["STModelArguments", "STDataArgumnets", "STTrainingArguments"]
+
+
+@dataclass
+class STModelArguments:
+    model_name: str = "bert-base-uncased"
+    peft_config_path: Optional[str] = None
+    use_flash_attention: bool = False
+
+    def __post_init__(self):
+        if self.peft_config_path is not None:
+            with open(self.peft_config_path, "r") as f:
+                peft_config_data = json.load(f)
+            self.peft_config = get_peft_config(peft_config_data)
+        else:
+            self.peft_config = None
+
+
+@dataclass
+class STDataArgumnets:
+    data_dir: str
+    hf_dataset_dir: str
+    task_names: list[str] = field(default_factory=list)
+    max_length: int = 512
+    n_dev_sample: int = 100
+    query_file_name: str = "tuple_beir/queries.jsonl"
+    corpus_file_name: str = "tuple_beir/corpus.jsonl"
+    qrel_file_name: str = "tuple_beir/qrels/train.tsv"
+    hard_negatives_file_name: str = "negatives/hard_negative.jsonl"
+    num_proc: int = 1
diff --git a/examples/training/swallow-tart/configs/ds_config_zero3.json b/examples/training/swallow-tart/configs/ds_config_zero3.json
@@ -0,0 +1,60 @@
+{
+    "fp16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 10,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+    "bf16": {
+        "enabled": "auto",
+        "loss_scale": 0,
+        "loss_scale_window": 1000,
+        "initial_scale_power": 10,
+        "hysteresis": 2,
+        "min_loss_scale": 1
+    },
+    "optimizer": {
+        "type": "AdamW",
+        "params": {
+            "lr": "auto",
+            "weight_decay": "auto"
+        }
+    },
+    "scheduler": {
+        "type": "WarmupDecayLR",
+        "params": {
+            "warmup_min_lr": "auto",
+            "warmup_max_lr": "auto",
+            "warmup_num_steps": "auto",
+            "total_num_steps": "auto"
+        }
+    },
+    "zero_optimization": {
+        "stage": 3,
+        "offload_optimizer": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "offload_param": {
+            "device": "cpu",
+            "pin_memory": true
+        },
+        "overlap_comm": true,
+        "contiguous_gradients": true,
+        "reduce_bucket_size": "auto",
+        "stage3_prefetch_bucket_size": "auto",
+        "stage3_param_persistence_threshold": "auto",
+        "sub_group_size": 1e9,
+        "stage3_max_live_parameters": 1e9,
+        "stage3_max_reuse_distance": 1e9,
+        "stage3_gather_16bit_weights_on_model_save": "auto"
+    },
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "steps_per_print": 2000,
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "wall_clock_breakdown": false
+}
diff --git a/examples/training/swallow-tart/configs/lora_config.json b/examples/training/swallow-tart/configs/lora_config.json
@@ -0,0 +1,27 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "tokyotech-llm/Swallow-7b-hf",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 256,
+  "lora_dropout": 0.1,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 128,
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "down_proj",
+    "up_proj",
+    "gate_proj"
+  ],
+  "task_type": "FEATURE_EXTRACTION",
+  "use_rslora": true
+}