Skip to content

Commit

Permalink
Merge pull request FLock-io#1 from FLock-io/feat/make-a-fully-automat…
Browse files Browse the repository at this point in the history
…e-script

feat: add a fully automated training script
  • Loading branch information
nickcom007 authored May 9, 2024
2 parents 92e27d5 + a71db29 commit b5e13e2
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 70 deletions.
17 changes: 14 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@ pip install -r requirements.txt
- [`demo_data.jsonl`](demo_data.jsonl) - Follows the shareGPT format. The training data you receive from the `fed-ledger` is in exactly the same format.
- [`merge.py`](merge.py) - Contains the utility function for merging LoRA weights. If you are training with LoRA, please ensure you merge the adapter before uploading to your Hugging Face repository.
- [`demo.py`](demo.py) - A training script that implements LoRA fine-tuning for a Gemma-2B model.
- [`full_automation.py`](full_automation.py) - A script that automate everything including get a task, download the training data, finetune Gemma-2B on training data, merge weights, upload to your HuggingFace model repo, and submit the task to fed-ledger.

### Start the Training
### Play with demo.py

#### Start the Training

Execute the following command to start the training:

Expand All @@ -33,11 +36,11 @@ The HF token is required due to the Gemma License.

This command initiates fine-tuning on the demo dataset, saves the fine-tuned model, merges the adapter to the base model, and saves the final model.

### Upload the model folder to your HuggingFace repo
#### Upload the model folder to your HuggingFace repo

[HuggingFace Models Uploading](https://huggingface.co/docs/hub/en/models-uploading)

### Submit the model
#### Submit the model

```bash

Expand All @@ -52,3 +55,11 @@ curl --location 'https://fed-ledger-prod.flock.io/api/v1/tasks/submit-result' \
}
}'
```

### Full Automation

Simply run

```bash
TASK_ID=<task-id> FLOCK_API_KEY="<your-flock-api-key-stakes-as-node-for-the-task>" HF_TOKEN="<your-hf-token>" CUDA_VISIBLE_DEVICES=0 python full_automtion.py
```
147 changes: 80 additions & 67 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,80 +6,93 @@
BitsAndBytesConfig, TrainingArguments)
from trl import SFTTrainer

from dataset import SFTDataCollator, GemmaSFTDataset
from dataset import GemmaSFTDataset, SFTDataCollator
from merge import merge_lora_to_base_model

lora_config = LoraConfig(
r=8,
target_modules=[
"q_proj",
"o_proj",
"k_proj",
"v_proj",
"gate_proj",
"up_proj",
"down_proj",
],
task_type="CAUSAL_LM",
)

def train_and_merge(
num_train_epochs: int = 3,
per_device_train_batch_size: int = 1,
gradient_accumulation_steps: int = 8,
context_length: int = 512,
):
lora_config = LoraConfig(
r=8,
target_modules=[
"q_proj",
"o_proj",
"k_proj",
"v_proj",
"gate_proj",
"up_proj",
"down_proj",
],
task_type="CAUSAL_LM",
)

model_id = "google/gemma-2b"
# Load model in 4-bit to do qLoRA
bnb_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)
model_id = "google/gemma-2b"
# Load model in 4-bit to do qLoRA
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)

training_args = TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=8,
warmup_steps=2,
max_steps=10,
learning_rate=2e-4,
bf16=True,
logging_steps=1,
output_dir="outputs",
optim="paged_adamw_8bit",
remove_unused_columns=False,
)
tokenizer = AutoTokenizer.from_pretrained(
model_id,
use_fast=True,
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=bnb_config,
device_map={"": 0},
token=os.environ["HF_TOKEN"],
)
training_args = TrainingArguments(
per_device_train_batch_size=per_device_train_batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
warmup_steps=100,
learning_rate=2e-4,
bf16=True,
logging_steps=20,
output_dir="outputs",
optim="paged_adamw_8bit",
remove_unused_columns=False,
num_train_epochs=num_train_epochs,
)
tokenizer = AutoTokenizer.from_pretrained(
model_id,
use_fast=True,
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=bnb_config,
device_map={"": 0},
token=os.environ["HF_TOKEN"],
)

# Load dataset
dataset = GemmaSFTDataset(
file="demo_data.jsonl",
tokenizer=tokenizer,
max_seq_length=512,
)
# Load dataset
dataset = GemmaSFTDataset(
file="demo_data.jsonl",
tokenizer=tokenizer,
max_seq_length=context_length,
)

# Define trainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
args=training_args,
peft_config=lora_config,
packing=True,
data_collator=SFTDataCollator(tokenizer, max_seq_length=512),
max_seq_length=512,
)
# Define trainer
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
args=training_args,
peft_config=lora_config,
packing=True,
data_collator=SFTDataCollator(tokenizer, max_seq_length=context_length),
max_seq_length=context_length,
)

# Train model
trainer.train()
# Train model
trainer.train()

# save model
trainer.save_model("outputs")
# save model
trainer.save_model("outputs")

# merge lora to base model
merge_lora_to_base_model(
model_name_or_path="google/gemma-2b",
adapter_name_or_path="outputs",
save_path="merged_model",
)
# merge lora to base model
print("Training Completed. Start to merge the weights....")
merge_lora_to_base_model(
model_name_or_path="google/gemma-2b",
adapter_name_or_path="outputs",
save_path="merged_model",
)


if __name__ == "__main__":
train_and_merge()
80 changes: 80 additions & 0 deletions full_automtion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import json
import os
import time

import requests
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from demo import train_and_merge

FLOCK_API_KEY = os.environ["FLOCK_API_KEY"]
FED_LEDGER_BASE_URL = "https://fed-ledger-prod.flock.io/api/v1"


def get_task(task_id: int):
response = requests.request(
"GET", f"{FED_LEDGER_BASE_URL}/tasks/get?task_id={task_id}"
)
return response.json()


def submit_task(task_id: int, hg_repo_id: str):
payload = json.dumps(
{"task_id": task_id, "data": {"hg_repo_id": hg_repo_id, "base_model": "gemma"}}
)
headers = {
"flock-api-key": FLOCK_API_KEY,
"Content-Type": "application/json",
}
response = requests.request(
"POST",
f"{FED_LEDGER_BASE_URL}/tasks/submit-result",
headers=headers,
data=payload,
)
return response.json()


if __name__ == "__main__":
task_id = os.environ["TASK_ID"]
task = get_task(task_id)
# download data from a presigned url
data_url = task["data"]["training_set_url"]
context_length = task["data"]["context_length"]
# download in chunks
response = requests.get(data_url, stream=True)
with open("demo_data.jsonl", "wb") as f:
for chunk in response.iter_content(chunk_size=128):
f.write(chunk)
# train and merge
print("Start to train the model...")
train_and_merge(num_train_epochs=1, context_length=256)

# generate a random repo id based on timestamp
hg_repo_id = "gemma-2b-flock-" + str(int(time.time()))

# load the merged model
model = AutoModelForCausalLM.from_pretrained(
"merged_model",
trust_remote_code=True,
low_cpu_mem_usage=True,
torch_dtype=torch.float16,
device_map={"": "cpu"},
)

# upload
print("Start to push the model to the hub...")
model.push_to_hub(
repo_id=hg_repo_id, use_temp_dir=True, token=os.environ["HF_TOKEN"]
)
# upload tokenizer as well
tokenizer = AutoTokenizer.from_pretrained(
"merged_model",
)
tokenizer.push_to_hub(
repo_id=hg_repo_id, use_temp_dir=True, token=os.environ["HF_TOKEN"]
)
# submit
submit_task(task_id, hg_repo_id)
print("Task submitted successfully")

0 comments on commit b5e13e2

Please sign in to comment.