Skip to content

Commit d84ae46

Browse files
committed
Merge branch 'main' into easydata-poc
2 parents 6a42e86 + 2ef6927 commit d84ae46

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1585
-412
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
port: 8000
2+
name: mpt-7b
3+
route_prefix: /mpt-7b
4+
precision: 'bf16'
5+
cpus_per_worker: 24
6+
gpus_per_worker: 0
7+
deepspeed: false
8+
workers_per_group: 2
9+
ipex: false
10+
device: "cpu"
11+
model_description:
12+
model_id_or_path: mosaicml/mpt-7b
13+
tokenizer_name_or_path: EleutherAI/gpt-neox-20b
14+
chat_processor: ChatModelGptJ
15+
peft_model_id_or_path: nathan0/mpt-7b-deltatuner-model
16+
peft_type: deltatuner
17+
prompt:
18+
intro: 'Below is an instruction that describes a task, paired with an input that
19+
provides further context. Write a response that appropriately completes the request.
20+
21+
'
22+
human_id: '
23+
24+
### Instruction'
25+
bot_id: '
26+
27+
### Response'
28+
stop_words: []
29+
config:
30+
trust_remote_code: true
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
port: 8000
2+
name: mpt-7b
3+
route_prefix: /mpt-7b
4+
precision: 'bf16'
5+
cpus_per_worker: 24
6+
gpus_per_worker: 0
7+
deepspeed: true
8+
workers_per_group: 2
9+
ipex: false
10+
device: "cpu"
11+
model_description:
12+
model_id_or_path: mosaicml/mpt-7b
13+
tokenizer_name_or_path: EleutherAI/gpt-neox-20b
14+
chat_processor: ChatModelGptJ
15+
peft_model_id_or_path: nathan0/mpt-7b-deltatuner-model
16+
peft_type: deltatuner
17+
prompt:
18+
intro: 'Below is an instruction that describes a task, paired with an input that
19+
provides further context. Write a response that appropriately completes the request.
20+
21+
'
22+
human_id: '
23+
24+
### Instruction'
25+
bot_id: '
26+
27+
### Response'
28+
stop_words: []
29+
config:
30+
trust_remote_code: true
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import argparse
2+
3+
4+
def update_finetune_config(base_model):
5+
conf_file = "finetune/finetune.conf"
6+
with open(conf_file) as f:
7+
config = eval(f.read())
8+
# due to compute node can't connect network
9+
# base models are downloaded as local files in directory ~/models/
10+
# avaiable base models are:
11+
#
12+
# Mistral-7B-v0.1
13+
# Llama-2-7b
14+
# pythia-1.4b
15+
# pythia-2.8b
16+
# pythia-70m
17+
# gpt-j-6b
18+
# pythia-160m
19+
# pythia-410m
20+
# pythia-12b
21+
# pythia-1b
22+
# pythia-6.9b
23+
24+
config["General"]["base_model"] = base_model
25+
# config["General"]["base_model"] = "pythia-70m"
26+
config["Training"]["device"] = "GPU"
27+
config["Training"]["resources_per_worker"]["CPU"] = 1
28+
config["Training"]["resources_per_worker"]["GPU"] = 1
29+
config["Training"]["accelerate_mode"] = "GPU_DDP"
30+
31+
with open(conf_file, "w") as f:
32+
f.write(str(config))
33+
34+
35+
def get_parser():
36+
parser = argparse.ArgumentParser(description="Finetuning on Intel GPU")
37+
parser.add_argument("--base_model", type=str, required=True, default=None)
38+
return parser
39+
40+
41+
if __name__ == "__main__":
42+
parser = get_parser()
43+
args = parser.parse_args()
44+
45+
update_finetune_config(args.base_model)

.github/workflows/workflow_finetune.yml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
4646
- name: Run Finetune Test
4747
run: |
48-
docker exec "finetune" bash -c "RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --address='127.0.0.1:6379' --ray-debugger-external"
48+
docker exec "finetune" bash -c "source \$(python -c 'import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)')/env/setvars.sh; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --head --node-ip-address 127.0.0.1 --ray-debugger-external; RAY_SERVE_ENABLE_EXPERIMENTAL_STREAMING=1 ray start --address='127.0.0.1:6379' --ray-debugger-external"
4949
CMD=$(cat << EOF
5050
conf_path = "finetune/finetune.conf"
5151
with open(conf_path, encoding="utf-8") as reader:
@@ -64,7 +64,11 @@ jobs:
6464
else:
6565
result['General']["config"]["use_auth_token"] = None
6666
result['Training']['epochs'] = 1
67-
result['Training']['num_training_workers'] = 1
67+
if "${{ matrix.model }}" == "gpt2":
68+
# to verify oneccl
69+
result['Training']['num_training_workers'] = 2
70+
else:
71+
result['Training']['num_training_workers'] = 1
6872
result['General']['lora_config'] = None
6973
with open(conf_path, 'w') as output:
7074
print(result, file=output)
@@ -123,6 +127,13 @@ jobs:
123127
docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
124128
fi
125129
130+
- name: Stop Ray
131+
run: |
132+
cid=$(docker ps -q --filter "name=finetune")
133+
if [[ ! -z "$cid" ]]; then
134+
docker exec "finetune" bash -c "ray stop"
135+
fi
136+
126137
- name: Stop Container
127138
if: success() || failure()
128139
run: |
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Finetune on Intel GPU
2+
3+
on:
4+
workflow_call:
5+
6+
jobs:
7+
finetune:
8+
name: finetune on gpu test
9+
strategy:
10+
matrix:
11+
model: [ pythia-6.9b, gpt-j-6b ]
12+
runs-on: self-hosted
13+
steps:
14+
- name: Checkout
15+
uses: actions/checkout@v2
16+
17+
- name: Running task on Intel GPU
18+
run: |
19+
rm ~/borealis-runner/llm-ray.tar.gz -f
20+
tar zcf ~/borealis-runner/llm-ray.tar.gz -C ~/actions-runner/_work/llm-ray .
21+
cd ~/borealis-runner/
22+
python3 finetune_on_pvc.py --base_model "${{ matrix.model }}"
23+
24+
- name: Test Summary
25+
run: echo "to be continued"

.github/workflows/workflow_inference.yml

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ jobs:
1212
name: inference test
1313
strategy:
1414
matrix:
15-
model: [ gpt-j-6B, gpt2, bloom, opt, mpt ]
15+
model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b ]
1616
include:
17-
- dtuner_model: /root/.cache/huggingface/hub/mpt-7b-deltatuner-model
18-
model: mpt
17+
- dtuner_model: nathan0/mpt-7b-deltatuner-model
18+
model: mpt-7b
1919
runs-on: self-hosted
2020
steps:
2121
- name: Checkout
@@ -43,13 +43,13 @@ jobs:
4343
- name: Run Inference Test with Deltatuner
4444
if: ${{ matrix.dtuner_model }}
4545
run: |
46-
docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py --deltatuner_model ${{ matrix.dtuner_model }}"
46+
docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file .github/workflows/config/mpt_deltatuner.yaml"
4747
docker exec "inference" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
4848
docker exec "inference" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
4949
5050
- name: Run Inference Test with DeepSpeed
5151
run: |
52-
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt)$ ]]; then
52+
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b)$ ]]; then
5353
echo ${{ matrix.model }} is not supported!
5454
else
5555
docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py --deepspeed"
@@ -60,14 +60,21 @@ jobs:
6060
- name: Run Inference Test with DeepSpeed and Deltatuner
6161
if: ${{ matrix.dtuner_model }}
6262
run: |
63-
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt)$ ]]; then
63+
if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b)$ ]]; then
6464
echo ${{ matrix.model }} is not supported!
6565
else
66-
docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py --deepspeed --deltatuner_model ${{ matrix.dtuner_model }}"
66+
docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml"
6767
docker exec "inference" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
6868
docker exec "inference" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
6969
fi
7070
71+
- name: Stop Ray
72+
run: |
73+
cid=$(docker ps -q --filter "name=inference")
74+
if [[ ! -z "$cid" ]]; then
75+
docker exec "inference" bash -c "ray stop"
76+
fi
77+
7178
- name: Stop Container
7279
if: success() || failure()
7380
run: |

.github/workflows/workflow_orders_nightly.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: llm-ray inference & finetune
22

33
on:
44
schedule:
5-
- cron: "* 21 * * *"
5+
- cron: "0 21 * * *"
66

77
jobs:
88

@@ -13,3 +13,6 @@ jobs:
1313
uses: ./.github/workflows/workflow_finetune.yml
1414
with:
1515
ci_type: nightly
16+
17+
call-finetune-on-intel-gpu:
18+
uses: ./.github/workflows/workflow_finetune_gpu.yml

README.md

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,9 @@ Workflow has been tested on Linux-4.18.0-408.el8.x86_64 and Ubuntu 22.04
4545
```bash
4646
git clone https://github.com/intel-sandbox/llm-ray.git
4747
cd llm-ray
48-
pip install -r ./requirements.txt -f https://developer.intel.com/ipex-whl-stable-cpu -f https://download.pytorch.org/whl/torch_stable.html
48+
pip install .[cpu] -f https://developer.intel.com/ipex-whl-stable-cpu -f https://download.pytorch.org/whl/torch_stable.html
49+
# Dynamic link oneCCL and Intel MPI libraries
50+
source $(python -c "import oneccl_bindings_for_pytorch as torch_ccl;print(torch_ccl.cwd)")/env/setvars.sh
4951
```
5052

5153
#### 2. Launch ray cluster
@@ -64,18 +66,23 @@ If deploying a ray cluster on multiple nodes, please download the workflow repos
6466

6567
#### 1. Prepare Dataset
6668

67-
Now, the workflow only supports datasets in the specified format
68-
69-
The format of dataset similar to [databricks/databricks-dolly-15k](https://huggingface.co/datasets/databricks/databricks-dolly-15k). This type of data is used for finetuning in prompt mode and this type of data is characterized by containing `instruction` `context` and `response` fields where `instruction` and `response` are required fields and `context` is an optional field. In the data preprocessing stage, the three fields will be concatenated to the corresponding format according to [dolly](https://github.com/databrickslabs/dolly/blob/master/training/trainer.py#LL93).
69+
The workflow only supports datasets with JSONL (JSON Lines) format, where each line is a separate JSON object. Here’s the structure each line should follow:
7070

71+
``` json
72+
{"instruction":"<User Input>", "context":"<Additional Information>", "response":"<Expected Output>"}
73+
```
7174

72-
The meaning of the above three columns:
73-
+ Instruction Column: The column in the dataset is the user input, such as a question or a command.
74-
+ Context Column: This column is other information used by instruction, such as the options used in the question and so on. It can be empty.
75-
+ Response: The column in the dataset containing the expected output.
75+
- Instruction: This is the user's input, such as a question, command, or prompt for content generation.
76+
- Context: Supplementary information that aids the instruction. This can include previous conversation parts, background details, or specificities influencing the response. It's optional and can be left empty.
77+
- Response: The model's expected output in response to the 'instruction', considering the 'context' if provided.
7678

79+
##### Examples:
80+
``` json
81+
{"instruction":"Which is a species of fish? Tope or Rope", "context":"", "response":"Tope"}
82+
{"instruction":"What is the average lifespan of a Golden Retriever?","context":"Golden Retrievers are a generally healthy breed; they have an average lifespan of 12 to 13 years. Irresponsible breeding to meet high demand has led to the prevalence of inherited health problems in some breed lines, including allergic skin conditions, eye problems and sometimes snappiness. These problems are rarely encountered in dogs bred from responsible breeders.","response":"The average lifespan of a Golden Retriever is 12 to 13 years."}
83+
```
7784

78-
Therefore, if the your data meets the above two formats, you can use the data by configuring the local data path or huggingface dataset. If not, please refer to the following **Adopt to Your Dataset**.
85+
An example dataset can be accessed at `examples/data/sample_finetune_data.jsonl`. Ensure each line in your dataset follows the above format.
7986

8087
#### 2. Finetune
8188

@@ -147,15 +154,16 @@ A specific model can be deployed by specifying the model path and tokenizer path
147154
# If you dont' want to view serve logs, you can set env var, "KEEP_SERVE_TERMINAL" to false
148155

149156
# Run model serve with specified model and tokenizer
150-
python inference/run_model_serve.py --model $model --tokenizer $tokenizer --streaming_response
157+
python inference/run_model_serve.py --model $model --tokenizer $tokenizer
151158

152159
# INFO - Deployment 'custom-model_PredictDeployment' is ready at `http://127.0.0.1:8000/custom-model`. component=serve deployment=custom-model_PredictDeployment
153160
# Service is deployed successfully
154161

155162
# Verfiy the inference on deployed model
156-
python inference/run_model_infer.py --model_endpoint http://127.0.0.1:8000/custom-model
163+
python inference/run_model_infer.py --model_endpoint http://127.0.0.1:8000/custom-model --streaming_response
157164
```
158-
Otherwise, all the models configured in `inference/config.py` will be deployed by default. If you want to choose a specific model to deploy, you can set env var, "MODEL_TO_SERVE", to your choice. You can add customized models in it as needed.
165+
Otherwise, all the models placed under `inference/models` folder will be deployed by default. If you want to choose a specific model to deploy, you can set env var, "MODEL_TO_SERVE", to your choice. You can also specify your model by either `--model` or `--config_file`.
166+
For `--config_file`, you can copy one of them from `inference/models` and make necessary changes.
159167

160168
Llm-ray also supports serving with deepspeed. Please follow the [guide](inference/deepspeed/README.md) under inference/deepspeed folder.
161169

common/trainer/default_trainer.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def recovery(self, config):
4949

5050
# update lr_scheduler status
5151
if Path.exists(checkpoint_dir / "lr_scheduler.pt") and hasattr(self, "lr_scheduler"):
52-
scheduler_state = torch.load(checkpoint_dir / "lr_schduler.pt", map_location="cpu")
52+
scheduler_state = torch.load(checkpoint_dir / "lr_scheduler.pt", map_location="cpu")
5353
self.lr_scheduler.load_state_dict(scheduler_state)
5454

5555
# update current epoch
@@ -111,12 +111,14 @@ def prepare(self, model, tokenizer, dataset, optimizer, accelerator):
111111
lr_scheduler = None
112112

113113
model.train()
114-
self.model, self.optimizer, self.lr_scheduler = accelerator.prepare(
115-
model, optimizer, lr_scheduler
116-
)
117114

118-
self.train_dataloader, self.eval_dataloader = accelerator.prepare(
119-
train_dataloader, eval_dataloader,
115+
# self.model, self.optimizer, self.lr_scheduler, ..., are prepared with 2 steps
116+
# because it is recommended way to prepare model and optimizer while using FSDP.
117+
# https://huggingface.co/docs/accelerate/usage_guides/fsdp#a-few-caveats-to-be-aware-of
118+
self.model = accelerator.prepare(model)
119+
120+
self.optimizer, self.train_dataloader, self.eval_dataloader, self.lr_scheduler = accelerator.prepare(
121+
optimizer, train_dataloader, eval_dataloader, lr_scheduler
120122
)
121123

122124
checkpoint = self.config.get("checkpoint")
@@ -144,7 +146,7 @@ def train(self):
144146
self.lr_scheduler.step()
145147
self.optimizer.zero_grad()
146148
if step % log_step == 0:
147-
logger.info(f"train epoch:[{idx}/{num_train_epochs}]\tstep:[{step}/{total_steps}]\tloss:{loss}\tppl:{math.exp(loss)}\ttime:{time.time()-start}")
149+
logger.info(f"train epoch:[{idx}/{num_train_epochs}]\tstep:[{step}/{total_steps}]\tloss:{loss:.6f}\tppl:{math.exp(loss):.6f}\ttime:{time.time()-start:.6f}")
148150
report({"train_epoch": idx, "total_epochs": num_train_epochs, "train_step": step, "total_steps": min(max_train_step, total_steps) if max_train_step else total_steps})
149151
start = time.time()
150152
if max_train_step is not None:
@@ -207,7 +209,7 @@ def save(self, config, epoch = 0):
207209
torch.save(self.optimizer.state_dict(), os.path.join(tmpdir, "optim.pt"))
208210
torch.save({"epoch": epoch}, os.path.join(tmpdir, "epoch.pt"))
209211
if self.lr_scheduler:
210-
torch.save(self.lr_scheduler.state_dict(), os.path.join(tmpdir, "lr_schduler.pt"))
212+
torch.save(self.lr_scheduler.state_dict(), os.path.join(tmpdir, "lr_scheduler.pt"))
211213
checkpoint = Checkpoint.from_directory(tmpdir)
212214
checkpoint.to_directory(local_checkpoint_path)
213215
logger.info(f"save checkpoint to {local_checkpoint_path} finished")

dev/docker/Dockerfile

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,10 @@ RUN conda init bash && \
2323
conda config --add channels intel && \
2424
conda install python==3.9
2525

26-
COPY ./requirements.txt /tmp
27-
RUN pip install -r /tmp/requirements.txt -f https://developer.intel.com/ipex-whl-stable-cpu \
26+
COPY ./ .
27+
RUN pip install -e .[cpu,deepspeed] -f https://developer.intel.com/ipex-whl-stable-cpu \
2828
-f https://download.pytorch.org/whl/torch_stable.html
2929

30-
# For serving with DeepSpeed
31-
COPY ./inference/deepspeed/requirements.cpu.txt /tmp
32-
RUN pip install -r /tmp/requirements.cpu.txt
3330
RUN ds_report
3431

3532
# Used to invalidate docker build cache with --build-arg CACHEBUST=$(date +%s)

0 commit comments

Comments
 (0)