From caec4eaf55dced581a5605525f67e7d41c68b726 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Fri, 4 Aug 2023 20:22:42 +0800 Subject: [PATCH 01/29] fix tokenizer load bug in latest transformers --- src/lmflow/models/hf_decoder_model.py | 45 ++++++++++++++++++++------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index 76cd353b9..aed7f751d 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -140,18 +140,41 @@ def __init__( "revision": model_args.model_revision, "use_auth_token": True if model_args.use_auth_token else None, } - if model_args.tokenizer_name: - tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) - elif model_args.model_name_or_path: - tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs) - else: - raise ValueError( - "You are instantiating a new tokenizer from scratch. This is" - " not supported by this script. You can do it from another" - " script, save it, and load it from here, using" - " --tokenizer_name." - ) + + try: + if model_args.tokenizer_name: + tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs) + elif model_args.model_name_or_path: + tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs) + else: + raise ValueError( + "You are instantiating a new tokenizer from scratch. This is" + " not supported by this script. You can do it from another" + " script, save it, and load it from here, using" + " --tokenizer_name." + ) + except RecursionError: + if model_args.tokenizer_name: + tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, unk_token="", + bos_token="", + eos_token="", + **tokenizer_kwargs) + elif model_args.model_name_or_path: + tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, unk_token="", + bos_token="", + eos_token="", + **tokenizer_kwargs) + else: + raise ValueError( + "You are instantiating a new tokenizer from scratch. This is" + " not supported by this script. You can do it from another" + " script, save it, and load it from here, using" + " --tokenizer_name." + ) + + logger.warning("The tokenizer_config.json file doesn't set the special tokens. Using default values: , , ") + self.tokenizer = tokenizer torch_dtype = ( From eab5c21497505765003c126a3a92bb87bfa98f9f Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Sat, 5 Aug 2023 23:59:16 +0800 Subject: [PATCH 02/29] add position interpolation readme --- Position_Interpolation_README.md | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 Position_Interpolation_README.md diff --git a/Position_Interpolation_README.md b/Position_Interpolation_README.md new file mode 100644 index 000000000..df0d469f0 --- /dev/null +++ b/Position_Interpolation_README.md @@ -0,0 +1,40 @@ +# Position Interpolation +Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techniques for LLaMA models. \ +For more details of these techniques, you can checkout the links below: +* Linear scaling: \ +https://arxiv.org/abs/2306.15595 +* NTK scaling: \ +https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/ +# Usage +To use the Position Interpolation Techniques, you need to set the following options: +``` +--truncate_to_model_max_length False +--do_rope_scaling True +``` +For linear scaling, set the extending ratio by: +``` +--rope_pi_ratio 4 +``` +For NTK scaling, set the extending ratio by: +``` +--rope_ntk_ratio 4 +``` +Here is an example of evaluation bash code: +``` +#!/bin/bash + +CUDA_VISIBLE_DEVICES=0 \ + deepspeed examples/evaluation.py \ + --answer_type text \ + --model_name_or_path pinkmanlove/llama-7b-hf \ + --dataset_path data/wiki_en_eval \ + --deepspeed examples/ds_config.json \ + --inference_batch_size_per_device 1 \ + --truncate_to_model_max_length False \ + --block_size 4096 \ + --use_flash_attention True \ + --do_rope_scaling True \ + --rope_pi_ratio 2 \ + --rope_ntk_ratio 4 \ + --metric ppl +``` \ No newline at end of file From 162e0cfec4a4b2fa25f4e80281a46868ace6137e Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Sun, 6 Aug 2023 00:13:01 +0800 Subject: [PATCH 03/29] add vocab_extension readme --- vocab_extension.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 vocab_extension.md diff --git a/vocab_extension.md b/vocab_extension.md new file mode 100644 index 000000000..bb5217d1c --- /dev/null +++ b/vocab_extension.md @@ -0,0 +1,22 @@ +# Train & Merge Tokenizer +To automatically convert data, train a SentencePiece tokenizer, and merge the tokenizer, you can run the following script: +``` +bash scripts/vocab_extension/train_merge_tokenizer.sh +``` +Alternatively, you can run each of the three steps separately: + +# Convert JSON Data to TXT +To convert JSON data to TXT for sentencepiece tokenizer training, run: +``` +bash scripts/vocab_extension/convert_json_to_txt.sh +``` +# Train SentencePiece Tokenizer +To train a SentencePiece tokenizer, run: +``` +bash scripts/vocab_extension/train_tokenizer.sh +``` +# Merge New Tokenizer with the Origin One +To merge a new tokenizer with the original one, run: +``` +bash scripts/vocab_extension/merge_tokenizer.sh +``` \ No newline at end of file From 3467db70ae493c5a1906eaed1fe010264ec482f6 Mon Sep 17 00:00:00 2001 From: shizhediao <654745845@qq.com> Date: Sun, 6 Aug 2023 00:23:50 +0800 Subject: [PATCH 04/29] update wechat qrcode --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index eb14f529b..9cc09360f 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ [![Doc](https://img.shields.io/badge/Website-Doc-ff69b4.svg)](https://optimalscale.github.io/LMFlow/) [![Embark](https://img.shields.io/badge/Discord-LMFlow-%237289da.svg?logo=discord)](https://discord.gg/u9VJNpzhvA) [![slack badge](https://img.shields.io/badge/Slack-Join-blueviolet?logo=slack&)](https://join.slack.com/t/lmflow/shared_invite/zt-1wju9nicy-woXbNtS~5MavHSAtiMxmxQ) -[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&)](https://i.imgloc.com/2023/07/13/VgJyaZ.jpeg) +[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&)](https://s1.ax1x.com/2023/08/06/pPAQTPI.jpg) An extensible, convenient, and efficient toolbox for finetuning large machine learning models, designed to be user-friendly, speedy and reliable, and accessible to the entire community. @@ -395,7 +395,7 @@ Whether you are a beginner or an expert, we believe that you can benefit from th [![Embark](https://img.shields.io/badge/discord-LMFlow-%237289da.svg?logo=discord)](https://discord.gg/u9VJNpzhvA) [![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&)](https://join.slack.com/t/lmflow/shared_invite/zt-1wju9nicy-woXbNtS~5MavHSAtiMxmxQ) -[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&)](https://i.imgloc.com/2023/07/13/VgJyaZ.jpeg) +[![WeChat badge](https://img.shields.io/badge/WeChat-Join-brightgreen?logo=wechat&)](https://s1.ax1x.com/2023/08/06/pPAQTPI.jpg) From 1d9bb83f973bdc088bbb7111680432413a0c463a Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Sun, 6 Aug 2023 00:26:22 +0800 Subject: [PATCH 05/29] changed readme position --- README.md | 4 ++++ vocab_extension.md => scripts/vocab_extension/README.md | 0 2 files changed, 4 insertions(+) rename vocab_extension.md => scripts/vocab_extension/README.md (100%) diff --git a/README.md b/README.md index eb14f529b..5aa45c0bd 100644 --- a/README.md +++ b/README.md @@ -336,6 +336,10 @@ You can config the deepspeed under configs. Details can be referred at [DeepSpee Thanks to the great efforts of [llama.cpp](https://github.com/ggerganov/llama.cpp). It is possible for everyone to run their LLaMA models on CPU by 4-bit quantization. We provide a script to convert LLaMA LoRA weights to `.pt` files. You only need to use `convert-pth-to-ggml.py` in llama.cpp to perform quantization. +### 4.4 Vocabulary List Extension + +Now you can train your own sentencepiece tokenizer and merge it with model's origin hf tokenizer. Check [vocab_extension](https://github.com/OptimalScale/LMFlow/scripts/vocab_extension) for more details. + ## 5. Model Release diff --git a/vocab_extension.md b/scripts/vocab_extension/README.md similarity index 100% rename from vocab_extension.md rename to scripts/vocab_extension/README.md From 90c7cf86e1a6ee480ec96a54bad0701b5bbcd142 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Sun, 6 Aug 2023 00:28:09 +0800 Subject: [PATCH 06/29] fix link bug --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5aa45c0bd..ae393a437 100644 --- a/README.md +++ b/README.md @@ -338,7 +338,7 @@ Thanks to the great efforts of [llama.cpp](https://github.com/ggerganov/llama.cp ### 4.4 Vocabulary List Extension -Now you can train your own sentencepiece tokenizer and merge it with model's origin hf tokenizer. Check [vocab_extension](https://github.com/OptimalScale/LMFlow/scripts/vocab_extension) for more details. +Now you can train your own sentencepiece tokenizer and merge it with model's origin hf tokenizer. Check [vocab_extension](https://github.com/OptimalScale/LMFlow/tree/main/scripts/vocab_extension) for more details. ## 5. Model Release From 083a69d7b363780f1a73a963b156022710313168 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Sun, 6 Aug 2023 00:32:54 +0800 Subject: [PATCH 07/29] change readme position --- README.md | 4 +++- .../Position_Interpolation.md | 0 2 files changed, 3 insertions(+), 1 deletion(-) rename Position_Interpolation_README.md => readme/Position_Interpolation.md (100%) diff --git a/README.md b/README.md index eb14f529b..26762357a 100644 --- a/README.md +++ b/README.md @@ -336,7 +336,9 @@ You can config the deepspeed under configs. Details can be referred at [DeepSpee Thanks to the great efforts of [llama.cpp](https://github.com/ggerganov/llama.cpp). It is possible for everyone to run their LLaMA models on CPU by 4-bit quantization. We provide a script to convert LLaMA LoRA weights to `.pt` files. You only need to use `convert-pth-to-ggml.py` in llama.cpp to perform quantization. - +### 4.5 Position Interpolation for LLaMA Models +Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techniques for LLaMA models. Check [postion_interpolation]( +https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) for more details ## 5. Model Release ### 5.1 Medical Model Checkpoints diff --git a/Position_Interpolation_README.md b/readme/Position_Interpolation.md similarity index 100% rename from Position_Interpolation_README.md rename to readme/Position_Interpolation.md From c42568f0954ad5f6ce74333b4156697135adaf17 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Sun, 6 Aug 2023 00:35:11 +0800 Subject: [PATCH 08/29] fix link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ae393a437..b3b388dfa 100644 --- a/README.md +++ b/README.md @@ -338,7 +338,7 @@ Thanks to the great efforts of [llama.cpp](https://github.com/ggerganov/llama.cp ### 4.4 Vocabulary List Extension -Now you can train your own sentencepiece tokenizer and merge it with model's origin hf tokenizer. Check [vocab_extension](https://github.com/OptimalScale/LMFlow/tree/main/scripts/vocab_extension) for more details. +Now you can train your own sentencepiece tokenizer and merge it with model's origin hf tokenizer. Check [vocab_extension](https://github.com/OptimalScale/LMFlow/blob/main/scripts/vocab_extension) for more details. ## 5. Model Release From 23e7a67dfe27b07fb3a17e79bfd02b102d91a568 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Sun, 6 Aug 2023 10:51:30 +0800 Subject: [PATCH 09/29] change font size of new readmes --- readme/Position_Interpolation.md | 2 +- scripts/vocab_extension/README.md | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/readme/Position_Interpolation.md b/readme/Position_Interpolation.md index df0d469f0..e89e961a8 100644 --- a/readme/Position_Interpolation.md +++ b/readme/Position_Interpolation.md @@ -5,7 +5,7 @@ For more details of these techniques, you can checkout the links below: https://arxiv.org/abs/2306.15595 * NTK scaling: \ https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/ -# Usage +## Usage To use the Position Interpolation Techniques, you need to set the following options: ``` --truncate_to_model_max_length False diff --git a/scripts/vocab_extension/README.md b/scripts/vocab_extension/README.md index bb5217d1c..5582d7d16 100644 --- a/scripts/vocab_extension/README.md +++ b/scripts/vocab_extension/README.md @@ -1,21 +1,22 @@ -# Train & Merge Tokenizer +# Vocab Extension +## Train & Merge Tokenizer To automatically convert data, train a SentencePiece tokenizer, and merge the tokenizer, you can run the following script: ``` bash scripts/vocab_extension/train_merge_tokenizer.sh ``` Alternatively, you can run each of the three steps separately: -# Convert JSON Data to TXT +## Convert JSON Data to TXT To convert JSON data to TXT for sentencepiece tokenizer training, run: ``` bash scripts/vocab_extension/convert_json_to_txt.sh ``` -# Train SentencePiece Tokenizer +## Train SentencePiece Tokenizer To train a SentencePiece tokenizer, run: ``` bash scripts/vocab_extension/train_tokenizer.sh ``` -# Merge New Tokenizer with the Origin One +## Merge New Tokenizer with the Origin One To merge a new tokenizer with the original one, run: ``` bash scripts/vocab_extension/merge_tokenizer.sh From ee4205e517594ee0c69be2303bc6f6658443a85a Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Sun, 6 Aug 2023 10:57:56 +0800 Subject: [PATCH 10/29] fix details in hf_decoder_model load tokenizer --- src/lmflow/models/hf_decoder_model.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index aed7f751d..571bbc447 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -155,6 +155,7 @@ def __init__( ) except RecursionError: + logger.warning("The tokenizer_config.json file doesn't set the special tokens. Using default values: , , for unknown token, bos token and eos token respectively.") if model_args.tokenizer_name: tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, unk_token="", bos_token="", @@ -172,8 +173,6 @@ def __init__( " script, save it, and load it from here, using" " --tokenizer_name." ) - - logger.warning("The tokenizer_config.json file doesn't set the special tokens. Using default values: , , ") self.tokenizer = tokenizer From d5dc68232bed954d1112532791d56a627430ebce Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 11:13:34 +0800 Subject: [PATCH 11/29] fix vocab extension bugs when use new transformers --- scripts/vocab_extension/merge_tokenizer.sh | 2 +- .../vocab_extension/train_merge_tokenizer.sh | 2 +- utils/merge_tokenizer.py | 22 ++++++++++++++----- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/scripts/vocab_extension/merge_tokenizer.sh b/scripts/vocab_extension/merge_tokenizer.sh index 4dbb98896..0cd1722c0 100644 --- a/scripts/vocab_extension/merge_tokenizer.sh +++ b/scripts/vocab_extension/merge_tokenizer.sh @@ -1,5 +1,5 @@ #!/bin/bash mkdir -p ./output_models/new_tokenizer -python utils/merge_tokenizer.py --tokenizer_dir pinkmanlove/llama-7b-hf \ +python utils/merge_tokenizer.py --tokenizer_dir openlm-research/open_llama_3b \ --chinese_sp_model_file ./output_models/new_tokenizer/example.model \ --output_dir ./output_models/merged_tokenizer \ \ No newline at end of file diff --git a/scripts/vocab_extension/train_merge_tokenizer.sh b/scripts/vocab_extension/train_merge_tokenizer.sh index 943c81ebe..43398613a 100644 --- a/scripts/vocab_extension/train_merge_tokenizer.sh +++ b/scripts/vocab_extension/train_merge_tokenizer.sh @@ -19,5 +19,5 @@ python utils/train_tokenizer.py --dataset_path ./data/wiki_zh_eval/converted_dat # merge the new tokenizer with the old one mkdir -p ./output_models/merged_tokenizer python utils/merge_tokenizer.py --chinese_sp_model_file ./output_models/new_tokenizer/example.model \ - --tokenizer_dir pinkmanlove/llama-7b-hf \ + --tokenizer_dir openlm-research/open_llama_3b \ --output_dir ./output_models/merged_tokenizer \ No newline at end of file diff --git a/utils/merge_tokenizer.py b/utils/merge_tokenizer.py index 17488f5c7..81cd0109e 100644 --- a/utils/merge_tokenizer.py +++ b/utils/merge_tokenizer.py @@ -20,7 +20,7 @@ os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"]="python" parser = argparse.ArgumentParser() - parser.add_argument('--tokenizer_dir', default='pinkmanlove/llama-7b-hf', type=str, required=False) + parser.add_argument('--tokenizer_dir', default='openlm-research/open_llama_3b', type=str, required=False) parser.add_argument('--chinese_sp_model_file', default='./output_models/new_tokenizer/example.model', type=str) parser.add_argument('--output_dir', default='./output_models/merged_tokenizer', type=str, required=False) args = parser.parse_args() @@ -30,7 +30,12 @@ output_dir = args.output_dir # load - old_tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir) + try: + old_tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir, use_fast=False) + except RecursionError: + old_tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir, unk_token="", + bos_token="", + eos_token="", use_fast=False) chinese_sp_model = spm.SentencePieceProcessor() chinese_sp_model.Load(chinese_sp_model_file) @@ -56,15 +61,20 @@ with open(output_sp_dir+'/merged_tokenizer.model', 'wb') as f: f.write(old_spm.SerializeToString()) - tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=tokenizer_dir,vocab_file=output_sp_dir+'/merged_tokenizer.model') - + try: + tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=tokenizer_dir,vocab_file=output_sp_dir+'/merged_tokenizer.model', use_fast=False) + except RecursionError: + tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=tokenizer_dir,unk_token="", + bos_token="", + eos_token="", + vocab_file=output_sp_dir+'/merged_tokenizer.model', + use_fast=False) tokenizer.save_pretrained(output_hf_dir) logging.info(f"Merged tokenizer has been saved to %s",output_dir) # Test - old_tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir) - new_tokenizer = AutoTokenizer.from_pretrained(output_hf_dir) + new_tokenizer = tokenizer logging.info(f"Old tokenizer vocab size: %d",len(old_tokenizer)) logging.info(f"New tokenizer vocab size: %d",len(new_tokenizer)) From 137c65c56013dfbfb90821580f0dc50d4e5af118 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 12:18:03 +0800 Subject: [PATCH 12/29] improve performance of training tokenizer --- scripts/vocab_extension/train_merge_tokenizer.sh | 3 ++- scripts/vocab_extension/train_tokenizer.sh | 3 ++- utils/merge_tokenizer.py | 4 ++-- utils/train_tokenizer.py | 6 ++++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/vocab_extension/train_merge_tokenizer.sh b/scripts/vocab_extension/train_merge_tokenizer.sh index 43398613a..2e63e84b6 100644 --- a/scripts/vocab_extension/train_merge_tokenizer.sh +++ b/scripts/vocab_extension/train_merge_tokenizer.sh @@ -14,7 +14,8 @@ python utils/train_tokenizer.py --dataset_path ./data/wiki_zh_eval/converted_dat --model_type bpe \ --output_dir ./output_models/new_tokenizer \ --user_defined_symbols 0,1,2,3,4,5,6,7,8,9,% \ - --vocab_size 20000 + --vocab_size 20000 \ + --max_sentencepiece_length 4 # merge the new tokenizer with the old one mkdir -p ./output_models/merged_tokenizer diff --git a/scripts/vocab_extension/train_tokenizer.sh b/scripts/vocab_extension/train_tokenizer.sh index f58347b6b..d61275499 100644 --- a/scripts/vocab_extension/train_tokenizer.sh +++ b/scripts/vocab_extension/train_tokenizer.sh @@ -4,4 +4,5 @@ python utils/train_tokenizer.py --dataset_path ./data/wiki_zh_eval/converted_dat --model_type bpe \ --output_dir ./output_models/new_tokenizer \ --user_defined_symbols 0,1,2,3,4,5,6,7,8,9,% \ - --vocab_size 20000 \ No newline at end of file + --vocab_size 20000 \ + --max_sentencepiece_length 4 \ No newline at end of file diff --git a/utils/merge_tokenizer.py b/utils/merge_tokenizer.py index 81cd0109e..e0fdd87e7 100644 --- a/utils/merge_tokenizer.py +++ b/utils/merge_tokenizer.py @@ -81,5 +81,5 @@ text='''白日依山尽,黄河入海流。欲穷千里目,更上一层楼。 The primary use of LLaMA is research on large language models, including''' logging.info(f"Test text:\n %s",text) - logging.info(f"Tokenized by LLaMA tokenizer:%s",old_tokenizer.tokenize(text)) - logging.info(f"Tokenized by Chinese-LLaMA tokenizer:%s",new_tokenizer.tokenize(text)) \ No newline at end of file + logging.info(f"Tokenized by original tokenizer:%s",old_tokenizer.tokenize(text)) + logging.info(f"Tokenized by merged tokenizer:%s",new_tokenizer.tokenize(text)) \ No newline at end of file diff --git a/utils/train_tokenizer.py b/utils/train_tokenizer.py index 31b1f79b5..48ce3c2b2 100644 --- a/utils/train_tokenizer.py +++ b/utils/train_tokenizer.py @@ -13,6 +13,7 @@ parser.add_argument('--vocab_size', default=20000, type=int, required=False) parser.add_argument('--model_type', default='bpe', type=str, required=False) parser.add_argument('--user_defined_symbols', default='0,1,2,3,4,5,6,7,8,9,%', type=str, required=False) + parser.add_argument('--max_sentencepiece_length', default=4, type=int, required=False) args = parser.parse_args() dataset_path = args.dataset_path @@ -20,10 +21,11 @@ vocab_size = args.vocab_size model_type = args.model_type user_defined_symbols = args.user_defined_symbols - + max_sentencepiece_length=args.max_sentencepiece_length + def mkdir(path): if not os.path.exists(path): os.makedirs(path) mkdir(output_dir) - spm.SentencePieceTrainer.train('--input={} --model_prefix={} --model_type={} --vocab_size={} --user_defined_symbols={} --minloglevel=1'.format(dataset_path,output_dir+'/example',model_type,vocab_size,user_defined_symbols)) \ No newline at end of file + spm.SentencePieceTrainer.train('--input={} --model_prefix={} --model_type={} --vocab_size={} --user_defined_symbols={} --max_sentencepiece_length={} --minloglevel=1'.format(dataset_path,output_dir+'/example',model_type,vocab_size,user_defined_symbols,max_sentencepiece_length)) \ No newline at end of file From 596790c28b311a0666ef3455b040f64feb11b7f0 Mon Sep 17 00:00:00 2001 From: rpan Date: Mon, 7 Aug 2023 14:51:43 +0800 Subject: [PATCH 13/29] Use fp16 instead of bf16 in default scripts Since only Ampere architecture GPU support bf16. fp16 is supported by more types of GPUs. --- scripts/run_finetune.sh | 2 +- scripts/run_finetune_with_lora.sh | 2 +- scripts/run_finetune_with_lora_save_aggregated_weights.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/run_finetune.sh b/scripts/run_finetune.sh index e6287bcd5..a9e766aa6 100755 --- a/scripts/run_finetune.sh +++ b/scripts/run_finetune.sh @@ -27,7 +27,7 @@ deepspeed ${deepspeed_args} \ --block_size 512 \ --per_device_train_batch_size 1 \ --deepspeed configs/ds_config_zero3.json \ - --bf16 \ + --fp16 \ --run_name finetune \ --validation_split_percentage 0 \ --logging_steps 20 \ diff --git a/scripts/run_finetune_with_lora.sh b/scripts/run_finetune_with_lora.sh index 57190d600..b57bc58b9 100755 --- a/scripts/run_finetune_with_lora.sh +++ b/scripts/run_finetune_with_lora.sh @@ -28,7 +28,7 @@ deepspeed ${deepspeed_args} \ --lora_r 8 \ --save_aggregated_lora 0\ --deepspeed configs/ds_config_zero2.json \ - --bf16 \ + --fp16 \ --run_name finetune_with_lora \ --validation_split_percentage 0 \ --logging_steps 20 \ diff --git a/scripts/run_finetune_with_lora_save_aggregated_weights.sh b/scripts/run_finetune_with_lora_save_aggregated_weights.sh index 46249c40e..70e907d95 100755 --- a/scripts/run_finetune_with_lora_save_aggregated_weights.sh +++ b/scripts/run_finetune_with_lora_save_aggregated_weights.sh @@ -29,7 +29,7 @@ deepspeed ${deepspeed_args} \ --lora_r 8 \ --save_aggregated_lora 1\ --deepspeed configs/ds_config_zero2.json \ - --bf16 \ + --fp16 \ --run_name finetune_with_lora \ --validation_split_percentage 0 \ --logging_steps 20 \ From db915332dacd61f9f0e302f60afc61b9bcebe6f2 Mon Sep 17 00:00:00 2001 From: rpan Date: Mon, 7 Aug 2023 17:24:04 +0800 Subject: [PATCH 14/29] Add data download for finetune scripts --- scripts/run_finetune.sh | 3 +++ scripts/run_finetune_with_lora.sh | 3 +++ scripts/run_finetune_with_lora_save_aggregated_weights.sh | 3 +++ 3 files changed, 9 insertions(+) diff --git a/scripts/run_finetune.sh b/scripts/run_finetune.sh index a9e766aa6..4cc56aa5c 100755 --- a/scripts/run_finetune.sh +++ b/scripts/run_finetune.sh @@ -14,6 +14,9 @@ output_dir=${project_dir}/output_models/${exp_id} log_dir=${project_dir}/log/${exp_id} dataset_path=${project_dir}/data/alpaca/train +if [ ! -d ${dataset_path} ]; then + cd data && ./download.sh alpaca && cd - +fi mkdir -p ${output_dir} ${log_dir} diff --git a/scripts/run_finetune_with_lora.sh b/scripts/run_finetune_with_lora.sh index b57bc58b9..696e3ca5b 100755 --- a/scripts/run_finetune_with_lora.sh +++ b/scripts/run_finetune_with_lora.sh @@ -12,6 +12,9 @@ output_dir=${project_dir}/output_models/${exp_id} log_dir=${project_dir}/log/${exp_id} dataset_path=${project_dir}/data/alpaca/train +if [ ! -d ${dataset_path} ]; then + cd data && ./download.sh alpaca && cd - +fi mkdir -p ${output_dir} ${log_dir} diff --git a/scripts/run_finetune_with_lora_save_aggregated_weights.sh b/scripts/run_finetune_with_lora_save_aggregated_weights.sh index 70e907d95..087db8af1 100755 --- a/scripts/run_finetune_with_lora_save_aggregated_weights.sh +++ b/scripts/run_finetune_with_lora_save_aggregated_weights.sh @@ -13,6 +13,9 @@ log_dir=${project_dir}/log/${exp_id} dataset_path=${project_dir}/data/alpaca/train eval_dataset_path=${project_dir}/data/alpaca/test +if [ ! -d ${dataset_path} ]; then + cd data && ./download.sh alpaca && cd - +fi mkdir -p ${output_dir} ${log_dir} From 74183fe4a94e504e8010a681bab8d9f2183d757a Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:25:57 +0800 Subject: [PATCH 15/29] fix import bugs when use flash_attn1 --- src/lmflow/models/hf_decoder_model.py | 7 +------ src/lmflow/utils/flash_attention/gpt2_flash_attention.py | 7 +++---- .../utils/flash_attention/gpt_neo_flash_attention.py | 7 +++---- .../utils/flash_attention/llama_flash_attention.py | 9 ++++----- 4 files changed, 11 insertions(+), 19 deletions(-) diff --git a/src/lmflow/models/hf_decoder_model.py b/src/lmflow/models/hf_decoder_model.py index 571bbc447..4e2f594af 100644 --- a/src/lmflow/models/hf_decoder_model.py +++ b/src/lmflow/models/hf_decoder_model.py @@ -75,12 +75,7 @@ "A100": ["LlamaForCausalLM", "GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"], "A40": ["LlamaForCausalLM","GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"] } - if int(flash_attn.__version__.split(".")[0]) == 1: - GPU_SUPPORT_FLASH_ATTENTION = { - "A100": ["LlamaForCausalLM", "GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"], - "A40": ["GPTNeoForCausalLM", "GPT2ForCausalLM", "BloomForCausalLM"] - } -except ImportError: +except: pass class HFDecoderModel(DecoderModel, Tunable): diff --git a/src/lmflow/utils/flash_attention/gpt2_flash_attention.py b/src/lmflow/utils/flash_attention/gpt2_flash_attention.py index f9d46ff8a..25ad44390 100644 --- a/src/lmflow/utils/flash_attention/gpt2_flash_attention.py +++ b/src/lmflow/utils/flash_attention/gpt2_flash_attention.py @@ -8,11 +8,10 @@ from einops import rearrange -import flash_attn -if int(flash_attn.__version__.split(".")[0]) == 1: - from flash_attn.flash_attn_interface import flash_attn_unpadded_qkvpacked_func -if int(flash_attn.__version__.split(".")[0]) == 2: +try: from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func +except: + from flash_attn.flash_attn_interface import flash_attn_unpadded_qkvpacked_func from flash_attn.bert_padding import unpad_input, pad_input diff --git a/src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py b/src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py index 0af820299..20c9e4783 100644 --- a/src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py +++ b/src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py @@ -4,11 +4,10 @@ import transformers from einops import rearrange -import flash_attn -if int(flash_attn.__version__.split(".")[0]) == 1: - from flash_attn.flash_attn_interface import flash_attn_unpadded_qkvpacked_func -if int(flash_attn.__version__.split(".")[0]) == 2: +try: from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func +except: + from flash_attn.flash_attn_interface import flash_attn_unpadded_qkvpacked_func from flash_attn.bert_padding import unpad_input, pad_input diff --git a/src/lmflow/utils/flash_attention/llama_flash_attention.py b/src/lmflow/utils/flash_attention/llama_flash_attention.py index 91bdc828a..e55eab949 100644 --- a/src/lmflow/utils/flash_attention/llama_flash_attention.py +++ b/src/lmflow/utils/flash_attention/llama_flash_attention.py @@ -8,12 +8,11 @@ from einops import rearrange -import flash_attn -if int(flash_attn.__version__.split(".")[0]) == 1: - from flash_attn.flash_attn_interface import flash_attn_unpadded_qkvpacked_func -if int(flash_attn.__version__.split(".")[0]) == 2: +try: from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func - +except: + from flash_attn.flash_attn_interface import flash_attn_unpadded_qkvpacked_func + from flash_attn.bert_padding import unpad_input, pad_input From 1854d129c5b7e821e8a2f7f08adb339398b8716a Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 17:28:59 +0800 Subject: [PATCH 16/29] code comments for flash_attn2 --- src/lmflow/utils/flash_attention/gpt2_flash_attention.py | 1 + src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py | 1 + src/lmflow/utils/flash_attention/llama_flash_attention.py | 1 + 3 files changed, 3 insertions(+) diff --git a/src/lmflow/utils/flash_attention/gpt2_flash_attention.py b/src/lmflow/utils/flash_attention/gpt2_flash_attention.py index 25ad44390..bac90b447 100644 --- a/src/lmflow/utils/flash_attention/gpt2_flash_attention.py +++ b/src/lmflow/utils/flash_attention/gpt2_flash_attention.py @@ -8,6 +8,7 @@ from einops import rearrange +#try to import flash_attn 2.x.x, if not, import flash_attn 1.x.x try: from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func except: diff --git a/src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py b/src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py index 20c9e4783..49c3d50a0 100644 --- a/src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py +++ b/src/lmflow/utils/flash_attention/gpt_neo_flash_attention.py @@ -4,6 +4,7 @@ import transformers from einops import rearrange +#try to import flash_attn 2.x.x, if not, import flash_attn 1.x.x try: from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func except: diff --git a/src/lmflow/utils/flash_attention/llama_flash_attention.py b/src/lmflow/utils/flash_attention/llama_flash_attention.py index e55eab949..4159629c6 100644 --- a/src/lmflow/utils/flash_attention/llama_flash_attention.py +++ b/src/lmflow/utils/flash_attention/llama_flash_attention.py @@ -8,6 +8,7 @@ from einops import rearrange +#try to import flash_attn 2.x.x, if not, import flash_attn 1.x.x try: from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func except: From dff1f92eebc13443e80ef9171617412a5862a001 Mon Sep 17 00:00:00 2001 From: rpan Date: Mon, 7 Aug 2023 19:32:33 +0800 Subject: [PATCH 17/29] Add auto data download for all scripts --- scripts/run_evaluation.sh | 4 ++++ scripts/run_evaluation_accelerator.sh | 4 ++++ scripts/run_evaluation_with_lora.sh | 5 +++++ scripts/run_multistage_finetune.sh | 3 +++ scripts/run_raft_align.sh | 4 ++++ scripts/run_reward_modeling.sh | 3 +++ 6 files changed, 23 insertions(+) diff --git a/scripts/run_evaluation.sh b/scripts/run_evaluation.sh index d94346af0..a1e786ec4 100755 --- a/scripts/run_evaluation.sh +++ b/scripts/run_evaluation.sh @@ -1,5 +1,9 @@ #!/bin/bash +if [ ! -d data/MedQA-USMLE ]; then + cd data && ./download.sh MedQA-USMLE && cd - +fi + CUDA_VISIBLE_DEVICES=0 \ deepspeed examples/evaluation.py \ --answer_type medmcqa \ diff --git a/scripts/run_evaluation_accelerator.sh b/scripts/run_evaluation_accelerator.sh index 1820c0239..8959f6f4b 100644 --- a/scripts/run_evaluation_accelerator.sh +++ b/scripts/run_evaluation_accelerator.sh @@ -1,5 +1,9 @@ #!/bin/bash +if [ ! -d data/MedQA-USMLE ]; then + cd data && ./download.sh MedQA-USMLE && cd - +fi + CUDA_VISIBLE_DEVICES=0 accelerate launch --config_file configs/accelerator_singlegpu_config.yaml examples/evaluation.py \ --answer_type usmle \ --model_name_or_path gpt2-large \ diff --git a/scripts/run_evaluation_with_lora.sh b/scripts/run_evaluation_with_lora.sh index d8a02162a..b83ad074c 100755 --- a/scripts/run_evaluation_with_lora.sh +++ b/scripts/run_evaluation_with_lora.sh @@ -3,6 +3,11 @@ # --model_name_or_path specifies the original huggingface model # --lora_model_path specifies the model difference introduced by finetuning, # i.e. the one saved by ./scripts/run_finetune_with_lora.sh + +if [ ! -d data/alpaca ]; then + cd data && ./download.sh alpaca && cd - +fi + CUDA_VISIBLE_DEVICES=0 \ deepspeed examples/evaluation.py \ --answer_type text \ diff --git a/scripts/run_multistage_finetune.sh b/scripts/run_multistage_finetune.sh index 9d30746c6..701540e8f 100755 --- a/scripts/run_multistage_finetune.sh +++ b/scripts/run_multistage_finetune.sh @@ -11,6 +11,9 @@ project_dir=$(cd "$(dirname $0)"/..; pwd) output_dir=${project_dir}/output_models/${exp_id} log_dir=${project_dir}/log/${exp_id} dataset_path="${project_dir}/data/example_dataset/train" +if [ ! -d ${dataset_path} ]; then + cd data && ./download.sh example_dataset && cd - +fi mkdir -p ${output_dir} ${log_dir} diff --git a/scripts/run_raft_align.sh b/scripts/run_raft_align.sh index 29d18ff9f..9cb5a9717 100755 --- a/scripts/run_raft_align.sh +++ b/scripts/run_raft_align.sh @@ -11,6 +11,10 @@ project_dir=$(cd "$(dirname $0)"/..; pwd) output_dir=${project_dir}/output_models/${exp_id} log_dir=${project_dir}/log/${exp_id} +if [ ! -d data/hh_rlhf ]; then + cd data && ./download.sh hh_rlhf && cd - +fi + mkdir -p ${output_dir} ${log_dir} export PYTHONPATH=. diff --git a/scripts/run_reward_modeling.sh b/scripts/run_reward_modeling.sh index 476661cde..7d9347851 100644 --- a/scripts/run_reward_modeling.sh +++ b/scripts/run_reward_modeling.sh @@ -14,6 +14,9 @@ output_dir=${project_dir}/output_models/${exp_id} log_dir=${project_dir}/log/${exp_id} dataset_path=${project_dir}/data/hh_rlhf/rm/hh_rlhf_rm_training.json +if [ ! -d data/hh_rlhf ]; then + cd data && ./download.sh hh_rlhf && cd - +fi mkdir -p ${output_dir} ${log_dir} From 21c2923114339b81f662b7cd93f18b633ee4f4b6 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 20:30:10 +0800 Subject: [PATCH 18/29] fix style issues --- utils/merge_tokenizer.py | 13 +++++++++++-- utils/train_tokenizer.py | 9 ++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/utils/merge_tokenizer.py b/utils/merge_tokenizer.py index e0fdd87e7..6354dd1e9 100644 --- a/utils/merge_tokenizer.py +++ b/utils/merge_tokenizer.py @@ -11,7 +11,7 @@ import torch device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') -from transformers import AutoTokenizer +from transformers import AutoTokenizer,LlamaTokenizer logging.basicConfig(level=logging.INFO) @@ -36,6 +36,10 @@ old_tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir, unk_token="", bos_token="", eos_token="", use_fast=False) + + if not isinstance(old_tokenizer,LlamaTokenizer): + raise ValueError("The tokenizer is not a LlamaTokenizer, we only support LlamaTokenizer for now.") + chinese_sp_model = spm.SentencePieceProcessor() chinese_sp_model.Load(chinese_sp_model_file) @@ -62,13 +66,18 @@ f.write(old_spm.SerializeToString()) try: - tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=tokenizer_dir,vocab_file=output_sp_dir+'/merged_tokenizer.model', use_fast=False) + tokenizer = AutoTokenizer.from_pretrain( + pretrained_model_name_or_path=tokenizer_dir, + vocab_file=output_sp_dir+'/merged_tokenizer.model', + use_fast=False + ) except RecursionError: tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=tokenizer_dir,unk_token="", bos_token="", eos_token="", vocab_file=output_sp_dir+'/merged_tokenizer.model', use_fast=False) + tokenizer.save_pretrained(output_hf_dir) logging.info(f"Merged tokenizer has been saved to %s",output_dir) diff --git a/utils/train_tokenizer.py b/utils/train_tokenizer.py index 48ce3c2b2..98d081123 100644 --- a/utils/train_tokenizer.py +++ b/utils/train_tokenizer.py @@ -28,4 +28,11 @@ def mkdir(path): os.makedirs(path) mkdir(output_dir) - spm.SentencePieceTrainer.train('--input={} --model_prefix={} --model_type={} --vocab_size={} --user_defined_symbols={} --max_sentencepiece_length={} --minloglevel=1'.format(dataset_path,output_dir+'/example',model_type,vocab_size,user_defined_symbols,max_sentencepiece_length)) \ No newline at end of file + spm.SentencePieceTrainer.train( + f'--input={dataset_path}' + f' --model_prefix={output_dir}/example' + f' --model_type={model_type}' + f' --vocab_size={vocab_size}' + f' --user_defined_symbols={user_defined_symbols}' + f' --max_sentencepiece_length={max_sentencepiece_length}' + ) \ No newline at end of file From 5ca18daae906bac755acb2b9034a8ac6f73ed908 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 20:35:38 +0800 Subject: [PATCH 19/29] change loglevel to 1 --- utils/train_tokenizer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/train_tokenizer.py b/utils/train_tokenizer.py index 98d081123..7b67f6c40 100644 --- a/utils/train_tokenizer.py +++ b/utils/train_tokenizer.py @@ -35,4 +35,5 @@ def mkdir(path): f' --vocab_size={vocab_size}' f' --user_defined_symbols={user_defined_symbols}' f' --max_sentencepiece_length={max_sentencepiece_length}' + f' --minloglevel=1' ) \ No newline at end of file From 3919c3a89e3ddbdafe1b9c0d1142d76f0bf390b8 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 20:43:32 +0800 Subject: [PATCH 20/29] optimize code style in merge_tokenizer.py --- utils/merge_tokenizer.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/utils/merge_tokenizer.py b/utils/merge_tokenizer.py index 6354dd1e9..9931edcf6 100644 --- a/utils/merge_tokenizer.py +++ b/utils/merge_tokenizer.py @@ -33,9 +33,11 @@ try: old_tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir, use_fast=False) except RecursionError: - old_tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir, unk_token="", + old_tokenizer = AutoTokenizer.from_pretrained(tokenizer_dir, + unk_token="", bos_token="", - eos_token="", use_fast=False) + eos_token="", + use_fast=False) if not isinstance(old_tokenizer,LlamaTokenizer): raise ValueError("The tokenizer is not a LlamaTokenizer, we only support LlamaTokenizer for now.") @@ -66,17 +68,18 @@ f.write(old_spm.SerializeToString()) try: - tokenizer = AutoTokenizer.from_pretrain( + tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path=tokenizer_dir, vocab_file=output_sp_dir+'/merged_tokenizer.model', use_fast=False ) except RecursionError: - tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=tokenizer_dir,unk_token="", - bos_token="", - eos_token="", - vocab_file=output_sp_dir+'/merged_tokenizer.model', - use_fast=False) + tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=tokenizer_dir, + unk_token="", + bos_token="", + eos_token="", + vocab_file=output_sp_dir+'/merged_tokenizer.model', + use_fast=False) tokenizer.save_pretrained(output_hf_dir) logging.info(f"Merged tokenizer has been saved to %s",output_dir) From 89b5e48d2c5047f3cb99eb8d7de42a8894eef351 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 21:42:55 +0800 Subject: [PATCH 21/29] update install.sh & README --- README.md | 2 +- install.sh | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 install.sh diff --git a/README.md b/README.md index 092026356..64ef6b251 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,7 @@ cd LMFlow conda create -n lmflow python=3.9 -y conda activate lmflow conda install mpi4py -pip install -e . +./install.sh ``` ## 2.Prepare Dataset diff --git a/install.sh b/install.sh new file mode 100644 index 000000000..6452e5053 --- /dev/null +++ b/install.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +pip install -e . + +gpu_state="$(nvidia-smi --query-gpu=name --format=csv,noheader)" +if [[ *"A100"* == "${gpu_state}" -o *"A40"* == "${gpu_state}" ]]; then + echo "YES!!!!!" + pip install flash-attn==2.0.2 +fi From 31d0e1dd23977245bc70c5b446f91b64dc6f8f1c Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 21:45:05 +0800 Subject: [PATCH 22/29] adjust install.sh --- install.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/install.sh b/install.sh index 6452e5053..eea46d3e4 100644 --- a/install.sh +++ b/install.sh @@ -3,7 +3,6 @@ pip install -e . gpu_state="$(nvidia-smi --query-gpu=name --format=csv,noheader)" -if [[ *"A100"* == "${gpu_state}" -o *"A40"* == "${gpu_state}" ]]; then - echo "YES!!!!!" +if [[ "${gpu_state}" == *"A100"* || "${gpu_state}" == *"A40"* ]]; then pip install flash-attn==2.0.2 -fi +fi \ No newline at end of file From f005663e099edc7736fee960a5157d0d37c00246 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 22:43:42 +0800 Subject: [PATCH 23/29] add flash_attn2 readme --- README.md | 8 +++++--- readme/flash_attn2.md | 8 ++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 readme/flash_attn2.md diff --git a/README.md b/README.md index 64ef6b251..fe4aa31eb 100644 --- a/README.md +++ b/README.md @@ -338,12 +338,14 @@ Thanks to the great efforts of [llama.cpp](https://github.com/ggerganov/llama.cp ### 4.4 Vocabulary List Extension -Now you can train your own sentencepiece tokenizer and merge it with model's origin hf tokenizer. Check [vocab_extension](https://github.com/OptimalScale/LMFlow/blob/main/scripts/vocab_extension) for more details. +Now you can train your own sentencepiece tokenizer and merge it with model's origin hf tokenizer. Check out [vocab_extension](https://github.com/OptimalScale/LMFlow/blob/main/scripts/vocab_extension) for more details. ### 4.5 Position Interpolation for LLaMA Models -Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techniques for LLaMA models. Check [postion_interpolation]( -https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) for more details +Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techniques for LLaMA models. Check out [postion_interpolation]( +https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) for more details. +### 4.6 Flash Attention 2.0 +Now LMFlow supports the latest Flash Attention 2.0. Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details. ## 5. Model Release ### 5.1 Medical Model Checkpoints diff --git a/readme/flash_attn2.md b/readme/flash_attn2.md new file mode 100644 index 000000000..5e1edaf90 --- /dev/null +++ b/readme/flash_attn2.md @@ -0,0 +1,8 @@ +# Falsh Attention 2.0 +We're thrilled to announce that LMFlow now supports training and inference using flash attn2! This cutting-edge feature will take your language modeling to the next level. To use it, simply add ``` --use_flash_attention True ``` to the corresponding bash script. + +But that's not all - we're also excited to share that LMFlow now supports GPU models A40 and A100! This means faster and more efficient training for your models. + +And as if that wasn't enough, we've also expanded our supported model architectures to include the powerful ["LlamaForCausalLM", "GPTNeoForCausalLM", "BloomForCausalLM"]. With LMFlow, the possibilities for language modeling are endless. + +Upgrade to LMFlow now and experience the future of language modeling! \ No newline at end of file From 17cf48ea594cf1b6941f287059b7bb9fd66f83f5 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 23:29:01 +0800 Subject: [PATCH 24/29] improve flash_attn2 readme --- readme/flash_attn2.md | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/readme/flash_attn2.md b/readme/flash_attn2.md index 5e1edaf90..bdc94fd45 100644 --- a/readme/flash_attn2.md +++ b/readme/flash_attn2.md @@ -1,8 +1,17 @@ -# Falsh Attention 2.0 -We're thrilled to announce that LMFlow now supports training and inference using flash attn2! This cutting-edge feature will take your language modeling to the next level. To use it, simply add ``` --use_flash_attention True ``` to the corresponding bash script. - -But that's not all - we're also excited to share that LMFlow now supports GPU models A40 and A100! This means faster and more efficient training for your models. - -And as if that wasn't enough, we've also expanded our supported model architectures to include the powerful ["LlamaForCausalLM", "GPTNeoForCausalLM", "BloomForCausalLM"]. With LMFlow, the possibilities for language modeling are endless. +# Flash Attention 2.0 +We're thrilled to announce that LMFlow now supports training and inference using **FlashAttention-2**! This cutting-edge feature will take your language modeling to the next level. To use it, simply add ``` --use_flash_attention True ``` to the corresponding bash script. +Here is an example of how to use it: +``` +#!/bin/bash +deepspeed examples/evaluation.py \ + --answer_type text \ + --model_name_or_path pinkmanlove/llama-7b-hf \ + --dataset_path data/wiki_en_eval \ + --deepspeed examples/ds_config.json \ + --inference_batch_size_per_device 1 \ + --block_size 2048 \ + --use_flash_attention True \ + --metric ppl +``` Upgrade to LMFlow now and experience the future of language modeling! \ No newline at end of file From a9fb5c075c445c7c882734c3d2f6b5b870d84c4b Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 23:38:40 +0800 Subject: [PATCH 25/29] update readme --- readme/flash_attn2.md | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/readme/flash_attn2.md b/readme/flash_attn2.md index bdc94fd45..0ee3beec7 100644 --- a/readme/flash_attn2.md +++ b/readme/flash_attn2.md @@ -3,15 +3,23 @@ We're thrilled to announce that LMFlow now supports training and inference using Here is an example of how to use it: ``` #!/bin/bash +pip install flash_attn==2.0.2 -deepspeed examples/evaluation.py \ - --answer_type text \ - --model_name_or_path pinkmanlove/llama-7b-hf \ - --dataset_path data/wiki_en_eval \ - --deepspeed examples/ds_config.json \ - --inference_batch_size_per_device 1 \ - --block_size 2048 \ - --use_flash_attention True \ - --metric ppl +model=pinkmanlove/llama-7b-hf +lora_args="" +if [ $# -ge 1 ]; then + model=$1 +fi +if [ $# -ge 2 ]; then + lora_args="--lora_model_path $2" +fi + +CUDA_VISIBLE_DEVICES=0 \ + deepspeed examples/chatbot.py \ + --deepspeed configs/ds_config_chatbot.json \ + --model_name_or_path ${model} \ + --use_flash_attention True \ + ${lora_args} ``` + Upgrade to LMFlow now and experience the future of language modeling! \ No newline at end of file From 7d53bf94ff7b325382f4f457a8c1a98fd115135c Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Mon, 7 Aug 2023 23:49:12 +0800 Subject: [PATCH 26/29] update readme bash code --- readme/flash_attn2.md | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/readme/flash_attn2.md b/readme/flash_attn2.md index 0ee3beec7..3b9970c11 100644 --- a/readme/flash_attn2.md +++ b/readme/flash_attn2.md @@ -5,21 +5,14 @@ Here is an example of how to use it: #!/bin/bash pip install flash_attn==2.0.2 -model=pinkmanlove/llama-7b-hf -lora_args="" -if [ $# -ge 1 ]; then - model=$1 -fi -if [ $# -ge 2 ]; then - lora_args="--lora_model_path $2" -fi - -CUDA_VISIBLE_DEVICES=0 \ - deepspeed examples/chatbot.py \ - --deepspeed configs/ds_config_chatbot.json \ - --model_name_or_path ${model} \ - --use_flash_attention True \ - ${lora_args} +deepspeed --master_port=11000 \ + examples/chatbot.py \ + --deepspeed configs/ds_config_chatbot.json \ + --model_name_or_path LMFlow/Full-Robin-7b-v2 \ + --max_new_tokens 1024 \ + --prompt_structure "###Human: {input_text}###Assistant:" \ + --end_string "#" \ + --use_flash_attention True ``` Upgrade to LMFlow now and experience the future of language modeling! \ No newline at end of file From f35fe8de5396192bd253ca530c1bdc5f53f391c6 Mon Sep 17 00:00:00 2001 From: shizhediao <654745845@qq.com> Date: Tue, 8 Aug 2023 03:04:49 +0800 Subject: [PATCH 27/29] annouce flash attention 2 --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fe4aa31eb..7449ae700 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ Large Model for All. See our [vision](https://github.com/OptimalScale/LMFlow#vis ## Latest News +* [2023-08-07] Support [Flash Attention-2](https://crfm.stanford.edu/2023/07/17/flash2.html). Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details. * [2023-08-02] Support [Llama2](https://ai.meta.com/llama/), [ChatGLM2](https://huggingface.co/THUDM/chatglm2-6b), and [Baichuan](https://huggingface.co/baichuan-inc/Baichuan-7B) models. * [2023-07-23] :rocket: [LMFlow multimodal chatbot](https://github.com/OptimalScale/LMFlow/blob/main/scripts/run_vis_chatbot_gradio_minigpt4.sh) is now available! Support multimodal inputs of images and texts. [Online Demo](http://multimodal.lmflow.online) is also provided (We hold the service on a single GPU, hence one may experience "queuing" or "application busy" sometimes when multiple users are accessing at the same time, please wait and attempt again later when such event happens) :rocket: ![image](https://github.com/OptimalScale/LMFlow/blob/rpan-vision-encoder/assets/multimodal-chatbot-demo.gif) * [2023-06-22] [LMFlow paper](https://arxiv.org/abs/2306.12420) is out! Check out our implementation details at https://arxiv.org/abs/2306.12420 @@ -344,8 +345,8 @@ Now you can train your own sentencepiece tokenizer and merge it with model's ori Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techniques for LLaMA models. Check out [postion_interpolation]( https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) for more details. -### 4.6 Flash Attention 2.0 -Now LMFlow supports the latest Flash Attention 2.0. Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details. +### 4.6 FlashAttention-2 +Now LMFlow supports the latest FlashAttention-2. Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details. ## 5. Model Release ### 5.1 Medical Model Checkpoints From 3fe99153169a66627cc17acc9d52e67322813171 Mon Sep 17 00:00:00 2001 From: shizhediao <654745845@qq.com> Date: Tue, 8 Aug 2023 03:05:36 +0800 Subject: [PATCH 28/29] reorg --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7449ae700..47eb32199 100644 --- a/README.md +++ b/README.md @@ -346,7 +346,8 @@ Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techn https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) for more details. ### 4.6 FlashAttention-2 -Now LMFlow supports the latest FlashAttention-2. Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details. +Now LMFlow supports the latest [Flash Attention-2](https://crfm.stanford.edu/2023/07/17/flash2.html). Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details. + ## 5. Model Release ### 5.1 Medical Model Checkpoints From 9337a724649b3dbfc6298805135930ea6255aeda Mon Sep 17 00:00:00 2001 From: shizhediao <654745845@qq.com> Date: Tue, 8 Aug 2023 03:06:24 +0800 Subject: [PATCH 29/29] remove space --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 47eb32199..d85303ee9 100644 --- a/README.md +++ b/README.md @@ -346,7 +346,7 @@ Now LMFlow supports the latest Linear & NTK (Neural Kernel theory) scaling techn https://github.com/OptimalScale/LMFlow/blob/main/readme/Position_Interpolation.md) for more details. ### 4.6 FlashAttention-2 -Now LMFlow supports the latest [Flash Attention-2](https://crfm.stanford.edu/2023/07/17/flash2.html). Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details. +Now LMFlow supports the latest [FlashAttention-2](https://crfm.stanford.edu/2023/07/17/flash2.html). Check out [flash_attention](https://github.com/OptimalScale/LMFlow/blob/main/readme/flash_attn2.md) for more details. ## 5. Model Release