Skip to content

Commit 382e87c

Browse files
committed
[misc] run pre-commit on all files
1 parent 2961038 commit 382e87c

File tree

82 files changed

+848
-865
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+848
-865
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,4 +162,4 @@ coverage.xml
162162

163163
# log, test files - ColossalChat
164164
applications/ColossalChat/logs
165-
applications/ColossalChat/tests/logs
165+
applications/ColossalChat/tests/logs

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -551,4 +551,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
551551
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
552552
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
553553
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
554-
THE SOFTWARE.
554+
THE SOFTWARE.

applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,10 @@
88

99
import numpy as np
1010
import torch
11-
from transformers import LlamaTokenizer, LlamaForCausalLM
11+
from transformers import LlamaForCausalLM, LlamaTokenizer
1212

1313
from colossalai.logging import get_dist_logger
1414

15-
1615
logger = get_dist_logger()
1716

1817

applications/Colossal-LLaMA-2/colossal_llama2/utils/ckpt_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
from typing import Any, Dict, Tuple, Union
1111

1212
import torch
13-
from torch.optim.optimizer import Optimizer
1413
from torch.optim.lr_scheduler import _LRScheduler
14+
from torch.optim.optimizer import Optimizer
1515

1616
from colossalai.booster import Booster
1717
from colossalai.cluster import DistCoordinator

applications/Colossal-LLaMA-2/colossal_llama2/utils/stream_chat_patch.py

Lines changed: 62 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
from copy import deepcopy
2-
from typing import Optional, List, Dict, Tuple, Callable, Any
2+
from typing import Any, Callable, Dict, List, Optional, Tuple
33

44
import torch
55
from torch import nn
6-
76
from transformers import PreTrainedTokenizer
8-
from transformers.utils import logging
97
from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList
10-
8+
from transformers.utils import logging
9+
1110
logger = logging.get_logger(__name__)
1211

1312

1413
def get_prompt_template(
15-
input_query:str,
16-
history:List[Dict]= None,
17-
roles:list = ["", "Human", "Assistant"],
14+
input_query: str,
15+
history: List[Dict] = None,
16+
roles: list = ["", "Human", "Assistant"],
1817
) -> str:
1918
"""
2019
Generates a prompt template for chat models based on input and history.
@@ -32,7 +31,7 @@ def get_prompt_template(
3231
new_history = []
3332
else:
3433
new_history = deepcopy(history)
35-
34+
3635
new_history.append({"role": roles[1], "message": input_query.strip()})
3736
new_history.append({"role": roles[2], "message": None})
3837

@@ -48,22 +47,23 @@ def get_prompt_template(
4847
prompt += f"{role}: <s>"
4948
return prompt
5049

50+
5151
@torch.inference_mode()
5252
def streaming_chat(
53-
model: Any,
53+
model: Any,
5454
tokenizer: PreTrainedTokenizer,
55-
input_query: str,
56-
history: List[Dict] = None,
57-
roles: list = ["", "Human", "Assistant"],
58-
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
59-
temperature: float = 0.8,
60-
top_p: float = 0.95,
61-
top_k: int = 50,
62-
do_sample: bool = True,
55+
input_query: str,
56+
history: List[Dict] = None,
57+
roles: list = ["", "Human", "Assistant"],
58+
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
59+
temperature: float = 0.8,
60+
top_p: float = 0.95,
61+
top_k: int = 50,
62+
do_sample: bool = True,
6363
length_penalty: float = 1.2,
64-
max_new_tokens: int = 512,
65-
logits_processor: LogitsProcessorList = None,
66-
return_past_key_values: bool = False,
64+
max_new_tokens: int = 512,
65+
logits_processor: LogitsProcessorList = None,
66+
return_past_key_values: bool = False,
6767
**kwargs,
6868
):
6969
"""
@@ -87,7 +87,7 @@ def streaming_chat(
8787
**kwargs: Additional keyword arguments for generation.
8888
8989
Yields:
90-
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
90+
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
9191
optionally the updated past key values if `return_past_key_values` is True.
9292
9393
Ensures padding is on the left side for the tokenizer.
@@ -97,63 +97,68 @@ def streaming_chat(
9797
history = []
9898
if logits_processor is None:
9999
logits_processor = LogitsProcessorList()
100-
100+
101101
generation_kwargs = {
102-
'temperature': temperature,
103-
'top_p': top_p,
104-
'top_k': top_k,
105-
'do_sample': do_sample,
106-
'max_new_tokens': max_new_tokens,
107-
'length_penalty': length_penalty,
108-
'use_cache': True,
109-
**kwargs
102+
"temperature": temperature,
103+
"top_p": top_p,
104+
"top_k": top_k,
105+
"do_sample": do_sample,
106+
"max_new_tokens": max_new_tokens,
107+
"length_penalty": length_penalty,
108+
"use_cache": True,
109+
**kwargs,
110110
}
111111

112112
prompt_str = get_prompt_template(input_query, history=history, roles=roles)
113-
113+
114114
eos_token_id = [tokenizer.eos_token_id]
115115
inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
116116
history.append({"role": roles[1], "message": input_query.strip()})
117117
history.append({"role": roles[2], "message": None})
118118

119-
for outputs in stream_generate(model, **inputs, past_key_values=past_key_values,
120-
eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
121-
**generation_kwargs):
119+
for outputs in stream_generate(
120+
model,
121+
**inputs,
122+
past_key_values=past_key_values,
123+
eos_token_id=eos_token_id,
124+
return_past_key_values=return_past_key_values,
125+
**generation_kwargs,
126+
):
122127
if return_past_key_values:
123128
outputs, past_key_values = outputs
124129

125-
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
130+
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) : -1]
126131
response = tokenizer.decode(outputs)
127132

128133
history[-1]["message"] = response.strip()
129134
if return_past_key_values:
130135
yield response, history, past_key_values
131136
else:
132137
yield response, history
133-
138+
134139

135140
@torch.inference_mode()
136141
def stream_generate(
137-
model: Any,
138-
input_ids: torch.Tensor,
142+
model: Any,
143+
input_ids: torch.Tensor,
139144
generation_config: Optional[GenerationConfig] = None,
140145
logits_processor: Optional[LogitsProcessorList] = None,
141146
stopping_criteria: Optional[StoppingCriteriaList] = None,
142147
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
143-
return_past_key_values: bool = False,
148+
return_past_key_values: bool = False,
144149
**kwargs,
145150
):
146151
"""
147152
Generates sequences of token ids using the specified model and generation parameters.
148153
Adapted from https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py
149-
154+
150155
Args:
151156
model (Any): The model used for generating sequences of token ids.
152-
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
157+
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
153158
generation_config (Optional[GenerationConfig]): The generation configuration to be used as base parametrization for the generation call.
154159
logits_processor (Optional[LogitsProcessorList]): Custom logits processors that complement the default logits processors built from arguments
155160
and generation config.
156-
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
161+
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
157162
and a generation config.
158163
prefix_allowed_tokens_fn (Optional[Callable[[int, torch.Tensor], List[int]]]): Function to constrain token generation.
159164
return_past_key_values (bool): Whether to return past key values for further incremental decoding, defaults to False.
@@ -169,33 +174,33 @@ def stream_generate(
169174
generation_config = model.generation_config
170175
generation_config = deepcopy(generation_config)
171176
model_kwargs = generation_config.update(**kwargs)
172-
177+
173178
eos_token_id = generation_config.eos_token_id
174179
if isinstance(eos_token_id, int):
175180
eos_token_id = [eos_token_id]
176181
eos_token_id_tensor = torch.tensor(eos_token_id).to(input_ids.device) if eos_token_id is not None else None
177182

178183
if generation_config.max_new_tokens is not None:
179184
generation_config.max_length = generation_config.max_new_tokens + input_ids_len
180-
185+
181186
if input_ids_len >= generation_config.max_length:
182187
input_ids_string = "decoder_input_ids" if model.config.is_encoder_decoder else "input_ids"
183188
logger.warning(
184-
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
185-
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
186-
" increasing `max_new_tokens`."
187-
)
189+
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
190+
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
191+
" increasing `max_new_tokens`."
192+
)
188193
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
189194
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
190-
195+
191196
# prepare distribution pre_processing samplers
192197
logits_processor = model._get_logits_processor(
193-
generation_config=generation_config,
194-
input_ids_seq_length=input_ids_len,
195-
encoder_input_ids=input_ids,
196-
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
197-
logits_processor=logits_processor,
198-
)
198+
generation_config=generation_config,
199+
input_ids_seq_length=input_ids_len,
200+
encoder_input_ids=input_ids,
201+
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
202+
logits_processor=logits_processor,
203+
)
199204

200205
# prepare stopping criteria
201206
stopping_criteria = model._get_stopping_criteria(
@@ -205,7 +210,7 @@ def stream_generate(
205210
logits_warper = model._get_logits_warper(generation_config)
206211
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
207212
scores = None
208-
213+
209214
while True:
210215
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
211216
# forward pass to get next token
@@ -244,4 +249,4 @@ def stream_generate(
244249
yield input_ids
245250
# stop when each sentence is finished, or if exceed the maximum length
246251
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
247-
break
252+
break

applications/Colossal-LLaMA-2/docs/example_13b.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ if __name__ == '__main__':
4343
model.to(device)
4444

4545
tokenizer = AutoTokenizer.from_pretrained("hpcai-tech/Colossal-LLaMA-2-13b-base")
46-
46+
4747
question = "xxx" # Your question.
4848
prompt = (f"请对下面的问题,给出准确、可靠、安全的回答\n"
4949
f"{question}"
@@ -111,7 +111,7 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
111111
| 整车热管理主要包括什么内容 | 整车热管理主要包括:1、发动机冷却系统;2、散热系统;3、空调冷却系统;4、采暖系统;5、动力转向器冷却系统;6、润滑油冷却系统;7、变速箱冷却系统;8、燃油冷却系统;9、冷却系统排放液系统。 |
112112
| 简述下汽车空调-热泵循环系统的原理 | 汽车空调系统主要有三种形式,其中热泵循环系统是其中的一种。其原理是利用发动机排放的热量,通过压缩机将冷媒压缩成高温高压的状态,然后通过冷却器释放到空气中,吸收周围的热量,再通过膨胀阀释放到蒸发器中,蒸发器在压缩机的作用下将冷媒压缩成高温高压的状态,再次释放到空气中,从而实现制冷的效果。 |
113113

114-
#### Finance
114+
#### Finance
115115
| Question | <center>Colossal-LLaMA-2-13b-base</center> |
116116
| :------: | :----------------------- |
117117
| 如何看待当前提前还贷热 | 提前还贷是一种常见的还贷方式,在市场中存在着许多的因素和影响,需要具体分析当前的经济形势和人们心理状况的影响。提前还贷可以帮助借款人早日摆脱债务负担,但也存在一些风险和不确定性。从经济形势来看,当前利率下探、还款压力减轻等因素可能会导致一部分借款人提前还贷。此外,一些购房者可能会受到市场不景气的影响,担心未来房产价值会下降,因此提前还贷以避免资产贬值。从借款人心理状况来看,提前还贷也可能会由于借款人对未来经济预期的不确定性而产生压力和不满。因此,提前还贷并不能一概而论,需要根据具体情况进行分析。如果提前还贷对借款人的经济情况和未来规划造成不利影响,建议谨慎考虑是否要提前还贷。|
@@ -150,4 +150,4 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
150150

151151

152152
## Conclusion
153-
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.
153+
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.

applications/Colossal-LLaMA-2/docs/example_7b.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,4 +242,4 @@ To comprehensively assess the performance of the Colossal-LLaMA-2-7B-base model,
242242
## Conclusion
243243
In general, the Colossal-LLaMA-2-7B-base model not only enhances its understanding of English but also exhibits significant improvements in its comprehension of Chinese. It boasts a broad spectrum of general knowledge, encompassing various fields such as food, sports, technology, literature, games, and more. Regarding text generation tasks, the Colossal-LLaMA-2-7B-base model excels in writing performance; however, its ability to generate specific formats like code, emails, tables, etc., needs enhancement due to the scarcity of relevant training data during our training phase. When compared to the Qwen-7b-base model, the Colossal-LLaMA-2-7B-base model outperforms it in answering most English questions and some Chinese questions, as demonstrated in the examples above.
244244

245-
Presently, the Colossal-LLaMA-2-7B-base model already exhibits some capabilities in sentiment analysis, logical reasoning, information extraction, role-play, classification, and rewriting. These capabilities are poised for further improvement in the future as part of our ongoing enhancements.
245+
Presently, the Colossal-LLaMA-2-7B-base model already exhibits some capabilities in sentiment analysis, logical reasoning, information extraction, role-play, classification, and rewriting. These capabilities are poised for further improvement in the future as part of our ongoing enhancements.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
hostname1
2-
hostname2
2+
hostname2

applications/Colossal-LLaMA-2/inference_example.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs):
1515
model.to(device)
1616

1717
try:
18-
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side='left')
18+
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left")
1919
except OSError:
2020
raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.")
2121

applications/Colossal-LLaMA-2/requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,3 @@ flash-attn>=2.0.0,<=2.0.5
1212
tqdm
1313
sentencepiece==0.1.99
1414
protobuf<=3.20.0
15-

0 commit comments

Comments
 (0)