Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Apr 8, 2024
1 parent 3194d11 commit 67e4754
Show file tree
Hide file tree
Showing 96 changed files with 1,208 additions and 1,131 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build_on_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,4 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: report
path: report/
path: report/
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -551,4 +551,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
THE SOFTWARE.
6 changes: 4 additions & 2 deletions applications/Chat/benchmarks/benchmark_opt_lora_dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,11 @@ def main(args):
if args.strategy == "ddp":
strategy = DDPStrategy()
elif args.strategy == "colossalai_gemini":
strategy = GeminiStrategy(placement_policy="static",initial_scale=2**5)
strategy = GeminiStrategy(placement_policy="static", initial_scale=2**5)
elif args.strategy == "colossalai_gemini_cpu":
strategy = GeminiStrategy(placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5)
strategy = GeminiStrategy(
placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5
)
elif args.strategy == "colossalai_zero2":
strategy = LowLevelZeroStrategy(stage=2, placement_policy="cuda")
elif args.strategy == "colossalai_zero2_cpu":
Expand Down
16 changes: 13 additions & 3 deletions applications/Chat/coati/dataset/sft_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,21 @@ def _preprocess(
"""Preprocess the data by tokenizing."""
sequences = [s + t + tokenizer.eos_token for s, t in zip(sources, targets)]
sequences_token = tokenizer(
sequences, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt", add_special_tokens=False
sequences,
max_length=max_length,
padding="max_length",
truncation=True,
return_tensors="pt",
add_special_tokens=False,
)

sources_token = tokenizer(
sources, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt", add_special_tokens=False
sources,
max_length=max_length,
padding="max_length",
truncation=True,
return_tensors="pt",
add_special_tokens=False,
)

assert sequences_token["attention_mask"].dim() == 2, "seq2seq model should be preprocessed differently"
Expand All @@ -66,7 +76,7 @@ def _preprocess(
if tokenizer.padding_side == "right":
# |prompt|completion|eos|pad|
labels[i][:source_len] = IGNORE_INDEX
if pad_len>0:
if pad_len > 0:
labels[i][-pad_len:] = IGNORE_INDEX
elif tokenizer.padding_side == "left":
# |pad|prompt|completion|eos|
Expand Down
1 change: 0 additions & 1 deletion applications/Chat/coati/models/base/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,3 @@ def forward(
"""Returns model output."""
output = self.model(input_ids, attention_mask=attention_mask, **model_kwargs)
return output

4 changes: 3 additions & 1 deletion applications/Chat/coati/ray/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ def get_strategy_from_args(strategy: str):
elif strategy == "colossalai_zero2":
strategy_ = LowLevelZeroStrategy(stage=2, placement_policy="cuda")
elif strategy == "colossalai_gemini_cpu":
strategy_ = GeminiStrategy(placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5)
strategy_ = GeminiStrategy(
placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5
)
elif strategy == "colossalai_zero2_cpu":
strategy_ = LowLevelZeroStrategy(stage=2, placement_policy="cpu")
else:
Expand Down
3 changes: 2 additions & 1 deletion applications/Chat/coati/trainer/strategies/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,17 @@ def save_pretrained(

model_path = os.path.join(path, "pytorch_model.bin")
self.save_model(model, model_path, shard=shard)

def _replace_keys(model_path: str, replace_fn: Callable):
state_dict = torch.load(model_path, map_location="cpu")
state_dict = {replace_fn(k): v for k, v in state_dict.items()}
torch.save(state_dict, model_path)

# FIXME: save_model would add "model." prefix to keys of pytorch_model.bin
# HACK: rename keys of pytorch_model.bin
if dist.get_rank() == 0:
_replace_keys(model_path, lambda k: k.replace("model.", "", 1))


def get_model_state_dict_shard(self, model: nn.Module, **config):
# TODO: implement sharding on naive strategy
model = self.unwrap_model(model)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ def main(args):
if args.strategy == "ddp":
strategy = DDPStrategy()
elif args.strategy == "colossalai_gemini":
strategy = GeminiStrategy(placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5)
strategy = GeminiStrategy(
placement_policy="static", offload_optim_frac=1.0, offload_param_frac=1.0, initial_scale=2**5
)
elif args.strategy == "colossalai_zero2":
strategy = LowLevelZeroStrategy(stage=2, placement_policy="cpu")
else:
Expand Down
2 changes: 1 addition & 1 deletion applications/Chat/examples/train_sft.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ torchrun --standalone --nproc_per_node=4 train_sft.py \
--accumulation_steps 8 \
--lr 2e-5 \
--max_datasets_size 512 \
--max_epochs 1
--max_epochs 1
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@

import numpy as np
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM
from transformers import LlamaForCausalLM, LlamaTokenizer

from colossalai.logging import get_dist_logger


logger = get_dist_logger()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from typing import Any, Dict, Tuple, Union

import torch
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.optimizer import Optimizer

from colossalai.booster import Booster
from colossalai.cluster import DistCoordinator
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
from copy import deepcopy
from typing import Optional, List, Dict, Tuple, Callable, Any
from typing import Any, Callable, Dict, List, Optional, Tuple

import torch
from torch import nn

from transformers import PreTrainedTokenizer
from transformers.utils import logging
from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList

from transformers.utils import logging

logger = logging.get_logger(__name__)


def get_prompt_template(
input_query:str,
history:List[Dict]= None,
roles:list = ["", "Human", "Assistant"],
input_query: str,
history: List[Dict] = None,
roles: list = ["", "Human", "Assistant"],
) -> str:
"""
Generates a prompt template for chat models based on input and history.
Expand All @@ -32,7 +31,7 @@ def get_prompt_template(
new_history = []
else:
new_history = deepcopy(history)

new_history.append({"role": roles[1], "message": input_query.strip()})
new_history.append({"role": roles[2], "message": None})

Expand All @@ -48,22 +47,23 @@ def get_prompt_template(
prompt += f"{role}: <s>"
return prompt


@torch.inference_mode()
def streaming_chat(
model: Any,
model: Any,
tokenizer: PreTrainedTokenizer,
input_query: str,
history: List[Dict] = None,
roles: list = ["", "Human", "Assistant"],
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
temperature: float = 0.8,
top_p: float = 0.95,
top_k: int = 50,
do_sample: bool = True,
input_query: str,
history: List[Dict] = None,
roles: list = ["", "Human", "Assistant"],
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
temperature: float = 0.8,
top_p: float = 0.95,
top_k: int = 50,
do_sample: bool = True,
length_penalty: float = 1.2,
max_new_tokens: int = 512,
logits_processor: LogitsProcessorList = None,
return_past_key_values: bool = False,
max_new_tokens: int = 512,
logits_processor: LogitsProcessorList = None,
return_past_key_values: bool = False,
**kwargs,
):
"""
Expand All @@ -87,7 +87,7 @@ def streaming_chat(
**kwargs: Additional keyword arguments for generation.
Yields:
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
optionally the updated past key values if `return_past_key_values` is True.
Ensures padding is on the left side for the tokenizer.
Expand All @@ -97,63 +97,68 @@ def streaming_chat(
history = []
if logits_processor is None:
logits_processor = LogitsProcessorList()

generation_kwargs = {
'temperature': temperature,
'top_p': top_p,
'top_k': top_k,
'do_sample': do_sample,
'max_new_tokens': max_new_tokens,
'length_penalty': length_penalty,
'use_cache': True,
**kwargs
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"do_sample": do_sample,
"max_new_tokens": max_new_tokens,
"length_penalty": length_penalty,
"use_cache": True,
**kwargs,
}

prompt_str = get_prompt_template(input_query, history=history, roles=roles)

eos_token_id = [tokenizer.eos_token_id]
inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
history.append({"role": roles[1], "message": input_query.strip()})
history.append({"role": roles[2], "message": None})

for outputs in stream_generate(model, **inputs, past_key_values=past_key_values,
eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
**generation_kwargs):
for outputs in stream_generate(
model,
**inputs,
past_key_values=past_key_values,
eos_token_id=eos_token_id,
return_past_key_values=return_past_key_values,
**generation_kwargs,
):
if return_past_key_values:
outputs, past_key_values = outputs

outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) : -1]
response = tokenizer.decode(outputs)

history[-1]["message"] = response.strip()
if return_past_key_values:
yield response, history, past_key_values
else:
yield response, history


@torch.inference_mode()
def stream_generate(
model: Any,
input_ids: torch.Tensor,
model: Any,
input_ids: torch.Tensor,
generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
return_past_key_values: bool = False,
return_past_key_values: bool = False,
**kwargs,
):
"""
Generates sequences of token ids using the specified model and generation parameters.
Adapted from https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py
Args:
model (Any): The model used for generating sequences of token ids.
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
generation_config (Optional[GenerationConfig]): The generation configuration to be used as base parametrization for the generation call.
logits_processor (Optional[LogitsProcessorList]): Custom logits processors that complement the default logits processors built from arguments
and generation config.
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
and a generation config.
prefix_allowed_tokens_fn (Optional[Callable[[int, torch.Tensor], List[int]]]): Function to constrain token generation.
return_past_key_values (bool): Whether to return past key values for further incremental decoding, defaults to False.
Expand All @@ -169,33 +174,33 @@ def stream_generate(
generation_config = model.generation_config
generation_config = deepcopy(generation_config)
model_kwargs = generation_config.update(**kwargs)

eos_token_id = generation_config.eos_token_id
if isinstance(eos_token_id, int):
eos_token_id = [eos_token_id]
eos_token_id_tensor = torch.tensor(eos_token_id).to(input_ids.device) if eos_token_id is not None else None

if generation_config.max_new_tokens is not None:
generation_config.max_length = generation_config.max_new_tokens + input_ids_len

if input_ids_len >= generation_config.max_length:
input_ids_string = "decoder_input_ids" if model.config.is_encoder_decoder else "input_ids"
logger.warning(
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`."
)
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`."
)
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()

# prepare distribution pre_processing samplers
logits_processor = model._get_logits_processor(
generation_config=generation_config,
input_ids_seq_length=input_ids_len,
encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor,
)
generation_config=generation_config,
input_ids_seq_length=input_ids_len,
encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor,
)

# prepare stopping criteria
stopping_criteria = model._get_stopping_criteria(
Expand All @@ -205,7 +210,7 @@ def stream_generate(
logits_warper = model._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None

while True:
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
# forward pass to get next token
Expand Down Expand Up @@ -244,4 +249,4 @@ def stream_generate(
yield input_ids
# stop when each sentence is finished, or if exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
break
break
6 changes: 3 additions & 3 deletions applications/Colossal-LLaMA-2/docs/example_13b.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ if __name__ == '__main__':
model.to(device)

tokenizer = AutoTokenizer.from_pretrained("hpcai-tech/Colossal-LLaMA-2-13b-base")

question = "xxx" # Your question.
prompt = (f"请对下面的问题,给出准确、可靠、安全的回答\n"
f"{question}"
Expand Down Expand Up @@ -111,7 +111,7 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
| 整车热管理主要包括什么内容 | 整车热管理主要包括:1、发动机冷却系统;2、散热系统;3、空调冷却系统;4、采暖系统;5、动力转向器冷却系统;6、润滑油冷却系统;7、变速箱冷却系统;8、燃油冷却系统;9、冷却系统排放液系统。 |
| 简述下汽车空调-热泵循环系统的原理 | 汽车空调系统主要有三种形式,其中热泵循环系统是其中的一种。其原理是利用发动机排放的热量,通过压缩机将冷媒压缩成高温高压的状态,然后通过冷却器释放到空气中,吸收周围的热量,再通过膨胀阀释放到蒸发器中,蒸发器在压缩机的作用下将冷媒压缩成高温高压的状态,再次释放到空气中,从而实现制冷的效果。 |

#### Finance
#### Finance
| Question | <center>Colossal-LLaMA-2-13b-base</center> |
| :------: | :----------------------- |
| 如何看待当前提前还贷热 | 提前还贷是一种常见的还贷方式,在市场中存在着许多的因素和影响,需要具体分析当前的经济形势和人们心理状况的影响。提前还贷可以帮助借款人早日摆脱债务负担,但也存在一些风险和不确定性。从经济形势来看,当前利率下探、还款压力减轻等因素可能会导致一部分借款人提前还贷。此外,一些购房者可能会受到市场不景气的影响,担心未来房产价值会下降,因此提前还贷以避免资产贬值。从借款人心理状况来看,提前还贷也可能会由于借款人对未来经济预期的不确定性而产生压力和不满。因此,提前还贷并不能一概而论,需要根据具体情况进行分析。如果提前还贷对借款人的经济情况和未来规划造成不利影响,建议谨慎考虑是否要提前还贷。|
Expand Down Expand Up @@ -150,4 +150,4 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value


## Conclusion
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.
Loading

0 comments on commit 67e4754

Please sign in to comment.