Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
wangbluo committed Aug 17, 2024
2 parents 0a51319 + 26493b9 commit 4cf79fa
Show file tree
Hide file tree
Showing 88 changed files with 2,111 additions and 416 deletions.
1 change: 1 addition & 0 deletions .compatibility
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
2.1.0-12.1.0
2.2.2-12.1.0
2.3.0-12.1.0
2.4.0-12.4.1
4 changes: 2 additions & 2 deletions .cuda_ext.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"cuda_image": "hpcaitech/cuda-conda:12.1"
},
{
"torch_command": "pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118",
"cuda_image": "hpcaitech/cuda-conda:11.8"
"torch_command": "pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124",
"cuda_image": "hpcaitech/cuda-conda:12.4"
}
]
}
2 changes: 1 addition & 1 deletion .github/workflows/build_on_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ jobs:
- name: Install Colossal-AI
run: |
BUILD_EXT=1 pip install -v -e .
pip install -r requirements/requirements-test.txt
pip install --no-cache-dir -r requirements/requirements-test.txt
- name: Store Colossal-AI Cache
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_on_schedule.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
[ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ] && cp -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/
BUILD_EXT=1 pip install -v -e .
cp -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/
pip install -r requirements/requirements-test.txt
pip install --no-cache-dir -r requirements/requirements-test.txt
- name: Unit Testing
if: steps.check-avai.outputs.avai == 'true'
Expand Down
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ repos:
hooks:
- id: isort
name: sort all imports (python)
args: ["--profile", "black"] # avoid conflict with black

- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.4.2
rev: 24.8.0
hooks:
- id: black
name: black formatter
Expand Down
1 change: 1 addition & 0 deletions applications/ColossalChat/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ examples/training_scripts/wandb
examples/training_scripts/output

examples/awesome-chatgpt-prompts/
examples/inference/round.txt
temp/

# ColossalChat
Expand Down
2 changes: 1 addition & 1 deletion applications/ColossalChat/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ cd $COLOSSAL_AI_ROOT
BUILD_EXT=1 pip install .

# Install ColossalChat
cd $COLOSSAL_AI_ROOT/applications/Chat
cd $COLOSSAL_AI_ROOT/applications/ColossalChat
pip install .
```

Expand Down
23 changes: 19 additions & 4 deletions applications/ColossalChat/coati/dataset/tokenization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ def tokenize_sft(

messages = data_point["messages"]
template = deepcopy(conversation_template)

if messages[0]["from"] == "system":
template.system_message = str(messages[0]["content"])
messages.pop(0)
template.messages = []
for idx, mess in enumerate(messages):
if mess["from"] != template.roles[idx % 2]:
Expand Down Expand Up @@ -148,11 +152,14 @@ def tokenize_prompt(
template = deepcopy(conversation_template)
template.messages = []

if messages[0]["from"] == "system":
template.system_message = str(messages[0]["content"])
messages.pop(0)

for idx, mess in enumerate(messages):
if mess["from"] != template.roles[idx % 2]:
raise ValueError(
f"Message should iterate between user and assistant and starts with a \
line from the user. Got the following data:\n{messages}"
f"Message should iterate between user and assistant and starts with a line from the user. Got the following data:\n{messages}"
)
template.append_message(mess["from"], mess["content"])

Expand All @@ -162,7 +169,7 @@ def tokenize_prompt(
template.messages = template.messages[:-1]

# Prepare data
prompt = template.get_prompt(length=len(template.messages) - 1, add_generation_prompt=True)
prompt = template.get_prompt(length=len(template.messages), add_generation_prompt=True)
tokenized = tokenizer([prompt], add_special_tokens=False)["input_ids"][0]

if tokenizer.bos_token_id is not None:
Expand Down Expand Up @@ -225,6 +232,10 @@ def tokenize_rlhf(
template = deepcopy(conversation_template)
template.clear()

if context[0]["from"] == "system":
template.system_message = str(context[0]["content"])
context.pop(0)

for idx, mess in enumerate(context):
if mess["from"] != template.roles[idx % 2]:
raise ValueError(
Expand Down Expand Up @@ -345,6 +356,10 @@ def tokenize_kto(
template = deepcopy(conversation_template)
template.clear()

if prompt[0]["from"] == "system":
template.system_message = str(prompt[0]["content"])
prompt.pop(0)

if prompt[0].get("from", None) != "user":
raise ValueError("conversation should start with user")
if completion.get("from", None) != "assistant":
Expand Down Expand Up @@ -377,4 +392,4 @@ def tokenize_kto(
"label": data_point["label"],
"input_id_decode": decoded_full_prompt,
"completion_decode": decoded_completion,
}
}
16 changes: 12 additions & 4 deletions applications/ColossalChat/coati/models/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ def forward(
action_mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
skip = False
ratio_ = ((log_probs - old_log_probs) * action_mask).exp()
if action_mask is None:
ratio_ = (log_probs - old_log_probs).exp()
else:
ratio_ = ((log_probs - old_log_probs) * action_mask).exp()

# note that if dropout is disabled (recommanded), ratio will always be 1.
if ratio_.mean() > self.skip_threshold:
Expand All @@ -56,7 +59,10 @@ def forward(
surr1 = ratio * advantages
surr2 = ratio.clamp(1 - self.clip_eps, 1 + self.clip_eps) * advantages
loss = -torch.min(surr1, surr2)
loss = masked_mean(loss, action_mask)
if action_mask is not None:
loss = masked_mean(loss, action_mask)
else:
loss = loss.mean(dim=1)
loss = loss.mean()
return loss, skip, ratio_.max()

Expand All @@ -81,8 +87,10 @@ def forward(
values_clipped = old_values + (values - old_values).clamp(-self.clip_eps, self.clip_eps)
surr1 = (values_clipped - returns) ** 2
surr2 = (values - returns) ** 2
loss = torch.max(surr1, surr2) / torch.sum(action_mask)
loss = torch.sum(loss * action_mask)
if action_mask is not None:
loss = torch.sum(torch.max(surr1, surr2) / torch.sum(action_mask) * action_mask)
else:
loss = torch.mean(torch.max(surr1, surr2))
return 0.5 * loss


Expand Down
7 changes: 4 additions & 3 deletions applications/ColossalChat/coati/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def disable_dropout(model: torch.nn.Module):
Returns:
None
"""
for module in model.modules():
if isinstance(module, torch.nn.Dropout):
module.p = 0.0
if model is not None:
for module in model.modules():
if isinstance(module, torch.nn.Dropout):
module.p = 0.0
11 changes: 10 additions & 1 deletion applications/ColossalChat/coati/trainer/dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def __init__(
beta: float = 0.1,
gamma: float = 0.0,
length_normalization: bool = False,
apply_loss_mask: bool = True,
accumulation_steps: int = 1,
start_epoch: int = 0,
save_interval: int = 0,
Expand All @@ -67,6 +68,7 @@ def __init__(
self.actor_scheduler = actor_lr_scheduler
self.tokenizer = tokenizer
self.actor_loss_fn = DpoLoss(beta, gamma)
self.apply_loss_mask = apply_loss_mask
self.save_interval = save_interval
self.coordinator = coordinator
self.save_dir = save_dir
Expand Down Expand Up @@ -135,6 +137,10 @@ def _train(self, epoch: int):
batch["reject_attention_mask"],
batch["reject_loss_mask"],
)
if not self.apply_loss_mask:
chosen_loss_mask = chosen_loss_mask.fill_(1.0)
reject_loss_mask = reject_loss_mask.fill_(1.0)

batch_size = chosen_input_ids.size()[0]

actor_all_logits = self.model(
Expand Down Expand Up @@ -284,6 +290,9 @@ def _eval(self, epoch: int):
batch["reject_attention_mask"],
batch["reject_loss_mask"],
)
if not self.apply_loss_mask:
chosen_loss_mask = chosen_loss_mask.fill_(1.0)
reject_loss_mask = reject_loss_mask.fill_(1.0)

batch_size = chosen_input_ids.size()[0]

Expand Down Expand Up @@ -347,4 +356,4 @@ def _eval(self, epoch: int):
os.makedirs(self.save_dir, exist_ok=True)
with open(os.path.join(self.save_dir, f"eval_result_epoch{epoch}.txt"), "w") as f:
f.write(msg)
step_bar.close()
step_bar.close()
39 changes: 35 additions & 4 deletions applications/ColossalChat/coati/trainer/kto.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Optional

import torch
import torch.distributed
import torch.distributed as dist
from coati.models.loss import KTOLoss
from coati.models.utils import calc_masked_log_probs
from coati.trainer.utils import all_reduce_mean
Expand Down Expand Up @@ -59,6 +59,7 @@ def __init__(
beta: float = 0.1,
desirable_weight: float = 1.0,
undesirable_weight: float = 1.0,
apply_loss_mask: bool = True,
accumulation_steps: int = 1,
start_epoch: int = 0,
save_interval: int = 0,
Expand All @@ -70,6 +71,7 @@ def __init__(
self.actor_scheduler = actor_lr_scheduler
self.tokenizer = tokenizer
self.kto_loss = KTOLoss(beta=beta, desirable_weight=desirable_weight, undesirable_weight=undesirable_weight)
self.apply_loss_mask = apply_loss_mask
self.save_interval = save_interval
self.coordinator = coordinator
self.save_dir = save_dir
Expand Down Expand Up @@ -134,6 +136,10 @@ def _train(self, epoch: int):
batch["kl_attention_mask"],
batch["kl_loss_mask"],
)
if not self.apply_loss_mask:
loss_mask = loss_mask.fill_(1.0)
kl_loss_mask = kl_loss_mask.fill_(1.0)

batch_size = input_ids.size()[0]

# actor logits
Expand Down Expand Up @@ -182,8 +188,28 @@ def _train(self, epoch: int):

# sync
loss_mean = all_reduce_mean(tensor=loss)
chosen_rewards_mean = all_reduce_mean(tensor=chosen_rewards.mean())
rejected_rewards_mean = all_reduce_mean(tensor=rejected_rewards.mean())
chosen_reward_mean = chosen_rewards.mean()
chosen_rewards_list = [
torch.tensor(0, dtype=loss.dtype, device=loss.device) for _ in range(dist.get_world_size())
]
dist.all_gather(chosen_rewards_list, chosen_reward_mean)
rejected_reward_mean = rejected_rewards.mean()
rejected_rewards_list = [
torch.tensor(0, dtype=loss.dtype, device=loss.device) for _ in range(dist.get_world_size())
]
dist.all_gather(rejected_rewards_list, rejected_reward_mean)
chosen_rewards_list = [i for i in chosen_rewards_list if not i.isnan()]
rejected_rewards_list = [i for i in rejected_rewards_list if not i.isnan()]
chosen_rewards_mean = (
torch.stack(chosen_rewards_list).mean()
if len(chosen_rewards_list) > 0
else torch.tensor(torch.nan, dtype=loss.dtype, device=loss.device)
)
rejected_rewards_mean = (
torch.stack(rejected_rewards_list).mean()
if len(rejected_rewards_list) > 0
else torch.tensor(torch.nan, dtype=loss.dtype, device=loss.device)
)
self.accumulative_meter.add("chosen_rewards", chosen_rewards_mean.to(torch.float16).mean().item())
self.accumulative_meter.add("rejected_rewards", rejected_rewards_mean.to(torch.float16).mean().item())
self.accumulative_meter.add("loss", loss_mean.to(torch.float16).detach().item())
Expand Down Expand Up @@ -256,6 +282,11 @@ def _eval(self, epoch: int):
batch["kl_attention_mask"],
batch["kl_loss_mask"],
)

if not self.apply_loss_mask:
loss_mask = loss_mask.fill_(1.0)
kl_loss_mask = kl_loss_mask.fill_(1.0)

batch_size = input_ids.size()[0]

# actor logits
Expand Down Expand Up @@ -315,4 +346,4 @@ def _eval(self, epoch: int):
os.makedirs(self.save_dir, exist_ok=True)
with open(os.path.join(self.save_dir, f"eval_result_epoch{epoch}.txt"), "w") as f:
f.write(msg)
step_bar.close()
step_bar.close()
14 changes: 13 additions & 1 deletion applications/ColossalChat/coati/trainer/orpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(
tokenizer: PreTrainedTokenizerBase,
max_epochs: int = 1,
lam: float = 0.1,
apply_loss_mask: bool = True,
accumulation_steps: int = 1,
start_epoch: int = 0,
save_interval: int = 0,
Expand All @@ -67,6 +68,7 @@ def __init__(
self.save_dir = save_dir
self.num_train_step = 0
self.lam = lam
self.apply_loss_mask = apply_loss_mask
self.accumulation_steps = accumulation_steps
self.device = get_current_device()
self.accumulative_meter = AccumulativeMeanMeter()
Expand Down Expand Up @@ -130,6 +132,11 @@ def _train(self, epoch: int):
batch["reject_attention_mask"],
batch["reject_loss_mask"],
)

if not self.apply_loss_mask:
chosen_loss_mask = chosen_loss_mask.fill_(1.0)
reject_loss_mask = reject_loss_mask.fill_(1.0)

batch_size = chosen_input_ids.size()[0]
actor_out = self.model(
input_ids=torch.cat([chosen_input_ids, reject_input_ids]),
Expand Down Expand Up @@ -263,6 +270,11 @@ def _eval(self, epoch: int):
batch["reject_attention_mask"],
batch["reject_loss_mask"],
)

if not self.apply_loss_mask:
chosen_loss_mask = chosen_loss_mask.fill_(1.0)
reject_loss_mask = reject_loss_mask.fill_(1.0)

batch_size = chosen_input_ids.size()[0]
actor_out = self.model(
input_ids=torch.cat([chosen_input_ids, reject_input_ids]),
Expand Down Expand Up @@ -311,4 +323,4 @@ def _eval(self, epoch: int):
os.makedirs(self.save_dir, exist_ok=True)
with open(os.path.join(self.save_dir, f"eval_result_epoch{epoch}.txt"), "w") as f:
f.write(msg)
step_bar.close()
step_bar.close()
12 changes: 10 additions & 2 deletions applications/ColossalChat/coati/trainer/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def __init__(
sample_buffer: bool = False,
dataloader_pin_memory: bool = True,
offload_inference_models: bool = True,
apply_loss_mask: bool = True,
accumulation_steps: int = 1,
save_interval: int = 0,
save_dir: str = None,
Expand Down Expand Up @@ -140,6 +141,7 @@ def __init__(
self.actor_optim = actor_optim
self.critic_optim = critic_optim
self.save_interval = save_interval
self.apply_loss_mask = apply_loss_mask
self.coordinator = coordinator
self.actor_save_dir = os.path.join(save_dir, "actor")
self.critic_save_dir = os.path.join(save_dir, "critic")
Expand Down Expand Up @@ -229,7 +231,10 @@ def _training_step(self, experience: Experience):
action_log_probs = calc_action_log_probs(actor_logits, experience.sequences, num_actions)

actor_loss, to_skip, max_ratio = self.actor_loss_fn(
action_log_probs, experience.action_log_probs, experience.advantages, action_mask=experience.action_mask
action_log_probs,
experience.action_log_probs,
experience.advantages,
action_mask=experience.action_mask if self.apply_loss_mask else None,
)
actor_loss = (1 - self.ptx_coef) * actor_loss
if not to_skip:
Expand All @@ -249,7 +254,10 @@ def _training_step(self, experience: Experience):
input_ids=experience.sequences, attention_mask=experience.attention_mask
) # [batch size, prompt_length + response_length]
critic_loss = self.critic_loss_fn(
values[:, -num_actions:], experience.values, experience.advantages, action_mask=experience.action_mask
values[:, -num_actions:],
experience.values,
experience.advantages,
action_mask=experience.action_mask if self.apply_loss_mask else None,
)
critic_loss = critic_loss * self.vf_coef
self.critic_booster.backward(loss=critic_loss, optimizer=self.critic_optim)
Expand Down
Loading

0 comments on commit 4cf79fa

Please sign in to comment.