Skip to content

Commit

Permalink
Merge branch 'intel' of github.com:mila-iqia/milabench into intel
Browse files Browse the repository at this point in the history
  • Loading branch information
pierre.delaunay committed Jun 11, 2024
2 parents 73c465e + baa6757 commit a4271d2
Show file tree
Hide file tree
Showing 86 changed files with 6,532 additions and 1,895 deletions.
471 changes: 357 additions & 114 deletions .pin/constraints-cuda-torch.txt

Large diffs are not rendered by default.

621 changes: 621 additions & 0 deletions .pin/constraints-hpu-torch.txt

Large diffs are not rendered by default.

450 changes: 334 additions & 116 deletions .pin/constraints-rocm-torch.txt

Large diffs are not rendered by default.

336 changes: 285 additions & 51 deletions .pin/constraints-xpu-torch.txt

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions benchmarks/accelerate_opt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def arguments():
get_scheduler,
)
from benchmate.observer import BenchObserver
from benchmate.monitor import milabench_sys_monitor

logger = get_logger(__name__)

Expand Down Expand Up @@ -124,7 +125,6 @@ class CustomInitProcessGroupKwargs(InitProcessGroupKwargs):
rank=int(os.environ["RANK"]),
world_size=int(os.environ["WORLD_SIZE"]),
)
print(init_process_group_kwargs.backend)

# Accelerator SUCK, it is impossible to make it use hccl
# We can bypass Accelerator logic by initializing the group ourselves
Expand All @@ -143,8 +143,8 @@ class CustomInitProcessGroupKwargs(InitProcessGroupKwargs):
# Set up logging for milabench (only in the run phase, for the main process)
monitor = None
if not is_prepare_phase and accelerator.is_main_process:
from benchmate.common import opt_voir
monitor = opt_voir()
# Set up logging for milabench (only in the run phase, for the main process)
milabench_sys_monitor()

logging.basicConfig(
level=logging.INFO,
Expand All @@ -170,13 +170,13 @@ class CustomInitProcessGroupKwargs(InitProcessGroupKwargs):
raw_datasets["validation"] = load_dataset(
dataset_name,
dataset_config_name,
split=f"train[:{validation_split_percentage}%]",
split=f"train[:{validation_split_percentage}%]",
revision=config["dataset_rev"]
)
raw_datasets["train"] = load_dataset(
dataset_name,
dataset_config_name,
split=f"train[{validation_split_percentage}%:]",
split=f"train[{validation_split_percentage}%:]",
revision=config["dataset_rev"]
)

Expand Down Expand Up @@ -360,9 +360,9 @@ def group_texts(examples):
starting_epoch = 0

observer = BenchObserver(
event_fn=acc.Event,
earlystop=30,
rank=int(os.environ["RANK"]),
event_fn=acc.Event,
earlystop=30,
rank=int(os.environ["RANK"]),
device=acc.fetch_device(int(os.environ["RANK"])),
stdout=True,
batch_size_fn=lambda batch: batch["labels"].shape[0]
Expand Down
Loading

0 comments on commit a4271d2

Please sign in to comment.