Skip to content

Commit

Permalink
Merge branch 'stable' of github.com:mila-iqia/milabench into intel
Browse files Browse the repository at this point in the history
  • Loading branch information
pierre.delaunay committed Jun 11, 2024
2 parents a4271d2 + 77fa0bc commit c151b98
Show file tree
Hide file tree
Showing 6 changed files with 289 additions and 2 deletions.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,13 @@ scripts/article/xpu/

dependencies/
benchmarks/gflownet/gflownet

scripts/inventory.yaml
output/
sqlite.db
.ruff_cache/

test.out
output/
workspace/
.pin/tmp-*
Empty file added .no_report
Empty file.
26 changes: 24 additions & 2 deletions milabench/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,13 @@ def _make_row(summary, compare, weights):

# Sum of all the GPU performance
# to get the overall perf of the whole machine

if "per_gpu" in summary:
acc = 0
for _, metrics in summary["per_gpu"].items():
acc += metrics[metric]
else:
acc = row["perf"]

success_ratio = 1 - row["fail"] / row["n"]
score = (acc if acc > 0 else row["perf"]) * success_ratio

Expand Down Expand Up @@ -210,6 +209,29 @@ def make_dataframe(summary, compare=None, weights=None):
for key in all_keys
}
).transpose()

return df


@error_guard({})
def make_report(
summary,
compare=None,
html=None,
compare_gpus=False,
price=None,
title=None,
sources=None,
errdata=None,
weights=None,
):
if weights is None:
weights = dict()

df = make_dataframe(summary, compare, weights)

# Reorder columns
df = df[sorted(df.columns, key=lambda k: columns_order.get(k, 0))]


@error_guard({})
Expand Down
110 changes: 110 additions & 0 deletions scripts/instructions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/bin/bash


set -m

#
#
#

echo ">> Configure the benchmark"
echo "=========================="


#
# Tweak the values to fit your system
#

USERNAME=${USER:-"mila"}
SSH_KEY_FILE=$HOME/.ssh/id_rsa
ARCH="cuda"
WORKER_0="cn-d003"
WORKER_1="cn-d004"



# Derived
VERSION="v0.0.8"
IMAGE="ghcr.io/mila-iqia/milabench:$ARCH-$VERSION"


# Create the config file
cat >overrides.yaml <<EOL
opt-6_7b-multinode:
docker_image: "$IMAGE"
worker_user: "$USERNAME"
manager_addr: "$WORKER_0"
worker_addrs:
- "$WORKER_1"
num_machines: 2
capabilities:
nodes: 2
opt-1_3b-multinode:
docker_image: "$IMAGE"
worker_user: "$USERNAME"
manager_addr: "$WORKER_0"
worker_addrs:
- "$WORKER_1"
num_machines: 2
capabilities:
nodes: 2
EOL

echo "<< ======================="
echo ""
echo ">> Prepare docker images"
echo "========================"

ssh $USERNAME@$WORKER_0 "docker pull $IMAGE"&
ssh $USERNAME@$WORKER_1 "docker pull $IMAGE"&
fg
fg

echo "<< ====================="
echo ""

#
#
#

echo ">> Run milabench"
echo "================"

if [ "$ARCH" = "cuda" ]; then
docker run -it --rm --gpus all --network host --ipc=host --privileged \
-v $SSH_KEY_FILE:/milabench/id_milabench \
-v $(pwd)/results:/milabench/envs/runs \
$IMAGE \
milabench run --override "$(cat overrides.yaml)"

elif [ "$ARCH" = "rocm" ]; then
docker run -it --rm --network host --ipc host --privileged \
--security-opt seccomp=unconfined --group-add video \
-v /opt/amdgpu/share/libdrm/amdgpu.ids:/opt/amdgpu/share/libdrm/amdgpu.ids \
-v /opt/rocm:/opt/rocm \
-v $(pwd)/results:/milabench/envs/runs \
$IMAGE \
milabench run --override "$(cat overrides.yaml)"
fi

echo "<< ============="
echo ""

#
#
#

echo ">> Print report"
echo "==============="
docker run -it --rm \
-v $(pwd)/results:/milabench/envs/runs \
$IMAGE \
milabench report --runs /milabench/envs/runs

echo "<< ============"
16 changes: 16 additions & 0 deletions scripts/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

OUTPUT="test.out"
rm -rf $OUTPUT
touch $OUTPUT
sbatch --ntasks=1\
--gpus-per-task=rtx8000:1\
--cpus-per-task=4\
--time=01:30:00\
--ntasks-per-node=1\
--mem=64G\
-o $OUTPUT\
slurm.sh\
-a cuda\
-b stable_update

tail -f $OUTPUT
129 changes: 129 additions & 0 deletions scripts/slurm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/bin/bash

function usage() {
echo "Usage: $0 [-m] [-p]"
echo " -h Display this help message."
echo " -a arch GPU arch (default: cuda)"
echo " -b BRANCH Branch to checkout (default: master)"
echo " -o ORIGIN Origin to use (default: github/mila/milabench)"
echo " -c CONFIG Configuration (default: milabench/config/standard.yaml)"
echo " -e ENV Environment (default: ./env)"
echo " -p PYTHON Python version (default: 3.9)"
echo " ARGUMENT Any additional argument you want to process."
exit 1
}

ARCH="cuda"
PYTHON="3.9"
BRANCH="master"
ORIGIN="https://github.com/mila-iqia/milabench.git"
LOC="$SLURM_TMPDIR"
CONFIG="$LOC/milabench/config/standard.yaml"
BASE="$LOC/base"
ENV="./env"
REMAINING_ARGS=""


while getopts ":hm:p:e:a:b:o:c:" opt; do
case $opt in
h)
usage
;;
p)
PYTHON="$OPTARG"
;;
b)
BRANCH="$OPTARG"
;;
o)
ORIGIN="$OPTARG"
;;
c)
CONFIG="$OPTARG"
;;
e)
ENV="$OPTARG"
;;
a)
ARCH="$OPTARG"
;;
l)
# FIX ME
LOC="$OPTARG"
CONFIG="$LOC/milabench/config/standard.yaml"
BASE="$LOC/base"
;;
:)
echo "Option -$OPTARG requires an argument." >&2
usage
;;
esac
done

shift "$((OPTIND-1))"
REMAINING_ARGS="$@"

echo " PYTHON: $PYTHON"
echo " branch: $BRANCH"
echo " origin: $ORIGIN"
echo " config: $CONFIG"
echo " env: $ENV"
echo " args: $REMAINING_ARGS"
#
# Fix problem with conda saying it is not "init properly"
#
CONDA_EXEC="$(which conda)"
CONDA_BASE=$(dirname $CONDA_EXEC)
source $CONDA_BASE/../etc/profile.d/conda.sh

if [ -e $HOME/.credentials.env ]; then
source $HOME/.credentials.env
fi

cd $LOC
#
# Create a new environment
#
if [ ! -d "$ENV" ] && [ "$ENV" != "base" ] && [ ! -d "$CONDA_ENVS/$ENV" ]; then
conda create --prefix $ENV python=$PYTHON -y
fi
conda activate $ENV

export HF_HOME=$BASE/cache
export HF_DATASETS_CACHE=$BASE/cache
export TORCH_HOME=$BASE/cache
export XDG_CACHE_HOME=$BASE/cache
export MILABENCH_GPU_ARCH=$ARCH

export MILABENCH_DASH=no
export PYTHONUNBUFFERED=1
export MILABENCH_BASE=$BASE
export MILABENCH_CONFIG=$CONFIG
#
# Fetch the repo
#
git clone --single-branch --depth 1 -b $BRANCH $ORIGIN
python -m pip install -e ./milabench

module load gcc/9.3.0
module load cuda/11.8

echo ""
echo "Install"
echo "-------"
milabench install --config $CONFIG --base $BASE $REMAINING_ARGS


echo ""
echo "Prepare"
echo "-------"
milabench prepare --config $CONFIG --base $BASE $REMAINING_ARGS

echo ""
echo "Run"
echo "---"
milabench run --config $CONFIG --base $BASE $REMAINING_ARGS

echo "----"
echo "Done after $SECONDS"
echo ""

0 comments on commit c151b98

Please sign in to comment.