Skip to content

add zero3 module_granularity_threshold to zero optimization. #855

add zero3 module_granularity_threshold to zero optimization.

add zero3 module_granularity_threshold to zero optimization. #855

Workflow file for this run

name: nv-ds-chat
on:
schedule:
- cron: "0 0 * * *"
workflow_dispatch:
inputs:
dse_branch:
description: 'DeepSpeedExamples Branch'
required: false
default: 'master'
type: string
pull_request:
paths:
- ".github/workflows/nv-ds-chat.yml"
- "deepspeed/runtime/zero/stage_1_and_2.py"
- "deepspeed/runtime/zero/stage3.py"
- "deepspeed/runtime/hybrid_engine.py"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
issues: write
jobs:
unit-tests:
runs-on: [self-hosted, nvidia, cu121, v100]
steps:
- uses: actions/checkout@v4
- id: setup-venv
uses: ./.github/workflows/setup-venv
- name: Install pytorch
run: |
pip3 install -U --cache-dir $TORCH_CACHE torch --index-url https://download.pytorch.org/whl/cu121
python -c "import torch; print('torch:', torch.__version__, torch)"
python -c "import torch; print('CUDA available:', torch.cuda.is_available())"
- name: Install deepspeed
run: |
pip install transformers==4.45.2
pip install .[dev]
ds_report
- name: Install deepspeed-chat
run: |
BRANCH="master"
if [[ ! -z "${{ github.event.inputs.dse_branch }}" ]]; then
BRANCH="${{ github.event.inputs.dse_branch }}"
fi
echo "DeepSpeedExamples Branch: $BRANCH"
git clone -b $BRANCH https://github.com/microsoft/DeepSpeedExamples.git
cd DeepSpeedExamples/applications/DeepSpeed-Chat
pip install -r requirements.txt
pip install -e .
- name: Python environment
run: |
pip list
- name: DS-Chat unit tests
run: |
cd DeepSpeedExamples/applications/DeepSpeed-Chat
unset TORCH_CUDA_ARCH_LIST # only jit compile for current arch
cd tests
pytest $PYTEST_OPTS ./
- name: Open GitHub issue if nightly CI fails
if: ${{ failure() && (github.event_name == 'schedule') }}
uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
filename: .github/ISSUE_TEMPLATE/ci_failure_report.md
update_existing: true