[Zerobubble] Support LinearWithAsyncCommunication for sharderformer policy #1749
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test Example on PR | |
on: | |
pull_request: | |
branches: | |
- "main" | |
- "develop" | |
- "feature/**" | |
# any change in the examples folder will trigger check for the corresponding example. | |
paths: | |
- "examples/**" | |
- "!examples/**.md" | |
- ".github/workflows/example_check_on_pr.yml" | |
jobs: | |
# This is for changed example files detect and output a matrix containing all the corresponding directory name. | |
detect-changed-example: | |
if: | | |
github.event.pull_request.draft == false && | |
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request' | |
runs-on: ubuntu-latest | |
outputs: | |
matrix: ${{ steps.setup-matrix.outputs.matrix }} | |
anyChanged: ${{ steps.setup-matrix.outputs.anyChanged }} | |
anyExtensionFileChanged: ${{ steps.find-extension-change.outputs.any_changed }} | |
name: Detect changed example files | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-detect-change | |
cancel-in-progress: true | |
steps: | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Locate base commit | |
id: locate-base-sha | |
run: | | |
curBranch=$(git rev-parse --abbrev-ref HEAD) | |
commonCommit=$(git merge-base origin/main $curBranch) | |
echo $commonCommit | |
echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT | |
- name: Find the changed extension-related files | |
id: find-extension-change | |
uses: tj-actions/changed-files@v35 | |
with: | |
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }} | |
files: | | |
op_builder/** | |
colossalai/kernel/** | |
setup.py | |
- name: Get all changed example files | |
id: changed-files | |
uses: tj-actions/changed-files@v35 | |
with: | |
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }} | |
- name: setup matrix | |
id: setup-matrix | |
run: | | |
changedFileName="" | |
for file in ${{ steps.changed-files.outputs.all_changed_files }}; do | |
changedFileName="${file}:${changedFileName}" | |
done | |
echo "$changedFileName was changed" | |
res=`python .github/workflows/scripts/example_checks/detect_changed_example.py --fileNameList $changedFileName` | |
echo "All changed examples are $res" | |
if [ "$res" == "[]" ]; then | |
echo "anyChanged=false" >> $GITHUB_OUTPUT | |
echo "matrix=null" >> $GITHUB_OUTPUT | |
else | |
dirs=$( IFS=',' ; echo "${res[*]}" ) | |
echo "anyChanged=true" >> $GITHUB_OUTPUT | |
echo "matrix={\"directory\":$(echo "$dirs")}" >> $GITHUB_OUTPUT | |
fi | |
# If no file is changed, it will prompt an error and shows the matrix do not have value. | |
check-changed-example: | |
# Add this condition to avoid executing this job if the trigger event is workflow_dispatch. | |
if: | | |
github.event.pull_request.draft == false && | |
github.event.pull_request.base.repo.full_name == 'hpcaitech/ColossalAI' && github.event_name == 'pull_request' && | |
needs.detect-changed-example.outputs.anyChanged == 'true' | |
name: Test the changed example | |
needs: detect-changed-example | |
runs-on: [self-hosted, gpu] | |
strategy: | |
fail-fast: false | |
matrix: ${{fromJson(needs.detect-changed-example.outputs.matrix)}} | |
container: | |
image: hpcaitech/pytorch-cuda:2.2.2-12.1.0 | |
options: --gpus all --rm -v /data/scratch/examples-data:/data/ -v /dev/shm | |
timeout-minutes: 30 | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-run-example-${{ matrix.directory }} | |
cancel-in-progress: true | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Restore Colossal-AI Cache | |
if: needs.detect.outputs.anyExtensionFileChanged != 'true' | |
run: | | |
if [ -d /github/home/cuda_ext_cache ] && [ ! -z "$(ls -A /github/home/cuda_ext_cache/)" ]; then | |
cp -p -r /github/home/cuda_ext_cache/* /__w/ColossalAI/ColossalAI/ | |
fi | |
- name: Install Colossal-AI | |
run: | | |
BUILD_EXT=1 pip install -v -e . | |
- name: Store Colossal-AI Cache | |
run: | | |
cp -p -r /__w/ColossalAI/ColossalAI/build /github/home/cuda_ext_cache/ | |
- name: Test the example | |
run: | | |
example_dir=${{ matrix.directory }} | |
cd "${PWD}/examples/${example_dir}" | |
bash test_ci.sh |