From 8e4f6e48db8e38f5a9fb02dc8851c33a818ebbf4 Mon Sep 17 00:00:00 2001 From: Zixu Wang <61218792+foin6@users.noreply.github.com> Date: Fri, 17 May 2024 01:46:52 +0800 Subject: [PATCH] Skip the UT cases that use unimplemented op builders. (#5372) Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com> Co-authored-by: Logan Adams --- tests/unit/elasticity/test_elastic.py | 13 +++++++------ .../ops/accelerators/test_accelerator_backward.py | 4 ++++ .../ops/accelerators/test_accelerator_forward.py | 4 ++++ tests/unit/ops/lion/test_cpu_lion.py | 7 ++++--- tests/unit/ops/lion/test_lion.py | 2 ++ .../half_precision/test_dynamic_loss_scale.py | 3 ++- tests/unit/runtime/half_precision/test_fp16.py | 14 ++++++++++---- 7 files changed, 33 insertions(+), 14 deletions(-) diff --git a/tests/unit/elasticity/test_elastic.py b/tests/unit/elasticity/test_elastic.py index 92e1520b2c7c..1f7cbbbca214 100644 --- a/tests/unit/elasticity/test_elastic.py +++ b/tests/unit/elasticity/test_elastic.py @@ -12,7 +12,7 @@ from deepspeed.ops.op_builder import FusedAdamBuilder, FusedLambBuilder if not deepspeed.ops.__compatible_ops__[FusedAdamBuilder.NAME]: - pytest.skip("This op had not been implemented on this system.", allow_module_level=True) + pytest.skip("This op has not been implemented on this system.", allow_module_level=True) @pytest.fixture @@ -150,7 +150,8 @@ def test_proper_mbsz(ds_config): class TestNonElasticBatchParams(DistributedTest): world_size = 2 - @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system.") def test(self): config_dict = { "train_batch_size": 2, @@ -183,10 +184,9 @@ def test(self): class TestNonElasticBatchParamsWithOverride(DistributedTest): world_size = 2 - @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system.") def test(self): - if not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME]: - pytest.skip("This op had not been implemented on this system.", allow_module_level=True) config_dict = { "train_batch_size": 2, "steps_per_print": 1, @@ -217,7 +217,8 @@ def test(self): class TestElasticConfigChanged(DistributedTest): world_size = 2 - @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system.") def test(self): config_dict = { "train_batch_size": 2, diff --git a/tests/unit/ops/accelerators/test_accelerator_backward.py b/tests/unit/ops/accelerators/test_accelerator_backward.py index 48e5fbbe7475..4b1b392e933a 100644 --- a/tests/unit/ops/accelerators/test_accelerator_backward.py +++ b/tests/unit/ops/accelerators/test_accelerator_backward.py @@ -9,12 +9,14 @@ import random import copy import os +import deepspeed from torch import nn from deepspeed import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig from deepspeed.accelerator import get_accelerator from unit.modeling import BertConfig, BertLayerNorm, BertEncoder as BertEncoderPostln from unit.modelingpreln import BertEncoder as BertEncoderPreln from unit.common import DistributedTest, is_rocm_pytorch +from deepspeed.ops.op_builder import TransformerBuilder if torch.half not in get_accelerator().supported_dtypes(): pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True) @@ -257,6 +259,8 @@ class TestCUDABackward(DistributedTest): #This is to flush denorms in forward pass. Please refer to https://github.com/pytorch/pytorch/blob/main/docs/source/notes/numerical_accuracy.rst#reduced-precision-fp16-and-bf16-gemms-and-convolutions-on-amd-instinct-mi200-devices os.environ['ROCBLAS_INTERNAL_FP16_ALT_IMPL'] = '1' + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[TransformerBuilder.NAME], + reason="TransformerBuilder has not been implemented on this system.") def test_backward(self, is_preln, use_fp16, batch_size, hidden_size, seq_len, heads, num_layers, atol): # Only run fp16 test cases on devices with FP16 capability. if not get_accelerator().is_fp16_supported() and (use_fp16 is True or is_preln is False): diff --git a/tests/unit/ops/accelerators/test_accelerator_forward.py b/tests/unit/ops/accelerators/test_accelerator_forward.py index ee9464f63aa1..e2f4ac177f1b 100644 --- a/tests/unit/ops/accelerators/test_accelerator_forward.py +++ b/tests/unit/ops/accelerators/test_accelerator_forward.py @@ -8,12 +8,14 @@ import pytest import random import copy +import deepspeed from torch import nn from unit.modelingpreln import BertEncoder as BertEncoderPreln from unit.modeling import BertLayerNorm, BertConfig, BertEncoder as BertEncoderPostln from deepspeed import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig from deepspeed.accelerator import get_accelerator from unit.common import DistributedTest +from deepspeed.ops.op_builder import TransformerBuilder if torch.half not in get_accelerator().supported_dtypes(): pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True) @@ -260,6 +262,8 @@ def test_forward(self, batch_size, hidden_size, seq_len, heads, num_layers, is_p class TestCUDAForwardSmallBatchSize(DistributedTest): world_size = 1 + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[TransformerBuilder.NAME], + reason="TransformerBuilder has not been implemented on this system.") def test_forward_with_small_bsz(self, batch_size, small_bsz, hidden_size, seq_len, heads, num_layers, is_preln, use_fp16): # Only run fp16 test cases on devices with FP16 capability. diff --git a/tests/unit/ops/lion/test_cpu_lion.py b/tests/unit/ops/lion/test_cpu_lion.py index 61a069af3257..7d40a98f35b9 100644 --- a/tests/unit/ops/lion/test_cpu_lion.py +++ b/tests/unit/ops/lion/test_cpu_lion.py @@ -14,9 +14,6 @@ from deepspeed.ops.op_builder import CPULionBuilder from unit.common import DistributedTest -if not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME]: - pytest.skip("cpu-lion is not compatible", allow_module_level=True) - pytest.cpu_vendor = get_cpu_info()["vendor_id_raw"].lower() @@ -62,6 +59,8 @@ class TestCPULion(DistributedTest): set_dist_env = False @pytest.mark.skipif(not get_accelerator().is_available(), reason="only supported in CUDA environments.") + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME], + reason="CPULionBuilder has not been implemented on this system.") def test_fused_lion_equal(self, dtype, model_size): if ("amd" in pytest.cpu_vendor) and (dtype == torch.half): pytest.skip("cpu-lion with half precision not supported on AMD CPUs") @@ -84,6 +83,8 @@ def test_fused_lion_equal(self, dtype, model_size): class TestCPULionGPUError(DistributedTest): + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME], + reason="CPULionBuilder has not been implemented on this system.") def test_cpu_lion_gpu_error(self): model_size = 64 from deepspeed.ops.lion import DeepSpeedCPULion diff --git a/tests/unit/ops/lion/test_lion.py b/tests/unit/ops/lion/test_lion.py index b2c3ac2f52df..507ff72ea51a 100644 --- a/tests/unit/ops/lion/test_lion.py +++ b/tests/unit/ops/lion/test_lion.py @@ -12,6 +12,7 @@ from unit.common import DistributedTest from unit.simple_model import SimpleModel from deepspeed.accelerator import get_accelerator +from deepspeed.ops.op_builder import CPULionBuilder if torch.half not in get_accelerator().supported_dtypes(): pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True) @@ -27,6 +28,7 @@ class TestLionConfigs(DistributedTest): world_size = 1 reuse_dist_env = True + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME], reason="CPULionBuilder has not been implemented on this system.") def test(self, optimizer, zero_offload, diff --git a/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py b/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py index 38c539c1cc6c..4b263172261c 100644 --- a/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py +++ b/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py @@ -153,7 +153,8 @@ def test_some_overflow(self): assert optim.cur_iter == expected_iteration -@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") +@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system.") class TestUnfused(DistributedTest): world_size = 1 diff --git a/tests/unit/runtime/half_precision/test_fp16.py b/tests/unit/runtime/half_precision/test_fp16.py index cf7a1d8a8183..dba15a969459 100644 --- a/tests/unit/runtime/half_precision/test_fp16.py +++ b/tests/unit/runtime/half_precision/test_fp16.py @@ -26,10 +26,11 @@ pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True) -@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") class TestLambFP32GradClip(DistributedTest): world_size = 2 + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system.") def test(self): if not get_accelerator().is_fp16_supported(): pytest.skip("fp16 is not supported") @@ -59,10 +60,11 @@ def test(self): model.step() -@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") class TestLambFP16(DistributedTest): world_size = 2 + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system.") def test__basic(self): if not get_accelerator().is_fp16_supported(): pytest.skip("fp16 is not supported") @@ -90,6 +92,8 @@ def test__basic(self): model.backward(loss) model.step() + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system.") def test_empty_grad(self): if not get_accelerator().is_fp16_supported(): pytest.skip("fp16 is not supported") @@ -236,8 +240,9 @@ def mock_unscale_and_clip_grads(grads_groups_flat, total_norm, apply_scale=True) engine.backward(loss) engine.step() - @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") @pytest.mark.parametrize("fused_lamb_legacy", [(False), (True)]) + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system.") def test_lamb_gradnorm(self, monkeypatch, fused_lamb_legacy: bool): if not get_accelerator().is_fp16_supported(): pytest.skip("fp16 is not supported") @@ -501,7 +506,8 @@ def test_adam_basic(self): model.backward(loss) model.step() - @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], + reason="FusedLambBuilder has not been implemented on this system") def test_lamb_basic(self): if not get_accelerator().is_fp16_supported(): pytest.skip("fp16 is not supported")