From 8e4f6e48db8e38f5a9fb02dc8851c33a818ebbf4 Mon Sep 17 00:00:00 2001
From: Zixu Wang <61218792+foin6@users.noreply.github.com>
Date: Fri, 17 May 2024 01:46:52 +0800
Subject: [PATCH] Skip the UT cases that use unimplemented op builders. (#5372)

Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
Co-authored-by: Logan Adams <loadams@microsoft.com>
---
 tests/unit/elasticity/test_elastic.py              | 13 +++++++------
 .../ops/accelerators/test_accelerator_backward.py  |  4 ++++
 .../ops/accelerators/test_accelerator_forward.py   |  4 ++++
 tests/unit/ops/lion/test_cpu_lion.py               |  7 ++++---
 tests/unit/ops/lion/test_lion.py                   |  2 ++
 .../half_precision/test_dynamic_loss_scale.py      |  3 ++-
 tests/unit/runtime/half_precision/test_fp16.py     | 14 ++++++++++----
 7 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/tests/unit/elasticity/test_elastic.py b/tests/unit/elasticity/test_elastic.py
index 92e1520b2c7c..1f7cbbbca214 100644
--- a/tests/unit/elasticity/test_elastic.py
+++ b/tests/unit/elasticity/test_elastic.py
@@ -12,7 +12,7 @@
 from deepspeed.ops.op_builder import FusedAdamBuilder, FusedLambBuilder
 
 if not deepspeed.ops.__compatible_ops__[FusedAdamBuilder.NAME]:
-    pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
+    pytest.skip("This op has not been implemented on this system.", allow_module_level=True)
 
 
 @pytest.fixture
@@ -150,7 +150,8 @@ def test_proper_mbsz(ds_config):
 class TestNonElasticBatchParams(DistributedTest):
     world_size = 2
 
-    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                        reason="FusedLambBuilder has not been implemented on this system.")
     def test(self):
         config_dict = {
             "train_batch_size": 2,
@@ -183,10 +184,9 @@ def test(self):
 class TestNonElasticBatchParamsWithOverride(DistributedTest):
     world_size = 2
 
-    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                        reason="FusedLambBuilder has not been implemented on this system.")
     def test(self):
-        if not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME]:
-            pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
         config_dict = {
             "train_batch_size": 2,
             "steps_per_print": 1,
@@ -217,7 +217,8 @@ def test(self):
 class TestElasticConfigChanged(DistributedTest):
     world_size = 2
 
-    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                        reason="FusedLambBuilder has not been implemented on this system.")
     def test(self):
         config_dict = {
             "train_batch_size": 2,
diff --git a/tests/unit/ops/accelerators/test_accelerator_backward.py b/tests/unit/ops/accelerators/test_accelerator_backward.py
index 48e5fbbe7475..4b1b392e933a 100644
--- a/tests/unit/ops/accelerators/test_accelerator_backward.py
+++ b/tests/unit/ops/accelerators/test_accelerator_backward.py
@@ -9,12 +9,14 @@
 import random
 import copy
 import os
+import deepspeed
 from torch import nn
 from deepspeed import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig
 from deepspeed.accelerator import get_accelerator
 from unit.modeling import BertConfig, BertLayerNorm, BertEncoder as BertEncoderPostln
 from unit.modelingpreln import BertEncoder as BertEncoderPreln
 from unit.common import DistributedTest, is_rocm_pytorch
+from deepspeed.ops.op_builder import TransformerBuilder
 
 if torch.half not in get_accelerator().supported_dtypes():
     pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True)
@@ -257,6 +259,8 @@ class TestCUDABackward(DistributedTest):
         #This is to flush denorms in forward pass. Please refer to https://github.com/pytorch/pytorch/blob/main/docs/source/notes/numerical_accuracy.rst#reduced-precision-fp16-and-bf16-gemms-and-convolutions-on-amd-instinct-mi200-devices
         os.environ['ROCBLAS_INTERNAL_FP16_ALT_IMPL'] = '1'
 
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[TransformerBuilder.NAME],
+                        reason="TransformerBuilder has not been implemented on this system.")
     def test_backward(self, is_preln, use_fp16, batch_size, hidden_size, seq_len, heads, num_layers, atol):
         # Only run fp16 test cases on devices with FP16 capability.
         if not get_accelerator().is_fp16_supported() and (use_fp16 is True or is_preln is False):
diff --git a/tests/unit/ops/accelerators/test_accelerator_forward.py b/tests/unit/ops/accelerators/test_accelerator_forward.py
index ee9464f63aa1..e2f4ac177f1b 100644
--- a/tests/unit/ops/accelerators/test_accelerator_forward.py
+++ b/tests/unit/ops/accelerators/test_accelerator_forward.py
@@ -8,12 +8,14 @@
 import pytest
 import random
 import copy
+import deepspeed
 from torch import nn
 from unit.modelingpreln import BertEncoder as BertEncoderPreln
 from unit.modeling import BertLayerNorm, BertConfig, BertEncoder as BertEncoderPostln
 from deepspeed import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig
 from deepspeed.accelerator import get_accelerator
 from unit.common import DistributedTest
+from deepspeed.ops.op_builder import TransformerBuilder
 
 if torch.half not in get_accelerator().supported_dtypes():
     pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True)
@@ -260,6 +262,8 @@ def test_forward(self, batch_size, hidden_size, seq_len, heads, num_layers, is_p
 class TestCUDAForwardSmallBatchSize(DistributedTest):
     world_size = 1
 
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[TransformerBuilder.NAME],
+                        reason="TransformerBuilder has not been implemented on this system.")
     def test_forward_with_small_bsz(self, batch_size, small_bsz, hidden_size, seq_len, heads, num_layers, is_preln,
                                     use_fp16):
         # Only run fp16 test cases on devices with FP16 capability.
diff --git a/tests/unit/ops/lion/test_cpu_lion.py b/tests/unit/ops/lion/test_cpu_lion.py
index 61a069af3257..7d40a98f35b9 100644
--- a/tests/unit/ops/lion/test_cpu_lion.py
+++ b/tests/unit/ops/lion/test_cpu_lion.py
@@ -14,9 +14,6 @@
 from deepspeed.ops.op_builder import CPULionBuilder
 from unit.common import DistributedTest
 
-if not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME]:
-    pytest.skip("cpu-lion is not compatible", allow_module_level=True)
-
 pytest.cpu_vendor = get_cpu_info()["vendor_id_raw"].lower()
 
 
@@ -62,6 +59,8 @@ class TestCPULion(DistributedTest):
         set_dist_env = False
 
     @pytest.mark.skipif(not get_accelerator().is_available(), reason="only supported in CUDA environments.")
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME],
+                        reason="CPULionBuilder has not been implemented on this system.")
     def test_fused_lion_equal(self, dtype, model_size):
         if ("amd" in pytest.cpu_vendor) and (dtype == torch.half):
             pytest.skip("cpu-lion with half precision not supported on AMD CPUs")
@@ -84,6 +83,8 @@ def test_fused_lion_equal(self, dtype, model_size):
 
 class TestCPULionGPUError(DistributedTest):
 
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME],
+                        reason="CPULionBuilder has not been implemented on this system.")
     def test_cpu_lion_gpu_error(self):
         model_size = 64
         from deepspeed.ops.lion import DeepSpeedCPULion
diff --git a/tests/unit/ops/lion/test_lion.py b/tests/unit/ops/lion/test_lion.py
index b2c3ac2f52df..507ff72ea51a 100644
--- a/tests/unit/ops/lion/test_lion.py
+++ b/tests/unit/ops/lion/test_lion.py
@@ -12,6 +12,7 @@
 from unit.common import DistributedTest
 from unit.simple_model import SimpleModel
 from deepspeed.accelerator import get_accelerator
+from deepspeed.ops.op_builder import CPULionBuilder
 
 if torch.half not in get_accelerator().supported_dtypes():
     pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True)
@@ -27,6 +28,7 @@ class TestLionConfigs(DistributedTest):
     world_size = 1
     reuse_dist_env = True
 
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME], reason="CPULionBuilder has not been implemented on this system.")
     def test(self,
              optimizer,
              zero_offload,
diff --git a/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py b/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py
index 38c539c1cc6c..4b263172261c 100644
--- a/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py
+++ b/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py
@@ -153,7 +153,8 @@ def test_some_overflow(self):
         assert optim.cur_iter == expected_iteration
 
 
-@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
+@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                    reason="FusedLambBuilder has not been implemented on this system.")
 class TestUnfused(DistributedTest):
     world_size = 1
 
diff --git a/tests/unit/runtime/half_precision/test_fp16.py b/tests/unit/runtime/half_precision/test_fp16.py
index cf7a1d8a8183..dba15a969459 100644
--- a/tests/unit/runtime/half_precision/test_fp16.py
+++ b/tests/unit/runtime/half_precision/test_fp16.py
@@ -26,10 +26,11 @@
     pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True)
 
 
-@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
 class TestLambFP32GradClip(DistributedTest):
     world_size = 2
 
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                        reason="FusedLambBuilder has not been implemented on this system.")
     def test(self):
         if not get_accelerator().is_fp16_supported():
             pytest.skip("fp16 is not supported")
@@ -59,10 +60,11 @@ def test(self):
             model.step()
 
 
-@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
 class TestLambFP16(DistributedTest):
     world_size = 2
 
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                        reason="FusedLambBuilder has not been implemented on this system.")
     def test__basic(self):
         if not get_accelerator().is_fp16_supported():
             pytest.skip("fp16 is not supported")
@@ -90,6 +92,8 @@ def test__basic(self):
             model.backward(loss)
             model.step()
 
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                        reason="FusedLambBuilder has not been implemented on this system.")
     def test_empty_grad(self):
         if not get_accelerator().is_fp16_supported():
             pytest.skip("fp16 is not supported")
@@ -236,8 +240,9 @@ def mock_unscale_and_clip_grads(grads_groups_flat, total_norm, apply_scale=True)
             engine.backward(loss)
             engine.step()
 
-    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
     @pytest.mark.parametrize("fused_lamb_legacy", [(False), (True)])
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                        reason="FusedLambBuilder has not been implemented on this system.")
     def test_lamb_gradnorm(self, monkeypatch, fused_lamb_legacy: bool):
         if not get_accelerator().is_fp16_supported():
             pytest.skip("fp16 is not supported")
@@ -501,7 +506,8 @@ def test_adam_basic(self):
             model.backward(loss)
             model.step()
 
-    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
+    @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
+                        reason="FusedLambBuilder has not been implemented on this system")
     def test_lamb_basic(self):
         if not get_accelerator().is_fp16_supported():
             pytest.skip("fp16 is not supported")