Skip to content

Commit

Permalink
Skip the UT cases that use unimplemented op builders. (#5372)
Browse files Browse the repository at this point in the history
Co-authored-by: Logan Adams <[email protected]>
Co-authored-by: Logan Adams <[email protected]>
  • Loading branch information
3 people authored May 16, 2024
1 parent 7f55b20 commit 8e4f6e4
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 14 deletions.
13 changes: 7 additions & 6 deletions tests/unit/elasticity/test_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from deepspeed.ops.op_builder import FusedAdamBuilder, FusedLambBuilder

if not deepspeed.ops.__compatible_ops__[FusedAdamBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
pytest.skip("This op has not been implemented on this system.", allow_module_level=True)


@pytest.fixture
Expand Down Expand Up @@ -150,7 +150,8 @@ def test_proper_mbsz(ds_config):
class TestNonElasticBatchParams(DistributedTest):
world_size = 2

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system.")
def test(self):
config_dict = {
"train_batch_size": 2,
Expand Down Expand Up @@ -183,10 +184,9 @@ def test(self):
class TestNonElasticBatchParamsWithOverride(DistributedTest):
world_size = 2

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system.")
def test(self):
if not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME]:
pytest.skip("This op had not been implemented on this system.", allow_module_level=True)
config_dict = {
"train_batch_size": 2,
"steps_per_print": 1,
Expand Down Expand Up @@ -217,7 +217,8 @@ def test(self):
class TestElasticConfigChanged(DistributedTest):
world_size = 2

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system.")
def test(self):
config_dict = {
"train_batch_size": 2,
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/ops/accelerators/test_accelerator_backward.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,14 @@
import random
import copy
import os
import deepspeed
from torch import nn
from deepspeed import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig
from deepspeed.accelerator import get_accelerator
from unit.modeling import BertConfig, BertLayerNorm, BertEncoder as BertEncoderPostln
from unit.modelingpreln import BertEncoder as BertEncoderPreln
from unit.common import DistributedTest, is_rocm_pytorch
from deepspeed.ops.op_builder import TransformerBuilder

if torch.half not in get_accelerator().supported_dtypes():
pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True)
Expand Down Expand Up @@ -257,6 +259,8 @@ class TestCUDABackward(DistributedTest):
#This is to flush denorms in forward pass. Please refer to https://github.com/pytorch/pytorch/blob/main/docs/source/notes/numerical_accuracy.rst#reduced-precision-fp16-and-bf16-gemms-and-convolutions-on-amd-instinct-mi200-devices
os.environ['ROCBLAS_INTERNAL_FP16_ALT_IMPL'] = '1'

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[TransformerBuilder.NAME],
reason="TransformerBuilder has not been implemented on this system.")
def test_backward(self, is_preln, use_fp16, batch_size, hidden_size, seq_len, heads, num_layers, atol):
# Only run fp16 test cases on devices with FP16 capability.
if not get_accelerator().is_fp16_supported() and (use_fp16 is True or is_preln is False):
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/ops/accelerators/test_accelerator_forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@
import pytest
import random
import copy
import deepspeed
from torch import nn
from unit.modelingpreln import BertEncoder as BertEncoderPreln
from unit.modeling import BertLayerNorm, BertConfig, BertEncoder as BertEncoderPostln
from deepspeed import DeepSpeedTransformerLayer, DeepSpeedTransformerConfig
from deepspeed.accelerator import get_accelerator
from unit.common import DistributedTest
from deepspeed.ops.op_builder import TransformerBuilder

if torch.half not in get_accelerator().supported_dtypes():
pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True)
Expand Down Expand Up @@ -260,6 +262,8 @@ def test_forward(self, batch_size, hidden_size, seq_len, heads, num_layers, is_p
class TestCUDAForwardSmallBatchSize(DistributedTest):
world_size = 1

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[TransformerBuilder.NAME],
reason="TransformerBuilder has not been implemented on this system.")
def test_forward_with_small_bsz(self, batch_size, small_bsz, hidden_size, seq_len, heads, num_layers, is_preln,
use_fp16):
# Only run fp16 test cases on devices with FP16 capability.
Expand Down
7 changes: 4 additions & 3 deletions tests/unit/ops/lion/test_cpu_lion.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
from deepspeed.ops.op_builder import CPULionBuilder
from unit.common import DistributedTest

if not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME]:
pytest.skip("cpu-lion is not compatible", allow_module_level=True)

pytest.cpu_vendor = get_cpu_info()["vendor_id_raw"].lower()


Expand Down Expand Up @@ -62,6 +59,8 @@ class TestCPULion(DistributedTest):
set_dist_env = False

@pytest.mark.skipif(not get_accelerator().is_available(), reason="only supported in CUDA environments.")
@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME],
reason="CPULionBuilder has not been implemented on this system.")
def test_fused_lion_equal(self, dtype, model_size):
if ("amd" in pytest.cpu_vendor) and (dtype == torch.half):
pytest.skip("cpu-lion with half precision not supported on AMD CPUs")
Expand All @@ -84,6 +83,8 @@ def test_fused_lion_equal(self, dtype, model_size):

class TestCPULionGPUError(DistributedTest):

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME],
reason="CPULionBuilder has not been implemented on this system.")
def test_cpu_lion_gpu_error(self):
model_size = 64
from deepspeed.ops.lion import DeepSpeedCPULion
Expand Down
2 changes: 2 additions & 0 deletions tests/unit/ops/lion/test_lion.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from unit.common import DistributedTest
from unit.simple_model import SimpleModel
from deepspeed.accelerator import get_accelerator
from deepspeed.ops.op_builder import CPULionBuilder

if torch.half not in get_accelerator().supported_dtypes():
pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True)
Expand All @@ -27,6 +28,7 @@ class TestLionConfigs(DistributedTest):
world_size = 1
reuse_dist_env = True

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[CPULionBuilder.NAME], reason="CPULionBuilder has not been implemented on this system.")
def test(self,
optimizer,
zero_offload,
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/runtime/half_precision/test_dynamic_loss_scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ def test_some_overflow(self):
assert optim.cur_iter == expected_iteration


@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system.")
class TestUnfused(DistributedTest):
world_size = 1

Expand Down
14 changes: 10 additions & 4 deletions tests/unit/runtime/half_precision/test_fp16.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,11 @@
pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True)


@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
class TestLambFP32GradClip(DistributedTest):
world_size = 2

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system.")
def test(self):
if not get_accelerator().is_fp16_supported():
pytest.skip("fp16 is not supported")
Expand Down Expand Up @@ -59,10 +60,11 @@ def test(self):
model.step()


@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
class TestLambFP16(DistributedTest):
world_size = 2

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system.")
def test__basic(self):
if not get_accelerator().is_fp16_supported():
pytest.skip("fp16 is not supported")
Expand Down Expand Up @@ -90,6 +92,8 @@ def test__basic(self):
model.backward(loss)
model.step()

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system.")
def test_empty_grad(self):
if not get_accelerator().is_fp16_supported():
pytest.skip("fp16 is not supported")
Expand Down Expand Up @@ -236,8 +240,9 @@ def mock_unscale_and_clip_grads(grads_groups_flat, total_norm, apply_scale=True)
engine.backward(loss)
engine.step()

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
@pytest.mark.parametrize("fused_lamb_legacy", [(False), (True)])
@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system.")
def test_lamb_gradnorm(self, monkeypatch, fused_lamb_legacy: bool):
if not get_accelerator().is_fp16_supported():
pytest.skip("fp16 is not supported")
Expand Down Expand Up @@ -501,7 +506,8 @@ def test_adam_basic(self):
model.backward(loss)
model.step()

@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible")
@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME],
reason="FusedLambBuilder has not been implemented on this system")
def test_lamb_basic(self):
if not get_accelerator().is_fp16_supported():
pytest.skip("fp16 is not supported")
Expand Down

0 comments on commit 8e4f6e4

Please sign in to comment.