diff --git a/tests/unit/elasticity/test_elastic.py b/tests/unit/elasticity/test_elastic.py index 63633a51914b..92e1520b2c7c 100644 --- a/tests/unit/elasticity/test_elastic.py +++ b/tests/unit/elasticity/test_elastic.py @@ -150,6 +150,7 @@ def test_proper_mbsz(ds_config): class TestNonElasticBatchParams(DistributedTest): world_size = 2 + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") def test(self): config_dict = { "train_batch_size": 2, @@ -182,6 +183,7 @@ def test(self): class TestNonElasticBatchParamsWithOverride(DistributedTest): world_size = 2 + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") def test(self): if not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME]: pytest.skip("This op had not been implemented on this system.", allow_module_level=True) @@ -215,6 +217,7 @@ def test(self): class TestElasticConfigChanged(DistributedTest): world_size = 2 + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") def test(self): config_dict = { "train_batch_size": 2, diff --git a/tests/unit/ops/adam/test_cpu_adam.py b/tests/unit/ops/adam/test_cpu_adam.py index 9a6ff6689446..785cf786acc3 100644 --- a/tests/unit/ops/adam/test_cpu_adam.py +++ b/tests/unit/ops/adam/test_cpu_adam.py @@ -11,7 +11,7 @@ import deepspeed from deepspeed.accelerator import get_accelerator from deepspeed.ops.adam import FusedAdam -from deepspeed.ops.op_builder import CPUAdamBuilder +from deepspeed.ops.op_builder import CPUAdamBuilder, FusedAdamBuilder from unit.common import DistributedTest if not deepspeed.ops.__compatible_ops__[CPUAdamBuilder.NAME]: @@ -62,6 +62,8 @@ class TestCPUAdam(DistributedTest): set_dist_env = False @pytest.mark.skipif(not get_accelerator().is_available(), reason="only supported in CUDA environments.") + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedAdamBuilder.NAME], + reason="FusedAdam is not compatible") def test_fused_adam_equal(self, dtype, model_size): if ("amd" in pytest.cpu_vendor) and (dtype == torch.half): pytest.skip("cpu-adam with half precision not supported on AMD CPUs") diff --git a/tests/unit/ops/adam/test_hybrid_adam.py b/tests/unit/ops/adam/test_hybrid_adam.py index c7ef4890b322..9003e02588c1 100644 --- a/tests/unit/ops/adam/test_hybrid_adam.py +++ b/tests/unit/ops/adam/test_hybrid_adam.py @@ -12,7 +12,7 @@ import deepspeed from deepspeed.accelerator import get_accelerator from deepspeed.ops.adam import FusedAdam, DeepSpeedCPUAdam -from deepspeed.ops.op_builder import CPUAdamBuilder +from deepspeed.ops.op_builder import CPUAdamBuilder, FusedAdamBuilder from unit.common import DistributedTest if not deepspeed.ops.__compatible_ops__[CPUAdamBuilder.NAME]: @@ -43,6 +43,8 @@ class TestHybridAdam(DistributedTest): set_dist_env = False @pytest.mark.skipif(not get_accelerator().is_available(), reason="only supported in CUDA environments.") + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedAdamBuilder.NAME], + reason="FusedAdam is not compatible") def test_hybrid_adam_equal(self, dtype, model_size): if ("amd" in pytest.cpu_vendor) and (dtype == torch.half): pytest.skip("cpu-adam with half precision not supported on AMD CPUs") diff --git a/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py b/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py index f350e08e68a7..38c539c1cc6c 100644 --- a/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py +++ b/tests/unit/runtime/half_precision/test_dynamic_loss_scale.py @@ -10,6 +10,7 @@ import numpy as np from unit.common import DistributedTest from unit.simple_model import SimpleModel +from deepspeed.ops.op_builder import FusedLambBuilder def run_model_step(model, gradient_list): @@ -152,6 +153,7 @@ def test_some_overflow(self): assert optim.cur_iter == expected_iteration +@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") class TestUnfused(DistributedTest): world_size = 1 diff --git a/tests/unit/runtime/half_precision/test_fp16.py b/tests/unit/runtime/half_precision/test_fp16.py index 5b300053d2a8..cf7a1d8a8183 100644 --- a/tests/unit/runtime/half_precision/test_fp16.py +++ b/tests/unit/runtime/half_precision/test_fp16.py @@ -12,7 +12,7 @@ from unit.simple_model import SimpleModel, SimpleOptimizer, random_dataloader, SimpleMoEModel, sequence_dataloader from deepspeed.utils.torch import required_torch_version from deepspeed.accelerator import get_accelerator -from deepspeed.ops.op_builder import CPUAdamBuilder +from deepspeed.ops.op_builder import CPUAdamBuilder, FusedLambBuilder from deepspeed.moe.utils import split_params_into_different_moe_groups_for_optimizer try: @@ -22,7 +22,11 @@ _amp_available = False amp_available = pytest.mark.skipif(not _amp_available, reason="apex/amp is not installed") +if torch.half not in get_accelerator().supported_dtypes(): + pytest.skip(f"fp16 not supported, valid dtype: {get_accelerator().supported_dtypes()}", allow_module_level=True) + +@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") class TestLambFP32GradClip(DistributedTest): world_size = 2 @@ -55,6 +59,7 @@ def test(self): model.step() +@pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") class TestLambFP16(DistributedTest): world_size = 2 @@ -231,6 +236,7 @@ def mock_unscale_and_clip_grads(grads_groups_flat, total_norm, apply_scale=True) engine.backward(loss) engine.step() + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") @pytest.mark.parametrize("fused_lamb_legacy", [(False), (True)]) def test_lamb_gradnorm(self, monkeypatch, fused_lamb_legacy: bool): if not get_accelerator().is_fp16_supported(): @@ -495,6 +501,7 @@ def test_adam_basic(self): model.backward(loss) model.step() + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedLambBuilder.NAME], reason="lamb is not compatible") def test_lamb_basic(self): if not get_accelerator().is_fp16_supported(): pytest.skip("fp16 is not supported") diff --git a/tests/unit/runtime/test_ds_initialize.py b/tests/unit/runtime/test_ds_initialize.py index 169096a6d4e5..9ff99f169f7a 100644 --- a/tests/unit/runtime/test_ds_initialize.py +++ b/tests/unit/runtime/test_ds_initialize.py @@ -20,6 +20,7 @@ from deepspeed.runtime.utils import see_memory_usage from deepspeed.utils.torch import required_torch_version from deepspeed.accelerator import get_accelerator +from deepspeed.ops.op_builder import FusedAdamBuilder @pytest.mark.parametrize('zero_stage', [0, 3]) @@ -67,6 +68,9 @@ def test(self, optimizer_type): def _optimizer_callable(params) -> Optimizer: return AdamW(params=params) + if (optimizer_type is None) and (not deepspeed.ops.__compatible_ops__[FusedAdamBuilder.NAME]): + pytest.skip("FusedAdam is not compatible") + hidden_dim = 10 model = SimpleModel(hidden_dim) @@ -95,6 +99,8 @@ def _optimizer_callable(params) -> Optimizer: class TestConfigOptimizer(DistributedTest): world_size = 1 + @pytest.mark.skipif(not deepspeed.ops.__compatible_ops__[FusedAdamBuilder.NAME], + reason="FusedAdam is not compatible") def test(self, client_parameters): ds_config = {"train_batch_size": 1, "optimizer": {"type": "Adam", "params": {"lr": 0.001}}}