add necessary tests and update the documentation

chenkang-mt · chenkang-mt · commit 4cfbf9b775c7 · 2025-11-28T10:46:28.000+08:00
diff --git a/docs/source-pytorch/accelerators/musa.rst b/docs/source-pytorch/accelerators/musa.rst
@@ -0,0 +1,77 @@
+:orphan:
+
+MUSA training (Advanced)
+========================
+**Audience:** Users looking to train models on MooreThreads device using MUSA accelerator.
+
+.. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.
+
+----
+
+MUSAAccelerator Overview
+--------------------
+torch_musa is an extended Python package based on PyTorch that enables full utilization of MooreThreads graphics cards' 
+super computing power. Combined with PyTorch, users can take advantage of the strong power of MooreThreads graphics cards 
+through torch_musa.
+
+PyTorch Lightning automatically finds these weights and ties them after the modules are moved to the
+MUSA device under the hood. It will ensure that the weights among the modules are shared but not copied
+independently.
+
+
+Example:
+
+.. code-block:: python
+    import torch, torch.nn as nn, torch.utils.data as data, torchvision as tv, torch.nn.functional as F
+    import pytorch_lightning as L
+
+    # Step 1: Define a LightningModule
+    class LitAutoEncoder(L.LightningModule):
+        def __init__(self):
+            super().__init__()
+            self.encoder = nn.Sequential(nn.Linear(28 * 28, 128), nn.ReLU(), nn.Linear(128, 3))
+            self.decoder = nn.Sequential(nn.Linear(3, 128), nn.ReLU(), nn.Linear(128, 28 * 28))
+
+        def forward(self, x):
+            # in lightning, forward defines the prediction/inference actions
+            embedding = self.encoder(x)
+            return embedding
+
+        def training_step(self, batch, batch_idx):
+            # training_step defines the train loop. It is independent of forward
+            x, _ = batch
+            x = x.view(x.size(0), -1)
+            z = self.encoder(x)
+            x_hat = self.decoder(z)
+            loss = F.mse_loss(x_hat, x)
+            self.log("train_loss", loss)
+            return loss
+
+        def configure_optimizers(self):
+            optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
+            return optimizer
+
+    def main():
+        # -------------------
+        # Step 2: Define data
+        # -------------------
+        dataset = tv.datasets.MNIST(".", download=True, transform=tv.transforms.ToTensor())
+        train, val = data.random_split(dataset, [55000, 5000])
+
+        # -------------------
+        # Step 3: Train
+        # -------------------
+        autoencoder = LitAutoEncoder()
+        # we also support accelerator="auto" or accelerator="musa"
+        trainer = L.Trainer(accelerator="gpu")
+        trainer.fit(autoencoder, data.DataLoader(train), data.DataLoader(val))
+
+    if __name__ == '__main__':
+
+        main()
+----
+
+MUSA
+----
+MUSA is the library that interfaces PyTorch with the MooreThreads graphics cards.
+For more information check out `MUSA <https://github.com/MooreThreads/torch_musa>`_.
diff --git a/src/lightning/fabric/utilities/testing/_runif.py b/src/lightning/fabric/utilities/testing/_runif.py
@@ -23,6 +23,7 @@
 from lightning.fabric.accelerators import XLAAccelerator
 from lightning.fabric.accelerators.cuda import num_cuda_devices
 from lightning.fabric.accelerators.mps import MPSAccelerator
+from lightning.fabric.accelerators.musa import MUSAAccelerator
 from lightning.fabric.strategies.deepspeed import _DEEPSPEED_AVAILABLE
 from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_2_4
 
@@ -36,6 +37,7 @@ def _runif_reasons(
     bf16_cuda: bool = False,
     tpu: bool = False,
     mps: Optional[bool] = None,
+    musa: Optional[bool] = None,
     skip_windows: bool = False,
     standalone: bool = False,
     deepspeed: bool = False,
@@ -53,6 +55,8 @@ def _runif_reasons(
         tpu: Require that TPU is available.
         mps: If True: Require that MPS (Apple Silicon) is available,
             if False: Explicitly Require that MPS is not available
+        musa: If True: Require that MUSA (Device) is available,
+            if False: Explicitly Require that MUSA is not available
         skip_windows: Skip for Windows platform.
         standalone: Mark the test as standalone, our CI will run it in a separate process.
             This requires that the ``PL_RUN_STANDALONE_TESTS=1`` environment variable is set.
@@ -107,6 +111,12 @@ def _runif_reasons(
             reasons.append("MPS")
         elif not mps and MPSAccelerator.is_available():
             reasons.append("not MPS")
+    
+    if musa is not None:
+        if musa and not MUSAAccelerator.is_available():
+            reasons.append("MUSA")
+        elif not musa and MUSAAccelerator.is_available():
+            reasons.append("not MUSA")
 
     if standalone:
         if os.getenv("PL_RUN_STANDALONE_TESTS", "0") != "1":
diff --git a/src/lightning/pytorch/utilities/testing/_runif.py b/src/lightning/pytorch/utilities/testing/_runif.py
@@ -32,6 +32,7 @@ def _runif_reasons(
     bf16_cuda: bool = False,
     tpu: bool = False,
     mps: Optional[bool] = None,
+    musa: Optional[bool] = None,
     skip_windows: bool = False,
     standalone: bool = False,
     deepspeed: bool = False,
@@ -56,6 +57,8 @@ def _runif_reasons(
         tpu: Require that TPU is available.
         mps: If True: Require that MPS (Apple Silicon) is available,
             if False: Explicitly Require that MPS is not available
+        musa: If True: Require that MUSA (Device) is available,
+            if False: Explicitly Require that MUSA is not available
         skip_windows: Skip for Windows platform.
         standalone: Mark the test as standalone, our CI will run it in a separate process.
             This requires that the ``PL_RUN_STANDALONE_TESTS=1`` environment variable is set.
@@ -79,6 +82,7 @@ def _runif_reasons(
         bf16_cuda=bf16_cuda,
         tpu=tpu,
         mps=mps,
+        musa=musa,
         skip_windows=skip_windows,
         standalone=standalone,
         deepspeed=deepspeed,
diff --git a/tests/tests_fabric/accelerators/test_musa.py b/tests/tests_fabric/accelerators/test_musa.py
@@ -0,0 +1,59 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import torch
+
+from lightning.fabric.accelerators.musa import MUSAAccelerator
+from lightning.fabric.utilities.exceptions import MisconfigurationException
+from tests_fabric.helpers.runif import RunIf
+from unittest import mock
+from unittest.mock import Mock
+
+_MAYBE_MUSA = "musa" if MUSAAccelerator.is_available() else "cpu"
+
+
+@mock.patch("lightning.fabric.accelerators.musa.num_musa_devices", return_value=2)
+@RunIf(musa=True)
+def test_auto_device_count(_):
+    assert MUSAAccelerator.auto_device_count() == 2
+
+
+@RunIf(musa=True)
+def test_musa_availability():
+    assert MUSAAccelerator.is_available()
+
+
+def test_init_device_with_wrong_device_type():
+    with pytest.raises(ValueError, match="Device should be MUSA"):
+        MUSAAccelerator().setup_device(torch.device("cpu"))
+
+
+@RunIf(musa=True)
+@pytest.mark.parametrize(
+    ("devices", "expected"),
+    [
+        ([], []),
+        ([1], [torch.device(_MAYBE_MUSA, 1)]),
+        ([3, 1], [torch.device(_MAYBE_MUSA, 3), torch.device(_MAYBE_MUSA, 1)]),
+    ],
+)
+def test_get_parallel_devices(devices, expected):
+    assert MUSAAccelerator.get_parallel_devices(devices) == expected
+
+@mock.patch("torch.musa.set_device")
+@mock.patch("torch.musa.get_device_capability", return_value=(7, 0))
+def test_set_cuda_device(_, set_device_mock):
+    device = torch.device(_MAYBE_MUSA, 1)
+    MUSAAccelerator().setup_device(device)
+    set_device_mock.assert_called_once_with(device)
diff --git a/tests/tests_fabric/utilities/test_distributed.py b/tests/tests_fabric/utilities/test_distributed.py
@@ -10,7 +10,7 @@
 from lightning_utilities.core.imports import RequirementCache
 
 import lightning.fabric
-from lightning.fabric.accelerators import CPUAccelerator, CUDAAccelerator, MPSAccelerator
+from lightning.fabric.accelerators import CPUAccelerator, CUDAAccelerator, MPSAccelerator, MUSAAccelerator
 from lightning.fabric.plugins.environments import LightningEnvironment
 from lightning.fabric.strategies import DDPStrategy, SingleDeviceStrategy
 from lightning.fabric.strategies.launchers.multiprocessing import _MultiProcessingLauncher
@@ -40,7 +40,7 @@ def spawn_launch(fn, parallel_devices):
     """Copied from ``tests_pytorch.core.test_results.spawn_launch``"""
     # TODO: the accelerator and cluster_environment should be optional to just launch processes, but this requires lazy
     # initialization to be implemented
-    device_to_accelerator = {"cuda": CUDAAccelerator, "mps": MPSAccelerator, "cpu": CPUAccelerator}
+    device_to_accelerator = {"cuda": CUDAAccelerator, "mps": MPSAccelerator, "cpu": CPUAccelerator, "musa": MUSAAccelerator}
     accelerator_cls = device_to_accelerator[parallel_devices[0].type]
     strategy = DDPStrategy(
         accelerator=accelerator_cls(),
diff --git a/tests/tests_pytorch/accelerators/test_musa.py b/tests/tests_pytorch/accelerators/test_musa.py
@@ -0,0 +1,60 @@
+# Copyright The Lightning AI team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import namedtuple
+from unittest import mock
+
+import pytest
+import torch
+
+import tests_pytorch.helpers.pipelines as tpipes
+from lightning.pytorch import Trainer
+from lightning.pytorch.accelerators import MUSAAccelerator
+from lightning.pytorch.demos.boring_classes import BoringModel
+from tests_pytorch.helpers.runif import RunIf
+
+
+@RunIf(musa=True)
+def test_musa_availability():
+    assert MUSAAccelerator.is_available()
+
+
+def test_warning_if_musa_not_used(musa_count_1):
+    with pytest.warns(UserWarning, match="GPU available but not used"):
+        Trainer(accelerator="cpu")
+
+
+@RunIf(musa=True)
+@pytest.mark.parametrize("accelerator_value", ["musa", MUSAAccelerator()])
+def test_trainer_musa_accelerator(accelerator_value):
+    trainer = Trainer(accelerator=accelerator_value, devices=1)
+    assert isinstance(trainer.accelerator, MUSAAccelerator)
+    assert trainer.num_devices == 1
+
+@RunIf(musa=True)
+@mock.patch("torch.musa.set_device")
+def test_set_musa_device(set_device_mock, tmp_path, monkeypatch):
+    monkeypatch.setenv("MUSA_DEVICE_ORDER", "PCI_BUS_ID")  # 或其他需要的值
+    model = BoringModel()
+    trainer = Trainer(
+        default_root_dir=tmp_path,
+        fast_dev_run=True,
+        accelerator="gpu",
+        devices=1,
+        enable_checkpointing=False,
+        enable_model_summary=False,
+        enable_progress_bar=False,
+    )
+    trainer.fit(model)
+    set_device_mock.assert_called_once()
diff --git a/tests/tests_pytorch/conftest.py b/tests/tests_pytorch/conftest.py
@@ -203,6 +203,30 @@ def cuda_count_2(monkeypatch):
 def cuda_count_4(monkeypatch):
     mock_cuda_count(monkeypatch, 4)
 
+def mock_musa_count(monkeypatch, n: int) -> None:
+    monkeypatch.setattr(lightning.fabric.accelerators.musa, "num_musa_devices", lambda: n)
+    monkeypatch.setattr(lightning.pytorch.accelerators.musa, "num_musa_devices", lambda: n)
+
+
+@pytest.fixture
+def musa_count_0(monkeypatch):
+    mock_musa_count(monkeypatch, 0)
+
+
+@pytest.fixture
+def musa_count_1(monkeypatch):
+    mock_musa_count(monkeypatch, 1)
+
+
+@pytest.fixture
+def musa_count_2(monkeypatch):
+    mock_musa_count(monkeypatch, 2)
+
+
+@pytest.fixture
+def musa_count_4(monkeypatch):
+    mock_musa_count(monkeypatch, 4)
+
 
 def mock_mps_count(monkeypatch, n: int) -> None:
     monkeypatch.setattr(lightning.fabric.accelerators.mps, "_get_all_available_mps_gpus", lambda: [0] if n > 0 else [])