diff --git a/mmengine/dist/dist.py b/mmengine/dist/dist.py
index 6569d9010c39058aa252dab0bc3e71ff6bee16bb..2ca21ca6030260811750d21dea6216f47cb74114 100644
--- a/mmengine/dist/dist.py
+++ b/mmengine/dist/dist.py
@@ -305,10 +305,16 @@ def sync_random_seed(group: Optional[dist.ProcessGroup] = None) -> int:
     if group is None:
         group = get_default_group()
 
+    group_backend = get_backend(group)
+    is_nccl_backend = group_backend == dist.Backend.NCCL
+    current_device = torch.device('cpu')
+    if is_nccl_backend:
+        current_device = torch.device('cuda', torch.cuda.current_device())
+
     if get_rank(group) == 0:
-        random_num = torch.tensor(seed, dtype=torch.int32)
+        random_num = torch.tensor(seed, dtype=torch.int32).to(current_device)
     else:
-        random_num = torch.tensor(0, dtype=torch.int32)
+        random_num = torch.tensor(0, dtype=torch.int32).to(current_device)
 
     dist.broadcast(random_num, src=0, group=group)
 
diff --git a/tests/test_dist/test_dist.py b/tests/test_dist/test_dist.py
index 78a55c541a6f896591e6b7490012f3324efc5828..3dccb075cc74692ce3aa002b87688a2f2e324cea 100644
--- a/tests/test_dist/test_dist.py
+++ b/tests/test_dist/test_dist.py
@@ -190,8 +190,7 @@ def _test_broadcast_dist(device):
 
 def _test_sync_random_seed_dist(device):
     with patch.object(
-            torch, 'tensor',
-            return_value=torch.tensor(1024).to(device)) as mock_tensor:
+            torch, 'tensor', return_value=torch.tensor(1024)) as mock_tensor:
         output = dist.sync_random_seed()
         assert output == 1024
     mock_tensor.assert_called()