不支持算子标记

wangyao-i · wangyao-i · commit 092c445558d8 · 2025-11-11T10:22:01.000+08:00
diff --git a/vllm_ascend/ops/fused_moe/experts_selector.py b/vllm_ascend/ops/fused_moe/experts_selector.py
@@ -196,9 +196,15 @@ def _select_experts_with_fusion_ops(
             routed_scaling_factor=1,
             eps=float(1e-20))
     if not use_grouped_topk and custom_routing_function is None and scoring_func == "softmax":
-        topk_weights, topk_ids, _ = torch_npu.npu_moe_gating_top_k_softmax(
-            x=router_logits, finished=None, k=top_k)
-        topk_ids = topk_ids.to(torch.int32)
+        if is_A5():
+            # A5 MOCK
+            new_shape = router_logits.shape[-1] + (topk,)
+            topk_weights = torch.ones(new_shape, dtype=router_logits.dtype, device=router_logits.device)
+            topk_ids = torch.zeros(topk_weights.shape, dtype=torch.int32, device=router_logits.device)
+        else :
+            topk_weights, topk_ids, _ = torch_npu.npu_moe_gating_top_k_softmax(
+                x=router_logits, finished=None, k=top_k)
+            topk_ids = topk_ids.to(torch.int32)
         topk_weights = _renormalize_topk_weights(topk_weights, renormalize)
 
     return topk_weights, topk_ids
diff --git a/vllm_ascend/ops/rotary_embedding.py b/vllm_ascend/ops/rotary_embedding.py
@@ -419,7 +419,10 @@ def forward_oot(
         if self.cos_sin_cache.dtype != query.dtype:  # type: ignore
             self.cos_sin_cache = self.cos_sin_cache.to(  # type: ignore
                 query.dtype)  # type: ignore
-
+        
+        if is_A5(): # A5不支持npu_mrope算子，这里需要使用小算子替换
+            return
+        
         query, key = torch_npu.npu_mrope(positions,
                                          query.contiguous(),
                                          key.contiguous(),
diff --git a/vllm_ascend/sample/sampler.py b/vllm_ascend/sample/sampler.py
@@ -25,7 +25,7 @@ def _apply_top_k_top_p(
         p: torch.Tensor,
     ) -> torch.Tensor:
         # npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P
-        if not is_310p() and p is not None and k is not None and 1 <= int(
+        if not is_310p() and not is_A5() and p is not None and k is not None and 1 <= int(
                 k.max()) <= 1024:
             # npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p)
             return torch_npu.npu_top_k_top_p(logits, p, k)