Skip to content

Commit 092c445

Browse files
committed
不支持算子标记
1 parent a7da0c5 commit 092c445

File tree

3 files changed

+14
-5
lines changed

3 files changed

+14
-5
lines changed

vllm_ascend/ops/fused_moe/experts_selector.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,15 @@ def _select_experts_with_fusion_ops(
196196
routed_scaling_factor=1,
197197
eps=float(1e-20))
198198
if not use_grouped_topk and custom_routing_function is None and scoring_func == "softmax":
199-
topk_weights, topk_ids, _ = torch_npu.npu_moe_gating_top_k_softmax(
200-
x=router_logits, finished=None, k=top_k)
201-
topk_ids = topk_ids.to(torch.int32)
199+
if is_A5():
200+
# A5 MOCK
201+
new_shape = router_logits.shape[-1] + (topk,)
202+
topk_weights = torch.ones(new_shape, dtype=router_logits.dtype, device=router_logits.device)
203+
topk_ids = torch.zeros(topk_weights.shape, dtype=torch.int32, device=router_logits.device)
204+
else :
205+
topk_weights, topk_ids, _ = torch_npu.npu_moe_gating_top_k_softmax(
206+
x=router_logits, finished=None, k=top_k)
207+
topk_ids = topk_ids.to(torch.int32)
202208
topk_weights = _renormalize_topk_weights(topk_weights, renormalize)
203209

204210
return topk_weights, topk_ids

vllm_ascend/ops/rotary_embedding.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,10 @@ def forward_oot(
419419
if self.cos_sin_cache.dtype != query.dtype: # type: ignore
420420
self.cos_sin_cache = self.cos_sin_cache.to( # type: ignore
421421
query.dtype) # type: ignore
422-
422+
423+
if is_A5(): # A5不支持npu_mrope算子,这里需要使用小算子替换
424+
return
425+
423426
query, key = torch_npu.npu_mrope(positions,
424427
query.contiguous(),
425428
key.contiguous(),

vllm_ascend/sample/sampler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def _apply_top_k_top_p(
2525
p: torch.Tensor,
2626
) -> torch.Tensor:
2727
# npu_top_k_top_p uses the operator aclnnApplyTopKTopP, but aclnnApplyTopKTopP currently does not support 310P
28-
if not is_310p() and p is not None and k is not None and 1 <= int(
28+
if not is_310p() and not is_A5() and p is not None and k is not None and 1 <= int(
2929
k.max()) <= 1024:
3030
# npu_top_k_top_p's parameter order is (logits, p, k), not (logits, k, p)
3131
return torch_npu.npu_top_k_top_p(logits, p, k)

0 commit comments

Comments
 (0)