Skip to content

Commit 8649bce

Browse files
committed
Added fp16 vector handling to LowerFLDEXP.
1 parent 484cea4 commit 8649bce

File tree

3 files changed

+193
-1410
lines changed

3 files changed

+193
-1410
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 78 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1830,15 +1830,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
18301830
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
18311831
}
18321832

1833-
for (MVT VT : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f32,
1834-
MVT::v4f64, MVT::v16f32, MVT::v8f64})
1835-
setOperationAction(ISD::FLDEXP, VT, Custom);
1836-
1837-
if (Subtarget.hasFP16()) {
1838-
for (MVT VT : {MVT::f16, MVT::v8f16, MVT::v16f16, MVT::v32f16})
1839-
setOperationAction(ISD::FLDEXP, VT, Custom);
1840-
}
1841-
18421833
setOperationAction(ISD::LRINT, MVT::v16f32,
18431834
Subtarget.hasDQI() ? Legal : Custom);
18441835
setOperationAction(ISD::LRINT, MVT::v8f64,
@@ -2111,6 +2102,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
21112102
// These operations are handled on non-VLX by artificially widening in
21122103
// isel patterns.
21132104

2105+
for (MVT VT : {MVT::f16, MVT::f32, MVT::f64, MVT::v8f16, MVT::v4f32,
2106+
MVT::v2f64, MVT::v16f16, MVT::v8f32, MVT::v4f64, MVT::v32f16,
2107+
MVT::v16f32, MVT::v8f64})
2108+
setOperationAction(ISD::FLDEXP, VT, Custom);
2109+
21142110
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
21152111
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
21162112
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
@@ -19160,72 +19156,110 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1916019156
}
1916119157

1916219158
static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
19163-
SelectionDAG &DAG) {
19159+
SelectionDAG &DAG) {
1916419160
SDLoc DL(Op);
1916519161
SDValue X = Op.getOperand(0);
1916619162
MVT XTy = X.getSimpleValueType();
1916719163
SDValue Exp = Op.getOperand(1);
19168-
MVT XVT, ExpVT;
1916919164

1917019165
switch (XTy.SimpleTy) {
1917119166
default:
1917219167
return SDValue();
1917319168
case MVT::f16:
19174-
if (Subtarget.hasFP16()) {
19175-
XVT = MVT::v8f16;
19176-
ExpVT = XVT;
19177-
break;
19169+
if (!Subtarget.hasFP16()) {
19170+
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
1917819171
}
19179-
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
19180-
[[fallthrough]];
19181-
case MVT::f32:
19182-
XVT = MVT::v4f32;
19183-
ExpVT = MVT::v4f32;
1918419172
break;
19173+
case MVT::f32:
1918519174
case MVT::f64:
19186-
XVT = MVT::v2f64;
19187-
ExpVT = MVT::v2f64;
1918819175
break;
1918919176
case MVT::v4f32:
1919019177
case MVT::v2f64:
19191-
if (!Subtarget.hasVLX()) {
19192-
XVT = XTy == MVT::v4f32 ? MVT::v16f32 : MVT::v8f64;
19193-
ExpVT = XVT;
19178+
if (Subtarget.hasVLX()) {
19179+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19180+
return DAG.getNode(X86ISD::SCALEFS, DL, XTy, X, Exp, X);
19181+
}
19182+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
19183+
break;
19184+
case MVT::v8f16:
19185+
if (Subtarget.hasFP16()) {
19186+
if (Subtarget.hasVLX()) {
19187+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19188+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
19189+
}
1919419190
break;
1919519191
}
19196-
[[fallthrough]];
19192+
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::v8f32, X);
19193+
Exp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::v8i32, Exp);
19194+
break;
1919719195
case MVT::v8f32:
1919819196
case MVT::v4f64:
19199-
if (!Subtarget.hasVLX()) {
19200-
XVT = XTy == MVT::v8f32 ? MVT::v16f32 : MVT::v8f64;
19201-
ExpVT = XVT;
19197+
if (Subtarget.hasVLX()) {
19198+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19199+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
19200+
}
19201+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
19202+
break;
19203+
case MVT::v16f16:
19204+
if (Subtarget.hasFP16()) {
19205+
if (Subtarget.hasVLX()) {
19206+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19207+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
19208+
}
1920219209
break;
1920319210
}
19204-
[[fallthrough]];
19211+
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::v16f32, X);
19212+
Exp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::v16i32, Exp);
19213+
break;
1920519214
case MVT::v16f32:
1920619215
case MVT::v8f64:
1920719216
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
1920819217
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
19218+
case MVT::v32f16:
19219+
if (Subtarget.hasFP16()) {
19220+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19221+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
19222+
}
19223+
SDValue Low = DAG.getExtractSubvector(DL, MVT::v16f16, X, 0);
19224+
SDValue High = DAG.getExtractSubvector(DL, MVT::v16f16, X, 16);
19225+
SDValue ExpLow = DAG.getExtractSubvector(DL, MVT::v16i16, Exp, 0);
19226+
SDValue ExpHigh = DAG.getExtractSubvector(DL, MVT::v16i16, Exp, 16);
19227+
19228+
SDValue OpLow = DAG.getNode(ISD::FLDEXP, DL, MVT::v16f16, Low, ExpLow);
19229+
SDValue OpHigh = DAG.getNode(ISD::FLDEXP, DL, MVT::v16f16, High, ExpHigh);
19230+
SDValue ScaledLow = LowerFLDEXP(OpLow, Subtarget, DAG);
19231+
SDValue ScaledHigh = LowerFLDEXP(OpHigh, Subtarget, DAG);
19232+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v32f16, ScaledLow,
19233+
ScaledHigh);
1920919234
}
1921019235

19211-
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
1921219236
if (XTy.isVector()) {
19213-
SDValue WideX =
19214-
DAG.getInsertSubvector(DL, DAG.getUNDEF(XVT), X, 0);
19215-
SDValue WideExp =
19216-
DAG.getInsertSubvector(DL, DAG.getUNDEF(ExpVT), Exp, 0);
19217-
SDValue Scalef =
19218-
DAG.getNode(X86ISD::SCALEF, DL, XVT, WideX, WideExp, WideX);
19219-
SDValue Final = DAG.getExtractSubvector(DL, XTy, Scalef, 0);
19220-
return Final;
19237+
SDValue WideX = widenSubVector(X, true, Subtarget, DAG, DL, 512);
19238+
SDValue WideExp = widenSubVector(Exp, true, Subtarget, DAG, DL, 512);
19239+
if (XTy.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) {
19240+
WideExp =
19241+
DAG.getNode(ISD::SINT_TO_FP, DL, WideX.getSimpleValueType(), WideExp);
19242+
SDValue Scalef = DAG.getNode(X86ISD::SCALEF, DL, WideX.getValueType(),
19243+
WideX, WideExp, WideX);
19244+
MVT ExtractVT = XTy == MVT::v8f16 ? MVT::v8f32 : MVT::v16f32;
19245+
SDValue LowHalf = DAG.getExtractSubvector(DL, ExtractVT, Scalef, 0);
19246+
return DAG.getNode(ISD::FP_ROUND, DL, XTy, LowHalf,
19247+
DAG.getTargetConstant(0, DL, MVT::i32));
19248+
}
19249+
SDValue Scalef = DAG.getNode(X86ISD::SCALEF, DL, WideX.getValueType(),
19250+
WideX, WideExp, WideX);
19251+
return DAG.getExtractSubvector(DL, XTy, Scalef, 0);
1922119252
} else {
19222-
SDValue VX = DAG.getInsertVectorElt(DL, DAG.getUNDEF(XVT), X, 0);
19223-
SDValue VExp = DAG.getInsertVectorElt(DL, DAG.getUNDEF(ExpVT), Exp, 0);
19224-
SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, XVT, VX, VExp, VX);
19253+
MVT VT = MVT::getVectorVT(X.getSimpleValueType(),
19254+
128 / X.getSimpleValueType().getSizeInBits());
19255+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
19256+
SDValue VX = DAG.getInsertVectorElt(DL, DAG.getUNDEF(VT), X, 0);
19257+
SDValue VExp = DAG.getInsertVectorElt(DL, DAG.getUNDEF(VT), Exp, 0);
19258+
SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, VT, VX, VExp, VX);
1922519259
SDValue Final = DAG.getExtractVectorElt(DL, X.getValueType(), Scalefs, 0);
1922619260
if (X.getValueType() != XTy)
1922719261
Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
19228-
DAG.getIntPtrConstant(1, SDLoc(Op)));
19262+
DAG.getTargetConstant(0, DL, MVT::i32));
1922919263
return Final;
1923019264
}
1923119265
}
@@ -33763,7 +33797,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3376333797
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
3376433798
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
3376533799
case ISD::FLDEXP: return LowerFLDEXP(Op, Subtarget, DAG);
33766-
// clang-format on
33800+
// clang-format on
3376733801
}
3376833802
}
3376933803

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 10 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
125125
; CHECK-SKX: # %bb.0:
126126
; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
127127
; CHECK-SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
128-
; CHECK-SKX-NEXT: vscalefps %xmm0, %xmm1, %xmm0
128+
; CHECK-SKX-NEXT: vscalefss %xmm0, %xmm1, %xmm0
129129
; CHECK-SKX-NEXT: retq
130130
%r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
131131
ret <4 x float> %r
@@ -576,109 +576,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
576576
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
577577
; CHECK-AVX2-NEXT: retq
578578
;
579-
; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
580-
; CHECK-ONLY-AVX512F: # %bb.0:
581-
; CHECK-ONLY-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
582-
; CHECK-ONLY-AVX512F-NEXT: cwtl
583-
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm2
584-
; CHECK-ONLY-AVX512F-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
585-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm2, %xmm1, %xmm2
586-
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm2, %xmm2
587-
; CHECK-ONLY-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
588-
; CHECK-ONLY-AVX512F-NEXT: cwtl
589-
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
590-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
591-
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
592-
; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
593-
; CHECK-ONLY-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
594-
; CHECK-ONLY-AVX512F-NEXT: cwtl
595-
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
596-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
597-
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
598-
; CHECK-ONLY-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
599-
; CHECK-ONLY-AVX512F-NEXT: cwtl
600-
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
601-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
602-
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
603-
; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
604-
; CHECK-ONLY-AVX512F-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
605-
; CHECK-ONLY-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
606-
; CHECK-ONLY-AVX512F-NEXT: cwtl
607-
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
608-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm3, %xmm1, %xmm3
609-
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm3, %xmm3
610-
; CHECK-ONLY-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
611-
; CHECK-ONLY-AVX512F-NEXT: cwtl
612-
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
613-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
614-
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
615-
; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
616-
; CHECK-ONLY-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
617-
; CHECK-ONLY-AVX512F-NEXT: cwtl
618-
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
619-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm4, %xmm1, %xmm4
620-
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm4, %xmm4
621-
; CHECK-ONLY-AVX512F-NEXT: vmovd %xmm0, %eax
622-
; CHECK-ONLY-AVX512F-NEXT: cwtl
623-
; CHECK-ONLY-AVX512F-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
624-
; CHECK-ONLY-AVX512F-NEXT: vscalefss %xmm0, %xmm1, %xmm0
625-
; CHECK-ONLY-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
626-
; CHECK-ONLY-AVX512F-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
627-
; CHECK-ONLY-AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
628-
; CHECK-ONLY-AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
629-
; CHECK-ONLY-AVX512F-NEXT: retq
630-
;
631-
; CHECK-SKX-LABEL: fmul_pow2_ldexp_8xhalf:
632-
; CHECK-SKX: # %bb.0:
633-
; CHECK-SKX-NEXT: vpextrw $7, %xmm0, %eax
634-
; CHECK-SKX-NEXT: cwtl
635-
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm1
636-
; CHECK-SKX-NEXT: vmovss {{.*#+}} xmm2 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
637-
; CHECK-SKX-NEXT: vscalefss %xmm1, %xmm2, %xmm1
638-
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm1, %xmm1
639-
; CHECK-SKX-NEXT: vpextrw $6, %xmm0, %eax
640-
; CHECK-SKX-NEXT: cwtl
641-
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
642-
; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
643-
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
644-
; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
645-
; CHECK-SKX-NEXT: vpextrw $5, %xmm0, %eax
646-
; CHECK-SKX-NEXT: cwtl
647-
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
648-
; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
649-
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
650-
; CHECK-SKX-NEXT: vpextrw $4, %xmm0, %eax
651-
; CHECK-SKX-NEXT: cwtl
652-
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
653-
; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
654-
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
655-
; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
656-
; CHECK-SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
657-
; CHECK-SKX-NEXT: vpextrw $3, %xmm0, %eax
658-
; CHECK-SKX-NEXT: cwtl
659-
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm3
660-
; CHECK-SKX-NEXT: vscalefss %xmm3, %xmm2, %xmm3
661-
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm3, %xmm3
662-
; CHECK-SKX-NEXT: vpextrw $2, %xmm0, %eax
663-
; CHECK-SKX-NEXT: cwtl
664-
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
665-
; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
666-
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
667-
; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
668-
; CHECK-SKX-NEXT: vpextrw $1, %xmm0, %eax
669-
; CHECK-SKX-NEXT: cwtl
670-
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm4
671-
; CHECK-SKX-NEXT: vscalefss %xmm4, %xmm2, %xmm4
672-
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm4, %xmm4
673-
; CHECK-SKX-NEXT: vmovd %xmm0, %eax
674-
; CHECK-SKX-NEXT: cwtl
675-
; CHECK-SKX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
676-
; CHECK-SKX-NEXT: vscalefss %xmm0, %xmm2, %xmm0
677-
; CHECK-SKX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
678-
; CHECK-SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
679-
; CHECK-SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
680-
; CHECK-SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
681-
; CHECK-SKX-NEXT: retq
579+
; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
580+
; CHECK-AVX512F: # %bb.0:
581+
; CHECK-AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3]
582+
; CHECK-AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
583+
; CHECK-AVX512F-NEXT: vcvtdq2ps %zmm0, %zmm0
584+
; CHECK-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
585+
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0
586+
; CHECK-AVX512F-NEXT: vzeroupper
587+
; CHECK-AVX512F-NEXT: retq
682588
%r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> <half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000>, <8 x i16> %i)
683589
ret <8 x half> %r
684590
}
@@ -1812,5 +1718,3 @@ define x86_fp80 @pr128528(i1 %cond) {
18121718
%mul = fmul x86_fp80 %conv, 0xK4007D055555555555800
18131719
ret x86_fp80 %mul
18141720
}
1815-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1816-
; CHECK-AVX512F: {{.*}}

0 commit comments

Comments
 (0)