diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt index dab8dbbe5611..c4fdab93fe2f 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt @@ -13,6 +13,7 @@ add_clang_library(TargetLowering TargetInfo.cpp TargetLoweringInfo.cpp Targets/AArch64.cpp + Targets/AMDGPU.cpp Targets/NVPTX.cpp Targets/SPIR.cpp Targets/X86.cpp diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp index d040c87282f5..6578e9df8b42 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerModule.cpp @@ -71,6 +71,8 @@ createTargetLoweringInfo(LowerModule &LM) { return createAArch64TargetLoweringInfo(LM, Kind); } + case llvm::Triple::amdgcn: + return createAMDGPUTargetLoweringInfo(LM); case llvm::Triple::x86_64: { switch (Triple.getOS()) { case llvm::Triple::Win32: diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.h index a03cf711babc..3a6ba03a2aa2 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/TargetInfo.h @@ -33,6 +33,9 @@ createSPIRVTargetLoweringInfo(LowerModule &CGM); std::unique_ptr createNVPTXTargetLoweringInfo(LowerModule &CGM); +std::unique_ptr +createAMDGPUTargetLoweringInfo(LowerModule &CGM); + } // namespace cir #endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETINFO_H diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp new file mode 100644 index 000000000000..7432972889ed --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/AMDGPU.cpp @@ -0,0 +1,71 @@ +//===- AMDGPU.cpp - TargetInfo for AMDGPU ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ABIInfoImpl.h" +#include "LowerFunctionInfo.h" +#include "LowerTypes.h" +#include "TargetInfo.h" +#include "TargetLoweringInfo.h" +#include "clang/CIR/ABIArgInfo.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "clang/CIR/MissingFeatures.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" + +using ABIArgInfo = cir::ABIArgInfo; +using MissingFeature = cir::MissingFeatures; + +namespace cir { + +//===----------------------------------------------------------------------===// +// AMDGPU ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class AMDGPUABIInfo : public ABIInfo { +public: + AMDGPUABIInfo(LowerTypes <) : ABIInfo(lt) {} + +private: + void computeInfo(LowerFunctionInfo &fi) const override { + llvm_unreachable("NYI"); + } +}; + +class AMDGPUTargetLoweringInfo : public TargetLoweringInfo { +public: + AMDGPUTargetLoweringInfo(LowerTypes <) + : TargetLoweringInfo(std::make_unique(lt)) {} + // Taken from here: https://llvm.org/docs/AMDGPUUsage.html#address-spaces + unsigned getTargetAddrSpaceFromCIRAddrSpace( + cir::AddressSpace addrSpace) const override { + switch (addrSpace) { + case cir::AddressSpace::OffloadPrivate: + return 5; + case cir::AddressSpace::OffloadLocal: + return 3; + case cir::AddressSpace::OffloadGlobal: + return 1; + case cir::AddressSpace::OffloadConstant: + return 4; + case cir::AddressSpace::OffloadGeneric: + return 0; + default: + cir_cconv_unreachable("Unknown CIR address space for this target"); + } + } +}; + +} // namespace +std::unique_ptr +createAMDGPUTargetLoweringInfo(LowerModule &lowerModule) { + return std::make_unique(lowerModule.getTypes()); +} + +} // namespace cir diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 9308a278e107..7b1c7a2ec2ca 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1749,19 +1749,31 @@ mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite( convertTypeForMemory(*getTypeConverter(), dataLayout, op.getAllocaType()); auto resultTy = getTypeConverter()->convertType(op.getType()); // Verification between the CIR alloca AS and the one from data layout. - { + auto allocaAS = [&]() { auto resPtrTy = mlir::cast(resultTy); auto dlAllocaASAttr = mlir::cast_if_present( dataLayout.getAllocaMemorySpace()); - // Absence means 0 // TODO: The query for the alloca AS should be done through CIRDataLayout // instead to reuse the logic of interpret null attr as 0. - auto dlAllocaAS = dlAllocaASAttr ? dlAllocaASAttr.getInt() : 0; - if (dlAllocaAS != resPtrTy.getAddressSpace()) { - return op.emitError() << "alloca address space doesn't match the one " - "from the target data layout: " - << dlAllocaAS; - } + if (!dlAllocaASAttr) + return 0u; + return static_cast(dlAllocaASAttr.getValue().getZExtValue()); + }(); + + auto resPtrTy = + mlir::LLVM::LLVMPointerType::get(elementTy.getContext(), allocaAS); + + auto llvmAlloca = rewriter.create( + op.getLoc(), resPtrTy, elementTy, size, op.getAlignmentAttr().getInt()); + + auto expectedPtrTy = mlir::cast( + getTypeConverter()->convertType(op.getResult().getType())); + + mlir::Value finalPtr = llvmAlloca.getResult(); + + if (expectedPtrTy.getAddressSpace() != allocaAS) { + finalPtr = rewriter.create( + op.getLoc(), expectedPtrTy, finalPtr); } // If there are annotations available, copy them out before we destroy the @@ -1770,11 +1782,10 @@ mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite( if (op.getAnnotations()) annotations = op.getAnnotationsAttr(); - auto llvmAlloca = rewriter.replaceOpWithNewOp( - op, resultTy, elementTy, size, op.getAlignmentAttr().getInt()); - if (annotations && !annotations.empty()) buildAllocaAnnotations(llvmAlloca, adaptor, rewriter, annotations); + rewriter.replaceOp(op, finalPtr); + return mlir::success(); } diff --git a/clang/test/CIR/CodeGen/HIP/address-spaces.cpp b/clang/test/CIR/CodeGen/HIP/address-spaces.cpp new file mode 100644 index 000000000000..3ac0c30e1fe1 --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/address-spaces.cpp @@ -0,0 +1,19 @@ +#include "cuda.h" + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.ll +// RUN: FileCheck --check-prefix=CIR --input-file=%t.ll %s + +__global__ void fn() { + int i = 0; + __shared__ int j; + j = i; +} + +// CIR: cir.global "private" internal dso_local addrspace(offload_local) @_ZZ2fnvE1j : !s32i +// CIR: cir.func dso_local @_Z2fnv +// CIR: [[Local:%[0-9]+]] = cir.alloca !s32i, !cir.ptr, ["i", init] +// CIR: [[Shared:%[0-9]+]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr +// CIR: [[Tmp:%[0-9]+]] = cir.load {{.*}} [[Local]] : !cir.ptr, !s32i +// CIR: cir.store{{.*}} [[Tmp]], [[Shared]] : !s32i, !cir.ptr diff --git a/clang/test/CIR/CodeGen/HIP/addrspace-lowering.cpp b/clang/test/CIR/CodeGen/HIP/addrspace-lowering.cpp new file mode 100644 index 000000000000..e45810e39bf1 --- /dev/null +++ b/clang/test/CIR/CodeGen/HIP/addrspace-lowering.cpp @@ -0,0 +1,25 @@ +#include "cuda.h" + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s + + +__shared__ int a; +// LLVM-DEVICE: @a = addrspace(3) global i32 undef, align 4 +// OGCG-DEVICE: @a = addrspace(3) global i32 undef, align 4 + +__device__ int b; +// LLVM-DEVICE: @b = addrspace(1) externally_initialized global i32 0, align 4 +// OGCG-DEVICE: @b = addrspace(1) externally_initialized global i32 0, align 4 + +__constant__ int c; +// LLVM-DEVICE: @c = addrspace(4) externally_initialized constant i32 0, align 4 +// OGCG-DEVICE: @c = addrspace(4) externally_initialized constant i32 0, align 4 + diff --git a/clang/test/CIR/CodeGen/HIP/simple.cpp b/clang/test/CIR/CodeGen/HIP/simple.cpp index aaed4e31e192..1f1049856d8d 100644 --- a/clang/test/CIR/CodeGen/HIP/simple.cpp +++ b/clang/test/CIR/CodeGen/HIP/simple.cpp @@ -1,20 +1,36 @@ -#include "../Inputs/cuda.h" +#include "cuda.h" // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ // RUN: -x hip -fhip-new-launch-api \ -// RUN: -emit-cir %s -o %t.cir +// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir // RUN: FileCheck --check-prefix=CIR-HOST --input-file=%t.cir %s // RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ // RUN: -fcuda-is-device -fhip-new-launch-api \ -// RUN: -emit-cir %s -o %t.cir +// RUN: -I%S/../Inputs/ -emit-cir %s -o %t.cir // RUN: FileCheck --check-prefix=CIR-DEVICE --input-file=%t.cir %s // // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir \ // RUN: -x hip -emit-llvm -fhip-new-launch-api \ -// RUN: %s -o %t.ll +// RUN: -I%S/../Inputs/ %s -o %t.ll // RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \ +// RUN: -x hip -emit-llvm -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG-HOST --input-file=%t.ll %s + +// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip \ +// RUN: -fcuda-is-device -fhip-new-launch-api \ +// RUN: -I%S/../Inputs/ -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG-DEVICE --input-file=%t.ll %s + + // Attribute for global_fn // CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}} @@ -29,6 +45,8 @@ __device__ void device_fn(int* a, double b, float c) {} __global__ void global_fn(int a) {} // CIR-DEVICE: @_Z9global_fni +// LLVM-DEVICE: define dso_local void @_Z9global_fni +// OGCG-DEVICE: define dso_local amdgpu_kernel void @_Z9global_fni // CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]]) // CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args" @@ -43,6 +61,12 @@ __global__ void global_fn(int a) {} // LLVM-HOST: %[[#GEP2:]] = getelementptr [1 x ptr], ptr %[[#KernelArgs]], i32 0, i64 0 // LLVM-HOST: call i32 @__hipPopCallConfiguration // LLVM-HOST: call i32 @hipLaunchKernel(ptr @_Z9global_fni +// +// OGCG-HOST: define dso_local void @_Z24__device_stub__global_fni +// OGCG-HOST: %kernel_args = alloca ptr, i64 1, align 16 +// OGCG-HOST: getelementptr ptr, ptr %kernel_args, i32 0 +// OGCG-HOST: call i32 @__hipPopCallConfiguration +// OGCG-HOST: %call = call noundef i32 @hipLaunchKernel(ptr noundef @_Z9global_fni int main() { @@ -78,3 +102,18 @@ int main() { // LLVM-HOST: %[[#]] = load i32 // LLVM-HOST: ret i32 +// OGCG-HOST: define dso_local noundef i32 @main +// OGCG-HOST: %agg.tmp = alloca %struct.dim3, align 4 +// OGCG-HOST: %agg.tmp1 = alloca %struct.dim3, align 4 +// OGCG-HOST: call void @_ZN4dim3C1Ejjj +// OGCG-HOST: call void @_ZN4dim3C1Ejjj +// OGCG-HOST: %call = call i32 @__hipPushCallConfiguration +// OGCG-HOST: %tobool = icmp ne i32 %call, 0 +// OGCG-HOST: br i1 %tobool, label %kcall.end, label %kcall.configok +// OGCG-HOST: kcall.configok: +// OGCG-HOST: call void @_Z24__device_stub__global_fni(i32 noundef 1) +// OGCG-HOST: br label %kcall.end +// OGCG-HOST: kcall.end: +// OGCG-HOST: %{{[0-9]+}} = load i32, ptr %retval, align 4 +// OGCG-HOST: ret i32 %8 +