Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ add_clang_library(TargetLowering
TargetInfo.cpp
TargetLoweringInfo.cpp
Targets/AArch64.cpp
Targets/AMDGPU.cpp
Targets/NVPTX.cpp
Targets/SPIR.cpp
Targets/X86.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ createTargetLoweringInfo(LowerModule &LM) {

return createAArch64TargetLoweringInfo(LM, Kind);
}
case llvm::Triple::amdgcn:
return createAMDGPUTargetLoweringInfo(LM);
case llvm::Triple::x86_64: {
switch (Triple.getOS()) {
case llvm::Triple::Win32:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ createSPIRVTargetLoweringInfo(LowerModule &CGM);
std::unique_ptr<TargetLoweringInfo>
createNVPTXTargetLoweringInfo(LowerModule &CGM);

std::unique_ptr<TargetLoweringInfo>
createAMDGPUTargetLoweringInfo(LowerModule &CGM);

} // namespace cir

#endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_TARGETINFO_H
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
//===- AMDGPU.cpp - TargetInfo for AMDGPU
//-----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "ABIInfoImpl.h"
#include "LowerFunctionInfo.h"
#include "LowerTypes.h"
#include "TargetInfo.h"
#include "TargetLoweringInfo.h"
#include "clang/CIR/ABIArgInfo.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"

using ABIArgInfo = cir::ABIArgInfo;
using MissingFeature = cir::MissingFeatures;

namespace cir {

//===----------------------------------------------------------------------===//
// AMDGPU ABI Implementation
//===----------------------------------------------------------------------===//

namespace {

class AMDGPUABIInfo : public ABIInfo {
public:
AMDGPUABIInfo(LowerTypes &lt) : ABIInfo(lt) {}

private:
void computeInfo(LowerFunctionInfo &fi) const override {
llvm_unreachable("NYI");
}
};

class AMDGPUTargetLoweringInfo : public TargetLoweringInfo {
public:
AMDGPUTargetLoweringInfo(LowerTypes &lt)
: TargetLoweringInfo(std::make_unique<AMDGPUABIInfo>(lt)) {}
// Taken from here: https://llvm.org/docs/AMDGPUUsage.html#address-spaces
unsigned getTargetAddrSpaceFromCIRAddrSpace(
cir::AddressSpace addrSpace) const override {
switch (addrSpace) {
case cir::AddressSpace::OffloadPrivate:
return 5;
case cir::AddressSpace::OffloadLocal:
return 3;
case cir::AddressSpace::OffloadGlobal:
return 1;
case cir::AddressSpace::OffloadConstant:
return 4;
case cir::AddressSpace::OffloadGeneric:
return 0;
default:
cir_cconv_unreachable("Unknown CIR address space for this target");
}
}
};

} // namespace
std::unique_ptr<TargetLoweringInfo>
createAMDGPUTargetLoweringInfo(LowerModule &lowerModule) {
return std::make_unique<AMDGPUTargetLoweringInfo>(lowerModule.getTypes());
}

} // namespace cir
33 changes: 22 additions & 11 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1749,19 +1749,31 @@ mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite(
convertTypeForMemory(*getTypeConverter(), dataLayout, op.getAllocaType());
auto resultTy = getTypeConverter()->convertType(op.getType());
// Verification between the CIR alloca AS and the one from data layout.
{
auto allocaAS = [&]() {
auto resPtrTy = mlir::cast<mlir::LLVM::LLVMPointerType>(resultTy);
auto dlAllocaASAttr = mlir::cast_if_present<mlir::IntegerAttr>(
dataLayout.getAllocaMemorySpace());
// Absence means 0
// TODO: The query for the alloca AS should be done through CIRDataLayout
// instead to reuse the logic of interpret null attr as 0.
auto dlAllocaAS = dlAllocaASAttr ? dlAllocaASAttr.getInt() : 0;
if (dlAllocaAS != resPtrTy.getAddressSpace()) {
return op.emitError() << "alloca address space doesn't match the one "
"from the target data layout: "
<< dlAllocaAS;
}
if (!dlAllocaASAttr)
return 0u;
return static_cast<unsigned>(dlAllocaASAttr.getValue().getZExtValue());
}();

auto resPtrTy =
mlir::LLVM::LLVMPointerType::get(elementTy.getContext(), allocaAS);

auto llvmAlloca = rewriter.create<mlir::LLVM::AllocaOp>(
op.getLoc(), resPtrTy, elementTy, size, op.getAlignmentAttr().getInt());

auto expectedPtrTy = mlir::cast<mlir::LLVM::LLVMPointerType>(
getTypeConverter()->convertType(op.getResult().getType()));

mlir::Value finalPtr = llvmAlloca.getResult();

if (expectedPtrTy.getAddressSpace() != allocaAS) {
finalPtr = rewriter.create<mlir::LLVM::AddrSpaceCastOp>(
op.getLoc(), expectedPtrTy, finalPtr);
}

// If there are annotations available, copy them out before we destroy the
Expand All @@ -1770,11 +1782,10 @@ mlir::LogicalResult CIRToLLVMAllocaOpLowering::matchAndRewrite(
if (op.getAnnotations())
annotations = op.getAnnotationsAttr();

auto llvmAlloca = rewriter.replaceOpWithNewOp<mlir::LLVM::AllocaOp>(
op, resultTy, elementTy, size, op.getAlignmentAttr().getInt());

if (annotations && !annotations.empty())
buildAllocaAnnotations(llvmAlloca, adaptor, rewriter, annotations);
rewriter.replaceOp(op, finalPtr);

return mlir::success();
}

Expand Down
19 changes: 19 additions & 0 deletions clang/test/CIR/CodeGen/HIP/address-spaces.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "../Inputs/cuda.h"

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -emit-cir %s -o %t.ll
// RUN: FileCheck --check-prefix=CIR --input-file=%t.ll %s

__global__ void fn() {
int i = 0;
__shared__ int j;
j = i;
}

// CIR: cir.global "private" internal dso_local addrspace(offload_local) @_ZZ2fnvE1j : !s32i
// CIR: cir.func dso_local @_Z2fnv
// CIR: [[Local:%[0-9]+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init]
// CIR: [[Shared:%[0-9]+]] = cir.get_global @_ZZ2fnvE1j : !cir.ptr<!s32i, addrspace(offload_local)>
// CIR: [[Tmp:%[0-9]+]] = cir.load {{.*}} [[Local]] : !cir.ptr<!s32i>, !s32i
// CIR: cir.store{{.*}} [[Tmp]], [[Shared]] : !s32i, !cir.ptr<!s32i, addrspace(offload_local)>
19 changes: 19 additions & 0 deletions clang/test/CIR/CodeGen/HIP/addrspace-lowering.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "../Inputs/cuda.h"

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s


__shared__ int a;

// LLVM-DEVICE: @a = addrspace(3) {{.*}}

__device__ int b;

// LLVM-DEVICE: @b = addrspace(1) {{.*}}

__constant__ int c;

// LLVM-DEVICE: @c = addrspace(4) {{.*}}
7 changes: 6 additions & 1 deletion clang/test/CIR/CodeGen/HIP/simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
// RUN: %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-HOST --input-file=%t.ll %s

// RUN: %clang_cc1 -triple=amdgcn-amd-amdhsa -x hip -fclangir \
// RUN: -fcuda-is-device -fhip-new-launch-api \
// RUN: -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM-DEVICE --input-file=%t.ll %s

// Attribute for global_fn
// CIR-HOST: [[Kernel:#[a-zA-Z_0-9]+]] = {{.*}}#cir.cu.kernel_name<_Z9global_fni>{{.*}}

Expand All @@ -29,6 +34,7 @@ __device__ void device_fn(int* a, double b, float c) {}

__global__ void global_fn(int a) {}
// CIR-DEVICE: @_Z9global_fni
// LLVM-DEVICE: define dso_local void @_Z9global_fni

// CIR-HOST: @_Z24__device_stub__global_fni{{.*}}extra([[Kernel]])
// CIR-HOST: %[[#CIRKernelArgs:]] = cir.alloca {{.*}}"kernel_args"
Expand All @@ -44,7 +50,6 @@ __global__ void global_fn(int a) {}
// LLVM-HOST: call i32 @__hipPopCallConfiguration
// LLVM-HOST: call i32 @hipLaunchKernel(ptr @_Z9global_fni


int main() {
global_fn<<<1, 1>>>(1);
}
Expand Down
Loading