Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions attach/nv_attach_impl/pass/ptxpass_core/src/core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ std::string compile_ebpf_to_ptx_from_words(
vm.register_external_function(505, "get_thread_idx", (void *)test_func);
vm.register_external_function(507, "cuda_exit", (void *)test_func);
vm.register_external_function(508, "get_grid_dim", (void *)test_func);
vm.register_external_function(509, "get_sm_id", (void *)test_func);
vm.register_external_function(510, "get_warp_id", (void *)test_func);
vm.register_external_function(511, "get_lane_id", (void *)test_func);

vm.load_code(insts, insts_count * sizeof(ebpf_inst));
bpftime::llvm_bpf_jit_context ctx(vm);
Expand Down
27 changes: 27 additions & 0 deletions attach/nv_attach_impl/trampoline/default_trampoline.cu
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,33 @@ _bpf_helper_ext_0508(uint64_t x, uint64_t y, uint64_t z, uint64_t, uint64_t)
return 0;
}

extern "C" __noinline__ __device__ uint64_t
_bpf_helper_ext_0509(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t)
{
// get sm id
uint32_t sm_id;
asm volatile("mov.u32 %0, %smid;" : "=r"(sm_id));
return (uint64_t)sm_id;
}

extern "C" __noinline__ __device__ uint64_t
_bpf_helper_ext_0510(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t)
{
// get warp id
uint32_t warp_id;
asm volatile("mov.u32 %0, %warpid;" : "=r"(warp_id));
return (uint64_t)warp_id;
}

extern "C" __noinline__ __device__ uint64_t
_bpf_helper_ext_0511(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t)
{
// get lane id
uint32_t lane_id;
asm volatile("mov.u32 %0, %laneid;" : "=r"(lane_id));
return (uint64_t)lane_id;
}

extern "C" __global__ void bpf_main(void *mem, size_t sz)
{
printf("kernel function entered, mem=%lx, memsz=%ld\n", (uintptr_t)mem,
Expand Down
57 changes: 57 additions & 0 deletions attach/nv_attach_impl/trampoline_ptx.h
Original file line number Diff line number Diff line change
Expand Up @@ -1546,5 +1546,62 @@ static const char TRAMPOLINE_PTX[] = R"(
st.param.b64 [func_retval0+0], %rd7;
ret;
// -- End function
}
// .globl _bpf_helper_ext_0509 // -- Begin function _bpf_helper_ext_0509
.visible .func (.param .b64 func_retval0) _bpf_helper_ext_0509(
.param .b64 _bpf_helper_ext_0509_param_0,
.param .b64 _bpf_helper_ext_0509_param_1,
.param .b64 _bpf_helper_ext_0509_param_2,
.param .b64 _bpf_helper_ext_0509_param_3,
.param .b64 _bpf_helper_ext_0509_param_4
) // @_bpf_helper_ext_0509
{
.reg .b32 %r<2>;
.reg .b64 %rd<2>;

// %bb.0:
mov.u32 %r1, %smid;
cvt.u64.u32 %rd1, %r1;
st.param.b64 [func_retval0+0], %rd1;
ret;
// -- End function
}
// .globl _bpf_helper_ext_0510 // -- Begin function _bpf_helper_ext_0510
.visible .func (.param .b64 func_retval0) _bpf_helper_ext_0510(
.param .b64 _bpf_helper_ext_0510_param_0,
.param .b64 _bpf_helper_ext_0510_param_1,
.param .b64 _bpf_helper_ext_0510_param_2,
.param .b64 _bpf_helper_ext_0510_param_3,
.param .b64 _bpf_helper_ext_0510_param_4
) // @_bpf_helper_ext_0510
{
.reg .b32 %r<2>;
.reg .b64 %rd<2>;

// %bb.0:
mov.u32 %r1, %warpid;
cvt.u64.u32 %rd1, %r1;
st.param.b64 [func_retval0+0], %rd1;
ret;
// -- End function
}
// .globl _bpf_helper_ext_0511 // -- Begin function _bpf_helper_ext_0511
.visible .func (.param .b64 func_retval0) _bpf_helper_ext_0511(
.param .b64 _bpf_helper_ext_0511_param_0,
.param .b64 _bpf_helper_ext_0511_param_1,
.param .b64 _bpf_helper_ext_0511_param_2,
.param .b64 _bpf_helper_ext_0511_param_3,
.param .b64 _bpf_helper_ext_0511_param_4
) // @_bpf_helper_ext_0511
{
.reg .b32 %r<2>;
.reg .b64 %rd<2>;

// %bb.0:
mov.u32 %r1, %laneid;
cvt.u64.u32 %rd1, %r1;
st.param.b64 [func_retval0+0], %rd1;
ret;
// -- End function
}
)";
6 changes: 6 additions & 0 deletions example/gpu/threadscheduling/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.output/
threadscheduling
vec_add
*.o
*.bpf.o
*.skel.h
119 changes: 119 additions & 0 deletions example/gpu/threadscheduling/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
OUTPUT := .output
CLANG ?= clang
LIBBPF_SRC := $(abspath ../../../third_party/libbpf/src)
BPFTOOL_SRC := $(abspath ../../../third_party/bpftool/src)
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
| sed 's/arm.*/arm/' \
| sed 's/aarch64/arm64/' \
| sed 's/ppc64le/powerpc/' \
| sed 's/mips.*/mips/' \
| sed 's/riscv64/riscv/' \
| sed 's/loongarch64/loongarch/')
VMLINUX := ../../../third_party/vmlinux/$(ARCH)/vmlinux.h
# Use our own libbpf API headers and Linux UAPI headers distributed with
# libbpf to avoid dependency on system-wide headers, which could be missing or
# outdated
INCLUDES := -I$(OUTPUT) -I../../../third_party/libbpf/include/uapi -I$(dir $(VMLINUX))
CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)

APPS = threadscheduling

# Get Clang's default includes on this system. We'll explicitly add these dirs
# to the includes list when compiling with `-target bpf` because otherwise some
# architecture-specific dirs will be "missing" on some architectures/distros -
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
# sys/cdefs.h etc. might be missing.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')

ifeq ($(V),1)
Q =
msg =
else
Q = @
msg = @printf ' %-8s %s%s\n' \
"$(1)" \
"$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \
"$(if $(3), $(3))";
MAKEFLAGS += --no-print-directory
endif

define allow-override
$(if $(or $(findstring environment,$(origin $(1))),\
$(findstring command line,$(origin $(1)))),,\
$(eval $(1) = $(2)))
endef

$(call allow-override,CC,$(CROSS_COMPILE)cc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)

.PHONY: all
all: $(APPS) vec_add

vec_add: vec_add.cu
@if command -v nvcc >/dev/null 2>&1; then \
nvcc -cudart shared vec_add.cu -o vec_add -g; \
else \
echo "Warning: CUDA not found, skipping vec_add build"; \
fi

.PHONY: clean
clean:
$(call msg,CLEAN)
$(Q)rm -rf $(OUTPUT) $(APPS) vec_add

$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
$(call msg,MKDIR,$@)
$(Q)mkdir -p $@

# Build libbpf
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
$(call msg,LIB,$@)
$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \
OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \
INCLUDEDIR= LIBDIR= UAPIDIR= \
install

# Build bpftool
$(BPFTOOL): | $(BPFTOOL_OUTPUT)
$(call msg,BPFTOOL,$@)
$(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap

# Build BPF code
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
$(call msg,BPF,$@)
$(Q)$(CLANG) -Xlinker --export-dynamic -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \
$(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \
-c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)

# Generate BPF skeletons
$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL)
$(call msg,GEN-SKEL,$@)
$(Q)$(BPFTOOL) gen skeleton $< > $@

# Build user-space code
$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h

$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
$(call msg,CC,$@)
$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@

# Build application binary
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
$(call msg,BINARY,$@)
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@

# delete failed targets
.DELETE_ON_ERROR:

# keep intermediate (.skel.h, .bpf.o, etc) targets
.SECONDARY:
Loading
Loading