Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions example/gpu/perthreadRuntimeDist/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/perthreadRuntimeDist
/.output
145 changes: 145 additions & 0 deletions example/gpu/perthreadRuntimeDist/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
OUTPUT := .output
CLANG ?= clang
LIBBPF_SRC := $(abspath ../../../third_party/libbpf/src)
BPFTOOL_SRC := $(abspath ../../../third_party/bpftool/src)
LIBBPF_OBJ := $(abspath $(OUTPUT)/libbpf.a)
BPFTOOL_OUTPUT ?= $(abspath $(OUTPUT)/bpftool)
BPFTOOL ?= $(BPFTOOL_OUTPUT)/bootstrap/bpftool
ARCH ?= $(shell uname -m | sed 's/x86_64/x86/' \
| sed 's/arm.*/arm/' \
| sed 's/aarch64/arm64/' \
| sed 's/ppc64le/powerpc/' \
| sed 's/mips.*/mips/' \
| sed 's/riscv64/riscv/' \
| sed 's/loongarch64/loongarch/')
VMLINUX := ../../../third_party/vmlinux/$(ARCH)/vmlinux.h
# Use our own libbpf API headers and Linux UAPI headers distributed with
# libbpf to avoid dependency on system-wide headers, which could be missing or
# outdated
INCLUDES := -I$(OUTPUT) -I../../../third_party/libbpf/include/uapi -I$(dir $(VMLINUX))
CFLAGS := -g -Wall
ALL_LDFLAGS := $(LDFLAGS) $(EXTRA_LDFLAGS)

APPS = perthreadRuntimeDist # minimal minimal_legacy uprobe kprobe fentry usdt sockfilter tc ksyscall

CARGO ?= $(shell which cargo)
ifeq ($(strip $(CARGO)),)
BZS_APPS :=
else
BZS_APPS := # profile
APPS += $(BZS_APPS)
# Required by libblazesym
ALL_LDFLAGS += -lrt -ldl -lpthread -lm
endif

# Get Clang's default includes on this system. We'll explicitly add these dirs
# to the includes list when compiling with `-target bpf` because otherwise some
# architecture-specific dirs will be "missing" on some architectures/distros -
# headers such as asm/types.h, asm/byteorder.h, asm/socket.h, asm/sockios.h,
# sys/cdefs.h etc. might be missing.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
CLANG_BPF_SYS_INCLUDES ?= $(shell $(CLANG) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')

ifeq ($(V),1)
Q =
msg =
else
Q = @
msg = @printf ' %-8s %s%s\n' \
"$(1)" \
"$(patsubst $(abspath $(OUTPUT))/%,%,$(2))" \
"$(if $(3), $(3))";
MAKEFLAGS += --no-print-directory
endif

define allow-override
$(if $(or $(findstring environment,$(origin $(1))),\
$(findstring command line,$(origin $(1)))),,\
$(eval $(1) = $(2)))
endef

$(call allow-override,CC,$(CROSS_COMPILE)cc)
$(call allow-override,LD,$(CROSS_COMPILE)ld)

.PHONY: all
all: $(APPS) vec_add

vec_add: vec_add.cu
@if command -v nvcc >/dev/null 2>&1; then \
nvcc -arch=sm_61 -cudart shared vec_add.cu -o vec_add -g; \
else \
echo "Warning: CUDA not found, skipping vec_add build"; \
fi

.PHONY: clean
clean:
$(call msg,CLEAN)
$(Q)rm -rf $(OUTPUT) $(APPS) vec_add

$(OUTPUT) $(OUTPUT)/libbpf $(BPFTOOL_OUTPUT):
$(call msg,MKDIR,$@)
$(Q)mkdir -p $@

# Build libbpf
$(LIBBPF_OBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
$(call msg,LIB,$@)
$(Q)$(MAKE) -C $(LIBBPF_SRC) BUILD_STATIC_ONLY=1 \
OBJDIR=$(dir $@)/libbpf DESTDIR=$(dir $@) \
INCLUDEDIR= LIBDIR= UAPIDIR= \
install

# Build bpftool
$(BPFTOOL): | $(BPFTOOL_OUTPUT)
$(call msg,BPFTOOL,$@)
$(Q)$(MAKE) ARCH= CROSS_COMPILE= OUTPUT=$(BPFTOOL_OUTPUT)/ -C $(BPFTOOL_SRC) bootstrap


$(LIBBLAZESYM_SRC)/target/release/libblazesym.a::
$(Q)cd $(LIBBLAZESYM_SRC) && $(CARGO) build --features=cheader,dont-generate-test-files --release

$(LIBBLAZESYM_OBJ): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
$(call msg,LIB, $@)
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/libblazesym.a $@

$(LIBBLAZESYM_HEADER): $(LIBBLAZESYM_SRC)/target/release/libblazesym.a | $(OUTPUT)
$(call msg,LIB,$@)
$(Q)cp $(LIBBLAZESYM_SRC)/target/release/blazesym.h $@

# Build BPF code
$(OUTPUT)/%.bpf.o: %.bpf.c $(LIBBPF_OBJ) $(wildcard %.h) $(VMLINUX) | $(OUTPUT) $(BPFTOOL)
$(call msg,BPF,$@)
$(Q)$(CLANG) -Xlinker --export-dynamic -g -O2 -target bpf -D__TARGET_ARCH_$(ARCH) \
$(INCLUDES) $(CLANG_BPF_SYS_INCLUDES) \
-c $(filter %.c,$^) -o $(patsubst %.bpf.o,%.tmp.bpf.o,$@)
$(Q)$(BPFTOOL) gen object $@ $(patsubst %.bpf.o,%.tmp.bpf.o,$@)

# Generate BPF skeletons
$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(OUTPUT) $(BPFTOOL)
$(call msg,GEN-SKEL,$@)
$(Q)$(BPFTOOL) gen skeleton $< > $@

# Build user-space code
$(patsubst %,$(OUTPUT)/%.o,$(APPS)): %.o: %.skel.h

$(OUTPUT)/%.o: %.c $(wildcard %.h) | $(OUTPUT)
$(call msg,CC,$@)
$(Q)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@

$(patsubst %,$(OUTPUT)/%.o,$(BZS_APPS)): $(LIBBLAZESYM_HEADER)

$(BZS_APPS): $(LIBBLAZESYM_OBJ)

# Build application binary
$(APPS): %: $(OUTPUT)/%.o $(LIBBPF_OBJ) | $(OUTPUT)
$(call msg,BINARY,$@)
$(Q)$(CC) $(CFLAGS) $^ $(ALL_LDFLAGS) -lelf -lz -o $@

# delete failed targets
.DELETE_ON_ERROR:

# keep intermediate (.skel.h, .bpf.o, etc) targets
.SECONDARY:
62 changes: 62 additions & 0 deletions example/gpu/perthreadRuntimeDist/perthreadRuntimeDist.bpf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "perthreadRuntimeDist.h"


struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} events SEC(".maps");

// 用于记录 start time
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 8192);
__type(key, u32);
__type(value, u64);
} start SEC(".maps");

char LICENSE[] SEC("license") = "Dual BSD/GPL";

// EXT helper index — 必须存在且可见
static const u64 (*bpf_get_globaltimer)(void) = (void *)502;

// GPU kernel entry
SEC("kprobe/cudaLaunchKernel")
int cuda__kernel_entry(struct pt_regs *ctx)
{
u32 tid = bpf_get_current_pid_tgid();

u64 start_cycles = bpf_get_globaltimer(); // bpf_ktime_get_ns()?

bpf_map_update_elem(&start, &tid, &start_cycles, BPF_ANY);

return 0;
}

SEC("kretprobe/cudaLaunchKernel")
int cuda__kernel_exit(struct pt_regs *ctx)
{
u32 tid = bpf_get_current_pid_tgid();
u64 *start_cycles = bpf_map_lookup_elem(&start, &tid);

if (!start_cycles)
return 0;

u64 end_cycles = bpf_get_globaltimer();

struct event_t evt = {
.tid = tid,
.cycles = end_cycles - *start_cycles,
};

bpf_perf_event_output(ctx, &events,
BPF_F_CURRENT_CPU,
&evt,
sizeof(evt));

bpf_map_delete_elem(&start, &tid);
return 0;
}
61 changes: 61 additions & 0 deletions example/gpu/perthreadRuntimeDist/perthreadRuntimeDist.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include <stdio.h>
#include <unistd.h>
#include <bpf/libbpf.h>
#include "perthreadRuntimeDist.skel.h"
#include "perthreadRuntimeDist.h"

static void handle_event(void *ctx, int cpu, void *data, __u32 size)
{
struct event_t *e = data;
printf("[CPU %d] tid=%u cycles=%llu ns\n",
cpu, e->tid, e->cycles);
}

static void handle_lost(void *ctx, int cpu, __u64 lost)
{
printf("LOST %llu events on CPU %d\n", lost, cpu);
}

int main()
{
struct perthreadRuntimeDist_bpf *skel;
struct perf_buffer *pb;
int events_fd;

skel = perthreadRuntimeDist_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open BPF skeleton\n");
return 1;
}

if (perthreadRuntimeDist_bpf__load(skel)) {
fprintf(stderr, "Failed to load BPF skeleton\n");
return 1;
}

if (perthreadRuntimeDist_bpf__attach(skel)) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
return 1;
}

printf("BPF attached successfully\n");

events_fd = bpf_map__fd(skel->maps.events);
pb = perf_buffer__new(events_fd, 16 /*buffer pages*/,
handle_event, handle_lost, NULL, NULL);

if (!pb) {
fprintf(stderr, "Failed to open perf buffer\n");
return 1;
}

printf("Collecting data...\n");

while (1) {
int err = perf_buffer__poll(pb, 100 /*ms*/);
if (err < 0)
printf("perf_buffer__poll() error %d\n", err);
}

return 0;
}
9 changes: 9 additions & 0 deletions example/gpu/perthreadRuntimeDist/perthreadRuntimeDist.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#ifndef __PERTHREAD_RUNTIME_DIST_H__
#define __PERTHREAD_RUNTIME_DIST_H__

struct event_t {
__u32 tid;
__u64 cycles;
};

#endif
Loading
Loading