Skip to content

Commit e8cd9e8

Browse files
committed
add linearized b+tree in lsmt
Signed-off-by: Lanzheng Liu <[email protected]>
1 parent bcd84a9 commit e8cd9e8

File tree

5 files changed

+168
-5
lines changed

5 files changed

+168
-5
lines changed

.github/workflows/cmake.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ jobs:
5151
working-directory: ${{github.workspace}}/build
5252
shell: bash
5353
run: |
54+
lscpu
5455
sudo make install
5556
sudo cp ${{github.workspace}}/src/example_config/overlaybd-registryv2.json /etc/overlaybd/overlaybd.json
5657
sudo systemctl enable /opt/overlaybd/overlaybd-tcmu.service
@@ -87,6 +88,7 @@ jobs:
8788
ls obd_mp
8889
sudo diff -r --exclude "lost+found" test_data obd_mp
8990
sudo umount obd_mp
91+
cat /var/log/overlaybd.log
9092
- name: E2E Test turboOCIv1
9193
working-directory: ${{github.workspace}}/build
9294
shell: bash

src/overlaybd/lsmt/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@ target_include_directories(lsmt_lib PUBLIC
55
${PHOTON_INCLUDE_DIR}
66
)
77

8+
# Check for AVX512F support and add appropriate compiler flags
9+
include(CheckCXXCompilerFlag)
10+
check_cxx_compiler_flag("-mavx512f" COMPILER_SUPPORTS_AVX512F)
11+
if(COMPILER_SUPPORTS_AVX512F)
12+
target_compile_options(lsmt_lib PRIVATE "-mavx512f")
13+
endif()
14+
815
if(BUILD_TESTING)
916
add_subdirectory(test)
1017
endif()

src/overlaybd/lsmt/index.cpp

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include <photon/common/alog.h>
2323
#include <photon/fs/filesystem.h>
2424
#include <photon/common/utility.h>
25+
#ifdef __AVX512F__
26+
#include <immintrin.h>
27+
#endif
2528
using namespace std;
2629

2730
namespace LSMT {
@@ -54,6 +57,123 @@ static inline size_t copy_n(IT begin, IT end, uint64_t end_offset, SegmentMappin
5457

5558
static bool verify_mapping_order(const SegmentMapping *pmappings, size_t n);
5659

60+
bool is_avx512f_supported() {
61+
__builtin_cpu_init();
62+
return __builtin_cpu_supports("avx512f") &&
63+
__builtin_cpu_supports("avx512dq") &&
64+
__builtin_cpu_supports("avx512vl") &&
65+
__builtin_cpu_supports("vpclmulqdq");
66+
}
67+
68+
const static uint32_t ORDER = 8;
69+
const static uint32_t MAX_LEVEL = 10;
70+
static constexpr uint32_t NODES_PER_LEVEL[MAX_LEVEL] = {8, 72, 648, 5832, 52488, 472392, 4251528, 38263752, 344373768, 3099363912};
71+
static constexpr uint32_t LEVEL_START_ID[MAX_LEVEL] = {0, 8, 80, 728, 6560, 59048, 531440, 4782968, 43046720, 387420488};
72+
73+
class LinearizedBptree: public Object {
74+
public:
75+
uint64_t N;
76+
uint64_t *node = nullptr;
77+
int32_t DEPTH = -1;
78+
79+
LinearizedBptree() {
80+
}
81+
82+
~LinearizedBptree() {
83+
free(node);
84+
}
85+
86+
virtual int build(const SegmentMapping* pbegin, const SegmentMapping* pend) {
87+
if (pbegin == pend) {
88+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: empty mapping");
89+
}
90+
if (pbegin->offset != 0) {
91+
// In a real file system, mapping offset starts from 0. skip for some ut.
92+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: invalid start offset");
93+
}
94+
size_t mapping_size = pend - pbegin;
95+
for (uint32_t i = 0; i < MAX_LEVEL; i++)
96+
if (NODES_PER_LEVEL[i] >= mapping_size) {
97+
DEPTH = i+1;
98+
break;
99+
}
100+
if (DEPTH == -1) {
101+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: too many mappings");
102+
}
103+
104+
N = (LEVEL_START_ID[DEPTH-1] + mapping_size + ORDER - 1) / ORDER * ORDER;
105+
LOG_INFO("building Linearized B+tree ", VALUE(DEPTH), VALUE(mapping_size), VALUE(N));
106+
auto ret = posix_memalign((void**)&node, 64, N*sizeof(uint64_t));
107+
if (ret != 0) {
108+
LOG_ERRNO_RETURN(ENOBUFS, -1, "linearized bptree not used: failed to alloc memory");
109+
}
110+
111+
uint32_t leaf_start = LEVEL_START_ID[DEPTH - 1];
112+
uint32_t leaf_size = NODES_PER_LEVEL[DEPTH - 1];
113+
114+
uint32_t p = leaf_start;
115+
116+
for (auto mp = pbegin; mp < pend; mp++, p++) {
117+
node[p] = mp->offset;
118+
}
119+
for (; p < N; p++)
120+
node[p] = -1;
121+
122+
auto G = ORDER;
123+
for (auto level = DEPTH-1; level > 0; level--) {
124+
auto pos = LEVEL_START_ID[level - 1];
125+
for (uint32_t i = 0; i < leaf_size; i += G * (ORDER + 1)) {
126+
for (uint32_t j = 1; j <= ORDER; j++) {
127+
uint32_t lower_id = leaf_start + i + G * j;
128+
node[pos++] = (lower_id < N) ? node[lower_id] : -1;
129+
}
130+
}
131+
G *= (ORDER + 1);
132+
}
133+
LOG_INFO("building Linearized B+tree done");
134+
return 0;
135+
}
136+
137+
virtual uint32_t branchfree_inner_search(const uint64_t *base, uint64_t x) const {
138+
return std::upper_bound(base, base + 8, x) - base;
139+
}
140+
141+
__attribute__((always_inline)) uint32_t search(const uint64_t x) const {
142+
uint32_t res = 0;
143+
#pragma GCC unroll 20
144+
for (int i = DEPTH; i > 1; --i) {
145+
uint32_t c = branchfree_inner_search(node + res, x);
146+
res = (ORDER+1)*res + (c+1)*ORDER;
147+
}
148+
res += branchfree_inner_search(node + res, x);
149+
res = res - 1 - LEVEL_START_ID[DEPTH-1];
150+
return res;
151+
}
152+
};
153+
154+
class LinearizedBptreeAcc: public LinearizedBptree {
155+
public:
156+
int build(const SegmentMapping* pbegin, const SegmentMapping* pend) override {
157+
auto res = LinearizedBptree::build(pbegin, pend);
158+
if (res < 0)
159+
return res;
160+
LOG_INFO("using Accelerated Linearized B+tree");
161+
return 0;
162+
}
163+
uint32_t branchfree_inner_search(const uint64_t *base, uint64_t x) const override {
164+
#ifdef __AVX512F__
165+
__m512i vx = _mm512_set1_epi64(x);
166+
__m512i data = _mm512_load_si512(base);
167+
uint8_t mask = _mm512_cmp_epu64_mask(vx, data, _MM_CMPINT_GE);
168+
return __builtin_popcount(mask);
169+
#else
170+
return std::upper_bound(base, base + 8, x) - base;
171+
#endif
172+
173+
}
174+
};
175+
176+
57177
class Index : public IMemoryIndex {
58178
public:
59179
bool ownership = false;
@@ -62,6 +182,7 @@ class Index : public IMemoryIndex {
62182
const SegmentMapping *pend = nullptr;
63183
uint64_t alloc_blk = 0;
64184
uint64_t virtual_size = 0;
185+
LinearizedBptree *lbt = nullptr;
65186

66187
inline void get_alloc_blks() {
67188
for (auto m : mapping) {
@@ -72,6 +193,7 @@ class Index : public IMemoryIndex {
72193
if (ownership) {
73194
delete[] pbegin;
74195
}
196+
safe_delete(lbt);
75197
}
76198
Index(const SegmentMapping *pmappings = nullptr, size_t n = 0, bool ownership = true,
77199
uint64_t vsize = 0)
@@ -120,7 +242,15 @@ class Index : public IMemoryIndex {
120242
virtual size_t lookup(Segment s, /* OUT */ SegmentMapping *pm, size_t n) const override {
121243
if (s.length == 0)
122244
return 0;
123-
auto lb = std::lower_bound(pbegin, pend, s);
245+
const SegmentMapping *lb;
246+
if (lbt) {
247+
lb = pbegin + lbt->search(s.offset);
248+
if (lb->end() <= s.offset)
249+
lb++;
250+
} else {
251+
lb = std::lower_bound(pbegin, pend, s);
252+
}
253+
124254
auto m = copy_n(lb, pend, s.end(), pm, n);
125255
trim_edge_mappings(pm, m, s);
126256
return m;
@@ -155,6 +285,16 @@ class Index : public IMemoryIndex {
155285
}
156286

157287
UNIMPLEMENTED_POINTER(IMemoryIndex *make_read_only_index() const override);
288+
289+
int build_lineriazed_bptree() {
290+
LinearizedBptree *tree = is_avx512f_supported() ? new LinearizedBptreeAcc() : new LinearizedBptree();
291+
if (tree->build(pbegin, pend) < 0) {
292+
delete(tree);
293+
return -1;
294+
}
295+
lbt = tree;
296+
return 0;
297+
}
158298
};
159299

160300
class LevelIndex : public Index {
@@ -694,6 +834,8 @@ IMemoryIndex *merge_memory_indexes(const IMemoryIndex **pindexes, size_t n) {
694834
auto pi = (const Index **)pindexes;
695835
mapping.reserve(pi[0]->size());
696836
merge_indexes(0, mapping, pi, n, 0, UINT64_MAX);
697-
return new Index(std::move(mapping), pindexes[0]->vsize());
837+
auto idx = new Index(std::move(mapping), pindexes[0]->vsize());
838+
idx->build_lineriazed_bptree();
839+
return idx;
698840
}
699841
} // namespace LSMT

src/overlaybd/lsmt/index.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ IMemoryIndex -> IMemoryIndex0 -> IComboIndex -> Index0 ( set<SegmentMap> ) -> Co
2929
#include <sys/types.h>
3030

3131
namespace LSMT {
32-
struct Segment { // 48 + 18 == 64
33-
uint64_t offset : 50; // offset (0.5 PB if in sector)
34-
uint32_t length : 14; // length (8MB if in sector)
32+
struct Segment {
33+
uint64_t offset : 50;
34+
uint32_t length : 14;
3535
const static uint64_t MAX_OFFSET = (1UL << 50) - 1;
3636
const static uint32_t MAX_LENGTH = (1 << 14) - 1;
3737
const static uint64_t INVALID_OFFSET = MAX_OFFSET;

src/overlaybd/lsmt/test/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,15 @@ add_test(
1313
COMMAND ${EXECUTABLE_OUTPUT_PATH}/lsmt_test
1414
)
1515

16+
include(CheckCXXCompilerFlag)
17+
check_cxx_compiler_flag("-mavx512f" COMPILER_SUPPORTS_AVX512F)
18+
if (COMPILER_SUPPORTS_AVX512F)
19+
add_executable(lsmt_test_lbt test.cpp)
20+
target_include_directories(lsmt_test_lbt PUBLIC ${PHOTON_INCLUDE_DIR})
21+
target_link_libraries(lsmt_test_lbt gtest gtest_main gflags pthread photon_static overlaybd_lib)
22+
target_compile_options(lsmt_test_lbt PRIVATE "-mavx512f")
23+
add_test(
24+
NAME lsmt_test_lbt
25+
COMMAND ${EXECUTABLE_OUTPUT_PATH}/lsmt_test_lbt
26+
)
27+
endif()

0 commit comments

Comments
 (0)