Skip to content

Commit ec30bc9

Browse files
committed
add linearized b+tree in lsmt
1 parent bcd84a9 commit ec30bc9

File tree

3 files changed

+165
-5
lines changed

3 files changed

+165
-5
lines changed

.github/workflows/cmake.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ jobs:
5151
working-directory: ${{github.workspace}}/build
5252
shell: bash
5353
run: |
54+
lscpu
5455
sudo make install
5556
sudo cp ${{github.workspace}}/src/example_config/overlaybd-registryv2.json /etc/overlaybd/overlaybd.json
5657
sudo systemctl enable /opt/overlaybd/overlaybd-tcmu.service
@@ -87,6 +88,7 @@ jobs:
8788
ls obd_mp
8889
sudo diff -r --exclude "lost+found" test_data obd_mp
8990
sudo umount obd_mp
91+
cat /var/log/overlaybd.log
9092
- name: E2E Test turboOCIv1
9193
working-directory: ${{github.workspace}}/build
9294
shell: bash

src/overlaybd/lsmt/index.cpp

Lines changed: 160 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include <photon/common/alog.h>
2323
#include <photon/fs/filesystem.h>
2424
#include <photon/common/utility.h>
25+
#ifdef __x86_64__
26+
#include <immintrin.h>
27+
#endif
2528
using namespace std;
2629

2730
namespace LSMT {
@@ -54,6 +57,138 @@ static inline size_t copy_n(IT begin, IT end, uint64_t end_offset, SegmentMappin
5457

5558
static bool verify_mapping_order(const SegmentMapping *pmappings, size_t n);
5659

60+
bool is_avx512f_supported() {
61+
#if defined(__x86_64__)
62+
__builtin_cpu_init();
63+
return __builtin_cpu_supports("avx512f");
64+
#else
65+
return false;
66+
#endif
67+
}
68+
69+
const static uint32_t ORDER = 8;
70+
const static uint32_t MAX_LEVEL = 10;
71+
static constexpr uint32_t NODES_PER_LEVEL[MAX_LEVEL] = {8, 72, 648, 5832, 52488, 472392, 4251528, 38263752, 344373768, 3099363912};
72+
static constexpr uint32_t LEVEL_START_ID[MAX_LEVEL] = {0, 8, 80, 728, 6560, 59048, 531440, 4782968, 43046720, 387420488};
73+
74+
class LinearizedBptree: public Object {
75+
public:
76+
uint64_t N;
77+
uint64_t *node = nullptr;
78+
int32_t DEPTH = -1;
79+
80+
LinearizedBptree() {
81+
}
82+
83+
~LinearizedBptree() {
84+
free(node);
85+
}
86+
87+
virtual int build(const SegmentMapping* pbegin, const SegmentMapping* pend) {
88+
if (pbegin == pend) {
89+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: empty mapping");
90+
}
91+
if (pbegin->offset != 0) {
92+
// In a real file system, mapping offset starts from 0. skip for some ut.
93+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: invalid start offset");
94+
}
95+
size_t mapping_size = pend - pbegin;
96+
for (uint32_t i = 0; i < MAX_LEVEL; i++)
97+
if (NODES_PER_LEVEL[i] >= mapping_size) {
98+
DEPTH = i+1;
99+
break;
100+
}
101+
if (DEPTH == -1) {
102+
LOG_ERROR_RETURN(EINVAL, -1, "linearized bptree not used: too many mappings");
103+
}
104+
105+
N = (LEVEL_START_ID[DEPTH-1] + mapping_size + ORDER - 1) / ORDER * ORDER;
106+
LOG_INFO("building Linearized B+tree ", VALUE(DEPTH), VALUE(mapping_size), VALUE(N));
107+
auto ret = posix_memalign((void**)&node, 64, N*sizeof(uint64_t));
108+
if (ret != 0) {
109+
LOG_ERRNO_RETURN(ENOBUFS, -1, "linearized bptree not used: failed to alloc memory");
110+
}
111+
112+
uint32_t leaf_start = LEVEL_START_ID[DEPTH - 1];
113+
uint32_t leaf_size = NODES_PER_LEVEL[DEPTH - 1];
114+
115+
uint32_t p = leaf_start;
116+
117+
for (auto mp = pbegin; mp < pend; mp++, p++) {
118+
node[p] = mp->offset;
119+
}
120+
for (; p < N; p++)
121+
node[p] = -1;
122+
123+
auto G = ORDER;
124+
for (auto level = DEPTH-1; level > 0; level--) {
125+
auto pos = LEVEL_START_ID[level - 1];
126+
for (uint32_t i = 0; i < leaf_size; i += G * (ORDER + 1)) {
127+
for (uint32_t j = 1; j <= ORDER; j++) {
128+
uint32_t lower_id = leaf_start + i + G * j;
129+
node[pos++] = (lower_id < N) ? node[lower_id] : -1;
130+
}
131+
}
132+
G *= (ORDER + 1);
133+
}
134+
LOG_INFO("building Linearized B+tree done");
135+
return 0;
136+
}
137+
138+
virtual uint32_t branchfree_inner_search(const uint64_t *base, uint64_t x) const {
139+
return std::upper_bound(base, base + 8, x) - base;
140+
}
141+
142+
__attribute__((always_inline)) uint32_t search(const uint64_t x) const {
143+
uint32_t res = 0;
144+
#pragma GCC unroll 20
145+
for (int i = DEPTH; i > 1; --i) {
146+
uint32_t c = branchfree_inner_search(node + res, x);
147+
res = (ORDER+1)*res + (c+1)*ORDER;
148+
}
149+
res += branchfree_inner_search(node + res, x);
150+
res = res - 1 - LEVEL_START_ID[DEPTH-1];
151+
return res;
152+
}
153+
};
154+
155+
class LinearizedBptreeAcc: public LinearizedBptree {
156+
public:
157+
#ifdef __x86_64__
158+
#ifdef __clang__
159+
#pragma clang attribute pop
160+
#pragma clang attribute push (__attribute__((target("avx512f"))), apply_to=function)
161+
#else // __GNUC__
162+
#pragma GCC push_options
163+
#pragma GCC target ("avx512f")
164+
#endif
165+
166+
int build(const SegmentMapping* pbegin, const SegmentMapping* pend) override {
167+
auto res = LinearizedBptree::build(pbegin, pend);
168+
if (res < 0)
169+
return res;
170+
LOG_INFO("using Accelerated Linearized B+tree");
171+
return 0;
172+
}
173+
174+
uint32_t branchfree_inner_search(const uint64_t *base, uint64_t x) const override {
175+
__m512i vx = _mm512_set1_epi64(x);
176+
__m512i data = _mm512_load_si512(base);
177+
uint8_t mask = _mm512_cmp_epu64_mask(vx, data, _MM_CMPINT_GE);
178+
return __builtin_popcount(mask);
179+
}
180+
181+
#ifdef __clang__
182+
#pragma clang attribute pop
183+
#else // __GNUC__
184+
#pragma GCC pop_options
185+
#endif
186+
187+
#endif // __x86_64__
188+
189+
};
190+
191+
57192
class Index : public IMemoryIndex {
58193
public:
59194
bool ownership = false;
@@ -62,6 +197,7 @@ class Index : public IMemoryIndex {
62197
const SegmentMapping *pend = nullptr;
63198
uint64_t alloc_blk = 0;
64199
uint64_t virtual_size = 0;
200+
LinearizedBptree *lbt = nullptr;
65201

66202
inline void get_alloc_blks() {
67203
for (auto m : mapping) {
@@ -72,6 +208,7 @@ class Index : public IMemoryIndex {
72208
if (ownership) {
73209
delete[] pbegin;
74210
}
211+
safe_delete(lbt);
75212
}
76213
Index(const SegmentMapping *pmappings = nullptr, size_t n = 0, bool ownership = true,
77214
uint64_t vsize = 0)
@@ -120,7 +257,15 @@ class Index : public IMemoryIndex {
120257
virtual size_t lookup(Segment s, /* OUT */ SegmentMapping *pm, size_t n) const override {
121258
if (s.length == 0)
122259
return 0;
123-
auto lb = std::lower_bound(pbegin, pend, s);
260+
const SegmentMapping *lb;
261+
if (lbt) {
262+
lb = pbegin + lbt->search(s.offset);
263+
if (lb->end() <= s.offset)
264+
lb++;
265+
} else {
266+
lb = std::lower_bound(pbegin, pend, s);
267+
}
268+
124269
auto m = copy_n(lb, pend, s.end(), pm, n);
125270
trim_edge_mappings(pm, m, s);
126271
return m;
@@ -155,6 +300,17 @@ class Index : public IMemoryIndex {
155300
}
156301

157302
UNIMPLEMENTED_POINTER(IMemoryIndex *make_read_only_index() const override);
303+
304+
int build_lineriazed_bptree() {
305+
auto avx512 = is_avx512f_supported();
306+
LinearizedBptree *tree = avx512 ? new LinearizedBptreeAcc() : new LinearizedBptree();
307+
if (tree->build(pbegin, pend) < 0) {
308+
delete(tree);
309+
return -1;
310+
}
311+
lbt = tree;
312+
return 0;
313+
}
158314
};
159315

160316
class LevelIndex : public Index {
@@ -694,6 +850,8 @@ IMemoryIndex *merge_memory_indexes(const IMemoryIndex **pindexes, size_t n) {
694850
auto pi = (const Index **)pindexes;
695851
mapping.reserve(pi[0]->size());
696852
merge_indexes(0, mapping, pi, n, 0, UINT64_MAX);
697-
return new Index(std::move(mapping), pindexes[0]->vsize());
853+
auto idx = new Index(std::move(mapping), pindexes[0]->vsize());
854+
idx->build_lineriazed_bptree();
855+
return idx;
698856
}
699857
} // namespace LSMT

src/overlaybd/lsmt/index.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ IMemoryIndex -> IMemoryIndex0 -> IComboIndex -> Index0 ( set<SegmentMap> ) -> Co
2929
#include <sys/types.h>
3030

3131
namespace LSMT {
32-
struct Segment { // 48 + 18 == 64
33-
uint64_t offset : 50; // offset (0.5 PB if in sector)
34-
uint32_t length : 14; // length (8MB if in sector)
32+
struct Segment {
33+
uint64_t offset : 50;
34+
uint32_t length : 14;
3535
const static uint64_t MAX_OFFSET = (1UL << 50) - 1;
3636
const static uint32_t MAX_LENGTH = (1 << 14) - 1;
3737
const static uint64_t INVALID_OFFSET = MAX_OFFSET;

0 commit comments

Comments
 (0)