2222#include < photon/common/alog.h>
2323#include < photon/fs/filesystem.h>
2424#include < photon/common/utility.h>
25+ #ifdef __x86_64__
26+ #include < immintrin.h>
27+ #endif
2528using namespace std ;
2629
2730namespace LSMT {
@@ -54,6 +57,138 @@ static inline size_t copy_n(IT begin, IT end, uint64_t end_offset, SegmentMappin
5457
5558static bool verify_mapping_order (const SegmentMapping *pmappings, size_t n);
5659
60+ bool is_avx512f_supported () {
61+ #if defined(__x86_64__)
62+ __builtin_cpu_init ();
63+ return __builtin_cpu_supports (" avx512f" );
64+ #else
65+ return false ;
66+ #endif
67+ }
68+
69+ const static uint32_t ORDER = 8 ;
70+ const static uint32_t MAX_LEVEL = 10 ;
71+ static constexpr uint32_t NODES_PER_LEVEL[MAX_LEVEL] = {8 , 72 , 648 , 5832 , 52488 , 472392 , 4251528 , 38263752 , 344373768 , 3099363912 };
72+ static constexpr uint32_t LEVEL_START_ID[MAX_LEVEL] = {0 , 8 , 80 , 728 , 6560 , 59048 , 531440 , 4782968 , 43046720 , 387420488 };
73+
74+ class LinearizedBptree : public Object {
75+ public:
76+ uint64_t N;
77+ uint64_t *node = nullptr ;
78+ int32_t DEPTH = -1 ;
79+
80+ LinearizedBptree () {
81+ }
82+
83+ ~LinearizedBptree () {
84+ free (node);
85+ }
86+
87+ virtual int build (const SegmentMapping* pbegin, const SegmentMapping* pend) {
88+ if (pbegin == pend) {
89+ LOG_ERROR_RETURN (EINVAL, -1 , " linearized bptree not used: empty mapping" );
90+ }
91+ if (pbegin->offset != 0 ) {
92+ // In a real file system, mapping offset starts from 0. skip for some ut.
93+ LOG_ERROR_RETURN (EINVAL, -1 , " linearized bptree not used: invalid start offset" );
94+ }
95+ size_t mapping_size = pend - pbegin;
96+ for (uint32_t i = 0 ; i < MAX_LEVEL; i++)
97+ if (NODES_PER_LEVEL[i] >= mapping_size) {
98+ DEPTH = i+1 ;
99+ break ;
100+ }
101+ if (DEPTH == -1 ) {
102+ LOG_ERROR_RETURN (EINVAL, -1 , " linearized bptree not used: too many mappings" );
103+ }
104+
105+ N = (LEVEL_START_ID[DEPTH-1 ] + mapping_size + ORDER - 1 ) / ORDER * ORDER;
106+ LOG_INFO (" building Linearized B+tree " , VALUE (DEPTH), VALUE (mapping_size), VALUE (N));
107+ auto ret = posix_memalign ((void **)&node, 64 , N*sizeof (uint64_t ));
108+ if (ret != 0 ) {
109+ LOG_ERRNO_RETURN (ENOBUFS, -1 , " linearized bptree not used: failed to alloc memory" );
110+ }
111+
112+ uint32_t leaf_start = LEVEL_START_ID[DEPTH - 1 ];
113+ uint32_t leaf_size = NODES_PER_LEVEL[DEPTH - 1 ];
114+
115+ uint32_t p = leaf_start;
116+
117+ for (auto mp = pbegin; mp < pend; mp++, p++) {
118+ node[p] = mp->offset ;
119+ }
120+ for (; p < N; p++)
121+ node[p] = -1 ;
122+
123+ auto G = ORDER;
124+ for (auto level = DEPTH-1 ; level > 0 ; level--) {
125+ auto pos = LEVEL_START_ID[level - 1 ];
126+ for (uint32_t i = 0 ; i < leaf_size; i += G * (ORDER + 1 )) {
127+ for (uint32_t j = 1 ; j <= ORDER; j++) {
128+ uint32_t lower_id = leaf_start + i + G * j;
129+ node[pos++] = (lower_id < N) ? node[lower_id] : -1 ;
130+ }
131+ }
132+ G *= (ORDER + 1 );
133+ }
134+ LOG_INFO (" building Linearized B+tree done" );
135+ return 0 ;
136+ }
137+
138+ virtual uint32_t branchfree_inner_search (const uint64_t *base, uint64_t x) const {
139+ return std::upper_bound (base, base + 8 , x) - base;
140+ }
141+
142+ __attribute__ ((always_inline)) uint32_t search (const uint64_t x) const {
143+ uint32_t res = 0 ;
144+ #pragma GCC unroll 20
145+ for (int i = DEPTH; i > 1 ; --i) {
146+ uint32_t c = branchfree_inner_search (node + res, x);
147+ res = (ORDER+1 )*res + (c+1 )*ORDER;
148+ }
149+ res += branchfree_inner_search (node + res, x);
150+ res = res - 1 - LEVEL_START_ID[DEPTH-1 ];
151+ return res;
152+ }
153+ };
154+
155+ class LinearizedBptreeAcc : public LinearizedBptree {
156+ public:
157+ #ifdef __x86_64__
158+ #ifdef __clang__
159+ #pragma clang attribute pop
160+ #pragma clang attribute push (__attribute__((target("avx512f"))), apply_to=function)
161+ #else // __GNUC__
162+ #pragma GCC push_options
163+ #pragma GCC target ("avx512f")
164+ #endif
165+
166+ int build (const SegmentMapping* pbegin, const SegmentMapping* pend) override {
167+ auto res = LinearizedBptree::build (pbegin, pend);
168+ if (res < 0 )
169+ return res;
170+ LOG_INFO (" using Accelerated Linearized B+tree" );
171+ return 0 ;
172+ }
173+
174+ uint32_t branchfree_inner_search (const uint64_t *base, uint64_t x) const override {
175+ __m512i vx = _mm512_set1_epi64 (x);
176+ __m512i data = _mm512_load_si512 (base);
177+ uint8_t mask = _mm512_cmp_epu64_mask (vx, data, _MM_CMPINT_GE);
178+ return __builtin_popcount (mask);
179+ }
180+
181+ #ifdef __clang__
182+ #pragma clang attribute pop
183+ #else // __GNUC__
184+ #pragma GCC pop_options
185+ #endif
186+
187+ #endif // __x86_64__
188+
189+ };
190+
191+
57192class Index : public IMemoryIndex {
58193public:
59194 bool ownership = false ;
@@ -62,6 +197,7 @@ class Index : public IMemoryIndex {
62197 const SegmentMapping *pend = nullptr ;
63198 uint64_t alloc_blk = 0 ;
64199 uint64_t virtual_size = 0 ;
200+ LinearizedBptree *lbt = nullptr ;
65201
66202 inline void get_alloc_blks () {
67203 for (auto m : mapping) {
@@ -72,6 +208,7 @@ class Index : public IMemoryIndex {
72208 if (ownership) {
73209 delete[] pbegin;
74210 }
211+ safe_delete (lbt);
75212 }
76213 Index (const SegmentMapping *pmappings = nullptr , size_t n = 0 , bool ownership = true ,
77214 uint64_t vsize = 0 )
@@ -120,7 +257,15 @@ class Index : public IMemoryIndex {
120257 virtual size_t lookup (Segment s, /* OUT */ SegmentMapping *pm, size_t n) const override {
121258 if (s.length == 0 )
122259 return 0 ;
123- auto lb = std::lower_bound (pbegin, pend, s);
260+ const SegmentMapping *lb;
261+ if (lbt) {
262+ lb = pbegin + lbt->search (s.offset );
263+ if (lb->end () <= s.offset )
264+ lb++;
265+ } else {
266+ lb = std::lower_bound (pbegin, pend, s);
267+ }
268+
124269 auto m = copy_n (lb, pend, s.end (), pm, n);
125270 trim_edge_mappings (pm, m, s);
126271 return m;
@@ -155,6 +300,17 @@ class Index : public IMemoryIndex {
155300 }
156301
157302 UNIMPLEMENTED_POINTER (IMemoryIndex *make_read_only_index () const override );
303+
304+ int build_lineriazed_bptree () {
305+ auto avx512 = is_avx512f_supported ();
306+ LinearizedBptree *tree = avx512 ? new LinearizedBptreeAcc () : new LinearizedBptree ();
307+ if (tree->build (pbegin, pend) < 0 ) {
308+ delete (tree);
309+ return -1 ;
310+ }
311+ lbt = tree;
312+ return 0 ;
313+ }
158314};
159315
160316class LevelIndex : public Index {
@@ -694,6 +850,8 @@ IMemoryIndex *merge_memory_indexes(const IMemoryIndex **pindexes, size_t n) {
694850 auto pi = (const Index **)pindexes;
695851 mapping.reserve (pi[0 ]->size ());
696852 merge_indexes (0 , mapping, pi, n, 0 , UINT64_MAX);
697- return new Index (std::move (mapping), pindexes[0 ]->vsize ());
853+ auto idx = new Index (std::move (mapping), pindexes[0 ]->vsize ());
854+ idx->build_lineriazed_bptree ();
855+ return idx;
698856}
699857} // namespace LSMT
0 commit comments