2222#include < photon/common/alog.h>
2323#include < photon/fs/filesystem.h>
2424#include < photon/common/utility.h>
25+ #ifdef __AVX512F__
26+ #include < immintrin.h>
27+ #endif
2528using namespace std ;
2629
2730namespace LSMT {
@@ -54,6 +57,123 @@ static inline size_t copy_n(IT begin, IT end, uint64_t end_offset, SegmentMappin
5457
5558static bool verify_mapping_order (const SegmentMapping *pmappings, size_t n);
5659
60+ bool is_avx512f_supported () {
61+ __builtin_cpu_init ();
62+ return __builtin_cpu_supports (" avx512f" ) &&
63+ __builtin_cpu_supports (" avx512dq" ) &&
64+ __builtin_cpu_supports (" avx512vl" ) &&
65+ __builtin_cpu_supports (" vpclmulqdq" );
66+ }
67+
68+ const static uint32_t ORDER = 8 ;
69+ const static uint32_t MAX_LEVEL = 10 ;
70+ static constexpr uint32_t NODES_PER_LEVEL[MAX_LEVEL] = {8 , 72 , 648 , 5832 , 52488 , 472392 , 4251528 , 38263752 , 344373768 , 3099363912 };
71+ static constexpr uint32_t LEVEL_START_ID[MAX_LEVEL] = {0 , 8 , 80 , 728 , 6560 , 59048 , 531440 , 4782968 , 43046720 , 387420488 };
72+
73+ class LinearizedBptree : public Object {
74+ public:
75+ uint64_t N;
76+ uint64_t *node = nullptr ;
77+ int32_t DEPTH = -1 ;
78+
79+ LinearizedBptree () {
80+ }
81+
82+ ~LinearizedBptree () {
83+ free (node);
84+ }
85+
86+ virtual int build (const SegmentMapping* pbegin, const SegmentMapping* pend) {
87+ if (pbegin == pend) {
88+ LOG_ERROR_RETURN (EINVAL, -1 , " linearized bptree not used: empty mapping" );
89+ }
90+ if (pbegin->offset != 0 ) {
91+ // In a real file system, mapping offset starts from 0. skip for some ut.
92+ LOG_ERROR_RETURN (EINVAL, -1 , " linearized bptree not used: invalid start offset" );
93+ }
94+ size_t mapping_size = pend - pbegin;
95+ for (uint32_t i = 0 ; i < MAX_LEVEL; i++)
96+ if (NODES_PER_LEVEL[i] >= mapping_size) {
97+ DEPTH = i+1 ;
98+ break ;
99+ }
100+ if (DEPTH == -1 ) {
101+ LOG_ERROR_RETURN (EINVAL, -1 , " linearized bptree not used: too many mappings" );
102+ }
103+
104+ N = (LEVEL_START_ID[DEPTH-1 ] + mapping_size + ORDER - 1 ) / ORDER * ORDER;
105+ LOG_INFO (" building Linearized B+tree " , VALUE (DEPTH), VALUE (mapping_size), VALUE (N));
106+ auto ret = posix_memalign ((void **)&node, 64 , N*sizeof (uint64_t ));
107+ if (ret != 0 ) {
108+ LOG_ERRNO_RETURN (ENOBUFS, -1 , " linearized bptree not used: failed to alloc memory" );
109+ }
110+
111+ uint32_t leaf_start = LEVEL_START_ID[DEPTH - 1 ];
112+ uint32_t leaf_size = NODES_PER_LEVEL[DEPTH - 1 ];
113+
114+ uint32_t p = leaf_start;
115+
116+ for (auto mp = pbegin; mp < pend; mp++, p++) {
117+ node[p] = mp->offset ;
118+ }
119+ for (; p < N; p++)
120+ node[p] = -1 ;
121+
122+ auto G = ORDER;
123+ for (auto level = DEPTH-1 ; level > 0 ; level--) {
124+ auto pos = LEVEL_START_ID[level - 1 ];
125+ for (uint32_t i = 0 ; i < leaf_size; i += G * (ORDER + 1 )) {
126+ for (uint32_t j = 1 ; j <= ORDER; j++) {
127+ uint32_t lower_id = leaf_start + i + G * j;
128+ node[pos++] = (lower_id < N) ? node[lower_id] : -1 ;
129+ }
130+ }
131+ G *= (ORDER + 1 );
132+ }
133+ LOG_INFO (" building Linearized B+tree done" );
134+ return 0 ;
135+ }
136+
137+ virtual uint32_t branchfree_inner_search (const uint64_t *base, uint64_t x) const {
138+ return std::upper_bound (base, base + 8 , x) - base;
139+ }
140+
141+ __attribute__ ((always_inline)) uint32_t search (const uint64_t x) const {
142+ uint32_t res = 0 ;
143+ #pragma GCC unroll 20
144+ for (int i = DEPTH; i > 1 ; --i) {
145+ uint32_t c = branchfree_inner_search (node + res, x);
146+ res = (ORDER+1 )*res + (c+1 )*ORDER;
147+ }
148+ res += branchfree_inner_search (node + res, x);
149+ res = res - 1 - LEVEL_START_ID[DEPTH-1 ];
150+ return res;
151+ }
152+ };
153+
154+ class LinearizedBptreeAcc : public LinearizedBptree {
155+ public:
156+ int build (const SegmentMapping* pbegin, const SegmentMapping* pend) override {
157+ auto res = LinearizedBptree::build (pbegin, pend);
158+ if (res < 0 )
159+ return res;
160+ LOG_INFO (" using Accelerated Linearized B+tree" );
161+ return 0 ;
162+ }
163+ uint32_t branchfree_inner_search (const uint64_t *base, uint64_t x) const override {
164+ #ifdef __AVX512F__
165+ __m512i vx = _mm512_set1_epi64 (x);
166+ __m512i data = _mm512_load_si512 (base);
167+ uint8_t mask = _mm512_cmp_epu64_mask (vx, data, _MM_CMPINT_GE);
168+ return __builtin_popcount (mask);
169+ #else
170+ return std::upper_bound (base, base + 8 , x) - base;
171+ #endif
172+
173+ }
174+ };
175+
176+
57177class Index : public IMemoryIndex {
58178public:
59179 bool ownership = false ;
@@ -62,6 +182,7 @@ class Index : public IMemoryIndex {
62182 const SegmentMapping *pend = nullptr ;
63183 uint64_t alloc_blk = 0 ;
64184 uint64_t virtual_size = 0 ;
185+ LinearizedBptree *lbt = nullptr ;
65186
66187 inline void get_alloc_blks () {
67188 for (auto m : mapping) {
@@ -72,6 +193,7 @@ class Index : public IMemoryIndex {
72193 if (ownership) {
73194 delete[] pbegin;
74195 }
196+ safe_delete (lbt);
75197 }
76198 Index (const SegmentMapping *pmappings = nullptr , size_t n = 0 , bool ownership = true ,
77199 uint64_t vsize = 0 )
@@ -120,7 +242,15 @@ class Index : public IMemoryIndex {
120242 virtual size_t lookup (Segment s, /* OUT */ SegmentMapping *pm, size_t n) const override {
121243 if (s.length == 0 )
122244 return 0 ;
123- auto lb = std::lower_bound (pbegin, pend, s);
245+ const SegmentMapping *lb;
246+ if (lbt) {
247+ lb = pbegin + lbt->search (s.offset );
248+ if (lb->end () <= s.offset )
249+ lb++;
250+ } else {
251+ lb = std::lower_bound (pbegin, pend, s);
252+ }
253+
124254 auto m = copy_n (lb, pend, s.end (), pm, n);
125255 trim_edge_mappings (pm, m, s);
126256 return m;
@@ -155,6 +285,16 @@ class Index : public IMemoryIndex {
155285 }
156286
157287 UNIMPLEMENTED_POINTER (IMemoryIndex *make_read_only_index () const override );
288+
289+ int build_lineriazed_bptree () {
290+ LinearizedBptree *tree = is_avx512f_supported () ? new LinearizedBptreeAcc () : new LinearizedBptree ();
291+ if (tree->build (pbegin, pend) < 0 ) {
292+ delete (tree);
293+ return -1 ;
294+ }
295+ lbt = tree;
296+ return 0 ;
297+ }
158298};
159299
160300class LevelIndex : public Index {
@@ -694,6 +834,8 @@ IMemoryIndex *merge_memory_indexes(const IMemoryIndex **pindexes, size_t n) {
694834 auto pi = (const Index **)pindexes;
695835 mapping.reserve (pi[0 ]->size ());
696836 merge_indexes (0 , mapping, pi, n, 0 , UINT64_MAX);
697- return new Index (std::move (mapping), pindexes[0 ]->vsize ());
837+ auto idx = new Index (std::move (mapping), pindexes[0 ]->vsize ());
838+ idx->build_lineriazed_bptree ();
839+ return idx;
698840}
699841} // namespace LSMT
0 commit comments