Skip to content

Commit ba9695a

Browse files
committed
Fix a bug with reading/writing the last chunk
1 parent c32cc21 commit ba9695a

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

hnswlib/hnswalg.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ class HierarchicalNSW : public AlgorithmInterface<dist_t> {
181181
free(getLinkListPtr(i));
182182
}
183183
}
184-
data_level0_memory_.clear();
185184
linkLists_.clear();
186185
cur_element_count = 0;
187186
visited_list_pool_.reset(nullptr);

hnswlib/hnswlib.h

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ MallocUniqueCharArrayPtr makeUniqueCharArray(size_t n_bytes) {
383383
// Manages a large, array-like data structure by allocating memory in smaller,
384384
// fixed-size blocks called "chunks." This class provides a flat, array-like
385385
// view over a large collection of elements without needing a single, massive
386-
// contiguous memory allocation, which helps avoid memory fragmentation.
386+
// contiguous memory allocation.
387387
//
388388
// It provides random access via `operator[]`, which internally maps an index
389389
// to the correct chunk and the element's offset within it. The size of the
@@ -481,33 +481,34 @@ class ChunkedArray {
481481

482482
chunks_.resize(new_chunk_count);
483483
for (size_t i = chunk_count; i < new_chunk_count; i++) {
484-
chunks_[i] = internal::makeUniqueCharArray(
484+
chunks_[i] = ::hnswlib::internal::makeUniqueCharArray(
485485
getSizePerChunk() + chunk_padding_bytes_);
486486
}
487487

488488
element_count_ = new_element_count;
489489
}
490490

491491
void writeToStream(std::ostream& output, size_t num_elements_to_write) {
492+
assert(num_elements_to_write <= element_count_);
492493
size_t num_chunks_to_write = getChunkCount(num_elements_to_write);
493-
size_t last_chunk_bytes =
494-
element_byte_size_ * (num_elements_to_write % elements_per_chunk_);
494+
size_t last_chunk_bytes = getLastChunkBytes(num_elements_to_write);
495495
for (size_t i = 0; i < num_chunks_to_write; ++i) {
496496
output.write(
497497
chunks_[i].get(),
498-
i + 1 == num_chunks_to_write ? last_chunk_bytes : getSizePerChunk());
498+
i + 1 == num_chunks_to_write ? last_chunk_bytes
499+
: getSizePerChunk());
499500
}
500501
}
501502

502503
void readFromStream(std::istream& input, size_t num_elements_to_read) {
503504
assert(num_elements_to_read <= element_count_);
504505
size_t num_chunks_to_read = getChunkCount(num_elements_to_read);
505-
size_t last_chunk_bytes =
506-
element_byte_size_ * (num_elements_to_read % elements_per_chunk_);
506+
size_t last_chunk_bytes = getLastChunkBytes(num_elements_to_read);
507507
for (size_t i = 0; i < num_chunks_to_read; ++i) {
508508
input.read(
509509
chunks_[i].get(),
510-
i + 1 == num_chunks_to_read ? last_chunk_bytes : getSizePerChunk());
510+
i + 1 == num_chunks_to_read ? last_chunk_bytes
511+
: getSizePerChunk());
511512
}
512513
}
513514

@@ -538,6 +539,17 @@ class ChunkedArray {
538539
return (element_count + elements_per_chunk_ - 1) / elements_per_chunk_;
539540
}
540541

542+
// Returns the byte size of the last chunk if pretend the element count is
543+
// the given number.
544+
size_t getLastChunkBytes(size_t element_count) {
545+
size_t last_chunk_num_elements = element_count % elements_per_chunk_;
546+
if (last_chunk_num_elements == 0) {
547+
// Last chunk is whole.
548+
last_chunk_num_elements = elements_per_chunk_;
549+
}
550+
return last_chunk_num_elements * element_byte_size_;
551+
}
552+
541553
size_t element_byte_size_;
542554
size_t elements_per_chunk_;
543555
size_t element_count_;

0 commit comments

Comments
 (0)