-
Notifications
You must be signed in to change notification settings - Fork 732
[Executorch] Introduce caching cpu memory allocator #15611
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: gh/kimishpatel/203/base
Are you sure you want to change the base?
Changes from 2 commits
ea16e15
08ab552
f9ce984
0c23c32
79bb135
7939d44
1d02fb8
7c2efa1
1bdcf8a
02ef641
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,48 @@ | ||||||
| # Copyright (c) Meta Platforms, Inc. and affiliates. | ||||||
| # All rights reserved. | ||||||
| # | ||||||
| # This source code is licensed under the BSD-style license found in the | ||||||
| # LICENSE file in the root directory of this source tree. | ||||||
|
|
||||||
| # Please this file formatted by running: | ||||||
| # ~~~ | ||||||
| # cmake-format -i CMakeLists.txt | ||||||
| # ~~~ | ||||||
|
|
||||||
| cmake_minimum_required(VERSION 3.19) | ||||||
|
|
||||||
| # Source root directory for executorch. | ||||||
| if(NOT EXECUTORCH_ROOT) | ||||||
| set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) | ||||||
| endif() | ||||||
|
|
||||||
| list(TRANSFORM _extension_module__srcs PREPEND "${EXECUTORCH_ROOT}/") | ||||||
|
||||||
| list(TRANSFORM _extension_module__srcs PREPEND "${EXECUTORCH_ROOT}/") | |
| list(TRANSFORM _extension_memory_allocator__srcs PREPEND "${EXECUTORCH_ROOT}/") |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,88 @@ | ||||||||||||||||||||||
| #include <cstdlib> | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| #include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h> | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| namespace executorch::extension { | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| namespace { | ||||||||||||||||||||||
| size_t get_alignment_adjusted_size(size_t size, size_t alignment) { | ||||||||||||||||||||||
| alignment = std::max(alignment, kDefaultAlignment); | ||||||||||||||||||||||
| if (size % alignment != 0) { | ||||||||||||||||||||||
| // Adjust size to the next multiple of alignment | ||||||||||||||||||||||
| // This is needed for aligned_alloc to work | ||||||||||||||||||||||
| return (size + alignment) & ~(alignment - 1); | ||||||||||||||||||||||
| } else { | ||||||||||||||||||||||
| return size; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } // namespace | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| CPUCachingAllocator::CPUCachingAllocator(uint32_t max_size) : MemoryAllocator(0, nullptr) { | ||||||||||||||||||||||
| max_size_ = max_size; | ||||||||||||||||||||||
| current_size_ = 0; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| void* CPUCachingAllocator::allocate(size_t size, size_t alignment) { | ||||||||||||||||||||||
| EXECUTORCH_TRACK_ALLOCATION(prof_id(), size); | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| if (!isPowerOf2(alignment)) { | ||||||||||||||||||||||
| ET_LOG(Error, "Alignment %zu is not a power of 2", alignment); | ||||||||||||||||||||||
| return nullptr; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| size = get_alignment_adjusted_size(size, alignment); | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| std::lock_guard<std::mutex> guard(mutex_); | ||||||||||||||||||||||
| const auto& it = available_map_.find(size); | ||||||||||||||||||||||
| if (it == available_map_.end() || it->second.empty()) { | ||||||||||||||||||||||
| if (current_size_ + size > max_size_) { | ||||||||||||||||||||||
| // Freeing while holding the lock will cause performance issues | ||||||||||||||||||||||
| // we probably should log how often this happens so as to allow | ||||||||||||||||||||||
| // for calling site to adjust the max_size_ parameter | ||||||||||||||||||||||
| free_cached(); | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| void* ptr = std::aligned_alloc(alignment, size); | ||||||||||||||||||||||
| current_size_ += size; | ||||||||||||||||||||||
| if (ptr == nullptr) { | ||||||||||||||||||||||
| ET_LOG(Error, "Failed to allocate memory"); | ||||||||||||||||||||||
| return nullptr; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
Comment on lines
+45
to
+49
|
||||||||||||||||||||||
| current_size_ += size; | |
| if (ptr == nullptr) { | |
| ET_LOG(Error, "Failed to allocate memory"); | |
| return nullptr; | |
| } | |
| if (ptr == nullptr) { | |
| ET_LOG(Error, "Failed to allocate memory"); | |
| return nullptr; | |
| } | |
| current_size_ += size; |
Copilot
AI
Nov 17, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo in comment: "dont" should be "don't".
| // We dont lock mutex_ here because it will cause deadlock otherwise | |
| // We don't lock mutex_ here because it will cause deadlock otherwise |
Copilot
AI
Nov 17, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a potential race condition in the destructor. While the comment states "destructor must be called in thread safe manner", the destructor doesn't lock the mutex before calling reset() and free_cached(). If another thread is still executing methods on this object when the destructor is called, this could lead to undefined behavior. Consider adding a lock guard at the start of the destructor, or document that the caller must ensure no concurrent access during destruction.
| // destructor must be called in thread safe manner | |
| // destructor must be called in thread safe manner | |
| std::lock_guard<std::mutex> guard(mutex_); |
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,81 @@ | ||||||||||
| #pragma once | ||||||||||
|
|
||||||||||
| #include <cstddef> | ||||||||||
| #include <mutex> | ||||||||||
|
|
||||||||||
| #include <executorch/runtime/core/memory_allocator.h> | ||||||||||
|
|
||||||||||
| #ifdef USE_C10_SMALL_VECTOR | ||||||||||
| #include <c10/util/SmallVector.h> | ||||||||||
| #else | ||||||||||
| #include <vector> | ||||||||||
| #endif | ||||||||||
|
|
||||||||||
| #ifdef USE_C10_FLAT_HASH_MAP | ||||||||||
| #include <c10/util/flat_hash_map.h> | ||||||||||
| #else | ||||||||||
| #include <unordered_map> | ||||||||||
| #endif | ||||||||||
|
|
||||||||||
| /* | ||||||||||
| * CPUCachingAllocator: | ||||||||||
| * This file is copied over from c10/mobile/CPUCachingAllocator.h | ||||||||||
| * It is a thread safe caching allocator. | ||||||||||
| */ | ||||||||||
|
|
||||||||||
| namespace executorch::extension { | ||||||||||
|
|
||||||||||
| #ifdef USE_C10_SMALL_VECTOR | ||||||||||
| template <typename T, unsigned N> | ||||||||||
| using SmallVector = c10::SmallVector<T, N>; | ||||||||||
| #else | ||||||||||
| template <typename T, unsigned N> | ||||||||||
| using SmallVector = std::vector<T>; | ||||||||||
| #endif | ||||||||||
|
|
||||||||||
| #ifdef USE_C10_FLAT_HASH_MAP | ||||||||||
| template<typename KeyType, typename ValueType> | ||||||||||
| using FlatHashMap = ska::flat_hash_map<KeyType, ValueType>; | ||||||||||
| #else | ||||||||||
| template<typename KeyType, typename ValueType> | ||||||||||
| using FlatHashMap = std::unordered_map<KeyType, ValueType>; | ||||||||||
| #endif | ||||||||||
|
|
||||||||||
| constexpr size_t kDefaultAlignment = 64; | ||||||||||
| class CPUCachingAllocator : public executorch::runtime::MemoryAllocator { | ||||||||||
| /* | ||||||||||
| * What it does: | ||||||||||
| * Caches all the allocations carried out by this allocator. | ||||||||||
| * Cache key is the size of the allocation. | ||||||||||
| * If requested size is found in the cache returns the cached pointer. | ||||||||||
| * What it does not do: | ||||||||||
| * No speculative allocation for any future allocations. | ||||||||||
| */ | ||||||||||
| private: | ||||||||||
| void free_cached(); | ||||||||||
|
|
||||||||||
| protected: | ||||||||||
| // Invariants. | ||||||||||
| // New invariants must be written. | ||||||||||
| FlatHashMap<size_t, SmallVector<void*, 16>> available_map_; | ||||||||||
| FlatHashMap<void*, size_t> allocation_map_; | ||||||||||
| // Since allocation_map, which is a global instance, is mutated/read via | ||||||||||
| // all public APIs we need a global mutex. | ||||||||||
|
Comment on lines
+62
to
+63
|
||||||||||
| // Since allocation_map, which is a global instance, is mutated/read via | |
| // all public APIs we need a global mutex. | |
| // Since allocation_map_ and other member variables are mutated/read via | |
| // all public APIs, we need a mutex to protect concurrent access to these instance members. |
Copilot
AI
Nov 17, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The constructor parameter max_size is declared as uint32_t but the member variable max_size_ is of type size_t. This type mismatch could lead to implicit narrowing on platforms where size_t is larger than 32 bits. Consider changing the constructor parameter to size_t for consistency.
| CPUCachingAllocator(uint32_t max_size); | |
| CPUCachingAllocator(size_t max_size); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Typo in comment: "Please this file" should be "Please keep this file" or "Please ensure this file is".