Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions extension/memory_allocator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Please this file formatted by running:
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo in comment: "Please this file" should be "Please keep this file" or "Please ensure this file is".

Suggested change
# Please this file formatted by running:
# Please keep this file formatted by running:

Copilot uses AI. Check for mistakes.
# ~~~
# cmake-format -i CMakeLists.txt
# ~~~

cmake_minimum_required(VERSION 3.19)

# Source root directory for executorch.
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

list(TRANSFORM _extension_module__srcs PREPEND "${EXECUTORCH_ROOT}/")
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable name _extension_module__srcs appears to be incorrect. This file is for extension_memory_allocator, so it should reference _extension_memory_allocator__srcs which is defined in the build variables.

Suggested change
list(TRANSFORM _extension_module__srcs PREPEND "${EXECUTORCH_ROOT}/")
list(TRANSFORM _extension_memory_allocator__srcs PREPEND "${EXECUTORCH_ROOT}/")

Copilot uses AI. Check for mistakes.
if(CMAKE_TOOLCHAIN_IOS
OR CMAKE_TOOLCHAIN_ANDROID
OR APPLE
)
# Building a share library on iOS requires code signing On Android we see
# duplicated registration when using shared lib
add_library(extension_memory_allocator STATIC ${_extension_memory_allocator__srcs})
else()
add_library(extension_memory_allocator ${_extension_memory_allocator__srcs})
endif()
target_link_libraries(
extension_memory_allocator PRIVATE executorch_core)
target_include_directories(
extension_memory_allocator PUBLIC ${_common_include_directories}
)
target_compile_options(
extension_memory_allocator
PUBLIC $<$<CXX_COMPILER_ID:MSVC>:/wd4996>
$<$<NOT:$<CXX_COMPILER_ID:MSVC>>:-Wno-deprecated-declarations -fPIC>
)

# Install libraries
install(
TARGETS extension_memory_allocator
EXPORT ExecuTorchTargets
DESTINATION ${CMAKE_INSTALL_LIBDIR}
INCLUDES
DESTINATION ${_common_include_directories}
)
88 changes: 88 additions & 0 deletions extension/memory_allocator/cpu_caching_malloc_allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#include <cstdlib>

#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h>

namespace executorch::extension {

namespace {
size_t get_alignment_adjusted_size(size_t size, size_t alignment) {
alignment = std::max(alignment, kDefaultAlignment);
if (size % alignment != 0) {
// Adjust size to the next multiple of alignment
// This is needed for aligned_alloc to work
return (size + alignment) & ~(alignment - 1);
} else {
return size;
}
}
} // namespace

CPUCachingAllocator::CPUCachingAllocator(uint32_t max_size) : MemoryAllocator(0, nullptr) {
max_size_ = max_size;
current_size_ = 0;
}

void* CPUCachingAllocator::allocate(size_t size, size_t alignment) {
EXECUTORCH_TRACK_ALLOCATION(prof_id(), size);

if (!isPowerOf2(alignment)) {
ET_LOG(Error, "Alignment %zu is not a power of 2", alignment);
return nullptr;
}
size = get_alignment_adjusted_size(size, alignment);

std::lock_guard<std::mutex> guard(mutex_);
const auto& it = available_map_.find(size);
if (it == available_map_.end() || it->second.empty()) {
if (current_size_ + size > max_size_) {
// Freeing while holding the lock will cause performance issues
// we probably should log how often this happens so as to allow
// for calling site to adjust the max_size_ parameter
free_cached();
}
void* ptr = std::aligned_alloc(alignment, size);
current_size_ += size;
if (ptr == nullptr) {
ET_LOG(Error, "Failed to allocate memory");
return nullptr;
}
Comment on lines +45 to +49
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current_size_ is incremented before checking if the allocation succeeded. If std::aligned_alloc returns nullptr (line 46), the function returns nullptr but current_size_ has already been incremented by size. This leads to incorrect size tracking. The increment should happen after confirming the allocation succeeded.

Suggested change
current_size_ += size;
if (ptr == nullptr) {
ET_LOG(Error, "Failed to allocate memory");
return nullptr;
}
if (ptr == nullptr) {
ET_LOG(Error, "Failed to allocate memory");
return nullptr;
}
current_size_ += size;

Copilot uses AI. Check for mistakes.
allocation_map_[ptr] = size;
return ptr;
}
void* ptr = it->second.back();
it->second.pop_back();
allocation_map_[ptr] = size;
return ptr;
}

void CPUCachingAllocator::free_cached() {
// We dont lock mutex_ here because it will cause deadlock otherwise
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo in comment: "dont" should be "don't".

Suggested change
// We dont lock mutex_ here because it will cause deadlock otherwise
// We don't lock mutex_ here because it will cause deadlock otherwise

Copilot uses AI. Check for mistakes.
// we could use recursive_mutex but we just design this differently since
// free_cache is not a public API anyways
for (const auto& it : available_map_) {
for (const auto ptr : it.second) {
std::free(ptr);
}
}
available_map_.clear();
}

void CPUCachingAllocator::reset() {
std::lock_guard<std::mutex> guard(mutex_);
for (auto& it : allocation_map_) {
void* ptr = it.first;
size_t alloc_size = it.second;
// Cache the memory
available_map_[alloc_size].push_back(ptr);
current_size_ -= alloc_size;
}
allocation_map_.clear();
}

CPUCachingAllocator::~CPUCachingAllocator() {
// destructor must be called in thread safe manner
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a potential race condition in the destructor. While the comment states "destructor must be called in thread safe manner", the destructor doesn't lock the mutex before calling reset() and free_cached(). If another thread is still executing methods on this object when the destructor is called, this could lead to undefined behavior. Consider adding a lock guard at the start of the destructor, or document that the caller must ensure no concurrent access during destruction.

Suggested change
// destructor must be called in thread safe manner
// destructor must be called in thread safe manner
std::lock_guard<std::mutex> guard(mutex_);

Copilot uses AI. Check for mistakes.
reset();
free_cached();
}

} // namespace executorch::extension
81 changes: 81 additions & 0 deletions extension/memory_allocator/cpu_caching_malloc_allocator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#pragma once

#include <cstddef>
#include <mutex>

#include <executorch/runtime/core/memory_allocator.h>

#ifdef USE_C10_SMALL_VECTOR
#include <c10/util/SmallVector.h>
#else
#include <vector>
#endif

#ifdef USE_C10_FLAT_HASH_MAP
#include <c10/util/flat_hash_map.h>
#else
#include <unordered_map>
#endif

/*
* CPUCachingAllocator:
* This file is copied over from c10/mobile/CPUCachingAllocator.h
* It is a thread safe caching allocator.
*/

namespace executorch::extension {

#ifdef USE_C10_SMALL_VECTOR
template <typename T, unsigned N>
using SmallVector = c10::SmallVector<T, N>;
#else
template <typename T, unsigned N>
using SmallVector = std::vector<T>;
#endif

#ifdef USE_C10_FLAT_HASH_MAP
template<typename KeyType, typename ValueType>
using FlatHashMap = ska::flat_hash_map<KeyType, ValueType>;
#else
template<typename KeyType, typename ValueType>
using FlatHashMap = std::unordered_map<KeyType, ValueType>;
#endif

constexpr size_t kDefaultAlignment = 64;
class CPUCachingAllocator : public executorch::runtime::MemoryAllocator {
/*
* What it does:
* Caches all the allocations carried out by this allocator.
* Cache key is the size of the allocation.
* If requested size is found in the cache returns the cached pointer.
* What it does not do:
* No speculative allocation for any future allocations.
*/
private:
void free_cached();

protected:
// Invariants.
// New invariants must be written.
FlatHashMap<size_t, SmallVector<void*, 16>> available_map_;
FlatHashMap<void*, size_t> allocation_map_;
// Since allocation_map, which is a global instance, is mutated/read via
// all public APIs we need a global mutex.
Comment on lines +62 to +63
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment mentions "allocation_map, which is a global instance" but allocation_map_ is actually an instance member variable, not a global instance. This comment is misleading and should be corrected to reflect that the mutex protects the instance's member variables.

Suggested change
// Since allocation_map, which is a global instance, is mutated/read via
// all public APIs we need a global mutex.
// Since allocation_map_ and other member variables are mutated/read via
// all public APIs, we need a mutex to protect concurrent access to these instance members.

Copilot uses AI. Check for mistakes.
std::mutex mutex_;
size_t max_size_;
size_t current_size_;

public:
/*
max_size: Maximum size of memory to cache. Never cache more than that.
*/
CPUCachingAllocator(uint32_t max_size);
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The constructor parameter max_size is declared as uint32_t but the member variable max_size_ is of type size_t. This type mismatch could lead to implicit narrowing on platforms where size_t is larger than 32 bits. Consider changing the constructor parameter to size_t for consistency.

Suggested change
CPUCachingAllocator(uint32_t max_size);
CPUCachingAllocator(size_t max_size);

Copilot uses AI. Check for mistakes.
// Checks the cache to see if allocation of size bytes can be found.
// If so return cached memory, else
// allocates memory, records it for caching and returns.
void* allocate(size_t size, size_t alignment = kDefaultAlignment) override;
void reset() override;
~CPUCachingAllocator();
};

} // namespace executorch::extension
17 changes: 17 additions & 0 deletions extension/memory_allocator/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,20 @@ def define_common_targets():
"@EXECUTORCH_CLIENTS",
],
)

runtime.cxx_library(
name = "cpu_caching_allocator",
srcs = [
"cpu_caching_malloc_allocator.cpp",
],
exported_headers = [
"cpu_caching_malloc_allocator.h",
],
exported_deps = [
"//executorch/runtime/core:memory_allocator",
],
visibility = [
"//executorch/extension/memory_allocator/test/...",
"@EXECUTORCH_CLIENTS",
],
)
Loading
Loading