Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,8 @@ if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
endif()

if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/memory_allocator/runner)
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path extension/memory_allocator/runner does not exist in the repository. The memory allocator CMakeLists.txt is located at extension/memory_allocator/CMakeLists.txt. This line should be:

add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/memory_allocator)
Suggested change
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/memory_allocator/runner)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/memory_allocator)

Copilot uses AI. Check for mistakes.
list(APPEND _executorch_extensions extension_memory_allocator)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
list(APPEND _executorch_extensions extension_llm_runner)
endif()
Expand Down
2 changes: 1 addition & 1 deletion extension/llm/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ list(TRANSFORM _extension_llm_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs})

set(runner_deps executorch_core extension_module extension_tensor
tokenizers::tokenizers
extension_memory_allocator tokenizers::tokenizers
)

# depend on arange_utils
Expand Down
20 changes: 18 additions & 2 deletions extension/llm/runner/llm_runner_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <executorch/extension/llm/runner/text_llm_runner.h>
#include <executorch/extension/llm/runner/text_prefiller.h>
#include <executorch/extension/llm/runner/text_token_generator.h>
#include <executorch/extension/memory_allocator/cpu_caching_malloc_allocator.h>
#include <executorch/runtime/core/result.h>
#include <executorch/runtime/platform/runtime.h>
#include <pytorch/tokenizers/hf_tokenizer.h>
Expand Down Expand Up @@ -209,11 +210,26 @@ std::unique_ptr<TextLLMRunner> create_text_llm_runner(

// Create the Module
std::unique_ptr<Module> module;
uint32_t max_cached_memory_size_bytes_ = 1024 * 1024 * 10; // 10MB
Copy link

Copilot AI Nov 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hardcoded value of 10MB for the caching allocator size should be documented or made configurable. According to the PR description, this improves performance by 6% on iOS for SDPA op temp allocations, but different models or use cases may benefit from different cache sizes. Consider:

  1. Adding a comment explaining why 10MB was chosen
  2. Making this value configurable through a parameter or constant
  3. Documenting the performance implications in code comments

Copilot uses AI. Check for mistakes.
if (data_files.size() > 0) {
module = std::make_unique<Module>(
model_path, data_files, Module::LoadMode::File);
model_path,
data_files,
Module::LoadMode::File,
nullptr,
std::make_unique<
executorch::extension::CPUCachingAllocator>( // temp memory
// allocator
max_cached_memory_size_bytes_));
} else {
module = std::make_unique<Module>(model_path, Module::LoadMode::File);
module = std::make_unique<Module>(
model_path,
Module::LoadMode::File,
nullptr,
std::make_unique<
executorch::extension::CPUCachingAllocator>( // temp memory
// allocator
max_cached_memory_size_bytes_));
}

// Get metadata from Module
Expand Down
1 change: 1 addition & 0 deletions extension/llm/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,7 @@ def define_common_targets():
":text_prefiller" + aten_suffix,
":text_token_generator" + aten_suffix,
"//executorch/extension/llm/runner/io_manager:io_manager" + aten_suffix,
"//executorch/extension/memory_allocator:cpu_caching_allocator",
"//pytorch/tokenizers:hf_tokenizer",
"//pytorch/tokenizers:llama2c_tokenizer",
"//pytorch/tokenizers:sentencepiece",
Expand Down
Loading