Implement cross-encoder reranker input length limit

edwinyyyu · edwinyyyu · commit eec97d462100 · 2025-12-09T16:02:59.000-08:00
Signed-off-by: Edwin Yu &lt;edwinyyyu@gmail.com&gt;
diff --git a/src/memmachine/common/reranker/cross_encoder_reranker.py b/src/memmachine/common/reranker/cross_encoder_reranker.py
@@ -5,6 +5,8 @@
 from pydantic import BaseModel, Field, InstanceOf
 from sentence_transformers import CrossEncoder
 
+from memmachine.common.utils import chunk_text, unflatten_like
+
 from .reranker import Reranker
 
 
@@ -15,6 +17,10 @@ class CrossEncoderRerankerParams(BaseModel):
         ...,
         description="The cross-encoder model to use for reranking",
     )
+    max_input_length: int | None = Field(
+        default=None,
+        description="Maximum input length for the model (in Unicode code points)",
+    )
 
 
 class CrossEncoderReranker(Reranker):
@@ -25,15 +31,38 @@ def __init__(self, params: CrossEncoderRerankerParams) -> None:
         super().__init__()
 
         self._cross_encoder = params.cross_encoder
+        self._max_input_length = params.max_input_length
 
     async def score(self, query: str, candidates: list[str]) -> list[float]:
         """Score candidates for a query using the cross-encoder."""
-        scores = [
+        query = query[: self._max_input_length] if self._max_input_length else query
+
+        candidates_chunks = [
+            chunk_text(candidate, self._max_input_length)
+            if self._max_input_length
+            else [candidate]
+            for candidate in candidates
+        ]
+
+        chunks = [
+            chunk
+            for candidate_chunks in candidates_chunks
+            for chunk in candidate_chunks
+        ]
+
+        chunk_scores = [
             float(score)
             for score in await asyncio.to_thread(
                 self._cross_encoder.predict,
-                [(query, candidate) for candidate in candidates],
+                [(query, chunk) for chunk in chunks],
                 show_progress_bar=False,
             )
         ]
-        return scores
+
+        candidates_chunk_scores = unflatten_like(chunk_scores, candidates_chunks)
+
+        # Take the maximum score among chunks for each candidate.
+        return [
+            max(candidate_chunk_scores)
+            for candidate_chunk_scores in candidates_chunk_scores
+        ]
diff --git a/src/memmachine/common/utils.py b/src/memmachine/common/utils.py
@@ -49,6 +49,21 @@ async def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
     return wrapper
 
 
+def chunk_text(text: str, max_length: int) -> list[str]:
+    """
+    Chunk text into partitions not exceeding max_length.
+
+    Args:
+        text (str): The input text to chunk.
+        max_length (int): The maximum length of each chunk.
+
+    Returns:
+        list[str]: A list of text chunks.
+
+    """
+    return [text[i : i + max_length] for i in range(0, len(text), max_length)]
+
+
 def chunk_text_balanced(text: str, max_length: int) -> list[str]:
     """
     Chunk text into balanced partitions not exceeding max_length.