Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 157 additions & 39 deletions src/utils/types.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,45 @@
"""Common types for the project."""

from typing import Any, Optional
import ast
import json
import logging
import re
from typing import Any, Optional

from llama_stack_client.lib.agents.event_logger import interleaved_content_as_str
from llama_stack_client.lib.agents.tool_parser import ToolParser
from llama_stack_client.types.shared.completion_message import CompletionMessage
from llama_stack_client.types.shared.tool_call import ToolCall
from llama_stack_client.types.tool_execution_step import ToolExecutionStep
from pydantic import BaseModel
from models.responses import RAGChunk

from constants import DEFAULT_RAG_TOOL
from models.responses import RAGChunk

logger = logging.getLogger(__name__)

# RAG Response Format Patterns
# ============================
# These patterns match the format produced by llama-stack's knowledge_search tool.
# Source: llama_stack/providers/inline/tool_runtime/rag/memory.py
#
# The format consists of:
# - Header (hardcoded): "knowledge_search tool found N chunks:\nBEGIN of knowledge_search tool results.\n"
# - Chunks (configurable template, default): "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
# - Footer (hardcoded): "END of knowledge_search tool results.\n"
#
# Note: The chunk template is configurable via RAGQueryConfig.chunk_template in llama-stack.
# If customized, these patterns may not match. A warning is logged when fallback occurs.

# Pattern to match individual RAG result blocks: " Result N\nContent: ..."
# Captures result number and everything until the next result or end marker
RAG_RESULT_PATTERN = re.compile(
r"\s*Result\s+(\d+)\s*\nContent:\s*(.*?)(?=\s*Result\s+\d+\s*\n|END of knowledge_search)",
re.DOTALL,
)
Comment on lines +21 to +39
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix pylint line-too-long in RAG format documentation comments.

Pylint is currently failing on Lines 27–28 due to overly long comment lines. You can keep the documentation but wrap it over multiple shorter lines.

-# The format consists of:
-# - Header (hardcoded): "knowledge_search tool found N chunks:\nBEGIN of knowledge_search tool results.\n"
-# - Chunks (configurable template, default): "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
-# - Footer (hardcoded): "END of knowledge_search tool results.\n"
+# The format consists of:
+# - Header (hardcoded):
+#   "knowledge_search tool found N chunks:\n"
+#   "BEGIN of knowledge_search tool results.\n"
+# - Chunks (configurable template, default):
+#   "Result {index}\n"
+#   "Content: {chunk.content}\n"
+#   "Metadata: {metadata}\n"
+# - Footer (hardcoded):
+#   "END of knowledge_search tool results.\n"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
# RAG Response Format Patterns
# ============================
# These patterns match the format produced by llama-stack's knowledge_search tool.
# Source: llama_stack/providers/inline/tool_runtime/rag/memory.py
#
# The format consists of:
# - Header (hardcoded): "knowledge_search tool found N chunks:\nBEGIN of knowledge_search tool results.\n"
# - Chunks (configurable template, default): "Result {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
# - Footer (hardcoded): "END of knowledge_search tool results.\n"
#
# Note: The chunk template is configurable via RAGQueryConfig.chunk_template in llama-stack.
# If customized, these patterns may not match. A warning is logged when fallback occurs.
# Pattern to match individual RAG result blocks: " Result N\nContent: ..."
# Captures result number and everything until the next result or end marker
RAG_RESULT_PATTERN = re.compile(
r"\s*Result\s+(\d+)\s*\nContent:\s*(.*?)(?=\s*Result\s+\d+\s*\n|END of knowledge_search)",
re.DOTALL,
)
# RAG Response Format Patterns
# ============================
# These patterns match the format produced by llama-stack's knowledge_search tool.
# Source: llama_stack/providers/inline/tool_runtime/rag/memory.py
#
# The format consists of:
# - Header (hardcoded):
# "knowledge_search tool found N chunks:\n"
# "BEGIN of knowledge_search tool results.\n"
# - Chunks (configurable template, default):
# "Result {index}\n"
# "Content: {chunk.content}\n"
# "Metadata: {metadata}\n"
# - Footer (hardcoded):
# "END of knowledge_search tool results.\n"
#
# Note: The chunk template is configurable via RAGQueryConfig.chunk_template in llama-stack.
# If customized, these patterns may not match. A warning is logged when fallback occurs.
# Pattern to match individual RAG result blocks: " Result N\nContent: ..."
# Captures result number and everything until the next result or end marker
RAG_RESULT_PATTERN = re.compile(
r"\s*Result\s+(\d+)\s*\nContent:\s*(.*?)(?=\s*Result\s+\d+\s*\n|END of knowledge_search)",
re.DOTALL,
)
🧰 Tools
🪛 GitHub Actions: Python linter

[error] 27-27: C0301: Line too long (106/100) (line-too-long). Pylint reported while running: uv run pylint src tests. Exit code 16.


[error] 28-28: C0301: Line too long (111/100) (line-too-long). Pylint reported while running: uv run pylint src tests. Exit code 16.

🤖 Prompt for AI Agents
In src/utils/types.py around lines 21 to 39, several documentation comment lines
(notably the RAG format description on lines ~27–28) exceed pylint's
line-too-long limit; break those long comment lines into multiple shorter
comment lines (each starting with '#') so no line exceeds the project's max
length (e.g., 80 chars), preserving the original wording and punctuation but
wrapping sentences across lines to satisfy pylint.


# Pattern to extract metadata dict from a result block
RAG_METADATA_PATTERN = re.compile(r"Metadata:\s*(\{[^}]+\})", re.DOTALL)


class Singleton(type):
Expand Down Expand Up @@ -117,47 +147,135 @@ def append_tool_calls_from_llama(self, tec: ToolExecutionStep) -> None:
self._extract_rag_chunks_from_response(response_content)

def _extract_rag_chunks_from_response(self, response_content: str) -> None:
"""Extract RAG chunks from tool response content."""
"""Extract RAG chunks from tool response content.

Parses RAG tool responses in multiple formats:
1. JSON format with "chunks" array or list of chunk objects
2. Formatted text with "Result N" blocks containing Content and Metadata

For formatted text responses, extracts:
- Content text for each result
- Metadata including docs_url, title, chunk_id, document_id
"""
if not response_content or not response_content.strip():
return

# Try JSON format first
if self._try_parse_json_chunks(response_content):
return

# Try formatted text with "Result N" blocks
if self._try_parse_formatted_chunks(response_content):
return

# Fallback: treat entire response as single chunk
# This may indicate the RAG response format has changed
logger.warning(
"Unable to parse individual RAG chunks from response. "
"Falling back to single-chunk extraction. "
"This may indicate a change in the RAG tool response format. "
"Response preview: %.200s...",
response_content[:200] if len(response_content) > 200 else response_content,
)
self.rag_chunks.append(
RAGChunk(
content=response_content,
source=DEFAULT_RAG_TOOL,
score=None,
)
)

def _try_parse_json_chunks(self, response_content: str) -> bool:
"""Try to parse response as JSON chunks.

Returns True if successfully parsed, False otherwise.
"""
try:
# Parse the response to get chunks
# Try JSON first
try:
data = json.loads(response_content)
if isinstance(data, dict) and "chunks" in data:
for chunk in data["chunks"]:
data = json.loads(response_content)
if isinstance(data, dict) and "chunks" in data:
for chunk in data["chunks"]:
self.rag_chunks.append(
RAGChunk(
content=chunk.get("content", ""),
source=chunk.get("source"),
score=chunk.get("score"),
)
)
return True
Comment on lines 121 to +204
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Empty chunks array returns True, may trigger unexpected behavior.

If the JSON contains {"chunks": []} (empty array), the loop executes zero times but returns True at line 183. This prevents the fallback from creating a single chunk, resulting in no RAG chunks at all.

Consider checking if chunks were actually added:

             if isinstance(data, dict) and "chunks" in data:
+                if not data["chunks"]:
+                    return False
                 for chunk in data["chunks"]:
                     self.rag_chunks.append(
                         RAGChunk(
                             content=chunk.get("content", ""),
                             source=chunk.get("source"),
                             score=chunk.get("score"),
                         )
                     )
                 return True
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
# Parse the response to get chunks
# Try JSON first
try:
data = json.loads(response_content)
if isinstance(data, dict) and "chunks" in data:
for chunk in data["chunks"]:
data = json.loads(response_content)
if isinstance(data, dict) and "chunks" in data:
for chunk in data["chunks"]:
self.rag_chunks.append(
RAGChunk(
content=chunk.get("content", ""),
source=chunk.get("source"),
score=chunk.get("score"),
)
)
return True
try:
data = json.loads(response_content)
if isinstance(data, dict) and "chunks" in data:
if not data["chunks"]:
return False
for chunk in data["chunks"]:
self.rag_chunks.append(
RAGChunk(
content=chunk.get("content", ""),
source=chunk.get("source"),
score=chunk.get("score"),
)
)
return True
🤖 Prompt for AI Agents
In src/utils/types.py around lines 172 to 183, the code returns True whenever
"chunks" exists in the parsed JSON even if it's an empty list, causing no
RAGChunk to be created; modify the logic to only return True if at least one
chunk was actually appended to self.rag_chunks (e.g., check a counter or length
before returning), and if no chunks were added fall through to the existing
fallback that creates a single chunk; also guard against non-list "chunks"
values by treating them as absent so the fallback runs.

if isinstance(data, list):
for chunk in data:
if isinstance(chunk, dict):
self.rag_chunks.append(
RAGChunk(
content=chunk.get("content", ""),
content=chunk.get("content", str(chunk)),
source=chunk.get("source"),
score=chunk.get("score"),
)
)
elif isinstance(data, list):
# Handle list of chunks
for chunk in data:
if isinstance(chunk, dict):
self.rag_chunks.append(
RAGChunk(
content=chunk.get("content", str(chunk)),
source=chunk.get("source"),
score=chunk.get("score"),
)
)
except json.JSONDecodeError:
# If not JSON, treat the entire response as a single chunk
if response_content.strip():
self.rag_chunks.append(
RAGChunk(
content=response_content,
source=DEFAULT_RAG_TOOL,
score=None,
)
)
except (KeyError, AttributeError, TypeError, ValueError):
# Treat response as single chunk on data access/structure errors
if response_content.strip():
self.rag_chunks.append(
RAGChunk(
content=response_content, source=DEFAULT_RAG_TOOL, score=None
)
)
return bool(data)
except (json.JSONDecodeError, KeyError, AttributeError, TypeError, ValueError):
pass
return False

def _try_parse_formatted_chunks(self, response_content: str) -> bool:
"""Try to parse formatted text response with 'Result N' blocks.

Parses responses in format:
knowledge_search tool found N chunks:
BEGIN of knowledge_search tool results.
Result 1
Content: <text>
Metadata: {'chunk_id': '...', 'docs_url': '...', 'title': '...', ...}
Result 2
...
END of knowledge_search tool results.

Returns True if at least one chunk was parsed, False otherwise.
"""
# Check if this looks like a formatted RAG response
if "Result" not in response_content or "Content:" not in response_content:
return False

matches = RAG_RESULT_PATTERN.findall(response_content)
if not matches:
return False

for _result_num, content_block in matches:
chunk = self._parse_single_chunk(content_block)
if chunk:
self.rag_chunks.append(chunk)

return bool(self.rag_chunks)
Comment on lines +243 to +248
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Return value check may be incorrect after partial JSON parsing failure.

Line 227 checks bool(self.rag_chunks), which includes any chunks added by a failed JSON parsing attempt. If _try_parse_json_chunks appends chunks then throws an exception, it returns False, but self.rag_chunks is non-empty. Then _try_parse_formatted_chunks returns True even if it parsed nothing from formatted text.

Track chunks added within this method instead:

     def _try_parse_formatted_chunks(self, response_content: str) -> bool:
         # ... docstring ...
         if "Result" not in response_content or "Content:" not in response_content:
             return False

         matches = RAG_RESULT_PATTERN.findall(response_content)
         if not matches:
             return False

+        chunks_added = 0
         for _result_num, content_block in matches:
             chunk = self._parse_single_chunk(content_block)
             if chunk:
                 self.rag_chunks.append(chunk)
+                chunks_added += 1

-        return bool(self.rag_chunks)
+        return chunks_added > 0
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
for _result_num, content_block in matches:
chunk = self._parse_single_chunk(content_block)
if chunk:
self.rag_chunks.append(chunk)
return bool(self.rag_chunks)
chunks_added = 0
for _result_num, content_block in matches:
chunk = self._parse_single_chunk(content_block)
if chunk:
self.rag_chunks.append(chunk)
chunks_added += 1
return chunks_added > 0


def _parse_single_chunk(self, content_block: str) -> RAGChunk | None:
"""Parse a single chunk from a content block.

Args:
content_block: Text containing content and optionally metadata

Returns:
RAGChunk if successfully parsed, None otherwise
"""
# Extract metadata if present
metadata: dict[str, Any] = {}
metadata_match = RAG_METADATA_PATTERN.search(content_block)
if metadata_match:
try:
metadata = ast.literal_eval(metadata_match.group(1))
except (ValueError, SyntaxError) as e:
logger.debug("Failed to parse chunk metadata: %s", e)

# Extract content (everything before "Metadata:" if present)
if metadata_match:
content = content_block[: metadata_match.start()].strip()
else:
content = content_block.strip()

if not content:
return None

return RAGChunk(
content=content,
source=metadata.get("docs_url") or metadata.get("source"),
score=metadata.get("score"),
)
Loading
Loading