Skip to content

Commit b559dd8

Browse files
committed
Enhancing RAG related logging wrt BYOK sources.
#Reverting the storing changes
1 parent 2446528 commit b559dd8

File tree

5 files changed

+134
-12
lines changed

5 files changed

+134
-12
lines changed

ols/app/models/models.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,11 +553,19 @@ class RagChunk:
553553
text: The text used as a RAG chunk.
554554
doc_url: The URL of the doc from which the RAG chunk comes from.
555555
doc_title: The title of the doc.
556+
index_id: The ID of the index from which this chunk was retrieved.
557+
index_origin: The origin/source of the index (e.g., BYOK, default).
558+
similarity_score: The similarity score of this chunk.
559+
token_count: The number of tokens in this chunk.
556560
"""
557561

558562
text: str
559563
doc_url: str
560564
doc_title: str
565+
index_id: str = ""
566+
index_origin: str = ""
567+
similarity_score: float = 0.0
568+
token_count: int = 0
561569

562570

563571
@dataclass

ols/src/query_helpers/docs_summarizer.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,22 @@ def _prepare_prompt(
176176
# Retrieve RAG content
177177
if rag_retriever:
178178
retrieved_nodes = rag_retriever.retrieve(query)
179+
logger.info("Retrieved %d documents from indexes", len(retrieved_nodes))
180+
179181
retrieved_nodes = reranker.rerank(retrieved_nodes)
182+
logger.info("After reranking: %d documents", len(retrieved_nodes))
183+
184+
# Logging top retrieved candidates with scores
185+
for i, node in enumerate(retrieved_nodes[:5]):
186+
logger.info(
187+
"Retrieved doc #%d: title='%s', url='%s', index='%s', score=%.4f",
188+
i + 1,
189+
node.metadata.get("title", "unknown"),
190+
node.metadata.get("docs_url", "unknown"),
191+
node.metadata.get("index_origin", "unknown"),
192+
node.get_score(raise_error=False),
193+
)
194+
180195
rag_chunks, available_tokens = token_handler.truncate_rag_context(
181196
retrieved_nodes, available_tokens
182197
)

ols/src/rag_index/index_loader.py

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,17 @@ class QueryFusionRetrieverCustom(QueryFusionRetriever): # pylint: disable=W0612
5656

5757
def __init__(self, **kwargs):
5858
"""Initialize custom query fusion class."""
59+
# Extract custom parameters before passing to parent
60+
retriever_weights = kwargs.pop("retriever_weights", None)
61+
index_configs = kwargs.pop("index_configs", None)
62+
retrievers = kwargs.get("retrievers", [])
63+
5964
super().__init__(**kwargs)
6065

61-
retriever_weights = kwargs.get("retriever_weights", None)
6266
if not retriever_weights:
63-
retriever_weights = [1.0] * len(kwargs["retrievers"])
67+
retriever_weights = [1.0] * len(retrievers)
6468
self._custom_retriever_weights = retriever_weights
69+
self._index_configs = index_configs
6570

6671
def _simple_fusion(self, results):
6772
"""Override internal method and apply weighted score."""
@@ -72,16 +77,38 @@ def _simple_fusion(self, results):
7277
# Current dynamic weights marginally penalize the score.
7378
all_nodes = {}
7479
for i, nodes_with_scores in enumerate(results.values()):
80+
# Getting index metadata based on available index configs
81+
index_id = ""
82+
index_origin = ""
83+
if self._index_configs and i < len(self._index_configs):
84+
index_config = self._index_configs[i]
85+
if index_config is not None:
86+
index_id = index_config.product_docs_index_id or ""
87+
index_origin = index_config.product_docs_origin or "default"
88+
7589
for j, node_with_score in enumerate(nodes_with_scores):
90+
# Add index metadata to node
91+
node_with_score.node.metadata["index_id"] = index_id
92+
node_with_score.node.metadata["index_origin"] = index_origin
93+
7694
node_index_id = f"{i}_{j}"
7795
all_nodes[node_index_id] = node_with_score
7896
# weighted_score = node_with_score.score * self._custom_retriever_weights[i]
7997
# Uncomment above and delete below, if we decide weights to be set from config.
80-
weighted_score = node_with_score.score * (
98+
original_score = node_with_score.score
99+
weighted_score = original_score * (
81100
1 - min(i, SCORE_DILUTION_DEPTH - 1) * SCORE_DILUTION_WEIGHT
82101
)
83102
all_nodes[node_index_id].score = weighted_score
84103

104+
logger.debug(
105+
"Document from index #%d (%s): original_score=%.4f, weighted_score=%.4f",
106+
i,
107+
index_origin or index_id or "unknown",
108+
original_score,
109+
weighted_score,
110+
)
111+
85112
return sorted(
86113
all_nodes.values(), key=lambda x: x.score or 0.0, reverse=True
87114
)
@@ -95,6 +122,7 @@ def __init__(self, index_config: Optional[ReferenceContent]) -> None:
95122
load_llama_index_deps()
96123
self._indexes = None
97124
self._retriever = None
125+
self._loaded_index_configs = None
98126

99127
self._index_config = index_config
100128
logger.debug("Config used for index load: %s", str(self._index_config))
@@ -132,6 +160,7 @@ def _load_index(self) -> None:
132160
Settings.llm = resolve_llm(None)
133161

134162
indexes = []
163+
loaded_configs = []
135164
for i, index_config in enumerate(self._index_config.indexes):
136165
if index_config.product_docs_index_path is None:
137166
logger.warning("Index path is not set for index #%d, skip loading.", i)
@@ -159,13 +188,15 @@ def _load_index(self) -> None:
159188
index_id=index_config.product_docs_index_id,
160189
)
161190
indexes.append(index)
191+
loaded_configs.append(index_config)
162192
logger.info("Vector index #%d is loaded.", i)
163193
except Exception as err:
164194
logger.exception(
165195
"Error loading vector index #%d:\n%s, skipped.", i, err
166196
)
167197
if len(indexes) == 0:
168198
logger.warning("No indexes are loaded.")
199+
self._loaded_index_configs = loaded_configs
169200
return
170201
if len(indexes) < len(self._index_config.indexes):
171202
logger.warning(
@@ -175,6 +206,7 @@ def _load_index(self) -> None:
175206
else:
176207
logger.info("All indexes are loaded.")
177208
self._indexes = indexes
209+
self._loaded_index_configs = loaded_configs
178210

179211
@property
180212
def vector_indexes(self) -> Optional[list[BaseIndex]]:
@@ -199,6 +231,18 @@ def get_retriever(
199231
):
200232
return self._retriever
201233

234+
# Log index information
235+
index_info = [
236+
f"{i}: {cfg.product_docs_origin or cfg.product_docs_index_id or 'unknown'}"
237+
for i, cfg in enumerate(self._loaded_index_configs or [])
238+
]
239+
logger.info(
240+
"Creating retriever for %d indexes (similarity_top_k=%d): %s",
241+
len(self._indexes),
242+
similarity_top_k,
243+
index_info,
244+
)
245+
202246
# Note: we are using a custom retriever, based on our need
203247
retriever = QueryFusionRetrieverCustom(
204248
retrievers=[
@@ -207,6 +251,7 @@ def get_retriever(
207251
],
208252
similarity_top_k=similarity_top_k,
209253
retriever_weights=None, # Setting as None, until this gets added to config
254+
index_configs=self._loaded_index_configs,
210255
mode="simple", # Don't modify this as we are adding our own logic
211256
num_queries=1, # set this to 1 to disable query generation
212257
use_async=False,

ols/utils/token_handler.py

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,24 @@ def truncate_rag_context(
121121
list of `RagChunk` objects, available tokens after context usage
122122
"""
123123
rag_chunks = []
124+
logger.info(
125+
"Processing %d retrieved nodes for RAG context", len(retrieved_nodes)
126+
)
124127

125-
for node in retrieved_nodes:
128+
for idx, node in enumerate(retrieved_nodes):
126129
score = float(node.get_score(raise_error=False))
130+
doc_title = node.metadata.get("title", "unknown")
131+
doc_url = node.metadata.get("docs_url", "unknown")
132+
index_id = node.metadata.get("index_id", "")
133+
index_origin = node.metadata.get("index_origin", "")
134+
127135
if score < RAG_SIMILARITY_CUTOFF:
128-
logger.debug(
129-
"RAG content similarity score: %f is less than threshold %f.",
136+
logger.info(
137+
"Document #%d rejected: '%s' (index: %s) - "
138+
"similarity score %.4f < threshold %.4f",
139+
idx + 1,
140+
doc_title,
141+
index_origin or index_id or "unknown",
130142
score,
131143
RAG_SIMILARITY_CUTOFF,
132144
)
@@ -140,23 +152,49 @@ def truncate_rag_context(
140152
logger.debug("RAG content tokens count: %d.", tokens_count)
141153

142154
available_tokens = min(tokens_count, max_tokens)
143-
logger.debug("Available tokens: %d.", tokens_count)
155+
logger.debug("Available tokens: %d.", available_tokens)
144156

145157
if available_tokens < MINIMUM_CONTEXT_TOKEN_LIMIT:
146-
logger.debug("%d tokens are less than threshold.", available_tokens)
158+
logger.info(
159+
"Document #%d rejected: '%s' (index: %s) - "
160+
"insufficient tokens (%d < %d minimum)",
161+
idx + 1,
162+
doc_title,
163+
index_origin or index_id or "unknown",
164+
available_tokens,
165+
MINIMUM_CONTEXT_TOKEN_LIMIT,
166+
)
147167
break
148168

169+
logger.info(
170+
"Document #%d selected: title='%s', url='%s', index='%s', "
171+
"score=%.4f, tokens=%d",
172+
idx + 1,
173+
doc_title,
174+
doc_url,
175+
index_origin or index_id or "unknown",
176+
score,
177+
available_tokens,
178+
)
179+
149180
node_text = self.tokens_to_text(tokens[:available_tokens])
150181
rag_chunks.append(
151182
RagChunk(
152183
text=node_text,
153-
doc_url=node.metadata.get("docs_url", ""),
154-
doc_title=node.metadata.get("title", ""),
184+
doc_url=doc_url,
185+
doc_title=doc_title,
186+
index_id=index_id,
187+
index_origin=index_origin,
188+
similarity_score=score,
189+
token_count=available_tokens,
155190
)
156191
)
157192

158193
max_tokens -= available_tokens
159194

195+
logger.info(
196+
"Final selection: %d documents chosen for RAG context", len(rag_chunks)
197+
)
160198
return rag_chunks, max_tokens
161199

162200
def limit_conversation_history(

tests/unit/app/endpoints/test_ols.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,8 +1047,24 @@ def test_store_transcript(transcripts_location):
10471047
"query_is_valid": query_is_valid,
10481048
"llm_response": response,
10491049
"rag_chunks": [
1050-
{"text": "text1", "doc_url": "url1", "doc_title": "title1"},
1051-
{"text": "text2", "doc_url": "url2", "doc_title": "title2"},
1050+
{
1051+
"text": "text1",
1052+
"doc_url": "url1",
1053+
"doc_title": "title1",
1054+
"index_id": "",
1055+
"index_origin": "",
1056+
"similarity_score": 0.0,
1057+
"token_count": 0,
1058+
},
1059+
{
1060+
"text": "text2",
1061+
"doc_url": "url2",
1062+
"doc_title": "title2",
1063+
"index_id": "",
1064+
"index_origin": "",
1065+
"similarity_score": 0.0,
1066+
"token_count": 0,
1067+
},
10521068
],
10531069
"truncated": truncated,
10541070
"tool_calls": [

0 commit comments

Comments
 (0)