@@ -56,12 +56,17 @@ class QueryFusionRetrieverCustom(QueryFusionRetriever): # pylint: disable=W0612
5656
5757 def __init__ (self , ** kwargs ):
5858 """Initialize custom query fusion class."""
59+ # Extract custom parameters before passing to parent
60+ retriever_weights = kwargs .pop ("retriever_weights" , None )
61+ index_configs = kwargs .pop ("index_configs" , None )
62+ retrievers = kwargs .get ("retrievers" , [])
63+
5964 super ().__init__ (** kwargs )
6065
61- retriever_weights = kwargs .get ("retriever_weights" , None )
6266 if not retriever_weights :
63- retriever_weights = [1.0 ] * len (kwargs [ " retrievers" ] )
67+ retriever_weights = [1.0 ] * len (retrievers )
6468 self ._custom_retriever_weights = retriever_weights
69+ self ._index_configs = index_configs
6570
6671 def _simple_fusion (self , results ):
6772 """Override internal method and apply weighted score."""
@@ -72,16 +77,38 @@ def _simple_fusion(self, results):
7277 # Current dynamic weights marginally penalize the score.
7378 all_nodes = {}
7479 for i , nodes_with_scores in enumerate (results .values ()):
80+ # Getting index metadata based on available index configs
81+ index_id = ""
82+ index_origin = ""
83+ if self ._index_configs and i < len (self ._index_configs ):
84+ index_config = self ._index_configs [i ]
85+ if index_config is not None :
86+ index_id = index_config .product_docs_index_id or ""
87+ index_origin = index_config .product_docs_origin or "default"
88+
7589 for j , node_with_score in enumerate (nodes_with_scores ):
90+ # Add index metadata to node
91+ node_with_score .node .metadata ["index_id" ] = index_id
92+ node_with_score .node .metadata ["index_origin" ] = index_origin
93+
7694 node_index_id = f"{ i } _{ j } "
7795 all_nodes [node_index_id ] = node_with_score
7896 # weighted_score = node_with_score.score * self._custom_retriever_weights[i]
7997 # Uncomment above and delete below, if we decide weights to be set from config.
80- weighted_score = node_with_score .score * (
98+ original_score = node_with_score .score
99+ weighted_score = original_score * (
81100 1 - min (i , SCORE_DILUTION_DEPTH - 1 ) * SCORE_DILUTION_WEIGHT
82101 )
83102 all_nodes [node_index_id ].score = weighted_score
84103
104+ logger .debug (
105+ "Document from index #%d (%s): original_score=%.4f, weighted_score=%.4f" ,
106+ i ,
107+ index_origin or index_id or "unknown" ,
108+ original_score ,
109+ weighted_score ,
110+ )
111+
85112 return sorted (
86113 all_nodes .values (), key = lambda x : x .score or 0.0 , reverse = True
87114 )
@@ -95,6 +122,7 @@ def __init__(self, index_config: Optional[ReferenceContent]) -> None:
95122 load_llama_index_deps ()
96123 self ._indexes = None
97124 self ._retriever = None
125+ self ._loaded_index_configs = None
98126
99127 self ._index_config = index_config
100128 logger .debug ("Config used for index load: %s" , str (self ._index_config ))
@@ -132,6 +160,7 @@ def _load_index(self) -> None:
132160 Settings .llm = resolve_llm (None )
133161
134162 indexes = []
163+ loaded_configs = []
135164 for i , index_config in enumerate (self ._index_config .indexes ):
136165 if index_config .product_docs_index_path is None :
137166 logger .warning ("Index path is not set for index #%d, skip loading." , i )
@@ -159,13 +188,15 @@ def _load_index(self) -> None:
159188 index_id = index_config .product_docs_index_id ,
160189 )
161190 indexes .append (index )
191+ loaded_configs .append (index_config )
162192 logger .info ("Vector index #%d is loaded." , i )
163193 except Exception as err :
164194 logger .exception (
165195 "Error loading vector index #%d:\n %s, skipped." , i , err
166196 )
167197 if len (indexes ) == 0 :
168198 logger .warning ("No indexes are loaded." )
199+ self ._loaded_index_configs = loaded_configs
169200 return
170201 if len (indexes ) < len (self ._index_config .indexes ):
171202 logger .warning (
@@ -175,6 +206,7 @@ def _load_index(self) -> None:
175206 else :
176207 logger .info ("All indexes are loaded." )
177208 self ._indexes = indexes
209+ self ._loaded_index_configs = loaded_configs
178210
179211 @property
180212 def vector_indexes (self ) -> Optional [list [BaseIndex ]]:
@@ -199,6 +231,18 @@ def get_retriever(
199231 ):
200232 return self ._retriever
201233
234+ # Log index information
235+ index_info = [
236+ f"{ i } : { cfg .product_docs_origin or cfg .product_docs_index_id or 'unknown' } "
237+ for i , cfg in enumerate (self ._loaded_index_configs or [])
238+ ]
239+ logger .info (
240+ "Creating retriever for %d indexes (similarity_top_k=%d): %s" ,
241+ len (self ._indexes ),
242+ similarity_top_k ,
243+ index_info ,
244+ )
245+
202246 # Note: we are using a custom retriever, based on our need
203247 retriever = QueryFusionRetrieverCustom (
204248 retrievers = [
@@ -207,6 +251,7 @@ def get_retriever(
207251 ],
208252 similarity_top_k = similarity_top_k ,
209253 retriever_weights = None , # Setting as None, until this gets added to config
254+ index_configs = self ._loaded_index_configs ,
210255 mode = "simple" , # Don't modify this as we are adding our own logic
211256 num_queries = 1 , # set this to 1 to disable query generation
212257 use_async = False ,
0 commit comments