refactor: (breaking change) using RichPromptTemplate instead of get_prompt_by_jinja2_template function (#680)

Mini256 · web-flow · commit d3043521dcdc · 2025-05-07T21:53:08.000+08:00
part of #664 - llamaindex already supports rendering prompt templates via the jinja2 rendering engine through `RichPromptTemplate`, so we don't need to `get_prompt_by_jinja2_template`. - We no longer need placeholders like <<query_str>> to avoid escaping, and can define prompt template variable placeholders uniformly using the {{xxx}} syntax. （breaking change）
diff --git a/backend/app/rag/chat/chat_flow.py b/backend/app/rag/chat/chat_flow.py
@@ -11,6 +11,8 @@
 from llama_index.core import get_response_synthesizer
 from llama_index.core.base.llms.types import ChatMessage, MessageRole
 from llama_index.core.schema import NodeWithScore
+from llama_index.core.prompts.rich import RichPromptTemplate
+
 from sqlmodel import Session
 from app.core.config import settings
 from app.exceptions import ChatNotFound
@@ -33,7 +35,6 @@
 from app.rag.utils import parse_goal_response_format
 from app.repositories import chat_repo
 from app.site_settings import SiteSetting
-from app.utils.jinja2 import get_prompt_by_jinja2_template
 from app.utils.tracing import LangfuseContextManager
 
 logger = logging.getLogger(__name__)
@@ -355,14 +356,13 @@ def _refine_user_question(
                     ),
                 )
 
+            prompt_template = RichPromptTemplate(refined_question_prompt)
             refined_question = self._fast_llm.predict(
-                get_prompt_by_jinja2_template(
-                    refined_question_prompt,
-                    graph_knowledges=knowledge_graph_context,
-                    chat_history=chat_history,
-                    question=user_question,
-                    current_date=datetime.now().strftime("%Y-%m-%d"),
-                ),
+                prompt_template,
+                graph_knowledges=knowledge_graph_context,
+                chat_history=chat_history,
+                question=user_question,
+                current_date=datetime.now().strftime("%Y-%m-%d"),
             )
 
             if not annotation_silent:
@@ -403,19 +403,18 @@ def _clarify_question(
                 "knowledge_graph_context": knowledge_graph_context,
             },
         ) as span:
-            clarity_result = (
-                self._fast_llm.predict(
-                    prompt=get_prompt_by_jinja2_template(
-                        self.engine_config.llm.clarifying_question_prompt,
-                        graph_knowledges=knowledge_graph_context,
-                        chat_history=chat_history,
-                        question=user_question,
-                    ),
-                )
-                .strip()
-                .strip(".\"'!")
+            prompt_template = RichPromptTemplate(
+                self.engine_config.llm.clarifying_question_prompt
             )
 
+            prediction = self._fast_llm.predict(
+                prompt_template,
+                graph_knowledges=knowledge_graph_context,
+                chat_history=chat_history,
+                question=user_question,
+            )
+            # TODO: using structured output to get the clarity result.
+            clarity_result = prediction.strip().strip(".\"'!")
             need_clarify = clarity_result.lower() != "false"
             need_clarify_response = clarity_result if need_clarify else ""
 
@@ -468,8 +467,10 @@ def _generate_answer(
             name="generate_answer", input=user_question
         ) as span:
             # Initialize response synthesizer.
-            text_qa_template = get_prompt_by_jinja2_template(
-                self.engine_config.llm.text_qa_prompt,
+            text_qa_template = RichPromptTemplate(
+                template_str=self.engine_config.llm.text_qa_prompt
+            )
+            text_qa_template = text_qa_template.partial_format(
                 current_date=datetime.now().strftime("%Y-%m-%d"),
                 graph_knowledges=knowledge_graph_context,
                 original_question=self.user_question,
diff --git a/backend/app/rag/chat/chat_service.py b/backend/app/rag/chat/chat_service.py
@@ -45,7 +45,7 @@
 from app.repositories.embedding_model import embedding_model_repo
 from app.repositories.llm import llm_repo
 from app.site_settings import SiteSetting
-from app.utils.jinja2 import get_prompt_by_jinja2_template
+from llama_index.core.prompts.rich import RichPromptTemplate
 
 logger = logging.getLogger(__name__)
 
@@ -283,11 +283,12 @@ def get_chat_message_recommend_questions(
     if questions is not None:
         return questions
 
+    prompt_template = RichPromptTemplate(
+        chat_engine_config.llm.further_questions_prompt
+    )
     recommend_questions = llm.predict(
-        prompt=get_prompt_by_jinja2_template(
-            chat_engine_config.llm.further_questions_prompt,
-            chat_message_content=chat_message.content,
-        ),
+        prompt_template,
+        chat_message_content=chat_message.content,
     )
     recommend_question_list = recommend_questions.splitlines()
     recommend_question_list = [
@@ -311,10 +312,8 @@ def get_chat_message_recommend_questions(
         """
         # with format or too long for per question, it's not a question list, generate again
         recommend_questions = llm.predict(
-            prompt=get_prompt_by_jinja2_template(
-                chat_engine_config.llm.further_questions_prompt,
-                chat_message_content=regenerate_content,
-            ),
+            prompt_template,
+            chat_message_content=regenerate_content,
         )
 
     db_session.add(
diff --git a/backend/app/rag/chat/retrieve/retrieve_flow.py b/backend/app/rag/chat/retrieve/retrieve_flow.py
@@ -5,14 +5,14 @@
 from llama_index.core.instrumentation import get_dispatcher
 from llama_index.core.llms import LLM
 from llama_index.core.schema import NodeWithScore, QueryBundle
+from llama_index.core.prompts.rich import RichPromptTemplate
 from pydantic import BaseModel
 from sqlmodel import Session
 
 from app.models import (
     Document as DBDocument,
     KnowledgeBase,
 )
-from app.utils.jinja2 import get_prompt_by_jinja2_template
 from app.rag.chat.config import ChatEngineConfig
 from app.rag.retrievers.knowledge_graph.fusion_retriever import (
     KnowledgeGraphFusionRetriever,
@@ -102,31 +102,34 @@ def _get_knowledge_graph_context(
         self, knowledge_graph: KnowledgeGraphRetrievalResult
     ) -> str:
         if self.engine_config.knowledge_graph.using_intent_search:
-            kg_context_template = get_prompt_by_jinja2_template(
-                self.engine_config.llm.intent_graph_knowledge,
-                # For forward compatibility considerations.
+            kg_context_template = RichPromptTemplate(
+                self.engine_config.llm.intent_graph_knowledge
+            )
+            return kg_context_template.format(
                 sub_queries=knowledge_graph.to_subqueries_dict(),
             )
-            return kg_context_template.template
         else:
-            kg_context_template = get_prompt_by_jinja2_template(
-                self.engine_config.llm.normal_graph_knowledge,
+            kg_context_template = RichPromptTemplate(
+                self.engine_config.llm.normal_graph_knowledge
+            )
+            return kg_context_template.format(
                 entities=knowledge_graph.entities,
                 relationships=knowledge_graph.relationships,
             )
-            return kg_context_template.template
 
     def _refine_user_question(
         self, user_question: str, knowledge_graph_context: str
     ) -> str:
-        return self._fast_llm.predict(
-            get_prompt_by_jinja2_template(
-                self.engine_config.llm.condense_question_prompt,
-                graph_knowledges=knowledge_graph_context,
-                question=user_question,
-                current_date=datetime.now().strftime("%Y-%m-%d"),
-            ),
+        prompt_template = RichPromptTemplate(
+            self.engine_config.llm.condense_question_prompt
+        )
+        refined_question = self._fast_llm.predict(
+            prompt_template,
+            graph_knowledges=knowledge_graph_context,
+            question=user_question,
+            current_date=datetime.now().strftime("%Y-%m-%d"),
         )
+        return refined_question.strip().strip(".\"'!")
 
     def search_relevant_chunks(self, user_question: str) -> List[NodeWithScore]:
         retriever = ChunkFusionRetriever(
diff --git a/backend/app/rag/default_prompt.py b/backend/app/rag/default_prompt.py
@@ -8,21 +8,15 @@
 Sub-query: {{ sub_query }}
 
   - Entities:
-
 {% for entity in data['entities'] %}
-
     - Name: {{ entity.name }}
-    - Description: {{ entity.description }}
-
+      Description: {{ entity.description }}
 {% endfor %}
 
   - Relationships:
-
 {% for relationship in data['relationships'] %}
-
     - Description: {{ relationship.rag_description }}
-    - Weight: {{ relationship.weight }}
-
+      Weight: {{ relationship.weight }}
 {% endfor %}
 
 {% endfor %}
@@ -35,10 +29,8 @@
 Entities:
 
 {% for entity in entities %}
-
 - Name: {{ entity.name }}
-- Description: {{ entity.description }}
-
+  Description: {{ entity.description }}
 {% endfor %}
 
 ---------------------
@@ -192,7 +184,6 @@
 Refined standalone question:
 """
 
-
 DEFAULT_TEXT_QA_PROMPT = """\
 Current Date: {{current_date}}
 ---------------------
@@ -205,7 +196,7 @@
 Context information is below.
 ---------------------
 
-<<context_str>>
+{{context_str}}
 
 ---------------------
 
@@ -242,45 +233,10 @@
 {{original_question}}
 
 The Refined Question used to search:
-<<query_str>>
-
-Answer:
-"""
-
-DEFAULT_REFINE_PROMPT = """\
-The Original questions is:
-
-{{original_question}}
-
-Refined Question used to search:
-<<query_str>>
-
----------------------
-We have provided an existing answer:
----------------------
-
-<<existing_answer>>
-
----------------------
-We have the opportunity to refine the existing answer (only if needed) with some more knowledge graph and context information below.
-
----------------------
-Knowledge graph information is below
----------------------
 
-{{graph_knowledges}}
-
----------------------
-Context information is below.
----------------------
+{{query_str}}
 
-<<context_msg>>
-
----------------------
-Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
-And the answer should use the same language with the question. If the answer has different language with the original question, please translate it to the same language with the question.
-
-Refined Answer:
+Answer:
 """
 
 DEFAULT_FURTHER_QUESTIONS_PROMPT = """\
diff --git a/backend/app/utils/jinja2.py b/backend/app/utils/jinja2.py