xynehq · kalpadhwaryu · Sep 22, 2025 · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025
@@ -1274,6 +1274,7 @@ export const agentSearchQueryPrompt = (
       a. ${QueryType.SearchWithoutFilters}
       b. ${QueryType.SearchWithFilters}  
       c. ${QueryType.GetItems}
+      d. ${QueryType.AggregatorQuery}
 
     ### CLASSIFICATION RULES - FIXED AND SOLID
 
@@ -1360,15 +1361,51 @@ export const agentSearchQueryPrompt = (
             "filterQuery": "<extracted keywords>",
           }
         }
-
+
+    4. **${QueryType.AggregatorQuery}**:
+    - The user is asking for specific information derived from items, not just a raw list.
+    - The request typically includes special keywords that imply filtering, aggregating, deduplicating, extracting fields, counting, or summarizing across items.
+    - Classify as AggregatorQuery when:
+      - Both <app> or <entity> are present (same requirement as GetItems), and
+      - The user includes specific keywords / intent such as:
+        - People-/field-focused extraction: names, emails, titles, owners, attendees, managers, requestors
+        - Aggregation/summarization: count, how many, unique, distinct, top, most, least, sum, average
+        - Set constraints: who joined, who applied, who I met, who reported, which teams, by department
+        - Return-shape hints: list the names, give me the emails, show unique titles
+      - The user intent is to compute or extract a specific field or metric (e.g., “names of people who joined”, “count of emails”), not to display the full items.
+    - Do not classify as AggregatorQuery if the user merely wants a list of items without these special keywords (that is GetItems).
+    - The result should be an extracted/aggregated answer (e.g., a list of names, a count, grouped summaries), not the raw items.
+    - Example Queries:
+      - “Can you give me the names of the people who joined Xyne in the last 6 months?”
+      - “Give me the list of people who applied for referrals to me.”
+      - “All the people who I have meetings with in the last 3 months.”
+      - “How many Google Docs were created in August?”
+      - “Show unique senders from my emails this week.”
+      - “List the teams of people who joined in the past quarter.”
+      - “Top 5 attendees I met on calendar in the last month.”
+      - “Give me the email addresses of candidates who applied via Greenhouse in July.”
+       - **JSON Structure**:
+        {
+          "type": "${QueryType.AggregatorQuery}",
+          "filters": {
+            "apps": ["<app1>", "<app2>"] or ["<single_app>"],
+            "entities": ["<entity1>", "<entity2>"] or ["<single_entity>"],
+            "count": "<number of items to list>",
+            "startTime": "<start time in ${config.llmTimeFormat}, if applicable>",
+            "endTime": "<end time in ${config.llmTimeFormat}, if applicable>"
+            "sortDirection": <boolean or null>,
+            "filterQuery": "<search keywords for content search>"
+          }
+        }
     ---
 
     #### Enum Values for Valid Inputs
 
     type (Query Types):  
     - ${QueryType.SearchWithoutFilters}  
     - ${QueryType.GetItems}    
-    - ${QueryType.SearchWithFilters}  
+    - ${QueryType.SearchWithFilters}
+    - ${QueryType.AggregatorQuery}  
 
     app (Valid Apps - can be arrays):  
     - ${Apps.GoogleDrive} 
@@ -1409,7 +1446,7 @@ export const agentSearchQueryPrompt = (
          "queryRewrite": "<string or null>",
          "temporalDirection": "next" | "prev" | null,
          "isFollowUp": "<boolean>",
-         "type": "<${QueryType.SearchWithoutFilters} | ${QueryType.SearchWithFilters}  | ${QueryType.GetItems} >",
+         "type": "<${QueryType.SearchWithoutFilters} | ${QueryType.SearchWithFilters}  | ${QueryType.GetItems} | ${QueryType.AggregatorQuery}>",
          "filterQuery": "<string or null>",
          "filters": {
            "apps": ["<app1>", "<app2>"] or ["<single_app>"] or null,

@@ -1235,6 +1235,7 @@ export const searchQueryPrompt = (
       a. ${QueryType.SearchWithoutFilters}
       b. ${QueryType.SearchWithFilters}  
       c. ${QueryType.GetItems}
+      d. ${QueryType.AggregatorQuery}
 
     ### CLASSIFICATION RULES - FIXED AND SOLID
 
@@ -1319,6 +1320,41 @@ export const searchQueryPrompt = (
           }
         }
 
+    4. **${QueryType.AggregatorQuery}**:
+    - The user is asking for specific information derived from items, not just a raw list.
+    - The request typically includes special keywords that imply filtering, aggregating, deduplicating, extracting fields, counting, or summarizing across items.
+    - Classify as AggregatorQuery when:
+      - Both <app> or <entity> are present (same requirement as GetItems), and
+      - The user includes specific keywords / intent such as:
+        - People-/field-focused extraction: names, emails, titles, owners, attendees, managers, requestors
+        - Aggregation/summarization: count, how many, unique, distinct, top, most, least, sum, average
+        - Set constraints: who joined, who applied, who I met, who reported, which teams, by department
+        - Return-shape hints: list the names, give me the emails, show unique titles
+      - The user intent is to compute or extract a specific field or metric (e.g., “names of people who joined”, “count of emails”), not to display the full items.
+    - Do not classify as AggregatorQuery if the user merely wants a list of items without these special keywords (that is GetItems).
+    - The result should be an extracted/aggregated answer (e.g., a list of names, a count, grouped summaries), not the raw items.
+    - Example Queries:
+      - “Can you give me the names of the people who joined Xyne in the last 6 months?”
+      - “Give me the list of people who applied for referrals to me.”
+      - “All the people who I have meetings with in the last 3 months.”
+      - “How many Google Docs were created in August?”
+      - “Show unique senders from my emails this week.”
+      - “List the teams of people who joined in the past quarter.”
+      - “Top 5 attendees I met on calendar in the last month.”
+      - “Give me the email addresses of candidates who applied via Greenhouse in July.”
+       - **JSON Structure**:
+        {
+          "type": "${QueryType.AggregatorQuery}",
+          "filters": {
+            "apps": ["<app1>", "<app2>"] or ["<single_app>"],
+            "entities": ["<entity1>", "<entity2>"] or ["<single_entity>"],
+            "count": "<number of items to list>",
+            "startTime": "<start time in ${config.llmTimeFormat}, if applicable>",
+            "endTime": "<end time in ${config.llmTimeFormat}, if applicable>"
+            "sortDirection": <boolean or null>,
+            "filterQuery": "<search keywords for content search>"
+          }
+        }
     ---
 
     #### Enum Values for Valid Inputs
@@ -1327,6 +1363,7 @@ export const searchQueryPrompt = (
     - ${QueryType.SearchWithoutFilters}  
     - ${QueryType.GetItems}    
     - ${QueryType.SearchWithFilters}  
+    - ${QueryType.AggregatorQuery}  
 
     app (Valid Apps - can be arrays):  
     - ${Apps.GoogleDrive} 
@@ -1419,7 +1456,7 @@ export const searchQueryPrompt = (
          "queryRewrite": "<string or null>",
          "temporalDirection": "next" | "prev" | null,
          "isFollowUp": "<boolean>",
-         "type": "<${QueryType.SearchWithoutFilters} | ${QueryType.SearchWithFilters}  | ${QueryType.GetItems} >",
+         "type": "<${QueryType.SearchWithoutFilters} | ${QueryType.SearchWithFilters}  | ${QueryType.GetItems} | ${QueryType.AggregatorQuery}>",
          "filterQuery": "<string or null>",
          "filters": {
            "apps": ["<app1>", "<app2>"] or ["<single_app>"] or null,
@@ -2435,3 +2472,97 @@ Without these connections, I can only provide general assistance and cannot acce
 Ensure that any mention of dates or times is expressed in the user's local time zone. Always respect the user's time zone.
 I'm still here to help with general questions, explanations, and tasks that don't require access to your personal workspace data. How can I assist you today?`
 
+export const aggregatorQueryPrompt = (
+  userContext: string,
+  retrievedContext: string,
+  dateForAI: string,
+) => `Your *entire* response MUST be a single, valid JSON object. Your output must start *directly* with '{' and end *directly* with '}'. Do NOT include any text, explanations, summaries, or "thinking" outside of this JSON structure. 
+
+The current date for your information is ${dateForAI}. 
+
+You are an AI assistant with access to internal workspace data. Your task is to decide whether the user's input query can be answered using ONLY the items provided in the Retrieved Context. 
+- If so, return the docIds of the specific documents from that context that would help answer the query. 
+- If not, return an empty array. 
+
+You have access to the following types of data: 
+1. Files (documents, spreadsheets, etc.) 
+2. User profiles 
+3. Emails 
+4. Calendar events 
+5. Slack messages 
+
+The context provided will be formatted with specific fields for each type: 
+## File Context Format 
+- App and Entity type 
+- Title 
+- Creation and update timestamps 
+- Owner information 
+- Mime type 
+- Permissions (informational only) 
+- Content chunks 
+- Relevance score 
+- docId 
+## User Context Format 
+- App and Entity type 
+- Addition date 
+- Name and email 
+- Gender 
+- Job title 
+- Department 
+- Location 
+- Relevance score 
+- docId 
+## Email Context Format 
+- App and Entity type 
+- Timestamp 
+- Subject 
+- From/To/Cc/Bcc 
+- Labels 
+- Content chunks 
+- Relevance score 
+- docId 
+## Event Context Format 
+- App and Entity type 
+- Event name and description 
+- Location and URLs 
+- Time information 
+- Organizer and attendees 
+- Recurrence patterns 
+- Meeting links 
+- Relevance score 
+- docId 
+## Slack Message Context Format 
+- App and Entity type 
+- Username 
+- Message 
+- teamName (User is part of Workspace) 
+- Relevance score 
+- docId 
+# Context of the user talking to you 
+${userContext} 
+This includes: 
+- User's name and email 
+- Company name and domain 
+- Current time and date 
+- Timezone 
+# Retrieved Context 
+${retrievedContext} 
+# Decision Policy (How to pick docIds) 
+1. **Answerability Test**: Determine if the user's query can be answered using ONLY the information found within the retrievedContext items. If no item could reasonably help produce a correct, grounded answer, return an empty array. 
+2. **Eligibility**: Only consider items that include a usable **docId** field. If an item lacks a docId, ignore it for selection. 
+3. **Relevance Signals**: - Semantic match between the user query and the item's title, content chunks, subject/message text, or metadata. - Relevance score (prefer higher). - Temporal fit (prefer more recent when the query implies “latest”, dates, or time sensitivity). - Source fit (e.g., if the query asks for emails, prioritize emails). 
+4. **Coverage**: Select all items that together could reasonably help answer the query. Avoid over-inclusion: do not add loosely related items that wouldn't improve the answer. 
+5. **Deduplicate**: Do not return duplicate docIds. 
+6. **Ordering**: Order docIds by descending usefulness (consider relevance score first, then recency, then content specificity). 
+7. **Privacy/Permissions**: The "Permissions" field is informational; do not filter on it. 
+8. **No Hallucinations**: Do not infer or fabricate docIds that are not explicitly present. 
+# Output Rules 
+- Return ONLY a JSON object with the following shape: { "docIds": ["<docId1>", "<docId2>", "..."] } 
+- If NO relevant items are found or the context cannot answer the query: { "docIds": [] } 
+- Do NOT include explanations, scores, citations, or any fields other than "docIds". 
+# Error Handling 
+- If the user query is ambiguous or cannot be grounded to the Retrieved Context, return an empty array. 
+# Important Notes 
+- Respect the user's timezone for any temporal reasoning. 
+- Be strict: select items only when they *clearly* help answer the query. 
+- Never include meeting/event mentions unless those items directly help answer the user's query.`
@@ -34,6 +34,7 @@ import { getErrorMessage } from "@/utils"
 import { parse } from "partial-json"
 
 import { ModelToProviderMap } from "@/ai/mappers"
+import { answerContextMap } from "@/ai/context"
 import type {
   AnswerResponse,
   ChainBreakClassifications,
@@ -86,6 +87,7 @@ import {
   deepResearchPrompt,
   webSearchSystemPrompt,
   agentWithNoIntegrationsSystemPrompt,
+  aggregatorQueryPrompt,
 } from "@/ai/prompts"
 
 import { BedrockProvider } from "@/ai/provider/bedrock"
@@ -2127,3 +2129,63 @@ export const agentWithNoIntegrationsQuestion = (
     throw error
   }
 }
+
+
+// Helper function to build structured context from raw Vespa search results
+const buildStructuredContextFromItems = (
+  items: any[],
+  userMetadata: UserMetadataType,
+): string => {
+  return items
+    .map((item, index) => {
+      // Use answerContextMap to create structured context for each item
+      const contextFromAnswerContextMap = answerContextMap(item, userMetadata)
+
+      // Add document index and docId to the context
+      const documentIndex = index + 1
+      const docId = item?.fields?.docId || ""
+
+      return `Document Index: ${documentIndex}
+      Doc ID: ${docId}
+      ${contextFromAnswerContextMap}
+      `
+    })
+    .join("\n\n")
+}
+
+export const aggregatorQueryJsonStream = (
+  userQuery: string,
+  userCtx: string,
+  userMetadata: UserMetadataType,
+  items: any[], // VespaSearchResult[]
+  params: ModelParams,
+): AsyncIterableIterator<ConverseResponse> => {
+  if (!params.modelId) {
+    params.modelId = defaultFastModel
+  }
+
+  // Build structured context from raw items and their chunks
+  const structuredContext = buildStructuredContextFromItems(items, userMetadata)
+
+  params.systemPrompt = aggregatorQueryPrompt(
+    userCtx,
+    indexToCitation(structuredContext),
+    userMetadata.dateForAI,
+  )
+
+  params.json = true
+
+  const baseMessage = {
+    role: ConversationRole.USER,
+    content: [
+      {
+        text: `${userQuery}`,
+      },
+    ],
+  }
+
+  const messages: Message[] = params.messages
+    ? [...params.messages, baseMessage]
+    : [baseMessage]
+  return getProviderByModel(params.modelId).converseStream(messages, params)
+}
@@ -268,6 +268,7 @@ export enum QueryType {
   SearchWithoutFilters = "SearchWithoutFilters",
   GetItems = "GetItems",
   SearchWithFilters = "SearchWithFilters",
+  AggregatorQuery = "AggregatorQuery",
 }
 
 export type Cost = {
@@ -417,6 +418,15 @@ export const GetItems = z
   })
   .merge(TemporalClassifierSchema)
 
+export const AggregatorQuery = z
+  .object({
+    type: z.literal(QueryType.AggregatorQuery),
+    isFollowUp: z.boolean().optional(),
+    filters: FiltersSchema,
+    filterQuery: z.string().nullable(),
+  })
+  .merge(TemporalClassifierSchema)
+
 export const SearchWithFilters = z
   .object({
     type: z.literal(QueryType.SearchWithFilters),
@@ -437,6 +447,7 @@ export const QueryRouterResponseSchema = z.discriminatedUnion("type", [
     .merge(TemporalClassifierSchema),
   SearchWithFilters,
   GetItems,
+  AggregatorQuery,
 ])
 
 export type QueryRouterLLMResponse = z.infer<typeof QueryRouterResponseSchema>