fix: Add adaptive batch retry for Ollama connection errors

matthewevans · matthewevans · commit 5436518d983d · 2025-10-19T19:56:50.000-07:00
Implements automatic batch splitting when encountering EOF or connection
errors during embedding generation. Starts with larger batches (100) for
optimal performance and recursively splits batches on failure until
successful or minimum batch size reached. Also adds callback-based parsing
for AST splitter to handle files &gt; 32KB.
diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts
@@ -1231,9 +1231,12 @@ export class Context {
         onFileProcessed?: (filePath: string, fileIndex: number, totalFiles: number) => void
     ): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
         const isHybrid = this.getIsHybrid();
+
+        // Batch size for embedding processing
         const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
+        console.log(`[Context] 🔧 Using batch size: ${EMBEDDING_BATCH_SIZE}`);
+
         const CHUNK_LIMIT = 450000;
-        console.log(`[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
 
         let chunkBuffer: Array<{ chunk: CodeChunk; codebasePath: string }> = [];
         let processedFiles = 0;
@@ -1335,15 +1338,72 @@ export class Context {
         await this.processChunkBatch(chunks, codebasePath);
     }
 
+    /**
+     * Detect if error is a connection/EOF error that might be resolved by smaller batches
+     */
+    private isConnectionError(error: Error): boolean {
+        const errorMessage = error.message.toLowerCase();
+        return (
+            errorMessage.includes('eof') ||
+            errorMessage.includes('econnreset') ||
+            errorMessage.includes('socket hang up') ||
+            (errorMessage.includes('connection') && errorMessage.includes('reset')) ||
+            errorMessage.includes('fetch failed')
+        );
+    }
+
+    /**
+     * Adaptive batch embedding with automatic retry on smaller batches
+     * @param contents Array of text contents to embed
+     * @param minBatchSize Minimum batch size before giving up (default: 5)
+     * @returns Array of embedding vectors
+     */
+    private async embedBatchAdaptive(
+        contents: string[],
+        minBatchSize: number = 5
+    ): Promise<EmbeddingVector[]> {
+        try {
+            // Try with the full batch
+            return await this.embedding.embedBatch(contents);
+        } catch (error) {
+            if (!(error instanceof Error) || !this.isConnectionError(error)) {
+                // Not a connection error, re-throw
+                throw error;
+            }
+
+            // Connection error - try splitting the batch
+            if (contents.length <= minBatchSize) {
+                // Already at minimum batch size, can't split further
+                console.error(`[Context] ❌ Failed to embed batch of ${contents.length} chunks even at minimum batch size`);
+                throw error;
+            }
+
+            // Split batch in half and retry
+            const midpoint = Math.floor(contents.length / 2);
+            const firstHalf = contents.slice(0, midpoint);
+            const secondHalf = contents.slice(midpoint);
+
+            console.warn(`[Context] ⚠️  Batch embedding failed (${error.message}), splitting ${contents.length} chunks into ${firstHalf.length} + ${secondHalf.length}`);
+
+            // Recursively process both halves
+            const [firstResults, secondResults] = await Promise.all([
+                this.embedBatchAdaptive(firstHalf, minBatchSize),
+                this.embedBatchAdaptive(secondHalf, minBatchSize)
+            ]);
+
+            return [...firstResults, ...secondResults];
+        }
+    }
+
     /**
      * Get cached embeddings or generate new ones
      * @param chunkContents Array of chunk content strings
      * @returns Array of embedding vectors
      */
     private async getCachedOrGenerateEmbeddings(chunkContents: string[]): Promise<EmbeddingVector[]> {
         if (!this.embeddingCache.isAvailable()) {
-            // Cache not available, use regular embedding generation
-            return await this.embedding.embedBatch(chunkContents);
+            // Cache not available, use adaptive embedding generation
+            return await this.embedBatchAdaptive(chunkContents);
         }
 
         // Generate content hashes for all chunks
@@ -1372,7 +1432,7 @@ export class Context {
         // Generate embeddings for uncached chunks
         if (uncachedContents.length > 0) {
             console.log(`[Context] 🔄 Cache miss for ${uncachedContents.length}/${chunkContents.length} chunks, generating embeddings...`);
-            const newEmbeddings = await this.embedding.embedBatch(uncachedContents);
+            const newEmbeddings = await this.embedBatchAdaptive(uncachedContents);
 
             // Store new embeddings in cache
             const embeddingsToCache = new Map<string, number[]>();
diff --git a/packages/core/src/splitter/ast-splitter.ts b/packages/core/src/splitter/ast-splitter.ts
@@ -55,8 +55,47 @@ export class AstCodeSplitter implements Splitter {
         try {
             console.log(`🌳 Using AST splitter for ${language} file: ${filePath || 'unknown'}`);
 
-            this.parser.setLanguage(langConfig.parser);
-            const tree = this.parser.parse(code);
+            // Validate input before parsing
+            if (typeof code !== 'string') {
+                console.warn(`[ASTSplitter] ⚠️  Code is not a string (type: ${typeof code}), falling back to LangChain: ${filePath || 'unknown'}`);
+                return await this.langchainFallback.split(String(code || ''), language, filePath);
+            }
+
+            if (code.length === 0) {
+                console.log(`[ASTSplitter] Empty file, returning empty chunks: ${filePath || 'unknown'}`);
+                return [];
+            }
+
+            // Set language with explicit error handling
+            try {
+                this.parser.setLanguage(langConfig.parser);
+            } catch (langError) {
+                console.error(`[ASTSplitter] ❌ setLanguage failed for ${language}: ${langError}`);
+                throw langError;
+            }
+
+            // Parse with explicit error handling
+            // Note: tree-sitter has a 32KB (32767 bytes) limit for string inputs
+            // For larger files, we must use the callback-based API with chunked reads
+            let tree;
+            try {
+                if (code.length > 32767) {
+                    // Use callback-based parsing for files > 32KB
+                    // Return chunks of at most 16KB to stay well under the 32KB limit
+                    const CHUNK_SIZE = 16384;
+                    tree = this.parser.parse((offset) => {
+                        if (offset >= code.length) return null;
+                        const end = Math.min(offset + CHUNK_SIZE, code.length);
+                        return code.slice(offset, end);
+                    });
+                } else {
+                    // Use direct string parsing for smaller files
+                    tree = this.parser.parse(code);
+                }
+            } catch (parseError) {
+                console.error(`[ASTSplitter] ❌ parse failed for ${language} (${filePath}): ${parseError}`);
+                throw parseError;
+            }
 
             if (!tree.rootNode) {
                 console.warn(`[ASTSplitter] ⚠️  Failed to parse AST for ${language}, falling back to LangChain: ${filePath || 'unknown'}`);
diff --git a/packages/mcp/src/embedding.ts b/packages/mcp/src/embedding.ts
@@ -54,7 +54,7 @@ export function createEmbeddingInstance(config: ContextMcpConfig): OpenAIEmbeddi
                 model: config.embeddingModel,
                 host: config.ollamaHost
             });
-            console.log(`[EMBEDDING] ✅ Ollama embedding instance created successfully`);
+            console.log(`[EMBEDDING] ✅ Ollama embedding instance created`);
             return ollamaEmbedding;
 
         default:
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml