@@ -1231,9 +1231,12 @@ export class Context {
12311231 onFileProcessed ?: ( filePath : string , fileIndex : number , totalFiles : number ) => void
12321232 ) : Promise < { processedFiles : number ; totalChunks : number ; status : 'completed' | 'limit_reached' } > {
12331233 const isHybrid = this . getIsHybrid ( ) ;
1234+
1235+ // Batch size for embedding processing
12341236 const EMBEDDING_BATCH_SIZE = Math . max ( 1 , parseInt ( envManager . get ( 'EMBEDDING_BATCH_SIZE' ) || '100' , 10 ) ) ;
1237+ console . log ( `[Context] 🔧 Using batch size: ${ EMBEDDING_BATCH_SIZE } ` ) ;
1238+
12351239 const CHUNK_LIMIT = 450000 ;
1236- console . log ( `[Context] 🔧 Using EMBEDDING_BATCH_SIZE: ${ EMBEDDING_BATCH_SIZE } ` ) ;
12371240
12381241 let chunkBuffer : Array < { chunk : CodeChunk ; codebasePath : string } > = [ ] ;
12391242 let processedFiles = 0 ;
@@ -1335,15 +1338,72 @@ export class Context {
13351338 await this . processChunkBatch ( chunks , codebasePath ) ;
13361339 }
13371340
1341+ /**
1342+ * Detect if error is a connection/EOF error that might be resolved by smaller batches
1343+ */
1344+ private isConnectionError ( error : Error ) : boolean {
1345+ const errorMessage = error . message . toLowerCase ( ) ;
1346+ return (
1347+ errorMessage . includes ( 'eof' ) ||
1348+ errorMessage . includes ( 'econnreset' ) ||
1349+ errorMessage . includes ( 'socket hang up' ) ||
1350+ ( errorMessage . includes ( 'connection' ) && errorMessage . includes ( 'reset' ) ) ||
1351+ errorMessage . includes ( 'fetch failed' )
1352+ ) ;
1353+ }
1354+
1355+ /**
1356+ * Adaptive batch embedding with automatic retry on smaller batches
1357+ * @param contents Array of text contents to embed
1358+ * @param minBatchSize Minimum batch size before giving up (default: 5)
1359+ * @returns Array of embedding vectors
1360+ */
1361+ private async embedBatchAdaptive (
1362+ contents : string [ ] ,
1363+ minBatchSize : number = 5
1364+ ) : Promise < EmbeddingVector [ ] > {
1365+ try {
1366+ // Try with the full batch
1367+ return await this . embedding . embedBatch ( contents ) ;
1368+ } catch ( error ) {
1369+ if ( ! ( error instanceof Error ) || ! this . isConnectionError ( error ) ) {
1370+ // Not a connection error, re-throw
1371+ throw error ;
1372+ }
1373+
1374+ // Connection error - try splitting the batch
1375+ if ( contents . length <= minBatchSize ) {
1376+ // Already at minimum batch size, can't split further
1377+ console . error ( `[Context] ❌ Failed to embed batch of ${ contents . length } chunks even at minimum batch size` ) ;
1378+ throw error ;
1379+ }
1380+
1381+ // Split batch in half and retry
1382+ const midpoint = Math . floor ( contents . length / 2 ) ;
1383+ const firstHalf = contents . slice ( 0 , midpoint ) ;
1384+ const secondHalf = contents . slice ( midpoint ) ;
1385+
1386+ console . warn ( `[Context] ⚠️ Batch embedding failed (${ error . message } ), splitting ${ contents . length } chunks into ${ firstHalf . length } + ${ secondHalf . length } ` ) ;
1387+
1388+ // Recursively process both halves
1389+ const [ firstResults , secondResults ] = await Promise . all ( [
1390+ this . embedBatchAdaptive ( firstHalf , minBatchSize ) ,
1391+ this . embedBatchAdaptive ( secondHalf , minBatchSize )
1392+ ] ) ;
1393+
1394+ return [ ...firstResults , ...secondResults ] ;
1395+ }
1396+ }
1397+
13381398 /**
13391399 * Get cached embeddings or generate new ones
13401400 * @param chunkContents Array of chunk content strings
13411401 * @returns Array of embedding vectors
13421402 */
13431403 private async getCachedOrGenerateEmbeddings ( chunkContents : string [ ] ) : Promise < EmbeddingVector [ ] > {
13441404 if ( ! this . embeddingCache . isAvailable ( ) ) {
1345- // Cache not available, use regular embedding generation
1346- return await this . embedding . embedBatch ( chunkContents ) ;
1405+ // Cache not available, use adaptive embedding generation
1406+ return await this . embedBatchAdaptive ( chunkContents ) ;
13471407 }
13481408
13491409 // Generate content hashes for all chunks
@@ -1372,7 +1432,7 @@ export class Context {
13721432 // Generate embeddings for uncached chunks
13731433 if ( uncachedContents . length > 0 ) {
13741434 console . log ( `[Context] 🔄 Cache miss for ${ uncachedContents . length } /${ chunkContents . length } chunks, generating embeddings...` ) ;
1375- const newEmbeddings = await this . embedding . embedBatch ( uncachedContents ) ;
1435+ const newEmbeddings = await this . embedBatchAdaptive ( uncachedContents ) ;
13761436
13771437 // Store new embeddings in cache
13781438 const embeddingsToCache = new Map < string , number [ ] > ( ) ;
0 commit comments