@@ -57,23 +57,35 @@ func (c *Chunk) ID() string {
5757}
5858
5959// newChunk creates a new Chunk from related tree-sitter nodes
60- func newChunk (
60+ func ( p * Parser ) newChunk (
6161 node * tree_sitter.Node ,
6262 source []byte ,
6363 path string ,
6464 usedPaths map [string ]bool ,
6565 fileType FileType ,
66- comments []* tree_sitter.Node ,
66+ folded []* tree_sitter.Node ,
67+ extractor * NamedChunkExtractor ,
6768) * Chunk {
6869 finalPath := resolvePath (path , usedPaths )
69- startPos , startByte , endPos , endByte := calculateChunkBounds (node , comments )
70- nodeText := node .Utf8Text (source )
70+ startPos , startByte , endPos , endByte := calculateChunkBounds (node , folded )
71+
72+ // Determine which node to use for the summary
73+ summaryNode := node
74+ if extractor != nil && extractor .SummaryNodeQuery != "" {
75+ // Use the existing executeQuery method to find the summary node
76+ matches , err := p .executeQuery (extractor .SummaryNodeQuery , node , source )
77+ if err == nil && len (matches ) > 0 {
78+ summaryNode = matches [0 ]
79+ }
80+ }
81+
82+ summaryText := summaryNode .Utf8Text (source )
7183 fullText := source [startByte :endByte ]
7284
7385 return & Chunk {
7486 Path : finalPath ,
7587 Type : string (fileType ),
76- Summary : summarize (nodeText ),
88+ Summary : summarize (summaryText ),
7789 Source : string (fullText ),
7890 StartLine : startPos .Row + 1 ,
7991 StartColumn : startPos .Column + 1 ,
@@ -101,8 +113,8 @@ func resolvePath(path string, usedPaths map[string]bool) string {
101113}
102114
103115// calculateChunkBounds determines the start and end positions for a chunk,
104- // extending to include any preceding comments
105- func calculateChunkBounds (node * tree_sitter.Node , comments []* tree_sitter.Node ) (
116+ // extending to include any preceding folded nodes
117+ func calculateChunkBounds (node * tree_sitter.Node , folded []* tree_sitter.Node ) (
106118 startPos tree_sitter.Point , startByte uint ,
107119 endPos tree_sitter.Point , endByte uint ,
108120) {
@@ -111,10 +123,10 @@ func calculateChunkBounds(node *tree_sitter.Node, comments []*tree_sitter.Node)
111123 endPos = node .EndPosition ()
112124 endByte = node .EndByte ()
113125
114- if len (comments ) > 0 {
115- firstComment := comments [0 ]
116- startPos = firstComment .StartPosition ()
117- startByte = firstComment .StartByte ()
126+ if len (folded ) > 0 {
127+ firstFolded := folded [0 ]
128+ startPos = firstFolded .StartPosition ()
129+ startByte = firstFolded .StartByte ()
118130 }
119131
120132 return startPos , startByte , endPos , endByte
@@ -147,15 +159,16 @@ func summarize(source string) string {
147159type LanguageSpec struct {
148160 NamedChunks map [string ]NamedChunkExtractor // node types that can be extracted by name
149161 ExtractChildrenIn []string // node types whose children should be recursively processed
150- CommentTypes []string // node types that represent comments
151- IgnoreTypes []string // node types to completely skip
162+ FoldIntoNextNode []string // node types to fold into next node, e.g., comments
163+ SkipTypes []string // node types to completely skip
152164 FileTypeRules []FileTypeRule // language-specific file type classification rules
153165}
154166
155167// NamedChunkExtractor defines tree-sitter queries for extracting named code entities
156168type NamedChunkExtractor struct {
157- NameQuery string // query to extract the entity name
158- ParentNameQuery string // optional query to extract parent entity name for hierarchical paths
169+ NameQuery string // query to extract the entity name
170+ ParentNameQuery string // optional query to extract parent entity name for hierarchical paths
171+ SummaryNodeQuery string // optional query to extract a specific node for the summary instead of the main node
159172}
160173
161174// FileTypeRule defines a pattern-based rule for classifying file types
@@ -246,7 +259,6 @@ func (p *Parser) classifyFileType(filePath string) FileType {
246259}
247260
248261// extractChunks recursively extracts semantic chunks from an AST node.
249- // Comments are collected and folded into the next non-comment chunk to improve context.
250262func (p * Parser ) extractChunks (
251263 node * tree_sitter.Node ,
252264 source []byte ,
@@ -255,31 +267,31 @@ func (p *Parser) extractChunks(
255267) []* Chunk {
256268 var chunks []* Chunk
257269 usedPaths := map [string ]bool {}
258- var comments []* tree_sitter.Node
270+ var folded []* tree_sitter.Node
259271
260272 for i := uint (0 ); i < node .ChildCount (); i ++ {
261273 child := node .Child (i )
262274 kind := child .Kind ()
263275
264- if slices .Contains (p .spec .IgnoreTypes , kind ) {
265- // Process any preceding comments as standalone chunks
266- for _ , comment := range comments {
267- chunks = append (chunks , p .extractNode (comment , source , usedPaths , fileType , nil ))
276+ if slices .Contains (p .spec .SkipTypes , kind ) {
277+ // Process any remaining folded nodes as standalone chunks
278+ for _ , foldedNode := range folded {
279+ chunks = append (chunks , p .extractNode (foldedNode , source , usedPaths , fileType , nil ))
268280 }
269- comments = nil
281+ folded = nil
270282
271283 continue
272284 }
273285
274- if slices .Contains (p .spec .CommentTypes , kind ) {
275- comments = append (comments , child )
286+ if slices .Contains (p .spec .FoldIntoNextNode , kind ) {
287+ folded = append (folded , child )
276288 continue
277289 }
278290
279- // Process code nodes & fold comments , if any
280- chunk , path := p .createChunkFromNode (child , source , parentPath , fileType , usedPaths , comments )
291+ // Process code nodes & folded nodes , if any
292+ chunk , path := p .createChunkFromNode (child , source , parentPath , fileType , usedPaths , folded )
281293 chunks = append (chunks , chunk )
282- comments = nil
294+ folded = nil
283295
284296 // Recursively process children if specified
285297 if slices .Contains (p .spec .ExtractChildrenIn , kind ) {
@@ -288,9 +300,9 @@ func (p *Parser) extractChunks(
288300 }
289301 }
290302
291- // Process any remaining comments as standalone chunks
292- for _ , comment := range comments {
293- chunks = append (chunks , p .extractNode (comment , source , usedPaths , fileType , nil ))
303+ // Process any remaining folded nodes as standalone chunks
304+ for _ , foldedNode := range folded {
305+ chunks = append (chunks , p .extractNode (foldedNode , source , usedPaths , fileType , nil ))
294306 }
295307
296308 return chunks
@@ -303,21 +315,21 @@ func (p *Parser) createChunkFromNode(
303315 parentPath string ,
304316 fileType FileType ,
305317 usedPaths map [string ]bool ,
306- comments []* tree_sitter.Node ,
318+ folded []* tree_sitter.Node ,
307319) (* Chunk , string ) {
308320 kind := node .Kind ()
309321 extractor , exists := p .spec .NamedChunks [kind ]
310322
311323 if exists {
312324 chunkPath , err := p .buildChunkPath (extractor , node , source , parentPath )
313325 if err == nil {
314- chunk := newChunk (node , source , chunkPath , usedPaths , fileType , comments )
326+ chunk := p . newChunk (node , source , chunkPath , usedPaths , fileType , folded , & extractor )
315327 return chunk , chunkPath
316328 }
317329 }
318330
319331 // No named extractor or building chunk path failed, use content-hash
320- return p .extractNode (node , source , usedPaths , fileType , comments ), parentPath
332+ return p .extractNode (node , source , usedPaths , fileType , folded ), parentPath
321333}
322334
323335// extractNode creates a chunk from a node using content-based hashing for the path
@@ -326,12 +338,12 @@ func (p *Parser) extractNode(
326338 source []byte ,
327339 usedPaths map [string ]bool ,
328340 fileType FileType ,
329- comments []* tree_sitter.Node ,
341+ folded []* tree_sitter.Node ,
330342) * Chunk {
331343 nodeSource := node .Utf8Text (source )
332344 hash := fmt .Sprintf ("%x" , xxhash .Sum64String (nodeSource ))
333345
334- return newChunk (node , source , hash , usedPaths , fileType , comments )
346+ return p . newChunk (node , source , hash , usedPaths , fileType , folded , nil )
335347}
336348
337349// buildChunkPath constructs a hierarchical path for a named chunk using tree-sitter queries
0 commit comments