From bd88f67ea8051a7de1c3adb74d20a893e70b63f8 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Tue, 27 May 2025 13:43:15 -0400 Subject: [PATCH 01/13] Lay foundation for workspace symbols again --- src/DocumentStore.zig | 33 +++ src/Server.zig | 67 +++++- src/TrigramStore.zig | 501 ++++++++++++++++++++++++++++++++++++++++++ src/zls.zig | 1 + 4 files changed, 601 insertions(+), 1 deletion(-) create mode 100644 src/TrigramStore.zig diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index ee610ccc3..a13a4e47f 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -14,6 +14,7 @@ const tracy = @import("tracy"); const translate_c = @import("translate_c.zig"); const DocumentScope = @import("DocumentScope.zig"); const DiagnosticsCollection = @import("DiagnosticsCollection.zig"); +const TrigramStore = @import("TrigramStore.zig"); const DocumentStore = @This(); @@ -26,6 +27,7 @@ thread_pool: *std.Thread.Pool, handles: Uri.ArrayHashMap(*Handle) = .empty, build_files: if (supports_build_system) Uri.ArrayHashMap(*BuildFile) else void = if (supports_build_system) .empty else {}, cimports: if (supports_build_system) std.AutoArrayHashMapUnmanaged(Hash, translate_c.Result) else void = if (supports_build_system) .empty else {}, +trigram_stores: Uri.ArrayHashMap(TrigramStore) = .empty, diagnostics_collection: *DiagnosticsCollection, builds_in_progress: std.atomic.Value(i32) = .init(0), transport: ?*lsp.Transport = null, @@ -603,6 +605,12 @@ pub fn deinit(self: *DocumentStore) void { } self.handles.deinit(self.allocator); + for (self.trigram_stores.keys(), self.trigram_stores.values()) |uri, *trigram_store| { + uri.deinit(self.allocator); + trigram_store.deinit(self.allocator); + } + self.trigram_stores.deinit(self.allocator); + if (supports_build_system) { for (self.build_files.values()) |build_file| { build_file.deinit(self.allocator); @@ -694,6 +702,31 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { }; } +pub fn trigramIndexUri( + store: *DocumentStore, + uri: Uri, + encoding: offsets.Encoding, +) error{OutOfMemory}!void { + const gop = try store.trigram_stores.getOrPut(store.allocator, uri); + + if (gop.found_existing) { + return; + } + + errdefer { + gop.key_ptr.deinit(store.allocator); + store.trigram_stores.swapRemoveAt(gop.index); + } + + gop.key_ptr.* = try uri.dupe(store.allocator); + gop.value_ptr.* = .empty; + + const file_contents = store.readFile(uri) orelse return; + defer store.allocator.free(file_contents); + + try gop.value_ptr.fill(store.allocator, file_contents, encoding); +} + /// **Thread safe** takes a shared lock /// This function does not protect against data races from modifying the BuildFile pub fn getBuildFile(self: *DocumentStore, uri: Uri) ?*BuildFile { diff --git a/src/Server.zig b/src/Server.zig index 260d1519c..a8d8ae750 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -34,6 +34,7 @@ const goto = @import("features/goto.zig"); const hover_handler = @import("features/hover.zig"); const selection_range = @import("features/selection_range.zig"); const diagnostics_gen = @import("features/diagnostics.zig"); +const TrigramStore = @import("TrigramStore.zig"); const BuildOnSave = diagnostics_gen.BuildOnSave; const BuildOnSaveSupport = build_runner_shared.BuildOnSaveSupport; @@ -563,7 +564,7 @@ fn initializeHandler(server: *Server, arena: std.mem.Allocator, request: types.I .documentRangeFormattingProvider = .{ .bool = false }, .foldingRangeProvider = .{ .bool = true }, .selectionRangeProvider = .{ .bool = true }, - .workspaceSymbolProvider = .{ .bool = false }, + .workspaceSymbolProvider = .{ .bool = true }, .workspace = .{ .workspaceFolders = .{ .supported = true, @@ -1563,6 +1564,67 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ return try selection_range.generateSelectionRanges(arena, handle, request.positions, server.offset_encoding); } +fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) Error!lsp.ResultType("workspace/symbol") { + if (request.query.len < 3) return null; + + for (server.workspaces.items) |workspace| { + const path = workspace.uri.toFsPath(arena) catch |err| switch (err) { + error.UnsupportedScheme => return null, // https://github.com/microsoft/language-server-protocol/issues/1264 + error.OutOfMemory => return error.OutOfMemory, + }; + var dir = std.fs.cwd().openDir(path, .{ .iterate = true }) catch return error.InternalError; + defer dir.close(); + + var walker = try dir.walk(arena); + defer walker.deinit(); + + while (walker.next() catch return error.InternalError) |entry| { + if (std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) { + const uri = Uri.fromPath( + arena, + std.fs.path.join(arena, &.{ path, entry.path }) catch return error.InternalError, + ) catch return error.InternalError; + + server.document_store.trigramIndexUri( + uri, + server.offset_encoding, + ) catch return error.InternalError; + } + } + } + + var symbols: std.ArrayListUnmanaged(types.workspace.Symbol) = .empty; + var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; + + for ( + server.document_store.trigram_stores.keys(), + server.document_store.trigram_stores.values(), + ) |uri, trigram_store| { + try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); + + const slice = trigram_store.declarations.slice(); + const names = slice.items(.name); + const ranges = slice.items(.range); + + for (declaration_buffer.items) |declaration| { + const name = names[@intFromEnum(declaration)]; + const range = ranges[@intFromEnum(declaration)]; + try symbols.append(arena, .{ + .name = trigram_store.names.items[name.start..name.end], + .kind = .Variable, + .location = .{ + .location = .{ + .uri = uri.raw, + .range = range, + }, + }, + }); + } + } + + return .{ .workspace_symbols = symbols.items }; +} + const HandledRequestParams = union(enum) { initialize: types.InitializeParams, shutdown, @@ -1586,6 +1648,7 @@ const HandledRequestParams = union(enum) { @"textDocument/codeAction": types.CodeAction.Params, @"textDocument/foldingRange": types.FoldingRange.Params, @"textDocument/selectionRange": types.SelectionRange.Params, + @"workspace/symbol": types.workspace.Symbol.Params, other: lsp.MethodWithParams, }; @@ -1630,6 +1693,7 @@ fn isBlockingMessage(msg: Message) bool { .@"textDocument/codeAction", .@"textDocument/foldingRange", .@"textDocument/selectionRange", + .@"workspace/symbol", => return false, .other => return false, }, @@ -1814,6 +1878,7 @@ pub fn sendRequestSync(server: *Server, arena: std.mem.Allocator, comptime metho .@"textDocument/codeAction" => try server.codeActionHandler(arena, params), .@"textDocument/foldingRange" => try server.foldingRangeHandler(arena, params), .@"textDocument/selectionRange" => try server.selectionRangeHandler(arena, params), + .@"workspace/symbol" => try server.workspaceSymbolHandler(arena, params), .other => return null, }; } diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig new file mode 100644 index 000000000..6421d8c3b --- /dev/null +++ b/src/TrigramStore.zig @@ -0,0 +1,501 @@ +//! Per-file trigram store. + +const std = @import("std"); +const ast = @import("ast.zig"); +const Ast = std.zig.Ast; +const builtin = @import("builtin"); +const assert = std.debug.assert; +const offsets = @import("offsets.zig"); +const log = std.log.scoped(.store); + +pub const TrigramStore = @This(); + +pub const Trigram = [3]u8; + +pub const NameSlice = struct { start: u32, end: u32 }; + +pub const Declaration = struct { + pub const Index = enum(u32) { _ }; + + name: NameSlice, + range: offsets.Range, +}; + +pub const empty: TrigramStore = .{ + .has_filter = false, + .filter_buckets = .empty, + .trigram_to_declarations = .empty, + .declarations = .empty, + .names = .empty, +}; + +has_filter: bool, +filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), +trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), +declarations: std.MultiArrayList(Declaration), +names: std.ArrayListUnmanaged(u8), + +pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { + store.filter_buckets.deinit(allocator); + for (store.trigram_to_declarations.values()) |*list| { + list.deinit(allocator); + } + store.trigram_to_declarations.deinit(allocator); + store.declarations.deinit(allocator); + store.names.deinit(allocator); + store.* = undefined; +} + +fn clearRetainingCapacity(store: *TrigramStore) void { + store.filter_buckets.clearRetainingCapacity(); + store.has_filter = false; + for (store.trigram_to_declarations.values()) |*list| { + list.clearRetainingCapacity(); + } + store.declarations.clearRetainingCapacity(); + store.names.clearRetainingCapacity(); +} + +pub fn fill( + store: *TrigramStore, + allocator: std.mem.Allocator, + source: [:0]const u8, + encoding: offsets.Encoding, +) error{OutOfMemory}!void { + store.clearRetainingCapacity(); + + var tree = try Ast.parse(allocator, source, .zig); + defer tree.deinit(allocator); + + const Context = struct { + allocator: std.mem.Allocator, + store: *TrigramStore, + in_function: bool, + encoding: offsets.Encoding, + + const Error = error{OutOfMemory}; + fn callback(context: *@This(), cb_tree: *const Ast, node: Ast.Node.Index) Error!void { + const old_in_function = context.in_function; + defer context.in_function = old_in_function; + + switch (cb_tree.nodeTag(node)) { + .fn_decl => { + if (!context.in_function) {} + + context.in_function = true; + }, + .root => unreachable, + .container_decl, + .container_decl_trailing, + .container_decl_arg, + .container_decl_arg_trailing, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union, + .tagged_union_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + => context.in_function = false, + + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => { + if (!context.in_function) { + const token = cb_tree.fullVarDecl(node).?.ast.mut_token + 1; + const name = cb_tree.tokenSlice(token); + + if (name.len >= 3) { + try context.store.appendDeclaration( + context.allocator, + name, + offsets.tokenToRange(cb_tree, token, context.encoding), + ); + } + } + }, + + else => {}, + } + + try ast.iterateChildren(cb_tree, node, context, Error, callback); + } + }; + + var context = Context{ + .allocator = allocator, + .store = store, + .in_function = false, + .encoding = encoding, + }; + try ast.iterateChildren(&tree, .root, &context, Context.Error, Context.callback); + + try store.finalize(allocator); +} + +/// Caller must not submit name.len < 3. +fn appendDeclaration( + store: *TrigramStore, + allocator: std.mem.Allocator, + name: []const u8, + range: offsets.Range, +) error{OutOfMemory}!void { + assert(name.len >= 3); + + const name_slice: NameSlice = blk: { + const start = store.names.items.len; + try store.names.appendSlice(allocator, name); + break :blk .{ + .start = @intCast(start), + .end = @intCast(store.names.items.len), + }; + }; + + try store.declarations.append(allocator, .{ + .name = name_slice, + .range = range, + }); + + for (0..name.len - 2) |index| { + const trigram = name[index..][0..3].*; + const gop = try store.trigram_to_declarations.getOrPutValue(allocator, trigram, .empty); + try gop.value_ptr.append(allocator, @enumFromInt(store.declarations.len - 1)); + } +} + +/// Must be called before any queries are executed. +fn finalize(store: *TrigramStore, allocator: std.mem.Allocator) error{OutOfMemory}!void { + { + const lists = store.trigram_to_declarations.values(); + var index: usize = 0; + while (index < lists.len) { + if (lists[index].items.len == 0) { + lists[index].deinit(allocator); + store.trigram_to_declarations.swapRemoveAt(index); + } else { + index += 1; + } + } + } + + const trigrams = store.trigram_to_declarations.keys(); + + if (trigrams.len > 0) { + var prng = std.Random.DefaultPrng.init(0); + + const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; + try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); + store.filter_buckets.items.len = filter_capacity; + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + filter.reset(); + store.has_filter = true; + + for (trigrams) |trigram| { + filter.append(prng.random(), trigram) catch |err| switch (err) { + error.EvictionFailed => { + // NOTE: This should generally be quite rare. + store.has_filter = false; + break; + }, + }; + } + } +} + +pub fn declarationsForQuery( + store: *const TrigramStore, + allocator: std.mem.Allocator, + query: []const u8, + declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index), +) error{OutOfMemory}!void { + assert(query.len >= 3); + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + + if (store.has_filter) { + for (0..query.len - 2) |index| { + const trigram = query[index..][0..3].*; + if (!filter.contains(trigram)) { + return; + } + } + } + + const first = (store.trigram_to_declarations.get(query[0..3].*) orelse { + declaration_buffer.clearRetainingCapacity(); + return; + }).items; + + declaration_buffer.clearRetainingCapacity(); + try declaration_buffer.ensureTotalCapacity(allocator, first.len * 2); + declaration_buffer.items.len = first.len * 2; + + var len = first.len; + @memcpy(declaration_buffer.items[0..len], first); + + for (0..query.len - 2) |index| { + const trigram = query[index..][0..3].*; + const old_len = len; + len = mergeIntersection( + (store.trigram_to_declarations.get(trigram[0..3].*) orelse return { + declaration_buffer.clearRetainingCapacity(); + return; + }).items, + declaration_buffer.items[0..len], + declaration_buffer.items[len..], + ); + @memcpy(declaration_buffer.items[0..len], declaration_buffer.items[old_len..][0..len]); + declaration_buffer.items.len = len * 2; + } + + declaration_buffer.items.len = declaration_buffer.items.len / 2; +} + +/// Asserts `@min(a.len, b.len) <= out.len`. +fn mergeIntersection( + a: []const Declaration.Index, + b: []const Declaration.Index, + out: []Declaration.Index, +) u32 { + std.debug.assert(@min(a.len, b.len) <= out.len); + + var out_idx: u32 = 0; + + var a_idx: u32 = 0; + var b_idx: u32 = 0; + + while (a_idx < a.len and b_idx < b.len) { + const a_val = a[a_idx]; + const b_val = b[b_idx]; + + if (a_val == b_val) { + out[out_idx] = a_val; + out_idx += 1; + a_idx += 1; + b_idx += 1; + } else if (@intFromEnum(a_val) < @intFromEnum(b_val)) { + a_idx += 1; + } else { + b_idx += 1; + } + } + + return out_idx; +} + +// TODO: The pow2 requirement is quite inefficient: explore ideas posted in +// https://databasearchitects.blogspot.com/2019/07/cuckoo-filters-with-arbitrarily-sized.html +// (rocksdb even-odd scheme from comments looks interesting). +pub const CuckooFilter = struct { + /// len must be a power of 2. + /// + /// ### Pathological case with buckets.len power of 2 + /// + /// - `BucketIndex(alias_0)` -> `bucket_1`, `BucketIndex(alias_0).alternate()` -> `bucket_2` + /// - `BucketIndex(alias_1)` -> `bucket_1`, `BucketIndex(alias_1).alternate()` -> `bucket_2` + /// + /// Our alternate mappings hold and `contains()` will not return false negatives. + /// + /// ### Pathological case with buckets.len NOT power of 2: + /// + /// - `BucketIndex(alias_0)` -> `bucket_1`, `BucketIndex(alias_0).alternate()` -> `bucket_3` + /// - `BucketIndex(alias_1)` -> `bucket_2`, `BucketIndex(alias_1).alternate()` -> `bucket_4` + /// + /// Our alternate mappings do not hold and `contains()` can return false negatives. This is not + /// acceptable as the entire point of an AMQ datastructure is the presence of false positives + /// but not false negatives. + buckets: []Bucket, + + pub const Fingerprint = enum(u8) { + none = std.math.maxInt(u8), + _, + + pub fn hash(fingerprint: Fingerprint) u32 { + return @truncate(std.hash.Murmur2_64.hash(&.{@intFromEnum(fingerprint)})); + } + }; + pub const Bucket = [4]Fingerprint; + pub const BucketIndex = enum(u32) { + _, + + pub fn alternate(index: BucketIndex, fingerprint: Fingerprint) BucketIndex { + assert(fingerprint != .none); + return @enumFromInt(@intFromEnum(index) ^ fingerprint.hash()); + } + }; + + pub const Triplet = struct { + fingerprint: Fingerprint, + index_1: BucketIndex, + index_2: BucketIndex, + + pub fn initFromTrigram(trigram: Trigram) Triplet { + const split: packed struct { + fingerprint: Fingerprint, + padding: u24, + index_1: BucketIndex, + } = @bitCast(std.hash.Murmur2_64.hash(&trigram)); + + const fingerprint: Fingerprint = if (split.fingerprint == .none) + @enumFromInt(0) + else + split.fingerprint; + + const triplet: Triplet = .{ + .fingerprint = fingerprint, + .index_1 = split.index_1, + .index_2 = split.index_1.alternate(fingerprint), + }; + assert(triplet.index_2.alternate(fingerprint) == triplet.index_1); + + return triplet; + } + }; + + pub fn reset(filter: CuckooFilter) void { + @memset(filter.buckets, [1]Fingerprint{.none} ** 4); + } + + pub fn capacityForCount(count: usize) error{Overflow}!usize { + const fill_rate = 0.95; + return try std.math.ceilPowerOfTwo(usize, @intFromFloat(@ceil(@as(f32, @floatFromInt(count)) / fill_rate))); + } + + // Use a hash (fnv) for randomness. + pub fn append(filter: CuckooFilter, random: std.Random, trigram: Trigram) error{EvictionFailed}!void { + const triplet: Triplet = .initFromTrigram(trigram); + + if (filter.appendToBucket(triplet.index_1, triplet.fingerprint) or + filter.appendToBucket(triplet.index_2, triplet.fingerprint)) + { + return; + } + + var fingerprint = triplet.fingerprint; + var index = if (random.boolean()) triplet.index_1 else triplet.index_2; + for (0..500) |_| { + fingerprint = filter.swapFromBucket(random, index, fingerprint); + index = index.alternate(fingerprint); + + if (filter.appendToBucket(index, fingerprint)) { + return; + } + } + + return error.EvictionFailed; + } + + fn bucketAt(filter: CuckooFilter, index: BucketIndex) *Bucket { + assert(std.math.isPowerOfTwo(filter.buckets.len)); + return &filter.buckets[@intFromEnum(index) & (filter.buckets.len - 1)]; + } + + fn appendToBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { + assert(fingerprint != .none); + + const bucket = filter.bucketAt(index); + for (bucket) |*slot| { + if (slot.* == .none) { + slot.* = fingerprint; + return true; + } + } + + return false; + } + + fn swapFromBucket( + filter: CuckooFilter, + random: std.Random, + index: BucketIndex, + fingerprint: Fingerprint, + ) Fingerprint { + assert(fingerprint != .none); + + const target = &filter.bucketAt(index)[random.int(u2)]; + + const old_fingerprint = target.*; + assert(old_fingerprint != .none); + + target.* = fingerprint; + + return old_fingerprint; + } + + pub fn contains(filter: CuckooFilter, trigram: Trigram) bool { + const triplet: Triplet = .initFromTrigram(trigram); + + return filter.containsInBucket(triplet.index_1, triplet.fingerprint) or + filter.containsInBucket(triplet.index_2, triplet.fingerprint); + } + + fn containsInBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { + assert(fingerprint != .none); + + const bucket = filter.bucketAt(index); + for (bucket) |*slot| { + if (slot.* == fingerprint) { + return true; + } + } + + return false; + } +}; + +// TODO: More extensive (different capacities) testing. +test CuckooFilter { + const allocator = std.testing.allocator; + + const element_count = 486; + const filter_size = comptime CuckooFilter.capacityForCount(element_count) catch unreachable; + try std.testing.expectEqual(512, filter_size); + + var entries: std.AutoArrayHashMapUnmanaged(Trigram, void) = .empty; + defer entries.deinit(allocator); + try entries.ensureTotalCapacity(allocator, element_count); + + var buckets: [filter_size]CuckooFilter.Bucket = undefined; + var filter = CuckooFilter{ .buckets = &buckets }; + var filter_prng = std.Random.DefaultPrng.init(42); + + for (0..2_500) |gen_prng_seed| { + entries.clearRetainingCapacity(); + filter.reset(); + + var gen_prng = std.Random.DefaultPrng.init(gen_prng_seed); + for (0..element_count) |_| { + const trigram: Trigram = @bitCast(gen_prng.random().int(u24)); + try entries.put(allocator, trigram, {}); + try filter.append(filter_prng.random(), trigram); + } + + // No false negatives + for (entries.keys()) |trigram| { + try std.testing.expect(filter.contains(trigram)); + } + + // Reasonable false positive rate + const fpr_count = 2_500; + var false_positives: usize = 0; + var negative_prng = std.Random.DefaultPrng.init(~gen_prng_seed); + for (0..fpr_count) |_| { + var trigram: Trigram = @bitCast(negative_prng.random().int(u24)); + while (entries.contains(trigram)) { + trigram = @bitCast(negative_prng.random().int(u24)); + } + + false_positives += @intFromBool(filter.contains(trigram)); + } + + const fpr = @as(f32, @floatFromInt(false_positives)) / fpr_count; + std.testing.expect(fpr < 0.035) catch |err| { + std.log.err("fpr: {d}%", .{fpr * 100}); + return err; + }; + } +} diff --git a/src/zls.zig b/src/zls.zig index 864bcb37f..7f244c28d 100644 --- a/src/zls.zig +++ b/src/zls.zig @@ -18,6 +18,7 @@ pub const Server = @import("Server.zig"); pub const snippets = @import("snippets.zig"); pub const testing = @import("testing.zig"); pub const translate_c = @import("translate_c.zig"); +pub const TrigramStore = @import("TrigramStore.zig"); pub const Uri = @import("Uri.zig"); pub const code_actions = @import("features/code_actions.zig"); From 671ef96871d4632e06ce999bc068041e53ab7468 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Sat, 7 Jun 2025 19:38:58 -0400 Subject: [PATCH 02/13] Open imported documents preemptively Co-authored-by: Techatrix --- src/DocumentStore.zig | 49 +++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index a13a4e47f..c831bf3f1 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -36,6 +36,8 @@ lsp_capabilities: struct { supports_semantic_tokens_refresh: bool = false, supports_inlay_hints_refresh: bool = false, } = .{}, +currently_loading_uris: Uri.ArrayHashMap(void) = .empty, +wait_for_currently_loading_uri: std.Thread.Condition = .{}, pub const Hasher = std.crypto.auth.siphash.SipHash128(1, 3); pub const Hash = [Hasher.mac_length]u8; @@ -611,6 +613,9 @@ pub fn deinit(self: *DocumentStore) void { } self.trigram_stores.deinit(self.allocator); + std.debug.assert(self.currently_loading_uris.count() == 0); + self.currently_loading_uris.deinit(self.allocator); + if (supports_build_system) { for (self.build_files.values()) |build_file| { build_file.deinit(self.allocator); @@ -694,7 +699,34 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); - if (self.getHandle(uri)) |handle| return handle; + { + self.lock.lock(); + defer self.lock.unlock(); + + while (true) { + if (self.handles.get(uri)) |handle| return handle; + + const gop = self.currently_loading_uris.getOrPutValue( + self.allocator, + uri, + {}, + ) catch return null; + + if (!gop.found_existing) { + break; + } + + var mutex: std.Thread.Mutex = .{}; + + mutex.lock(); + defer mutex.unlock(); + + self.lock.unlock(); + self.wait_for_currently_loading_uri.wait(&mutex); + self.lock.lock(); + } + } + const file_contents = self.readFile(uri) orelse return null; return self.createAndStoreDocument(uri, file_contents, false) catch |err| { log.err("failed to store document '{s}': {}", .{ uri.raw, err }); @@ -1393,17 +1425,12 @@ fn createAndStoreDocument( errdefer if (!gop.found_existing) std.debug.assert(self.handles.swapRemove(uri)); if (gop.found_existing) { - if (lsp_synced) { - new_handle.impl.associated_build_file = gop.value_ptr.*.impl.associated_build_file; - gop.value_ptr.*.impl.associated_build_file = .init; + new_handle.impl.associated_build_file = gop.value_ptr.*.impl.associated_build_file; + gop.value_ptr.*.impl.associated_build_file = .init; - new_handle.uri = gop.key_ptr.*; - gop.value_ptr.*.deinit(); - gop.value_ptr.*.* = new_handle; - } else { - // TODO prevent concurrent `createAndStoreDocument` invocations from racing each other - new_handle.deinit(); - } + new_handle.uri = gop.key_ptr.*; + gop.value_ptr.*.deinit(); + gop.value_ptr.*.* = new_handle; } else { gop.key_ptr.* = try uri.dupe(self.allocator); errdefer gop.key_ptr.*.deinit(self.allocator); From 4d802d4c981185b2af9eb5613961838f1f048483 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Thu, 12 Jun 2025 19:15:16 -0400 Subject: [PATCH 03/13] Load all documents in workspace on start Co-authored-by: Techatrix --- src/DocumentStore.zig | 174 +++++++++++++++++++++++++++++++------ src/Server.zig | 72 ++++++++-------- src/TrigramStore.zig | 194 ++++++++++++++++++------------------------ 3 files changed, 269 insertions(+), 171 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index c831bf3f1..78dca00e3 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -198,6 +198,7 @@ pub const Handle = struct { lazy_condition: std.Thread.Condition = .{}, import_uris: ?[]Uri = null, + trigram_store: TrigramStore = undefined, document_scope: DocumentScope = undefined, zzoiir: ZirOrZoir = undefined, @@ -236,6 +237,11 @@ pub const Handle = struct { /// `false` indicates the document only exists because it is a dependency of another document /// or has been closed with `textDocument/didClose`. lsp_synced: bool = false, + /// true if a thread has acquired the permission to compute the `TrigramStore` + /// all other threads will wait until the given thread has computed the `TrigramStore` before reading it. + has_trigram_store_lock: bool = false, + /// true if `handle.impl.trigram_store` has been set + has_trigram_store: bool = false, /// true if a thread has acquired the permission to compute the `DocumentScope` /// all other threads will wait until the given thread has computed the `DocumentScope` before reading it. has_document_scope_lock: bool = false, @@ -246,7 +252,7 @@ pub const Handle = struct { /// all other threads will wait until the given thread has computed the `std.zig.Zir` or `std.zig.Zoir` before reading it. /// true if `handle.impl.zir` has been set has_zzoiir: bool = false, - _: u27 = 0, + _: u25 = 0, }; /// Takes ownership of `text` on success. @@ -291,6 +297,7 @@ pub const Handle = struct { .zon => self.impl.zzoiir.zon.deinit(allocator), }; if (status.has_document_scope) self.impl.document_scope.deinit(allocator); + if (status.has_trigram_store) self.impl.trigram_store.deinit(allocator); allocator.free(self.tree.source); self.tree.deinit(allocator); @@ -370,6 +377,23 @@ pub const Handle = struct { return self.impl.document_scope; } + pub fn getTrigramStore(self: *Handle) error{OutOfMemory}!TrigramStore { + if (self.getStatus().has_trigram_store) return self.impl.trigram_store; + return try self.getLazy(TrigramStore, "trigram_store", struct { + fn create(handle: *Handle, allocator: std.mem.Allocator) error{OutOfMemory}!TrigramStore { + return try .init(allocator, &handle.tree); + } + }); + } + + /// Asserts that `getTrigramStore` has been previously called on `handle`. + pub fn getTrigramStoreCached(self: *Handle) TrigramStore { + if (builtin.mode == .Debug) { + std.debug.assert(self.getStatus().has_trigram_store); + } + return self.impl.trigram_store; + } + pub fn getZir(self: *Handle) error{OutOfMemory}!std.zig.Zir { std.debug.assert(self.tree.mode == .zig); const zir_or_zoir = try self.getZirOrZoir(); @@ -727,6 +751,11 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { } } + defer { + std.debug.assert(self.currently_loading_uris.swapRemove(uri)); + self.wait_for_currently_loading_uri.broadcast(); + } + const file_contents = self.readFile(uri) orelse return null; return self.createAndStoreDocument(uri, file_contents, false) catch |err| { log.err("failed to store document '{s}': {}", .{ uri.raw, err }); @@ -734,31 +763,6 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { }; } -pub fn trigramIndexUri( - store: *DocumentStore, - uri: Uri, - encoding: offsets.Encoding, -) error{OutOfMemory}!void { - const gop = try store.trigram_stores.getOrPut(store.allocator, uri); - - if (gop.found_existing) { - return; - } - - errdefer { - gop.key_ptr.deinit(store.allocator); - store.trigram_stores.swapRemoveAt(gop.index); - } - - gop.key_ptr.* = try uri.dupe(store.allocator); - gop.value_ptr.* = .empty; - - const file_contents = store.readFile(uri) orelse return; - defer store.allocator.free(file_contents); - - try gop.value_ptr.fill(store.allocator, file_contents, encoding); -} - /// **Thread safe** takes a shared lock /// This function does not protect against data races from modifying the BuildFile pub fn getBuildFile(self: *DocumentStore, uri: Uri) ?*BuildFile { @@ -993,6 +997,9 @@ fn notifyBuildEnd(self: *DocumentStore, status: EndStatus) void { } fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + { build_file.impl.mutex.lock(); defer build_file.impl.mutex.unlock(); @@ -1073,6 +1080,52 @@ fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void } } +pub fn loadTrigramStores(store: *DocumentStore) error{OutOfMemory}![]*DocumentStore.Handle { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + + var handles: std.ArrayListUnmanaged(*DocumentStore.Handle) = try .initCapacity(store.allocator, store.handles.count()); + errdefer handles.deinit(store.allocator); + + for (store.handles.values()) |handle| { + // TODO check if the handle is in a workspace folder instead + if (isInStd(handle.uri)) continue; + handles.appendAssumeCapacity(handle); + } + + if (builtin.single_threaded) { + for (handles.items) |handle| { + _ = try handle.getTrigramStore(); + } + return try handles.toOwnedSlice(store.allocator); + } + + const loadTrigramStore = struct { + fn loadTrigramStore( + handle: *Handle, + did_out_of_memory: *std.atomic.Value(bool), + ) void { + _ = handle.getTrigramStore() catch { + did_out_of_memory.store(true, .release); + }; + } + }.loadTrigramStore; + + var wait_group: std.Thread.WaitGroup = .{}; + var did_out_of_memory: std.atomic.Value(bool) = .init(false); + + for (handles.items) |handle| { + const status = handle.getStatus(); + if (status.has_trigram_store) continue; + store.thread_pool.spawnWg(&wait_group, loadTrigramStore, .{ handle, &did_out_of_memory }); + } + store.thread_pool.waitAndWork(&wait_group); + + if (did_out_of_memory.load(.acquire)) return error.OutOfMemory; + + return try handles.toOwnedSlice(store.allocator); +} + pub fn isBuildFile(uri: Uri) bool { return std.mem.endsWith(u8, uri.raw, "/build.zig"); } @@ -1255,6 +1308,9 @@ fn buildDotZigExists(dir_path: []const u8) bool { /// See `Handle.getAssociatedBuildFileUri`. /// Caller owns returned memory. fn collectPotentialBuildFiles(self: *DocumentStore, uri: Uri) error{OutOfMemory}![]*BuildFile { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + if (isInStd(uri)) return &.{}; var potential_build_files: std.ArrayList(*BuildFile) = .empty; @@ -1445,6 +1501,72 @@ fn createAndStoreDocument( return gop.value_ptr.*; } +pub fn loadDirectoryRecursive(store: *DocumentStore, directory_uri: Uri) !usize { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + + const workspace_path = try directory_uri.toFsPath(store.allocator); + defer store.allocator.free(workspace_path); + + var workspace_dir = try std.fs.openDirAbsolute(workspace_path, .{ .iterate = true }); + defer workspace_dir.close(); + + var walker = try workspace_dir.walk(store.allocator); + defer walker.deinit(); + + var not_currently_loading_uris: std.ArrayListUnmanaged(Uri) = .empty; + defer { + for (not_currently_loading_uris.items) |uri| uri.deinit(store.allocator); + not_currently_loading_uris.deinit(store.allocator); + } + + var file_count: usize = 0; + { + while (try walker.next()) |entry| { + if (entry.kind == .directory) continue; + if (std.mem.indexOf(u8, entry.path, std.fs.path.sep_str ++ ".zig-cache" ++ std.fs.path.sep_str) != null) continue; + if (std.mem.startsWith(u8, entry.path, ".zig-cache" ++ std.fs.path.sep_str)) continue; + if (!std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) continue; + + file_count += 1; + + const path = try std.fs.path.join(store.allocator, &.{ workspace_path, entry.path }); + defer store.allocator.free(path); + + try not_currently_loading_uris.ensureUnusedCapacity(store.allocator, 1); + + const uri: Uri = try .fromPath(store.allocator, path); + errdefer comptime unreachable; + + store.lock.lockShared(); + defer store.lock.unlockShared(); + + if (!store.handles.contains(uri) and + !store.currently_loading_uris.contains(uri)) + { + not_currently_loading_uris.appendAssumeCapacity(uri); + } + } + } + + errdefer comptime unreachable; + + const S = struct { + fn getOrLoadHandleVoid(s: *DocumentStore, uri: Uri) void { + _ = s.getOrLoadHandle(uri); + uri.deinit(s.allocator); + } + }; + + var wait_group: std.Thread.WaitGroup = .{}; + while (not_currently_loading_uris.pop()) |uri| { + store.thread_pool.spawnWg(&wait_group, S.getOrLoadHandleVoid, .{ store, uri }); + } + store.thread_pool.waitAndWork(&wait_group); + + return file_count; +} + pub const CImportHandle = struct { /// the `@cImport` node node: Ast.Node.Index, diff --git a/src/Server.zig b/src/Server.zig index a8d8ae750..18d90a777 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -856,7 +856,6 @@ const Workspace = struct { fn addWorkspace(server: *Server, uri: Uri) error{OutOfMemory}!void { try server.workspaces.ensureUnusedCapacity(server.allocator, 1); server.workspaces.appendAssumeCapacity(try Workspace.init(server, uri)); - log.info("added Workspace Folder: {s}", .{uri.raw}); if (BuildOnSaveSupport.isSupportedComptime() and // Don't initialize build on save until initialization finished. @@ -869,6 +868,16 @@ fn addWorkspace(server: *Server, uri: Uri) error{OutOfMemory}!void { .restart = false, }); } + + const file_count = server.document_store.loadDirectoryRecursive(uri) catch |err| switch (err) { + error.UnsupportedScheme => return, + else => { + log.err("failed to load files in workspace '{s}': {}", .{ uri.raw, err }); + return; + }, + }; + + log.info("added Workspace Folder: {s} ({d} files)", .{ uri.raw, file_count }); } fn removeWorkspace(server: *Server, uri: Uri) void { @@ -1567,54 +1576,47 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) Error!lsp.ResultType("workspace/symbol") { if (request.query.len < 3) return null; - for (server.workspaces.items) |workspace| { - const path = workspace.uri.toFsPath(arena) catch |err| switch (err) { - error.UnsupportedScheme => return null, // https://github.com/microsoft/language-server-protocol/issues/1264 - error.OutOfMemory => return error.OutOfMemory, - }; - var dir = std.fs.cwd().openDir(path, .{ .iterate = true }) catch return error.InternalError; - defer dir.close(); - - var walker = try dir.walk(arena); - defer walker.deinit(); - - while (walker.next() catch return error.InternalError) |entry| { - if (std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) { - const uri = Uri.fromPath( - arena, - std.fs.path.join(arena, &.{ path, entry.path }) catch return error.InternalError, - ) catch return error.InternalError; - - server.document_store.trigramIndexUri( - uri, - server.offset_encoding, - ) catch return error.InternalError; - } - } - } + const handles = try server.document_store.loadTrigramStores(); + defer server.document_store.allocator.free(handles); var symbols: std.ArrayListUnmanaged(types.workspace.Symbol) = .empty; var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; + var loc_buffer: std.ArrayListUnmanaged(offsets.Loc) = .empty; + var range_buffer: std.ArrayListUnmanaged(offsets.Range) = .empty; + + for (handles) |handle| { + const trigram_store = handle.getTrigramStoreCached(); - for ( - server.document_store.trigram_stores.keys(), - server.document_store.trigram_stores.values(), - ) |uri, trigram_store| { + declaration_buffer.clearRetainingCapacity(); try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); const slice = trigram_store.declarations.slice(); const names = slice.items(.name); - const ranges = slice.items(.range); + const locs = slice.items(.loc); + + { + // Convert `offsets.Loc` to `offsets.Range` + + try loc_buffer.resize(arena, declaration_buffer.items.len); + try range_buffer.resize(arena, declaration_buffer.items.len); + + for (declaration_buffer.items, loc_buffer.items) |declaration, *loc| { + const small_loc = locs[@intFromEnum(declaration)]; + loc.* = .{ .start = small_loc.start, .end = small_loc.end }; + } + + try offsets.multiple.locToRange(arena, handle.tree.source, loc_buffer.items, range_buffer.items, server.offset_encoding); + } - for (declaration_buffer.items) |declaration| { + try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); + for (declaration_buffer.items, range_buffer.items) |declaration, range| { const name = names[@intFromEnum(declaration)]; - const range = ranges[@intFromEnum(declaration)]; - try symbols.append(arena, .{ + symbols.appendAssumeCapacity(.{ .name = trigram_store.names.items[name.start..name.end], .kind = .Variable, .location = .{ .location = .{ - .uri = uri.raw, + .uri = handle.uri.raw, .range = range, }, }, diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 6421d8c3b..5f1a84a43 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -3,30 +3,21 @@ const std = @import("std"); const ast = @import("ast.zig"); const Ast = std.zig.Ast; -const builtin = @import("builtin"); const assert = std.debug.assert; const offsets = @import("offsets.zig"); -const log = std.log.scoped(.store); pub const TrigramStore = @This(); pub const Trigram = [3]u8; pub const NameSlice = struct { start: u32, end: u32 }; +pub const Loc = struct { start: u32, end: u32 }; pub const Declaration = struct { pub const Index = enum(u32) { _ }; name: NameSlice, - range: offsets.Range, -}; - -pub const empty: TrigramStore = .{ - .has_filter = false, - .filter_buckets = .empty, - .trigram_to_declarations = .empty, - .declarations = .empty, - .names = .empty, + loc: Loc, }; has_filter: bool, @@ -35,43 +26,23 @@ trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnm declarations: std.MultiArrayList(Declaration), names: std.ArrayListUnmanaged(u8), -pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { - store.filter_buckets.deinit(allocator); - for (store.trigram_to_declarations.values()) |*list| { - list.deinit(allocator); - } - store.trigram_to_declarations.deinit(allocator); - store.declarations.deinit(allocator); - store.names.deinit(allocator); - store.* = undefined; -} - -fn clearRetainingCapacity(store: *TrigramStore) void { - store.filter_buckets.clearRetainingCapacity(); - store.has_filter = false; - for (store.trigram_to_declarations.values()) |*list| { - list.clearRetainingCapacity(); - } - store.declarations.clearRetainingCapacity(); - store.names.clearRetainingCapacity(); -} - -pub fn fill( - store: *TrigramStore, +pub fn init( allocator: std.mem.Allocator, - source: [:0]const u8, - encoding: offsets.Encoding, -) error{OutOfMemory}!void { - store.clearRetainingCapacity(); - - var tree = try Ast.parse(allocator, source, .zig); - defer tree.deinit(allocator); + tree: *const Ast, +) error{OutOfMemory}!TrigramStore { + var store: TrigramStore = .{ + .has_filter = false, + .filter_buckets = .empty, + .trigram_to_declarations = .empty, + .declarations = .empty, + .names = .empty, + }; + errdefer store.deinit(allocator); const Context = struct { allocator: std.mem.Allocator, store: *TrigramStore, in_function: bool, - encoding: offsets.Encoding, const Error = error{OutOfMemory}; fn callback(context: *@This(), cb_tree: *const Ast, node: Ast.Node.Index) Error!void { @@ -109,10 +80,12 @@ pub fn fill( const name = cb_tree.tokenSlice(token); if (name.len >= 3) { + const loc = offsets.tokenToLoc(cb_tree, token); + try context.store.appendDeclaration( context.allocator, name, - offsets.tokenToRange(cb_tree, token, context.encoding), + .{ .start = @intCast(loc.start), .end = @intCast(loc.end) }, ); } } @@ -125,15 +98,60 @@ pub fn fill( } }; - var context = Context{ + var context: Context = .{ .allocator = allocator, - .store = store, + .store = &store, .in_function = false, - .encoding = encoding, }; - try ast.iterateChildren(&tree, .root, &context, Context.Error, Context.callback); + try ast.iterateChildren(tree, .root, &context, Context.Error, Context.callback); + + const lists = store.trigram_to_declarations.values(); + var index: usize = 0; + while (index < lists.len) { + if (lists[index].items.len == 0) { + lists[index].deinit(allocator); + store.trigram_to_declarations.swapRemoveAt(index); + } else { + index += 1; + } + } + + const trigrams = store.trigram_to_declarations.keys(); + + if (trigrams.len > 0) { + var prng = std.Random.DefaultPrng.init(0); + + const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; + try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); + store.filter_buckets.items.len = filter_capacity; - try store.finalize(allocator); + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + filter.reset(); + store.has_filter = true; + + for (trigrams) |trigram| { + filter.append(prng.random(), trigram) catch |err| switch (err) { + error.EvictionFailed => { + // NOTE: This should generally be quite rare. + store.has_filter = false; + break; + }, + }; + } + } + + return store; +} + +pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { + store.filter_buckets.deinit(allocator); + for (store.trigram_to_declarations.values()) |*list| { + list.deinit(allocator); + } + store.trigram_to_declarations.deinit(allocator); + store.declarations.deinit(allocator); + store.names.deinit(allocator); + store.* = undefined; } /// Caller must not submit name.len < 3. @@ -141,7 +159,7 @@ fn appendDeclaration( store: *TrigramStore, allocator: std.mem.Allocator, name: []const u8, - range: offsets.Range, + loc: Loc, ) error{OutOfMemory}!void { assert(name.len >= 3); @@ -156,7 +174,7 @@ fn appendDeclaration( try store.declarations.append(allocator, .{ .name = name_slice, - .range = range, + .loc = loc, }); for (0..name.len - 2) |index| { @@ -166,46 +184,7 @@ fn appendDeclaration( } } -/// Must be called before any queries are executed. -fn finalize(store: *TrigramStore, allocator: std.mem.Allocator) error{OutOfMemory}!void { - { - const lists = store.trigram_to_declarations.values(); - var index: usize = 0; - while (index < lists.len) { - if (lists[index].items.len == 0) { - lists[index].deinit(allocator); - store.trigram_to_declarations.swapRemoveAt(index); - } else { - index += 1; - } - } - } - - const trigrams = store.trigram_to_declarations.keys(); - - if (trigrams.len > 0) { - var prng = std.Random.DefaultPrng.init(0); - - const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; - try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); - store.filter_buckets.items.len = filter_capacity; - - const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; - filter.reset(); - store.has_filter = true; - - for (trigrams) |trigram| { - filter.append(prng.random(), trigram) catch |err| switch (err) { - error.EvictionFailed => { - // NOTE: This should generally be quite rare. - store.has_filter = false; - break; - }, - }; - } - } -} - +/// Asserts query.len >= 3. Asserts declaration_buffer.items.len == 0. pub fn declarationsForQuery( store: *const TrigramStore, allocator: std.mem.Allocator, @@ -213,6 +192,7 @@ pub fn declarationsForQuery( declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index), ) error{OutOfMemory}!void { assert(query.len >= 3); + assert(declaration_buffer.items.len == 0); const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; @@ -225,14 +205,9 @@ pub fn declarationsForQuery( } } - const first = (store.trigram_to_declarations.get(query[0..3].*) orelse { - declaration_buffer.clearRetainingCapacity(); - return; - }).items; + const first = (store.trigram_to_declarations.get(query[0..3].*) orelse return).items; - declaration_buffer.clearRetainingCapacity(); - try declaration_buffer.ensureTotalCapacity(allocator, first.len * 2); - declaration_buffer.items.len = first.len * 2; + try declaration_buffer.resize(allocator, first.len * 2); var len = first.len; @memcpy(declaration_buffer.items[0..len], first); @@ -241,7 +216,7 @@ pub fn declarationsForQuery( const trigram = query[index..][0..3].*; const old_len = len; len = mergeIntersection( - (store.trigram_to_declarations.get(trigram[0..3].*) orelse return { + (store.trigram_to_declarations.get(trigram[0..3].*) orelse { declaration_buffer.clearRetainingCapacity(); return; }).items, @@ -249,10 +224,10 @@ pub fn declarationsForQuery( declaration_buffer.items[len..], ); @memcpy(declaration_buffer.items[0..len], declaration_buffer.items[old_len..][0..len]); - declaration_buffer.items.len = len * 2; + declaration_buffer.shrinkRetainingCapacity(len * 2); } - declaration_buffer.items.len = declaration_buffer.items.len / 2; + declaration_buffer.shrinkRetainingCapacity(declaration_buffer.items.len / 2); } /// Asserts `@min(a.len, b.len) <= out.len`. @@ -261,7 +236,7 @@ fn mergeIntersection( b: []const Declaration.Index, out: []Declaration.Index, ) u32 { - std.debug.assert(@min(a.len, b.len) <= out.len); + assert(@min(a.len, b.len) <= out.len); var out_idx: u32 = 0; @@ -460,17 +435,17 @@ test CuckooFilter { try entries.ensureTotalCapacity(allocator, element_count); var buckets: [filter_size]CuckooFilter.Bucket = undefined; - var filter = CuckooFilter{ .buckets = &buckets }; - var filter_prng = std.Random.DefaultPrng.init(42); + var filter: CuckooFilter = .{ .buckets = &buckets }; + var filter_prng: std.Random.DefaultPrng = .init(42); for (0..2_500) |gen_prng_seed| { entries.clearRetainingCapacity(); filter.reset(); - var gen_prng = std.Random.DefaultPrng.init(gen_prng_seed); + var gen_prng: std.Random.DefaultPrng = .init(gen_prng_seed); for (0..element_count) |_| { const trigram: Trigram = @bitCast(gen_prng.random().int(u24)); - try entries.put(allocator, trigram, {}); + entries.putAssumeCapacity(trigram, {}); try filter.append(filter_prng.random(), trigram); } @@ -482,7 +457,7 @@ test CuckooFilter { // Reasonable false positive rate const fpr_count = 2_500; var false_positives: usize = 0; - var negative_prng = std.Random.DefaultPrng.init(~gen_prng_seed); + var negative_prng: std.Random.DefaultPrng = .init(~gen_prng_seed); for (0..fpr_count) |_| { var trigram: Trigram = @bitCast(negative_prng.random().int(u24)); while (entries.contains(trigram)) { @@ -493,9 +468,8 @@ test CuckooFilter { } const fpr = @as(f32, @floatFromInt(false_positives)) / fpr_count; - std.testing.expect(fpr < 0.035) catch |err| { - std.log.err("fpr: {d}%", .{fpr * 100}); - return err; - }; + + errdefer std.log.err("fpr: {d}%", .{fpr * 100}); + try std.testing.expect(fpr < 0.035); } } From c03e50b1004cd3105845df7611972f2b6e99f227 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Wed, 19 Nov 2025 20:34:04 +0100 Subject: [PATCH 04/13] add function declarations to workspace symbols --- src/Server.zig | 53 +--------------- src/TrigramStore.zig | 99 +++++++++++++++++------------- src/features/workspace_symbols.zig | 88 ++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 93 deletions(-) create mode 100644 src/features/workspace_symbols.zig diff --git a/src/Server.zig b/src/Server.zig index 18d90a777..4fe81fb5c 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -34,7 +34,6 @@ const goto = @import("features/goto.zig"); const hover_handler = @import("features/hover.zig"); const selection_range = @import("features/selection_range.zig"); const diagnostics_gen = @import("features/diagnostics.zig"); -const TrigramStore = @import("TrigramStore.zig"); const BuildOnSave = diagnostics_gen.BuildOnSave; const BuildOnSaveSupport = build_runner_shared.BuildOnSaveSupport; @@ -1574,57 +1573,7 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ } fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) Error!lsp.ResultType("workspace/symbol") { - if (request.query.len < 3) return null; - - const handles = try server.document_store.loadTrigramStores(); - defer server.document_store.allocator.free(handles); - - var symbols: std.ArrayListUnmanaged(types.workspace.Symbol) = .empty; - var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; - var loc_buffer: std.ArrayListUnmanaged(offsets.Loc) = .empty; - var range_buffer: std.ArrayListUnmanaged(offsets.Range) = .empty; - - for (handles) |handle| { - const trigram_store = handle.getTrigramStoreCached(); - - declaration_buffer.clearRetainingCapacity(); - try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); - - const slice = trigram_store.declarations.slice(); - const names = slice.items(.name); - const locs = slice.items(.loc); - - { - // Convert `offsets.Loc` to `offsets.Range` - - try loc_buffer.resize(arena, declaration_buffer.items.len); - try range_buffer.resize(arena, declaration_buffer.items.len); - - for (declaration_buffer.items, loc_buffer.items) |declaration, *loc| { - const small_loc = locs[@intFromEnum(declaration)]; - loc.* = .{ .start = small_loc.start, .end = small_loc.end }; - } - - try offsets.multiple.locToRange(arena, handle.tree.source, loc_buffer.items, range_buffer.items, server.offset_encoding); - } - - try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); - for (declaration_buffer.items, range_buffer.items) |declaration, range| { - const name = names[@intFromEnum(declaration)]; - symbols.appendAssumeCapacity(.{ - .name = trigram_store.names.items[name.start..name.end], - .kind = .Variable, - .location = .{ - .location = .{ - .uri = handle.uri.raw, - .range = range, - }, - }, - }); - } - } - - return .{ .workspace_symbols = symbols.items }; + return try @import("features/workspace_symbols.zig").handler(server, arena, request); } const HandledRequestParams = union(enum) { diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 5f1a84a43..425e99317 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -10,21 +10,25 @@ pub const TrigramStore = @This(); pub const Trigram = [3]u8; -pub const NameSlice = struct { start: u32, end: u32 }; -pub const Loc = struct { start: u32, end: u32 }; - pub const Declaration = struct { pub const Index = enum(u32) { _ }; - name: NameSlice, - loc: Loc, + pub const Kind = enum { + variable, + constant, + function, + test_function, + }; + + /// Either `.identifier` or `.string_literal`. + name: Ast.TokenIndex, + kind: Kind, }; has_filter: bool, filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), declarations: std.MultiArrayList(Declaration), -names: std.ArrayListUnmanaged(u8), pub fn init( allocator: std.mem.Allocator, @@ -35,7 +39,6 @@ pub fn init( .filter_buckets = .empty, .trigram_to_declarations = .empty, .declarations = .empty, - .names = .empty, }; errdefer store.deinit(allocator); @@ -50,10 +53,22 @@ pub fn init( defer context.in_function = old_in_function; switch (cb_tree.nodeTag(node)) { - .fn_decl => { - if (!context.in_function) {} - - context.in_function = true; + .fn_proto, + .fn_proto_multi, + .fn_proto_one, + .fn_proto_simple, + => |tag| skip: { + context.in_function = tag == .fn_decl; + + const fn_token = cb_tree.nodeMainToken(node); + if (cb_tree.tokenTag(fn_token + 1) != .identifier) break :skip; + + try context.store.appendDeclaration( + context.allocator, + offsets.identifierTokenToNameSlice(cb_tree, fn_token + 1), + fn_token + 1, + .function, + ); }, .root => unreachable, .container_decl, @@ -74,23 +89,35 @@ pub fn init( .local_var_decl, .simple_var_decl, .aligned_var_decl, - => { - if (!context.in_function) { - const token = cb_tree.fullVarDecl(node).?.ast.mut_token + 1; - const name = cb_tree.tokenSlice(token); - - if (name.len >= 3) { - const loc = offsets.tokenToLoc(cb_tree, token); - - try context.store.appendDeclaration( - context.allocator, - name, - .{ .start = @intCast(loc.start), .end = @intCast(loc.end) }, - ); - } - } + => skip: { + if (context.in_function) break :skip; + + const main_token = cb_tree.nodeMainToken(node); + + const kind: Declaration.Kind = switch (cb_tree.tokenTag(main_token)) { + .keyword_var => .variable, + .keyword_const => .constant, + else => unreachable, + }; + + try context.store.appendDeclaration( + context.allocator, + offsets.identifierTokenToNameSlice(cb_tree, main_token + 1), + main_token + 1, + kind, + ); }, + .test_decl => skip: { + const test_name_token, const test_name = ast.testDeclNameAndToken(cb_tree, node) orelse break :skip; + + try context.store.appendDeclaration( + context.allocator, + test_name, + test_name_token, + .test_function, + ); + }, else => {}, } @@ -150,31 +177,21 @@ pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { } store.trigram_to_declarations.deinit(allocator); store.declarations.deinit(allocator); - store.names.deinit(allocator); store.* = undefined; } -/// Caller must not submit name.len < 3. fn appendDeclaration( store: *TrigramStore, allocator: std.mem.Allocator, name: []const u8, - loc: Loc, + name_token: Ast.TokenIndex, + kind: Declaration.Kind, ) error{OutOfMemory}!void { - assert(name.len >= 3); - - const name_slice: NameSlice = blk: { - const start = store.names.items.len; - try store.names.appendSlice(allocator, name); - break :blk .{ - .start = @intCast(start), - .end = @intCast(store.names.items.len), - }; - }; + if (name.len < 3) return; try store.declarations.append(allocator, .{ - .name = name_slice, - .loc = loc, + .name = name_token, + .kind = kind, }); for (0..name.len - 2) |index| { diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig new file mode 100644 index 000000000..f204f2081 --- /dev/null +++ b/src/features/workspace_symbols.zig @@ -0,0 +1,88 @@ +//! Implementation of [`workspace/symbol`](https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#workspace_symbol) + +const std = @import("std"); + +const lsp = @import("lsp"); +const types = lsp.types; + +const DocumentStore = @import("../DocumentStore.zig"); +const offsets = @import("../offsets.zig"); +const Server = @import("../Server.zig"); +const TrigramStore = @import("../TrigramStore.zig"); + +pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) error{OutOfMemory}!lsp.ResultType("workspace/symbol") { + if (request.query.len < 3) return null; + + const handles = try server.document_store.loadTrigramStores(); + defer server.document_store.allocator.free(handles); + + var symbols: std.ArrayListUnmanaged(types.workspace.Symbol) = .empty; + var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; + + for (handles) |handle| { + const trigram_store = handle.getTrigramStoreCached(); + + declaration_buffer.clearRetainingCapacity(); + try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); + + const SortContext = struct { + names: []const std.zig.Ast.TokenIndex, + fn lessThan(ctx: @This(), lhs: TrigramStore.Declaration.Index, rhs: TrigramStore.Declaration.Index) bool { + return ctx.names[@intFromEnum(lhs)] < ctx.names[@intFromEnum(rhs)]; + } + }; + + std.mem.sortUnstable( + TrigramStore.Declaration.Index, + declaration_buffer.items, + SortContext{ .names = trigram_store.declarations.items(.name) }, + SortContext.lessThan, + ); + + const slice = trigram_store.declarations.slice(); + const names = slice.items(.name); + const kinds = slice.items(.kind); + + var last_index: usize = 0; + var last_position: offsets.Position = .{ .line = 0, .character = 0 }; + + try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); + for (declaration_buffer.items) |declaration| { + const name_token = names[@intFromEnum(declaration)]; + const kind = kinds[@intFromEnum(declaration)]; + + const loc = switch (handle.tree.tokenTag(name_token)) { + .identifier => offsets.identifierTokenToNameLoc(&handle.tree, name_token), + .string_literal => offsets.tokenToLoc(&handle.tree, name_token), + else => unreachable, + }; + const name = offsets.locToSlice(handle.tree.source, loc); + + const start_position = offsets.advancePosition(handle.tree.source, last_position, last_index, loc.start, server.offset_encoding); + const end_position = offsets.advancePosition(handle.tree.source, start_position, loc.start, loc.end, server.offset_encoding); + last_index = loc.end; + last_position = end_position; + + symbols.appendAssumeCapacity(.{ + .name = name, + .kind = switch (kind) { + .variable => .Variable, + .constant => .Constant, + .function => .Function, + .test_function => .Method, // there is no SymbolKind that represents a tests, + }, + .location = .{ + .location = .{ + .uri = handle.uri.raw, + .range = .{ + .start = start_position, + .end = end_position, + }, + }, + }, + }); + } + } + + return .{ .workspace_symbols = symbols.items }; +} From d7472e1254ae2a03bd67e2346247c78043e38b25 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Wed, 19 Nov 2025 20:32:12 +0100 Subject: [PATCH 05/13] only report workspace symbols on files inside a workspace folder --- src/DocumentStore.zig | 18 +++++++++++++++--- src/features/workspace_symbols.zig | 11 ++++++++++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 78dca00e3..7b3bac086 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -1080,7 +1080,10 @@ fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void } } -pub fn loadTrigramStores(store: *DocumentStore) error{OutOfMemory}![]*DocumentStore.Handle { +pub fn loadTrigramStores( + store: *DocumentStore, + filter_paths: []const []const u8, +) error{OutOfMemory}![]*DocumentStore.Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); @@ -1088,8 +1091,17 @@ pub fn loadTrigramStores(store: *DocumentStore) error{OutOfMemory}![]*DocumentSt errdefer handles.deinit(store.allocator); for (store.handles.values()) |handle| { - // TODO check if the handle is in a workspace folder instead - if (isInStd(handle.uri)) continue; + if (handle.uri.toFsPath(store.allocator)) |path| { + defer store.allocator.free(path); + for (filter_paths) |filter_path| { + if (std.mem.startsWith(u8, path, filter_path)) break; + } else break; + } else |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => { + // The URI is either invalid or not a `file` scheme. Either way, we should include it. + }, + } handles.appendAssumeCapacity(handle); } diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig index f204f2081..ad4bc42d1 100644 --- a/src/features/workspace_symbols.zig +++ b/src/features/workspace_symbols.zig @@ -13,7 +13,16 @@ const TrigramStore = @import("../TrigramStore.zig"); pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) error{OutOfMemory}!lsp.ResultType("workspace/symbol") { if (request.query.len < 3) return null; - const handles = try server.document_store.loadTrigramStores(); + var workspace_paths: std.ArrayList([]const u8) = try .initCapacity(arena, server.workspaces.items.len); + for (server.workspaces.items) |workspace| { + const path = workspace.uri.toFsPath(arena) catch |err| switch (err) { + error.UnsupportedScheme => return null, // https://github.com/microsoft/language-server-protocol/issues/1264 + error.OutOfMemory => return error.OutOfMemory, + }; + workspace_paths.appendAssumeCapacity(path); + } + + const handles = try server.document_store.loadTrigramStores(workspace_paths.items); defer server.document_store.allocator.free(handles); var symbols: std.ArrayListUnmanaged(types.workspace.Symbol) = .empty; From 7830237497b1461277d1244eefecefb782fd6d2f Mon Sep 17 00:00:00 2001 From: Techatrix Date: Tue, 16 Sep 2025 22:15:28 +0200 Subject: [PATCH 06/13] fix race condition in getOrLoadHandle --- src/DocumentStore.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 7b3bac086..a0dd22bca 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -752,6 +752,8 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { } defer { + self.lock.lock(); + defer self.lock.unlock(); std.debug.assert(self.currently_loading_uris.swapRemove(uri)); self.wait_for_currently_loading_uri.broadcast(); } From cf80c131759c0aca9a77039d9db49eed0b9105fe Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:38:09 -0400 Subject: [PATCH 07/13] Even/odd cuckoo --- src/TrigramStore.zig | 101 ++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 425e99317..850e53423 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -148,7 +148,7 @@ pub fn init( if (trigrams.len > 0) { var prng = std.Random.DefaultPrng.init(0); - const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; + const filter_capacity = CuckooFilter.capacityForCount(@intCast(store.trigram_to_declarations.count())) catch unreachable; try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); store.filter_buckets.items.len = filter_capacity; @@ -279,44 +279,53 @@ fn mergeIntersection( return out_idx; } -// TODO: The pow2 requirement is quite inefficient: explore ideas posted in -// https://databasearchitects.blogspot.com/2019/07/cuckoo-filters-with-arbitrarily-sized.html -// (rocksdb even-odd scheme from comments looks interesting). +fn parity(integer: anytype) enum(u1) { even, odd } { + return @enumFromInt(integer & 1); +} + pub const CuckooFilter = struct { - /// len must be a power of 2. - /// - /// ### Pathological case with buckets.len power of 2 - /// - /// - `BucketIndex(alias_0)` -> `bucket_1`, `BucketIndex(alias_0).alternate()` -> `bucket_2` - /// - `BucketIndex(alias_1)` -> `bucket_1`, `BucketIndex(alias_1).alternate()` -> `bucket_2` - /// - /// Our alternate mappings hold and `contains()` will not return false negatives. - /// - /// ### Pathological case with buckets.len NOT power of 2: - /// - /// - `BucketIndex(alias_0)` -> `bucket_1`, `BucketIndex(alias_0).alternate()` -> `bucket_3` - /// - `BucketIndex(alias_1)` -> `bucket_2`, `BucketIndex(alias_1).alternate()` -> `bucket_4` - /// - /// Our alternate mappings do not hold and `contains()` can return false negatives. This is not - /// acceptable as the entire point of an AMQ datastructure is the presence of false positives - /// but not false negatives. buckets: []Bucket, pub const Fingerprint = enum(u8) { none = std.math.maxInt(u8), _, - pub fn hash(fingerprint: Fingerprint) u32 { - return @truncate(std.hash.Murmur2_64.hash(&.{@intFromEnum(fingerprint)})); + const precomputed_odd_hashes = blk: { + var table: [255]u32 = undefined; + + for (&table, 0..) |*h, index| { + h.* = @truncate(std.hash.Murmur2_64.hash(&.{index}) | 1); + } + + break :blk table; + }; + + pub fn oddHash(fingerprint: Fingerprint) u32 { + assert(fingerprint != .none); + return precomputed_odd_hashes[@intFromEnum(fingerprint)]; } }; + pub const Bucket = [4]Fingerprint; pub const BucketIndex = enum(u32) { _, - pub fn alternate(index: BucketIndex, fingerprint: Fingerprint) BucketIndex { + pub fn alternate(index: BucketIndex, fingerprint: Fingerprint, len: u32) BucketIndex { + assert(@intFromEnum(index) < len); assert(fingerprint != .none); - return @enumFromInt(@intFromEnum(index) ^ fingerprint.hash()); + + const signed_index: i64 = @intFromEnum(index); + const odd_hash: i64 = fingerprint.oddHash(); + + const unbounded = switch (parity(signed_index)) { + .even => signed_index + odd_hash, + .odd => signed_index - odd_hash, + }; + const bounded: u32 = @intCast(@mod(unbounded, len)); + + assert(parity(signed_index) != parity(bounded)); + + return @enumFromInt(bounded); } }; @@ -325,41 +334,46 @@ pub const CuckooFilter = struct { index_1: BucketIndex, index_2: BucketIndex, - pub fn initFromTrigram(trigram: Trigram) Triplet { + pub fn initFromTrigram(trigram: Trigram, len: u32) Triplet { const split: packed struct { fingerprint: Fingerprint, padding: u24, - index_1: BucketIndex, + index_1: u32, } = @bitCast(std.hash.Murmur2_64.hash(&trigram)); + const index_1: BucketIndex = @enumFromInt(split.index_1 % len); + const fingerprint: Fingerprint = if (split.fingerprint == .none) - @enumFromInt(0) + @enumFromInt(1) else split.fingerprint; const triplet: Triplet = .{ .fingerprint = fingerprint, - .index_1 = split.index_1, - .index_2 = split.index_1.alternate(fingerprint), + .index_1 = index_1, + .index_2 = index_1.alternate(fingerprint, len), }; - assert(triplet.index_2.alternate(fingerprint) == triplet.index_1); + assert(triplet.index_2.alternate(fingerprint, len) == index_1); return triplet; } }; + pub fn init(buckets: []Bucket) CuckooFilter { + assert(parity(buckets.len) == .even); + return .{ .buckets = buckets }; + } + pub fn reset(filter: CuckooFilter) void { - @memset(filter.buckets, [1]Fingerprint{.none} ** 4); + @memset(filter.buckets, [1]Fingerprint{.none} ** @typeInfo(Bucket).array.len); } - pub fn capacityForCount(count: usize) error{Overflow}!usize { - const fill_rate = 0.95; - return try std.math.ceilPowerOfTwo(usize, @intFromFloat(@ceil(@as(f32, @floatFromInt(count)) / fill_rate))); + pub fn capacityForCount(count: u32) error{Overflow}!u32 { + return count + (count & 1); } - // Use a hash (fnv) for randomness. pub fn append(filter: CuckooFilter, random: std.Random, trigram: Trigram) error{EvictionFailed}!void { - const triplet: Triplet = .initFromTrigram(trigram); + const triplet: Triplet = .initFromTrigram(trigram, @intCast(filter.buckets.len)); if (filter.appendToBucket(triplet.index_1, triplet.fingerprint) or filter.appendToBucket(triplet.index_2, triplet.fingerprint)) @@ -371,7 +385,7 @@ pub const CuckooFilter = struct { var index = if (random.boolean()) triplet.index_1 else triplet.index_2; for (0..500) |_| { fingerprint = filter.swapFromBucket(random, index, fingerprint); - index = index.alternate(fingerprint); + index = index.alternate(fingerprint, @intCast(filter.buckets.len)); if (filter.appendToBucket(index, fingerprint)) { return; @@ -382,8 +396,7 @@ pub const CuckooFilter = struct { } fn bucketAt(filter: CuckooFilter, index: BucketIndex) *Bucket { - assert(std.math.isPowerOfTwo(filter.buckets.len)); - return &filter.buckets[@intFromEnum(index) & (filter.buckets.len - 1)]; + return &filter.buckets[@intFromEnum(index)]; } fn appendToBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { @@ -408,6 +421,7 @@ pub const CuckooFilter = struct { ) Fingerprint { assert(fingerprint != .none); + comptime assert(@typeInfo(Bucket).array.len == 4); const target = &filter.bucketAt(index)[random.int(u2)]; const old_fingerprint = target.*; @@ -419,7 +433,7 @@ pub const CuckooFilter = struct { } pub fn contains(filter: CuckooFilter, trigram: Trigram) bool { - const triplet: Triplet = .initFromTrigram(trigram); + const triplet: Triplet = .initFromTrigram(trigram, @intCast(filter.buckets.len)); return filter.containsInBucket(triplet.index_1, triplet.fingerprint) or filter.containsInBucket(triplet.index_2, triplet.fingerprint); @@ -443,16 +457,15 @@ pub const CuckooFilter = struct { test CuckooFilter { const allocator = std.testing.allocator; - const element_count = 486; + const element_count = 499; const filter_size = comptime CuckooFilter.capacityForCount(element_count) catch unreachable; - try std.testing.expectEqual(512, filter_size); var entries: std.AutoArrayHashMapUnmanaged(Trigram, void) = .empty; defer entries.deinit(allocator); try entries.ensureTotalCapacity(allocator, element_count); var buckets: [filter_size]CuckooFilter.Bucket = undefined; - var filter: CuckooFilter = .{ .buckets = &buckets }; + var filter: CuckooFilter = .init(&buckets); var filter_prng: std.Random.DefaultPrng = .init(42); for (0..2_500) |gen_prng_seed| { From 643ffcc3f92fb0661aac575a9880820605a03a69 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Tue, 28 Oct 2025 23:22:22 -0400 Subject: [PATCH 08/13] Case-sensitive trigram iterator Co-Authored-By: Techatrix --- src/TrigramStore.zig | 125 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 850e53423..8a0344634 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -30,6 +30,131 @@ filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), declarations: std.MultiArrayList(Declaration), +pub const TrigramIterator = struct { + buffer: []const u8, + index: usize, + boundary: Boundary, + + pub fn init(buffer: []const u8) TrigramIterator { + assert(buffer.len != 0); + return .{ .buffer = buffer, .index = 0, .boundary = .calculate(buffer, 0) }; + } + + pub const Boundary = struct { + end: usize, + next_start: ?usize, + + pub fn calculate(buffer: []const u8, index: usize) Boundary { + assert(buffer[index..].len > 0); + + if (std.ascii.isLower(buffer[index])) { + // First character lowercase + for (buffer[index + 1 ..], index + 1..) |c, i| { + if (!std.ascii.isLower(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } else { + if (index + 1 >= buffer.len) { + return .{ + .end = buffer.len, + .next_start = null, + }; + } + + if (std.ascii.isLower(buffer[index + 1])) { + // First char is uppercase, second char is lowercase + for (buffer[index + 2 ..], index + 2..) |c, i| { + if (!std.ascii.isLower(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } else { + // First and second chars are uppercase + for (buffer[index + 2 ..], index + 2..) |c, i| { + if (!std.ascii.isUpper(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } + } + + return .{ + .end = buffer.len, + .next_start = null, + }; + } + }; + + pub fn next(ti: *TrigramIterator) ?Trigram { + if (ti.index == ti.buffer.len) return null; + assert(ti.index < ti.boundary.end); + + var trigram: [3]u8 = @splat(0); + const unpadded = ti.buffer[ti.index..@min(ti.index + 3, ti.boundary.end)]; + _ = std.ascii.lowerString(&trigram, unpadded); + + if (unpadded.len < 3 or ti.index + 3 >= ti.boundary.end) { + ti.index = ti.boundary.next_start orelse { + ti.index = ti.buffer.len; + return trigram; + }; + ti.boundary = .calculate(ti.buffer, ti.index); + } else { + ti.index += 1; + } + + return trigram; + } +}; + +test "TrigramIterator.Boundary.calculate" { + var boundary: TrigramIterator.Boundary = .calculate("helloWORLD", 0); + try std.testing.expectEqual(5, boundary.end); + try std.testing.expectEqual(5, boundary.next_start.?); + + boundary = .calculate("helloWORLD", 5); + try std.testing.expectEqual(10, boundary.end); + try std.testing.expectEqual(null, boundary.next_start); +} + +test TrigramIterator { + const allocator = std.testing.allocator; + + const matrix: []const struct { []const u8, []const Trigram } = &.{ + .{ "a", &.{"a\x00\x00".*} }, + .{ "ab", &.{"ab\x00".*} }, + .{ "helloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + .{ "HelloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + .{ "HelloWorld", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + }; + + var actual: std.ArrayList(Trigram) = .empty; + defer actual.deinit(allocator); + + for (matrix) |entry| { + const input, const expected = entry; + + actual.clearRetainingCapacity(); + + var it: TrigramIterator = .init(input); + while (it.next()) |trigram| { + try actual.append(allocator, trigram); + } + + try @import("testing.zig").expectEqual(expected, actual.items); + } +} + pub fn init( allocator: std.mem.Allocator, tree: *const Ast, From 04476c2fc3f11280da3adec874ee946c3974bae8 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Sat, 22 Nov 2025 16:28:34 +0100 Subject: [PATCH 09/13] skip all directories starting with '.' instead of just zig-cache Also uses `walker.leave` to no walk through files that we would skip anyway. --- src/DocumentStore.zig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index a0dd22bca..767c2546c 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -1537,9 +1537,12 @@ pub fn loadDirectoryRecursive(store: *DocumentStore, directory_uri: Uri) !usize var file_count: usize = 0; { while (try walker.next()) |entry| { - if (entry.kind == .directory) continue; - if (std.mem.indexOf(u8, entry.path, std.fs.path.sep_str ++ ".zig-cache" ++ std.fs.path.sep_str) != null) continue; - if (std.mem.startsWith(u8, entry.path, ".zig-cache" ++ std.fs.path.sep_str)) continue; + if (entry.kind == .directory) { + if (std.mem.startsWith(u8, entry.basename, ".") or std.mem.eql(u8, entry.basename, "zig-cache")) { + walker.leave(); + } + continue; + } if (!std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) continue; file_count += 1; From 4443b2521e3946ac6f0ca4a6eb39294d9989b513 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Sat, 22 Nov 2025 19:45:29 -0500 Subject: [PATCH 10/13] Fix new trigram iterator, misc bugs Co-Authored-By: Techatrix --- src/TrigramStore.zig | 252 +++++++++++++++-------------- src/features/workspace_symbols.zig | 2 +- 2 files changed, 131 insertions(+), 123 deletions(-) diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 8a0344634..ca763104b 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -33,126 +33,96 @@ declarations: std.MultiArrayList(Declaration), pub const TrigramIterator = struct { buffer: []const u8, index: usize, - boundary: Boundary, + + trigram_buffer: Trigram, + trigram_buffer_index: u2, pub fn init(buffer: []const u8) TrigramIterator { assert(buffer.len != 0); - return .{ .buffer = buffer, .index = 0, .boundary = .calculate(buffer, 0) }; + return .{ + .buffer = buffer, + .index = 0, + .trigram_buffer = @splat(0), + .trigram_buffer_index = 0, + }; } - pub const Boundary = struct { - end: usize, - next_start: ?usize, - - pub fn calculate(buffer: []const u8, index: usize) Boundary { - assert(buffer[index..].len > 0); - - if (std.ascii.isLower(buffer[index])) { - // First character lowercase - for (buffer[index + 1 ..], index + 1..) |c, i| { - if (!std.ascii.isLower(c)) { - return .{ - .end = i, - .next_start = i, - }; - } - } - } else { - if (index + 1 >= buffer.len) { - return .{ - .end = buffer.len, - .next_start = null, - }; - } - - if (std.ascii.isLower(buffer[index + 1])) { - // First char is uppercase, second char is lowercase - for (buffer[index + 2 ..], index + 2..) |c, i| { - if (!std.ascii.isLower(c)) { - return .{ - .end = i, - .next_start = i, - }; - } - } - } else { - // First and second chars are uppercase - for (buffer[index + 2 ..], index + 2..) |c, i| { - if (!std.ascii.isUpper(c)) { - return .{ - .end = i, - .next_start = i, - }; - } - } - } - } - - return .{ - .end = buffer.len, - .next_start = null, - }; - } - }; - pub fn next(ti: *TrigramIterator) ?Trigram { - if (ti.index == ti.buffer.len) return null; - assert(ti.index < ti.boundary.end); - - var trigram: [3]u8 = @splat(0); - const unpadded = ti.buffer[ti.index..@min(ti.index + 3, ti.boundary.end)]; - _ = std.ascii.lowerString(&trigram, unpadded); + while (ti.index < ti.buffer.len) { + defer ti.index += 1; + const c = std.ascii.toLower(ti.buffer[ti.index]); + if (c == '_') continue; + + if (ti.trigram_buffer_index < 3) { + ti.trigram_buffer[ti.trigram_buffer_index] = c; + ti.trigram_buffer_index += 1; + continue; + } - if (unpadded.len < 3 or ti.index + 3 >= ti.boundary.end) { - ti.index = ti.boundary.next_start orelse { - ti.index = ti.buffer.len; - return trigram; - }; - ti.boundary = .calculate(ti.buffer, ti.index); + defer { + @memmove(ti.trigram_buffer[0..2], ti.trigram_buffer[1..3]); + ti.trigram_buffer[2] = c; + } + return ti.trigram_buffer; + } else if (ti.trigram_buffer_index > 0) { + ti.trigram_buffer_index = 0; + return ti.trigram_buffer; } else { - ti.index += 1; + return null; } - - return trigram; } }; -test "TrigramIterator.Boundary.calculate" { - var boundary: TrigramIterator.Boundary = .calculate("helloWORLD", 0); - try std.testing.expectEqual(5, boundary.end); - try std.testing.expectEqual(5, boundary.next_start.?); - - boundary = .calculate("helloWORLD", 5); - try std.testing.expectEqual(10, boundary.end); - try std.testing.expectEqual(null, boundary.next_start); +test TrigramIterator { + try testTrigramIterator("a", &.{"a\x00\x00".*}); + try testTrigramIterator("ab", &.{"ab\x00".*}); + try testTrigramIterator("abc", &.{"abc".*}); + + try testTrigramIterator("hello", &.{ "hel".*, "ell".*, "llo".* }); + try testTrigramIterator("HELLO", &.{ "hel".*, "ell".*, "llo".* }); + try testTrigramIterator("HellO", &.{ "hel".*, "ell".*, "llo".* }); + + try testTrigramIterator("a_", &.{"a\x00\x00".*}); + try testTrigramIterator("ab_", &.{"ab\x00".*}); + try testTrigramIterator("abc_", &.{"abc".*}); + + try testTrigramIterator("_a", &.{"a\x00\x00".*}); + try testTrigramIterator("_a_", &.{"a\x00\x00".*}); + try testTrigramIterator("_a__", &.{"a\x00\x00".*}); + + try testTrigramIterator("_", &.{}); + try testTrigramIterator("__", &.{}); + try testTrigramIterator("___", &.{}); + + try testTrigramIterator("He_ll_O", &.{ "hel".*, "ell".*, "llo".* }); + try testTrigramIterator("He__ll___O", &.{ "hel".*, "ell".*, "llo".* }); + try testTrigramIterator("__He__ll__O_", &.{ "hel".*, "ell".*, "llo".* }); + + try testTrigramIterator("HellO__World___HelloWorld", &.{ + "hel".*, "ell".*, "llo".*, + "low".*, "owo".*, "wor".*, + "orl".*, "rld".*, "ldh".*, + "dhe".*, "hel".*, "ell".*, + "llo".*, "low".*, "owo".*, + "wor".*, "orl".*, "rld".*, + }); } -test TrigramIterator { +fn testTrigramIterator( + input: []const u8, + expected: []const Trigram, +) !void { const allocator = std.testing.allocator; - const matrix: []const struct { []const u8, []const Trigram } = &.{ - .{ "a", &.{"a\x00\x00".*} }, - .{ "ab", &.{"ab\x00".*} }, - .{ "helloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, - .{ "HelloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, - .{ "HelloWorld", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, - }; - - var actual: std.ArrayList(Trigram) = .empty; - defer actual.deinit(allocator); + var actual_buffer: std.ArrayList(Trigram) = .empty; + defer actual_buffer.deinit(allocator); - for (matrix) |entry| { - const input, const expected = entry; - - actual.clearRetainingCapacity(); - - var it: TrigramIterator = .init(input); - while (it.next()) |trigram| { - try actual.append(allocator, trigram); - } - - try @import("testing.zig").expectEqual(expected, actual.items); + var it: TrigramIterator = .init(input); + while (it.next()) |trigram| { + try actual_buffer.append(allocator, trigram); } + + try @import("testing.zig").expectEqual(expected, actual_buffer.items); } pub fn init( @@ -190,7 +160,7 @@ pub fn init( try context.store.appendDeclaration( context.allocator, - offsets.identifierTokenToNameSlice(cb_tree, fn_token + 1), + cb_tree, fn_token + 1, .function, ); @@ -227,18 +197,18 @@ pub fn init( try context.store.appendDeclaration( context.allocator, - offsets.identifierTokenToNameSlice(cb_tree, main_token + 1), + cb_tree, main_token + 1, kind, ); }, .test_decl => skip: { - const test_name_token, const test_name = ast.testDeclNameAndToken(cb_tree, node) orelse break :skip; + const test_name_token = cb_tree.nodeData(node).opt_token_and_node[0].unwrap() orelse break :skip; try context.store.appendDeclaration( context.allocator, - test_name, + cb_tree, test_name_token, .test_function, ); @@ -273,7 +243,7 @@ pub fn init( if (trigrams.len > 0) { var prng = std.Random.DefaultPrng.init(0); - const filter_capacity = CuckooFilter.capacityForCount(@intCast(store.trigram_to_declarations.count())) catch unreachable; + const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); store.filter_buckets.items.len = filter_capacity; @@ -308,21 +278,54 @@ pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { fn appendDeclaration( store: *TrigramStore, allocator: std.mem.Allocator, - name: []const u8, + tree: *const Ast, name_token: Ast.TokenIndex, kind: Declaration.Kind, ) error{OutOfMemory}!void { - if (name.len < 3) return; + const raw_name = tree.tokenSlice(name_token); + + const strategy: enum { raw, smart }, const name = switch (tree.tokenTag(name_token)) { + .string_literal => .{ .raw, raw_name[1 .. raw_name.len - 1] }, + .identifier => if (std.mem.startsWith(u8, raw_name, "@")) + .{ .raw, raw_name[2 .. raw_name.len - 1] } + else + .{ .smart, raw_name }, + else => unreachable, + }; + + switch (strategy) { + .raw => { + if (name.len < 3) return; + for (0..name.len - 2) |index| { + const trigram = name[index..][0..3].*; + try store.appendOneTrigram(allocator, trigram); + } + }, + .smart => { + var it: TrigramIterator = .init(name); + while (it.next()) |trigram| { + try store.appendOneTrigram(allocator, trigram); + } + }, + } try store.declarations.append(allocator, .{ .name = name_token, .kind = kind, }); +} - for (0..name.len - 2) |index| { - const trigram = name[index..][0..3].*; - const gop = try store.trigram_to_declarations.getOrPutValue(allocator, trigram, .empty); - try gop.value_ptr.append(allocator, @enumFromInt(store.declarations.len - 1)); +fn appendOneTrigram( + store: *TrigramStore, + allocator: std.mem.Allocator, + trigram: Trigram, +) error{OutOfMemory}!void { + const declaration_index: Declaration.Index = @enumFromInt(store.declarations.len); + + const gop = try store.trigram_to_declarations.getOrPutValue(allocator, trigram, .empty); + + if (gop.value_ptr.getLastOrNull() != declaration_index) { + try gop.value_ptr.append(allocator, declaration_index); } } @@ -333,32 +336,33 @@ pub fn declarationsForQuery( query: []const u8, declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index), ) error{OutOfMemory}!void { - assert(query.len >= 3); + assert(query.len >= 1); assert(declaration_buffer.items.len == 0); const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; if (store.has_filter) { - for (0..query.len - 2) |index| { - const trigram = query[index..][0..3].*; + var ti: TrigramIterator = .init(query); + while (ti.next()) |trigram| { if (!filter.contains(trigram)) { return; } } } - const first = (store.trigram_to_declarations.get(query[0..3].*) orelse return).items; + var ti: TrigramIterator = .init(query); + + const first = (store.trigram_to_declarations.get(ti.next() orelse return) orelse return).items; try declaration_buffer.resize(allocator, first.len * 2); var len = first.len; @memcpy(declaration_buffer.items[0..len], first); - for (0..query.len - 2) |index| { - const trigram = query[index..][0..3].*; + while (ti.next()) |trigram| { const old_len = len; len = mergeIntersection( - (store.trigram_to_declarations.get(trigram[0..3].*) orelse { + (store.trigram_to_declarations.get(trigram) orelse { declaration_buffer.clearRetainingCapacity(); return; }).items, @@ -493,8 +497,12 @@ pub const CuckooFilter = struct { @memset(filter.buckets, [1]Fingerprint{.none} ** @typeInfo(Bucket).array.len); } - pub fn capacityForCount(count: u32) error{Overflow}!u32 { - return count + (count & 1); + pub fn capacityForCount(count: usize) error{Overflow}!usize { + const overallocated_count = std.math.divCeil(usize, try std.math.mul(usize, count, 105), 100) catch |err| switch (err) { + error.DivisionByZero => unreachable, + else => |e| return e, + }; + return overallocated_count + (overallocated_count & 1); } pub fn append(filter: CuckooFilter, random: std.Random, trigram: Trigram) error{EvictionFailed}!void { diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig index ad4bc42d1..9f61ce656 100644 --- a/src/features/workspace_symbols.zig +++ b/src/features/workspace_symbols.zig @@ -11,7 +11,7 @@ const Server = @import("../Server.zig"); const TrigramStore = @import("../TrigramStore.zig"); pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) error{OutOfMemory}!lsp.ResultType("workspace/symbol") { - if (request.query.len < 3) return null; + if (request.query.len == 0) return null; var workspace_paths: std.ArrayList([]const u8) = try .initCapacity(arena, server.workspaces.items.len); for (server.workspaces.items) |workspace| { From daaf7f0da8a18e2bd19b2fc3835e7140a62512c3 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Sun, 23 Nov 2025 18:31:32 -0500 Subject: [PATCH 11/13] Lazier build file loading Co-Authored-By: Techatrix --- src/DocumentStore.zig | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 767c2546c..288ffe1f9 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -759,7 +759,11 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { } const file_contents = self.readFile(uri) orelse return null; - return self.createAndStoreDocument(uri, file_contents, false) catch |err| { + return self.createAndStoreDocument( + uri, + file_contents, + .{ .lsp_synced = false, .load_build_file_behaviour = .never }, + ) catch |err| { log.err("failed to store document '{s}': {}", .{ uri.raw, err }); return null; }; @@ -825,7 +829,11 @@ pub fn openLspSyncedDocument(self: *DocumentStore, uri: Uri, text: []const u8) e } const duped_text = try self.allocator.dupeZ(u8, text); - _ = try self.createAndStoreDocument(uri, duped_text, true); + _ = try self.createAndStoreDocument( + uri, + duped_text, + .{ .lsp_synced = true, .load_build_file_behaviour = .load_but_dont_update }, + ); } /// Closes a document that has been synced over the LSP protocol (`textDocument/didClose`). @@ -862,7 +870,7 @@ pub fn refreshLspSyncedDocument(self: *DocumentStore, uri: Uri, new_text: [:0]co log.warn("Document modified without being opened: {s}", .{uri.raw}); } - _ = try self.createAndStoreDocument(uri, new_text, true); + _ = try self.createAndStoreDocument(uri, new_text, .{ .lsp_synced = true, .load_build_file_behaviour = .only_update }); } /// Refreshes a document from the file system, unless said document is synced over the LSP protocol. @@ -886,7 +894,11 @@ pub fn refreshDocumentFromFileSystem(self: *DocumentStore, uri: Uri, should_dele if (handle.isLspSynced()) return false; } const file_contents = self.readFile(uri) orelse return false; - _ = try self.createAndStoreDocument(uri, file_contents, false); + _ = try self.createAndStoreDocument( + uri, + file_contents, + .{ .lsp_synced = false, .load_build_file_behaviour = .only_update }, + ); } return true; @@ -1467,25 +1479,38 @@ fn uriInImports( return false; } +const CreateAndStoreOptions = struct { + lsp_synced: bool, + load_build_file_behaviour: enum { load_but_dont_update, only_update, never }, +}; + /// takes ownership of the `text` passed in. /// **Thread safe** takes an exclusive lock fn createAndStoreDocument( self: *DocumentStore, uri: Uri, text: [:0]const u8, - lsp_synced: bool, + options: CreateAndStoreOptions, ) error{OutOfMemory}!*Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); - var new_handle = Handle.init(self.allocator, uri, text, lsp_synced) catch |err| { + var new_handle = Handle.init(self.allocator, uri, text, options.lsp_synced) catch |err| { self.allocator.free(text); return err; }; errdefer new_handle.deinit(); if (supports_build_system and isBuildFile(uri) and !isInStd(uri)) { - _ = self.getOrLoadBuildFile(uri); + switch (options.load_build_file_behaviour) { + .load_but_dont_update => { + _ = self.getOrLoadBuildFile(uri); + }, + .only_update => { + self.invalidateBuildFile(uri); + }, + .never => {}, + } } self.lock.lock(); From 110876c2b921f04423b51489a6a3b6e30b2f8e54 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Sun, 23 Nov 2025 19:59:01 -0500 Subject: [PATCH 12/13] Add workspace symbols tests Co-Authored-By: Techatrix --- src/DocumentStore.zig | 18 ++-- src/Server.zig | 4 +- src/TrigramStore.zig | 10 ++- src/features/document_symbol.zig | 2 +- src/features/workspace_symbols.zig | 27 +++--- tests/context.zig | 33 +++++++- tests/lsp_features/workspace_symbols.zig | 101 +++++++++++++++++++++++ tests/tests.zig | 1 + 8 files changed, 157 insertions(+), 39 deletions(-) create mode 100644 tests/lsp_features/workspace_symbols.zig diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 288ffe1f9..e614d8282 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -1096,7 +1096,7 @@ fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void pub fn loadTrigramStores( store: *DocumentStore, - filter_paths: []const []const u8, + filter_uris: []const std.Uri, ) error{OutOfMemory}![]*DocumentStore.Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); @@ -1105,17 +1105,11 @@ pub fn loadTrigramStores( errdefer handles.deinit(store.allocator); for (store.handles.values()) |handle| { - if (handle.uri.toFsPath(store.allocator)) |path| { - defer store.allocator.free(path); - for (filter_paths) |filter_path| { - if (std.mem.startsWith(u8, path, filter_path)) break; - } else break; - } else |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - else => { - // The URI is either invalid or not a `file` scheme. Either way, we should include it. - }, - } + var handle_uri = std.Uri.parse(handle.uri.raw) catch unreachable; + for (filter_uris) |filter_uri| { + if (!std.ascii.eqlIgnoreCase(handle_uri.scheme, filter_uri.scheme)) continue; + if (std.mem.startsWith(u8, handle_uri.path.percent_encoded, filter_uri.path.percent_encoded)) break; + } else break; handles.appendAssumeCapacity(handle); } diff --git a/src/Server.zig b/src/Server.zig index 4fe81fb5c..a8fe4916a 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -869,7 +869,7 @@ fn addWorkspace(server: *Server, uri: Uri) error{OutOfMemory}!void { } const file_count = server.document_store.loadDirectoryRecursive(uri) catch |err| switch (err) { - error.UnsupportedScheme => return, + error.UnsupportedScheme => return, // https://github.com/microsoft/language-server-protocol/issues/1264 else => { log.err("failed to load files in workspace '{s}': {}", .{ uri.raw, err }); return; @@ -1572,7 +1572,7 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ return try selection_range.generateSelectionRanges(arena, handle, request.positions, server.offset_encoding); } -fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) Error!lsp.ResultType("workspace/symbol") { +fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) Error!?types.workspace.Symbol.Result { return try @import("features/workspace_symbols.zig").handler(server, arena, request); } diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index ca763104b..177c73f4e 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -148,13 +148,12 @@ pub fn init( defer context.in_function = old_in_function; switch (cb_tree.nodeTag(node)) { + .fn_decl => context.in_function = true, .fn_proto, .fn_proto_multi, .fn_proto_one, .fn_proto_simple, - => |tag| skip: { - context.in_function = tag == .fn_decl; - + => skip: { const fn_token = cb_tree.nodeMainToken(node); if (cb_tree.tokenTag(fn_token + 1) != .identifier) break :skip; @@ -204,6 +203,8 @@ pub fn init( }, .test_decl => skip: { + context.in_function = true; + const test_name_token = cb_tree.nodeData(node).opt_token_and_node[0].unwrap() orelse break :skip; try context.store.appendDeclaration( @@ -297,7 +298,8 @@ fn appendDeclaration( .raw => { if (name.len < 3) return; for (0..name.len - 2) |index| { - const trigram = name[index..][0..3].*; + var trigram = name[index..][0..3].*; + for (&trigram) |*char| char.* = std.ascii.toLower(char.*); try store.appendOneTrigram(allocator, trigram); } }, diff --git a/src/features/document_symbol.zig b/src/features/document_symbol.zig index d88745de4..306a67dda 100644 --- a/src/features/document_symbol.zig +++ b/src/features/document_symbol.zig @@ -27,7 +27,7 @@ const Context = struct { total_symbol_count: *usize, }; -fn tokenNameMaybeQuotes(tree: *const Ast, token: Ast.TokenIndex) []const u8 { +pub fn tokenNameMaybeQuotes(tree: *const Ast, token: Ast.TokenIndex) []const u8 { const token_slice = tree.tokenSlice(token); switch (tree.tokenTag(token)) { .identifier => return token_slice, diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig index 9f61ce656..b8edd7638 100644 --- a/src/features/workspace_symbols.zig +++ b/src/features/workspace_symbols.zig @@ -9,24 +9,23 @@ const DocumentStore = @import("../DocumentStore.zig"); const offsets = @import("../offsets.zig"); const Server = @import("../Server.zig"); const TrigramStore = @import("../TrigramStore.zig"); +const Uri = @import("../Uri.zig"); -pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) error{OutOfMemory}!lsp.ResultType("workspace/symbol") { +pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) error{OutOfMemory}!?types.workspace.Symbol.Result { if (request.query.len == 0) return null; - var workspace_paths: std.ArrayList([]const u8) = try .initCapacity(arena, server.workspaces.items.len); + var workspace_uris: std.ArrayList(std.Uri) = try .initCapacity(arena, server.workspaces.items.len); + defer workspace_uris.deinit(arena); + for (server.workspaces.items) |workspace| { - const path = workspace.uri.toFsPath(arena) catch |err| switch (err) { - error.UnsupportedScheme => return null, // https://github.com/microsoft/language-server-protocol/issues/1264 - error.OutOfMemory => return error.OutOfMemory, - }; - workspace_paths.appendAssumeCapacity(path); + workspace_uris.appendAssumeCapacity(std.Uri.parse(workspace.uri.raw) catch unreachable); } - const handles = try server.document_store.loadTrigramStores(workspace_paths.items); + const handles = try server.document_store.loadTrigramStores(workspace_uris.items); defer server.document_store.allocator.free(handles); - var symbols: std.ArrayListUnmanaged(types.workspace.Symbol) = .empty; - var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; + var symbols: std.ArrayList(types.workspace.Symbol) = .empty; + var declaration_buffer: std.ArrayList(TrigramStore.Declaration.Index) = .empty; for (handles) |handle| { const trigram_store = handle.getTrigramStoreCached(); @@ -60,12 +59,8 @@ pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.workspa const name_token = names[@intFromEnum(declaration)]; const kind = kinds[@intFromEnum(declaration)]; - const loc = switch (handle.tree.tokenTag(name_token)) { - .identifier => offsets.identifierTokenToNameLoc(&handle.tree, name_token), - .string_literal => offsets.tokenToLoc(&handle.tree, name_token), - else => unreachable, - }; - const name = offsets.locToSlice(handle.tree.source, loc); + const loc = offsets.tokenToLoc(&handle.tree, name_token); + const name = @import("document_symbol.zig").tokenNameMaybeQuotes(&handle.tree, name_token); const start_position = offsets.advancePosition(handle.tree.source, last_position, last_index, loc.start, server.offset_encoding); const end_position = offsets.advancePosition(handle.tree.source, start_position, loc.start, loc.end, server.offset_encoding); diff --git a/tests/context.zig b/tests/context.zig index 14d6820f2..65c295cae 100644 --- a/tests/context.zig +++ b/tests/context.zig @@ -87,17 +87,21 @@ pub const Context = struct { use_file_scheme: bool = false, source: []const u8, mode: std.zig.Ast.Mode = .zig, + base_directory: []const u8 = "/", }) !zls.Uri { + std.debug.assert(std.mem.startsWith(u8, options.base_directory, "/")); + std.debug.assert(std.mem.endsWith(u8, options.base_directory, "/")); + const fmt = switch (builtin.os.tag) { - .windows => "file:/c:/Untitled-{d}.{t}", - else => "file:/Untitled-{d}.{t}", + .windows => "file:/c:{s}Untitled-{d}.{t}", + else => "file:{s}Untitled-{d}.{t}", }; const arena = self.arena.allocator(); const path = if (options.use_file_scheme) - try std.fmt.allocPrint(arena, fmt, .{ self.file_id, options.mode }) + try std.fmt.allocPrint(arena, fmt, .{ options.base_directory, self.file_id, options.mode }) else - try std.fmt.allocPrint(arena, "untitled:/Untitled-{d}.{t}", .{ self.file_id, options.mode }); + try std.fmt.allocPrint(arena, "untitled:{s}Untitled-{d}.{t}", .{ options.base_directory, self.file_id, options.mode }); const uri: zls.Uri = try .parse(arena, path); const params: types.TextDocument.DidOpenParams = .{ @@ -114,4 +118,25 @@ pub const Context = struct { self.file_id += 1; return uri; } + + pub fn addWorkspace(self: *Context, name: []const u8, base_directory: []const u8) !void { + std.debug.assert(std.mem.startsWith(u8, base_directory, "/")); + std.debug.assert(std.mem.endsWith(u8, base_directory, "/")); + + try self.server.sendNotificationSync( + self.arena.allocator(), + "workspace/didChangeWorkspaceFolders", + .{ + .event = .{ + .added = &.{ + .{ + .uri = try std.fmt.allocPrint(self.arena.allocator(), "untitled:{s}", .{base_directory}), + .name = name, + }, + }, + .removed = &.{}, + }, + }, + ); + } }; diff --git a/tests/lsp_features/workspace_symbols.zig b/tests/lsp_features/workspace_symbols.zig new file mode 100644 index 000000000..410d5192f --- /dev/null +++ b/tests/lsp_features/workspace_symbols.zig @@ -0,0 +1,101 @@ +const std = @import("std"); +const zls = @import("zls"); + +const Context = @import("../context.zig").Context; + +const types = zls.lsp.types; + +const allocator: std.mem.Allocator = std.testing.allocator; + +test "workspace symbols" { + var ctx: Context = try .init(); + defer ctx.deinit(); + + try ctx.addWorkspace("Animal Shelter", "/animal_shelter/"); + + _ = try ctx.addDocument(.{ .source = + \\const SalamanderCrab = struct { + \\ fn salamander_crab() void {} + \\}; + , .base_directory = "/animal_shelter/" }); + + _ = try ctx.addDocument(.{ .source = + \\const Dog = struct { + \\ const sheltie: Dog = .{}; + \\ var @"Mr Crabs" = @compileError("hold up"); + \\}; + \\test "walk the dog" { + \\ const dog: Dog = .sheltie; + \\ _ = dog; // nah + \\} + , .base_directory = "/animal_shelter/" }); + + _ = try ctx.addDocument(.{ .source = + \\const Lion = struct { + \\ extern fn evolveToMonke() void; + \\ fn roar() void { + \\ var lion = "cool!"; + \\ const Lion2 = struct { + \\ const lion_for_real = 0; + \\ }; + \\ } + \\}; + , .base_directory = "/animal_shelter/" }); + + _ = try ctx.addDocument(.{ .source = + \\const PotatoDoctor = struct {}; + , .base_directory = "/farm/" }); + + try testDocumentSymbol(&ctx, "Sal", + \\Constant SalamanderCrab + \\Function salamander_crab + ); + try testDocumentSymbol(&ctx, "_cr___a_b_", + \\Constant SalamanderCrab + \\Function salamander_crab + \\Variable @"Mr Crabs" + ); + try testDocumentSymbol(&ctx, "dog", + \\Constant Dog + \\Method walk the dog + ); + try testDocumentSymbol(&ctx, "potato_d", ""); + // Becomes S\x00\x00 which matches nothing + try testDocumentSymbol(&ctx, "S", ""); + try testDocumentSymbol(&ctx, "lion", + \\Constant Lion + \\Constant lion_for_real + ); + try testDocumentSymbol(&ctx, "monke", + \\Function evolveToMonke + ); +} + +fn testDocumentSymbol(ctx: *Context, query: []const u8, expected: []const u8) !void { + const response = try ctx.server.sendRequestSync( + ctx.arena.allocator(), + "workspace/symbol", + .{ .query = query }, + ) orelse { + std.debug.print("Server returned `null` as the result\n", .{}); + return error.InvalidResponse; + }; + + var actual: std.ArrayList(u8) = .empty; + defer actual.deinit(allocator); + + for (response.workspace_symbols) |workspace_symbol| { + std.debug.assert(workspace_symbol.tags == null); // unsupported for now + std.debug.assert(workspace_symbol.containerName == null); // unsupported for now + try actual.print(allocator, "{t} {s}\n", .{ + workspace_symbol.kind, + workspace_symbol.name, + }); + } + + if (actual.items.len != 0) { + _ = actual.pop(); // Final \n + } + + try zls.testing.expectEqualStrings(expected, actual.items); +} diff --git a/tests/tests.zig b/tests/tests.zig index ffe7f37d1..76307fd05 100644 --- a/tests/tests.zig +++ b/tests/tests.zig @@ -22,6 +22,7 @@ comptime { _ = @import("lsp_features/selection_range.zig"); _ = @import("lsp_features/semantic_tokens.zig"); _ = @import("lsp_features/signature_help.zig"); + _ = @import("lsp_features/workspace_symbols.zig"); // Language features _ = @import("language_features/cimport.zig"); From e32251442401082f6d1b843779a0d4c3d52d7827 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Sun, 23 Nov 2025 20:16:31 -0500 Subject: [PATCH 13/13] fmt fix --- tests/lsp_features/workspace_symbols.zig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/lsp_features/workspace_symbols.zig b/tests/lsp_features/workspace_symbols.zig index 410d5192f..a3ceeb1cb 100644 --- a/tests/lsp_features/workspace_symbols.zig +++ b/tests/lsp_features/workspace_symbols.zig @@ -13,13 +13,13 @@ test "workspace symbols" { try ctx.addWorkspace("Animal Shelter", "/animal_shelter/"); - _ = try ctx.addDocument(.{ .source = + _ = try ctx.addDocument(.{ .source = \\const SalamanderCrab = struct { \\ fn salamander_crab() void {} \\}; , .base_directory = "/animal_shelter/" }); - _ = try ctx.addDocument(.{ .source = + _ = try ctx.addDocument(.{ .source = \\const Dog = struct { \\ const sheltie: Dog = .{}; \\ var @"Mr Crabs" = @compileError("hold up"); @@ -30,7 +30,7 @@ test "workspace symbols" { \\} , .base_directory = "/animal_shelter/" }); - _ = try ctx.addDocument(.{ .source = + _ = try ctx.addDocument(.{ .source = \\const Lion = struct { \\ extern fn evolveToMonke() void; \\ fn roar() void { @@ -42,7 +42,7 @@ test "workspace symbols" { \\}; , .base_directory = "/animal_shelter/" }); - _ = try ctx.addDocument(.{ .source = + _ = try ctx.addDocument(.{ .source = \\const PotatoDoctor = struct {}; , .base_directory = "/farm/" });