diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index ee610ccc3..e614d8282 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -14,6 +14,7 @@ const tracy = @import("tracy"); const translate_c = @import("translate_c.zig"); const DocumentScope = @import("DocumentScope.zig"); const DiagnosticsCollection = @import("DiagnosticsCollection.zig"); +const TrigramStore = @import("TrigramStore.zig"); const DocumentStore = @This(); @@ -26,6 +27,7 @@ thread_pool: *std.Thread.Pool, handles: Uri.ArrayHashMap(*Handle) = .empty, build_files: if (supports_build_system) Uri.ArrayHashMap(*BuildFile) else void = if (supports_build_system) .empty else {}, cimports: if (supports_build_system) std.AutoArrayHashMapUnmanaged(Hash, translate_c.Result) else void = if (supports_build_system) .empty else {}, +trigram_stores: Uri.ArrayHashMap(TrigramStore) = .empty, diagnostics_collection: *DiagnosticsCollection, builds_in_progress: std.atomic.Value(i32) = .init(0), transport: ?*lsp.Transport = null, @@ -34,6 +36,8 @@ lsp_capabilities: struct { supports_semantic_tokens_refresh: bool = false, supports_inlay_hints_refresh: bool = false, } = .{}, +currently_loading_uris: Uri.ArrayHashMap(void) = .empty, +wait_for_currently_loading_uri: std.Thread.Condition = .{}, pub const Hasher = std.crypto.auth.siphash.SipHash128(1, 3); pub const Hash = [Hasher.mac_length]u8; @@ -194,6 +198,7 @@ pub const Handle = struct { lazy_condition: std.Thread.Condition = .{}, import_uris: ?[]Uri = null, + trigram_store: TrigramStore = undefined, document_scope: DocumentScope = undefined, zzoiir: ZirOrZoir = undefined, @@ -232,6 +237,11 @@ pub const Handle = struct { /// `false` indicates the document only exists because it is a dependency of another document /// or has been closed with `textDocument/didClose`. lsp_synced: bool = false, + /// true if a thread has acquired the permission to compute the `TrigramStore` + /// all other threads will wait until the given thread has computed the `TrigramStore` before reading it. + has_trigram_store_lock: bool = false, + /// true if `handle.impl.trigram_store` has been set + has_trigram_store: bool = false, /// true if a thread has acquired the permission to compute the `DocumentScope` /// all other threads will wait until the given thread has computed the `DocumentScope` before reading it. has_document_scope_lock: bool = false, @@ -242,7 +252,7 @@ pub const Handle = struct { /// all other threads will wait until the given thread has computed the `std.zig.Zir` or `std.zig.Zoir` before reading it. /// true if `handle.impl.zir` has been set has_zzoiir: bool = false, - _: u27 = 0, + _: u25 = 0, }; /// Takes ownership of `text` on success. @@ -287,6 +297,7 @@ pub const Handle = struct { .zon => self.impl.zzoiir.zon.deinit(allocator), }; if (status.has_document_scope) self.impl.document_scope.deinit(allocator); + if (status.has_trigram_store) self.impl.trigram_store.deinit(allocator); allocator.free(self.tree.source); self.tree.deinit(allocator); @@ -366,6 +377,23 @@ pub const Handle = struct { return self.impl.document_scope; } + pub fn getTrigramStore(self: *Handle) error{OutOfMemory}!TrigramStore { + if (self.getStatus().has_trigram_store) return self.impl.trigram_store; + return try self.getLazy(TrigramStore, "trigram_store", struct { + fn create(handle: *Handle, allocator: std.mem.Allocator) error{OutOfMemory}!TrigramStore { + return try .init(allocator, &handle.tree); + } + }); + } + + /// Asserts that `getTrigramStore` has been previously called on `handle`. + pub fn getTrigramStoreCached(self: *Handle) TrigramStore { + if (builtin.mode == .Debug) { + std.debug.assert(self.getStatus().has_trigram_store); + } + return self.impl.trigram_store; + } + pub fn getZir(self: *Handle) error{OutOfMemory}!std.zig.Zir { std.debug.assert(self.tree.mode == .zig); const zir_or_zoir = try self.getZirOrZoir(); @@ -603,6 +631,15 @@ pub fn deinit(self: *DocumentStore) void { } self.handles.deinit(self.allocator); + for (self.trigram_stores.keys(), self.trigram_stores.values()) |uri, *trigram_store| { + uri.deinit(self.allocator); + trigram_store.deinit(self.allocator); + } + self.trigram_stores.deinit(self.allocator); + + std.debug.assert(self.currently_loading_uris.count() == 0); + self.currently_loading_uris.deinit(self.allocator); + if (supports_build_system) { for (self.build_files.values()) |build_file| { build_file.deinit(self.allocator); @@ -686,9 +723,47 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); - if (self.getHandle(uri)) |handle| return handle; + { + self.lock.lock(); + defer self.lock.unlock(); + + while (true) { + if (self.handles.get(uri)) |handle| return handle; + + const gop = self.currently_loading_uris.getOrPutValue( + self.allocator, + uri, + {}, + ) catch return null; + + if (!gop.found_existing) { + break; + } + + var mutex: std.Thread.Mutex = .{}; + + mutex.lock(); + defer mutex.unlock(); + + self.lock.unlock(); + self.wait_for_currently_loading_uri.wait(&mutex); + self.lock.lock(); + } + } + + defer { + self.lock.lock(); + defer self.lock.unlock(); + std.debug.assert(self.currently_loading_uris.swapRemove(uri)); + self.wait_for_currently_loading_uri.broadcast(); + } + const file_contents = self.readFile(uri) orelse return null; - return self.createAndStoreDocument(uri, file_contents, false) catch |err| { + return self.createAndStoreDocument( + uri, + file_contents, + .{ .lsp_synced = false, .load_build_file_behaviour = .never }, + ) catch |err| { log.err("failed to store document '{s}': {}", .{ uri.raw, err }); return null; }; @@ -754,7 +829,11 @@ pub fn openLspSyncedDocument(self: *DocumentStore, uri: Uri, text: []const u8) e } const duped_text = try self.allocator.dupeZ(u8, text); - _ = try self.createAndStoreDocument(uri, duped_text, true); + _ = try self.createAndStoreDocument( + uri, + duped_text, + .{ .lsp_synced = true, .load_build_file_behaviour = .load_but_dont_update }, + ); } /// Closes a document that has been synced over the LSP protocol (`textDocument/didClose`). @@ -791,7 +870,7 @@ pub fn refreshLspSyncedDocument(self: *DocumentStore, uri: Uri, new_text: [:0]co log.warn("Document modified without being opened: {s}", .{uri.raw}); } - _ = try self.createAndStoreDocument(uri, new_text, true); + _ = try self.createAndStoreDocument(uri, new_text, .{ .lsp_synced = true, .load_build_file_behaviour = .only_update }); } /// Refreshes a document from the file system, unless said document is synced over the LSP protocol. @@ -815,7 +894,11 @@ pub fn refreshDocumentFromFileSystem(self: *DocumentStore, uri: Uri, should_dele if (handle.isLspSynced()) return false; } const file_contents = self.readFile(uri) orelse return false; - _ = try self.createAndStoreDocument(uri, file_contents, false); + _ = try self.createAndStoreDocument( + uri, + file_contents, + .{ .lsp_synced = false, .load_build_file_behaviour = .only_update }, + ); } return true; @@ -928,6 +1011,9 @@ fn notifyBuildEnd(self: *DocumentStore, status: EndStatus) void { } fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + { build_file.impl.mutex.lock(); defer build_file.impl.mutex.unlock(); @@ -1008,6 +1094,58 @@ fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void } } +pub fn loadTrigramStores( + store: *DocumentStore, + filter_uris: []const std.Uri, +) error{OutOfMemory}![]*DocumentStore.Handle { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + + var handles: std.ArrayListUnmanaged(*DocumentStore.Handle) = try .initCapacity(store.allocator, store.handles.count()); + errdefer handles.deinit(store.allocator); + + for (store.handles.values()) |handle| { + var handle_uri = std.Uri.parse(handle.uri.raw) catch unreachable; + for (filter_uris) |filter_uri| { + if (!std.ascii.eqlIgnoreCase(handle_uri.scheme, filter_uri.scheme)) continue; + if (std.mem.startsWith(u8, handle_uri.path.percent_encoded, filter_uri.path.percent_encoded)) break; + } else break; + handles.appendAssumeCapacity(handle); + } + + if (builtin.single_threaded) { + for (handles.items) |handle| { + _ = try handle.getTrigramStore(); + } + return try handles.toOwnedSlice(store.allocator); + } + + const loadTrigramStore = struct { + fn loadTrigramStore( + handle: *Handle, + did_out_of_memory: *std.atomic.Value(bool), + ) void { + _ = handle.getTrigramStore() catch { + did_out_of_memory.store(true, .release); + }; + } + }.loadTrigramStore; + + var wait_group: std.Thread.WaitGroup = .{}; + var did_out_of_memory: std.atomic.Value(bool) = .init(false); + + for (handles.items) |handle| { + const status = handle.getStatus(); + if (status.has_trigram_store) continue; + store.thread_pool.spawnWg(&wait_group, loadTrigramStore, .{ handle, &did_out_of_memory }); + } + store.thread_pool.waitAndWork(&wait_group); + + if (did_out_of_memory.load(.acquire)) return error.OutOfMemory; + + return try handles.toOwnedSlice(store.allocator); +} + pub fn isBuildFile(uri: Uri) bool { return std.mem.endsWith(u8, uri.raw, "/build.zig"); } @@ -1190,6 +1328,9 @@ fn buildDotZigExists(dir_path: []const u8) bool { /// See `Handle.getAssociatedBuildFileUri`. /// Caller owns returned memory. fn collectPotentialBuildFiles(self: *DocumentStore, uri: Uri) error{OutOfMemory}![]*BuildFile { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + if (isInStd(uri)) return &.{}; var potential_build_files: std.ArrayList(*BuildFile) = .empty; @@ -1332,25 +1473,38 @@ fn uriInImports( return false; } +const CreateAndStoreOptions = struct { + lsp_synced: bool, + load_build_file_behaviour: enum { load_but_dont_update, only_update, never }, +}; + /// takes ownership of the `text` passed in. /// **Thread safe** takes an exclusive lock fn createAndStoreDocument( self: *DocumentStore, uri: Uri, text: [:0]const u8, - lsp_synced: bool, + options: CreateAndStoreOptions, ) error{OutOfMemory}!*Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); - var new_handle = Handle.init(self.allocator, uri, text, lsp_synced) catch |err| { + var new_handle = Handle.init(self.allocator, uri, text, options.lsp_synced) catch |err| { self.allocator.free(text); return err; }; errdefer new_handle.deinit(); if (supports_build_system and isBuildFile(uri) and !isInStd(uri)) { - _ = self.getOrLoadBuildFile(uri); + switch (options.load_build_file_behaviour) { + .load_but_dont_update => { + _ = self.getOrLoadBuildFile(uri); + }, + .only_update => { + self.invalidateBuildFile(uri); + }, + .never => {}, + } } self.lock.lock(); @@ -1360,17 +1514,12 @@ fn createAndStoreDocument( errdefer if (!gop.found_existing) std.debug.assert(self.handles.swapRemove(uri)); if (gop.found_existing) { - if (lsp_synced) { - new_handle.impl.associated_build_file = gop.value_ptr.*.impl.associated_build_file; - gop.value_ptr.*.impl.associated_build_file = .init; + new_handle.impl.associated_build_file = gop.value_ptr.*.impl.associated_build_file; + gop.value_ptr.*.impl.associated_build_file = .init; - new_handle.uri = gop.key_ptr.*; - gop.value_ptr.*.deinit(); - gop.value_ptr.*.* = new_handle; - } else { - // TODO prevent concurrent `createAndStoreDocument` invocations from racing each other - new_handle.deinit(); - } + new_handle.uri = gop.key_ptr.*; + gop.value_ptr.*.deinit(); + gop.value_ptr.*.* = new_handle; } else { gop.key_ptr.* = try uri.dupe(self.allocator); errdefer gop.key_ptr.*.deinit(self.allocator); @@ -1385,6 +1534,75 @@ fn createAndStoreDocument( return gop.value_ptr.*; } +pub fn loadDirectoryRecursive(store: *DocumentStore, directory_uri: Uri) !usize { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + + const workspace_path = try directory_uri.toFsPath(store.allocator); + defer store.allocator.free(workspace_path); + + var workspace_dir = try std.fs.openDirAbsolute(workspace_path, .{ .iterate = true }); + defer workspace_dir.close(); + + var walker = try workspace_dir.walk(store.allocator); + defer walker.deinit(); + + var not_currently_loading_uris: std.ArrayListUnmanaged(Uri) = .empty; + defer { + for (not_currently_loading_uris.items) |uri| uri.deinit(store.allocator); + not_currently_loading_uris.deinit(store.allocator); + } + + var file_count: usize = 0; + { + while (try walker.next()) |entry| { + if (entry.kind == .directory) { + if (std.mem.startsWith(u8, entry.basename, ".") or std.mem.eql(u8, entry.basename, "zig-cache")) { + walker.leave(); + } + continue; + } + if (!std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) continue; + + file_count += 1; + + const path = try std.fs.path.join(store.allocator, &.{ workspace_path, entry.path }); + defer store.allocator.free(path); + + try not_currently_loading_uris.ensureUnusedCapacity(store.allocator, 1); + + const uri: Uri = try .fromPath(store.allocator, path); + errdefer comptime unreachable; + + store.lock.lockShared(); + defer store.lock.unlockShared(); + + if (!store.handles.contains(uri) and + !store.currently_loading_uris.contains(uri)) + { + not_currently_loading_uris.appendAssumeCapacity(uri); + } + } + } + + errdefer comptime unreachable; + + const S = struct { + fn getOrLoadHandleVoid(s: *DocumentStore, uri: Uri) void { + _ = s.getOrLoadHandle(uri); + uri.deinit(s.allocator); + } + }; + + var wait_group: std.Thread.WaitGroup = .{}; + while (not_currently_loading_uris.pop()) |uri| { + store.thread_pool.spawnWg(&wait_group, S.getOrLoadHandleVoid, .{ store, uri }); + } + store.thread_pool.waitAndWork(&wait_group); + + return file_count; +} + pub const CImportHandle = struct { /// the `@cImport` node node: Ast.Node.Index, diff --git a/src/Server.zig b/src/Server.zig index 260d1519c..a8fe4916a 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -563,7 +563,7 @@ fn initializeHandler(server: *Server, arena: std.mem.Allocator, request: types.I .documentRangeFormattingProvider = .{ .bool = false }, .foldingRangeProvider = .{ .bool = true }, .selectionRangeProvider = .{ .bool = true }, - .workspaceSymbolProvider = .{ .bool = false }, + .workspaceSymbolProvider = .{ .bool = true }, .workspace = .{ .workspaceFolders = .{ .supported = true, @@ -855,7 +855,6 @@ const Workspace = struct { fn addWorkspace(server: *Server, uri: Uri) error{OutOfMemory}!void { try server.workspaces.ensureUnusedCapacity(server.allocator, 1); server.workspaces.appendAssumeCapacity(try Workspace.init(server, uri)); - log.info("added Workspace Folder: {s}", .{uri.raw}); if (BuildOnSaveSupport.isSupportedComptime() and // Don't initialize build on save until initialization finished. @@ -868,6 +867,16 @@ fn addWorkspace(server: *Server, uri: Uri) error{OutOfMemory}!void { .restart = false, }); } + + const file_count = server.document_store.loadDirectoryRecursive(uri) catch |err| switch (err) { + error.UnsupportedScheme => return, // https://github.com/microsoft/language-server-protocol/issues/1264 + else => { + log.err("failed to load files in workspace '{s}': {}", .{ uri.raw, err }); + return; + }, + }; + + log.info("added Workspace Folder: {s} ({d} files)", .{ uri.raw, file_count }); } fn removeWorkspace(server: *Server, uri: Uri) void { @@ -1563,6 +1572,10 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ return try selection_range.generateSelectionRanges(arena, handle, request.positions, server.offset_encoding); } +fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) Error!?types.workspace.Symbol.Result { + return try @import("features/workspace_symbols.zig").handler(server, arena, request); +} + const HandledRequestParams = union(enum) { initialize: types.InitializeParams, shutdown, @@ -1586,6 +1599,7 @@ const HandledRequestParams = union(enum) { @"textDocument/codeAction": types.CodeAction.Params, @"textDocument/foldingRange": types.FoldingRange.Params, @"textDocument/selectionRange": types.SelectionRange.Params, + @"workspace/symbol": types.workspace.Symbol.Params, other: lsp.MethodWithParams, }; @@ -1630,6 +1644,7 @@ fn isBlockingMessage(msg: Message) bool { .@"textDocument/codeAction", .@"textDocument/foldingRange", .@"textDocument/selectionRange", + .@"workspace/symbol", => return false, .other => return false, }, @@ -1814,6 +1829,7 @@ pub fn sendRequestSync(server: *Server, arena: std.mem.Allocator, comptime metho .@"textDocument/codeAction" => try server.codeActionHandler(arena, params), .@"textDocument/foldingRange" => try server.foldingRangeHandler(arena, params), .@"textDocument/selectionRange" => try server.selectionRangeHandler(arena, params), + .@"workspace/symbol" => try server.workspaceSymbolHandler(arena, params), .other => return null, }; } diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig new file mode 100644 index 000000000..177c73f4e --- /dev/null +++ b/src/TrigramStore.zig @@ -0,0 +1,640 @@ +//! Per-file trigram store. + +const std = @import("std"); +const ast = @import("ast.zig"); +const Ast = std.zig.Ast; +const assert = std.debug.assert; +const offsets = @import("offsets.zig"); + +pub const TrigramStore = @This(); + +pub const Trigram = [3]u8; + +pub const Declaration = struct { + pub const Index = enum(u32) { _ }; + + pub const Kind = enum { + variable, + constant, + function, + test_function, + }; + + /// Either `.identifier` or `.string_literal`. + name: Ast.TokenIndex, + kind: Kind, +}; + +has_filter: bool, +filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), +trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), +declarations: std.MultiArrayList(Declaration), + +pub const TrigramIterator = struct { + buffer: []const u8, + index: usize, + + trigram_buffer: Trigram, + trigram_buffer_index: u2, + + pub fn init(buffer: []const u8) TrigramIterator { + assert(buffer.len != 0); + return .{ + .buffer = buffer, + .index = 0, + .trigram_buffer = @splat(0), + .trigram_buffer_index = 0, + }; + } + + pub fn next(ti: *TrigramIterator) ?Trigram { + while (ti.index < ti.buffer.len) { + defer ti.index += 1; + const c = std.ascii.toLower(ti.buffer[ti.index]); + if (c == '_') continue; + + if (ti.trigram_buffer_index < 3) { + ti.trigram_buffer[ti.trigram_buffer_index] = c; + ti.trigram_buffer_index += 1; + continue; + } + + defer { + @memmove(ti.trigram_buffer[0..2], ti.trigram_buffer[1..3]); + ti.trigram_buffer[2] = c; + } + return ti.trigram_buffer; + } else if (ti.trigram_buffer_index > 0) { + ti.trigram_buffer_index = 0; + return ti.trigram_buffer; + } else { + return null; + } + } +}; + +test TrigramIterator { + try testTrigramIterator("a", &.{"a\x00\x00".*}); + try testTrigramIterator("ab", &.{"ab\x00".*}); + try testTrigramIterator("abc", &.{"abc".*}); + + try testTrigramIterator("hello", &.{ "hel".*, "ell".*, "llo".* }); + try testTrigramIterator("HELLO", &.{ "hel".*, "ell".*, "llo".* }); + try testTrigramIterator("HellO", &.{ "hel".*, "ell".*, "llo".* }); + + try testTrigramIterator("a_", &.{"a\x00\x00".*}); + try testTrigramIterator("ab_", &.{"ab\x00".*}); + try testTrigramIterator("abc_", &.{"abc".*}); + + try testTrigramIterator("_a", &.{"a\x00\x00".*}); + try testTrigramIterator("_a_", &.{"a\x00\x00".*}); + try testTrigramIterator("_a__", &.{"a\x00\x00".*}); + + try testTrigramIterator("_", &.{}); + try testTrigramIterator("__", &.{}); + try testTrigramIterator("___", &.{}); + + try testTrigramIterator("He_ll_O", &.{ "hel".*, "ell".*, "llo".* }); + try testTrigramIterator("He__ll___O", &.{ "hel".*, "ell".*, "llo".* }); + try testTrigramIterator("__He__ll__O_", &.{ "hel".*, "ell".*, "llo".* }); + + try testTrigramIterator("HellO__World___HelloWorld", &.{ + "hel".*, "ell".*, "llo".*, + "low".*, "owo".*, "wor".*, + "orl".*, "rld".*, "ldh".*, + "dhe".*, "hel".*, "ell".*, + "llo".*, "low".*, "owo".*, + "wor".*, "orl".*, "rld".*, + }); +} + +fn testTrigramIterator( + input: []const u8, + expected: []const Trigram, +) !void { + const allocator = std.testing.allocator; + + var actual_buffer: std.ArrayList(Trigram) = .empty; + defer actual_buffer.deinit(allocator); + + var it: TrigramIterator = .init(input); + while (it.next()) |trigram| { + try actual_buffer.append(allocator, trigram); + } + + try @import("testing.zig").expectEqual(expected, actual_buffer.items); +} + +pub fn init( + allocator: std.mem.Allocator, + tree: *const Ast, +) error{OutOfMemory}!TrigramStore { + var store: TrigramStore = .{ + .has_filter = false, + .filter_buckets = .empty, + .trigram_to_declarations = .empty, + .declarations = .empty, + }; + errdefer store.deinit(allocator); + + const Context = struct { + allocator: std.mem.Allocator, + store: *TrigramStore, + in_function: bool, + + const Error = error{OutOfMemory}; + fn callback(context: *@This(), cb_tree: *const Ast, node: Ast.Node.Index) Error!void { + const old_in_function = context.in_function; + defer context.in_function = old_in_function; + + switch (cb_tree.nodeTag(node)) { + .fn_decl => context.in_function = true, + .fn_proto, + .fn_proto_multi, + .fn_proto_one, + .fn_proto_simple, + => skip: { + const fn_token = cb_tree.nodeMainToken(node); + if (cb_tree.tokenTag(fn_token + 1) != .identifier) break :skip; + + try context.store.appendDeclaration( + context.allocator, + cb_tree, + fn_token + 1, + .function, + ); + }, + .root => unreachable, + .container_decl, + .container_decl_trailing, + .container_decl_arg, + .container_decl_arg_trailing, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union, + .tagged_union_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + => context.in_function = false, + + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => skip: { + if (context.in_function) break :skip; + + const main_token = cb_tree.nodeMainToken(node); + + const kind: Declaration.Kind = switch (cb_tree.tokenTag(main_token)) { + .keyword_var => .variable, + .keyword_const => .constant, + else => unreachable, + }; + + try context.store.appendDeclaration( + context.allocator, + cb_tree, + main_token + 1, + kind, + ); + }, + + .test_decl => skip: { + context.in_function = true; + + const test_name_token = cb_tree.nodeData(node).opt_token_and_node[0].unwrap() orelse break :skip; + + try context.store.appendDeclaration( + context.allocator, + cb_tree, + test_name_token, + .test_function, + ); + }, + else => {}, + } + + try ast.iterateChildren(cb_tree, node, context, Error, callback); + } + }; + + var context: Context = .{ + .allocator = allocator, + .store = &store, + .in_function = false, + }; + try ast.iterateChildren(tree, .root, &context, Context.Error, Context.callback); + + const lists = store.trigram_to_declarations.values(); + var index: usize = 0; + while (index < lists.len) { + if (lists[index].items.len == 0) { + lists[index].deinit(allocator); + store.trigram_to_declarations.swapRemoveAt(index); + } else { + index += 1; + } + } + + const trigrams = store.trigram_to_declarations.keys(); + + if (trigrams.len > 0) { + var prng = std.Random.DefaultPrng.init(0); + + const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; + try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); + store.filter_buckets.items.len = filter_capacity; + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + filter.reset(); + store.has_filter = true; + + for (trigrams) |trigram| { + filter.append(prng.random(), trigram) catch |err| switch (err) { + error.EvictionFailed => { + // NOTE: This should generally be quite rare. + store.has_filter = false; + break; + }, + }; + } + } + + return store; +} + +pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { + store.filter_buckets.deinit(allocator); + for (store.trigram_to_declarations.values()) |*list| { + list.deinit(allocator); + } + store.trigram_to_declarations.deinit(allocator); + store.declarations.deinit(allocator); + store.* = undefined; +} + +fn appendDeclaration( + store: *TrigramStore, + allocator: std.mem.Allocator, + tree: *const Ast, + name_token: Ast.TokenIndex, + kind: Declaration.Kind, +) error{OutOfMemory}!void { + const raw_name = tree.tokenSlice(name_token); + + const strategy: enum { raw, smart }, const name = switch (tree.tokenTag(name_token)) { + .string_literal => .{ .raw, raw_name[1 .. raw_name.len - 1] }, + .identifier => if (std.mem.startsWith(u8, raw_name, "@")) + .{ .raw, raw_name[2 .. raw_name.len - 1] } + else + .{ .smart, raw_name }, + else => unreachable, + }; + + switch (strategy) { + .raw => { + if (name.len < 3) return; + for (0..name.len - 2) |index| { + var trigram = name[index..][0..3].*; + for (&trigram) |*char| char.* = std.ascii.toLower(char.*); + try store.appendOneTrigram(allocator, trigram); + } + }, + .smart => { + var it: TrigramIterator = .init(name); + while (it.next()) |trigram| { + try store.appendOneTrigram(allocator, trigram); + } + }, + } + + try store.declarations.append(allocator, .{ + .name = name_token, + .kind = kind, + }); +} + +fn appendOneTrigram( + store: *TrigramStore, + allocator: std.mem.Allocator, + trigram: Trigram, +) error{OutOfMemory}!void { + const declaration_index: Declaration.Index = @enumFromInt(store.declarations.len); + + const gop = try store.trigram_to_declarations.getOrPutValue(allocator, trigram, .empty); + + if (gop.value_ptr.getLastOrNull() != declaration_index) { + try gop.value_ptr.append(allocator, declaration_index); + } +} + +/// Asserts query.len >= 3. Asserts declaration_buffer.items.len == 0. +pub fn declarationsForQuery( + store: *const TrigramStore, + allocator: std.mem.Allocator, + query: []const u8, + declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index), +) error{OutOfMemory}!void { + assert(query.len >= 1); + assert(declaration_buffer.items.len == 0); + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + + if (store.has_filter) { + var ti: TrigramIterator = .init(query); + while (ti.next()) |trigram| { + if (!filter.contains(trigram)) { + return; + } + } + } + + var ti: TrigramIterator = .init(query); + + const first = (store.trigram_to_declarations.get(ti.next() orelse return) orelse return).items; + + try declaration_buffer.resize(allocator, first.len * 2); + + var len = first.len; + @memcpy(declaration_buffer.items[0..len], first); + + while (ti.next()) |trigram| { + const old_len = len; + len = mergeIntersection( + (store.trigram_to_declarations.get(trigram) orelse { + declaration_buffer.clearRetainingCapacity(); + return; + }).items, + declaration_buffer.items[0..len], + declaration_buffer.items[len..], + ); + @memcpy(declaration_buffer.items[0..len], declaration_buffer.items[old_len..][0..len]); + declaration_buffer.shrinkRetainingCapacity(len * 2); + } + + declaration_buffer.shrinkRetainingCapacity(declaration_buffer.items.len / 2); +} + +/// Asserts `@min(a.len, b.len) <= out.len`. +fn mergeIntersection( + a: []const Declaration.Index, + b: []const Declaration.Index, + out: []Declaration.Index, +) u32 { + assert(@min(a.len, b.len) <= out.len); + + var out_idx: u32 = 0; + + var a_idx: u32 = 0; + var b_idx: u32 = 0; + + while (a_idx < a.len and b_idx < b.len) { + const a_val = a[a_idx]; + const b_val = b[b_idx]; + + if (a_val == b_val) { + out[out_idx] = a_val; + out_idx += 1; + a_idx += 1; + b_idx += 1; + } else if (@intFromEnum(a_val) < @intFromEnum(b_val)) { + a_idx += 1; + } else { + b_idx += 1; + } + } + + return out_idx; +} + +fn parity(integer: anytype) enum(u1) { even, odd } { + return @enumFromInt(integer & 1); +} + +pub const CuckooFilter = struct { + buckets: []Bucket, + + pub const Fingerprint = enum(u8) { + none = std.math.maxInt(u8), + _, + + const precomputed_odd_hashes = blk: { + var table: [255]u32 = undefined; + + for (&table, 0..) |*h, index| { + h.* = @truncate(std.hash.Murmur2_64.hash(&.{index}) | 1); + } + + break :blk table; + }; + + pub fn oddHash(fingerprint: Fingerprint) u32 { + assert(fingerprint != .none); + return precomputed_odd_hashes[@intFromEnum(fingerprint)]; + } + }; + + pub const Bucket = [4]Fingerprint; + pub const BucketIndex = enum(u32) { + _, + + pub fn alternate(index: BucketIndex, fingerprint: Fingerprint, len: u32) BucketIndex { + assert(@intFromEnum(index) < len); + assert(fingerprint != .none); + + const signed_index: i64 = @intFromEnum(index); + const odd_hash: i64 = fingerprint.oddHash(); + + const unbounded = switch (parity(signed_index)) { + .even => signed_index + odd_hash, + .odd => signed_index - odd_hash, + }; + const bounded: u32 = @intCast(@mod(unbounded, len)); + + assert(parity(signed_index) != parity(bounded)); + + return @enumFromInt(bounded); + } + }; + + pub const Triplet = struct { + fingerprint: Fingerprint, + index_1: BucketIndex, + index_2: BucketIndex, + + pub fn initFromTrigram(trigram: Trigram, len: u32) Triplet { + const split: packed struct { + fingerprint: Fingerprint, + padding: u24, + index_1: u32, + } = @bitCast(std.hash.Murmur2_64.hash(&trigram)); + + const index_1: BucketIndex = @enumFromInt(split.index_1 % len); + + const fingerprint: Fingerprint = if (split.fingerprint == .none) + @enumFromInt(1) + else + split.fingerprint; + + const triplet: Triplet = .{ + .fingerprint = fingerprint, + .index_1 = index_1, + .index_2 = index_1.alternate(fingerprint, len), + }; + assert(triplet.index_2.alternate(fingerprint, len) == index_1); + + return triplet; + } + }; + + pub fn init(buckets: []Bucket) CuckooFilter { + assert(parity(buckets.len) == .even); + return .{ .buckets = buckets }; + } + + pub fn reset(filter: CuckooFilter) void { + @memset(filter.buckets, [1]Fingerprint{.none} ** @typeInfo(Bucket).array.len); + } + + pub fn capacityForCount(count: usize) error{Overflow}!usize { + const overallocated_count = std.math.divCeil(usize, try std.math.mul(usize, count, 105), 100) catch |err| switch (err) { + error.DivisionByZero => unreachable, + else => |e| return e, + }; + return overallocated_count + (overallocated_count & 1); + } + + pub fn append(filter: CuckooFilter, random: std.Random, trigram: Trigram) error{EvictionFailed}!void { + const triplet: Triplet = .initFromTrigram(trigram, @intCast(filter.buckets.len)); + + if (filter.appendToBucket(triplet.index_1, triplet.fingerprint) or + filter.appendToBucket(triplet.index_2, triplet.fingerprint)) + { + return; + } + + var fingerprint = triplet.fingerprint; + var index = if (random.boolean()) triplet.index_1 else triplet.index_2; + for (0..500) |_| { + fingerprint = filter.swapFromBucket(random, index, fingerprint); + index = index.alternate(fingerprint, @intCast(filter.buckets.len)); + + if (filter.appendToBucket(index, fingerprint)) { + return; + } + } + + return error.EvictionFailed; + } + + fn bucketAt(filter: CuckooFilter, index: BucketIndex) *Bucket { + return &filter.buckets[@intFromEnum(index)]; + } + + fn appendToBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { + assert(fingerprint != .none); + + const bucket = filter.bucketAt(index); + for (bucket) |*slot| { + if (slot.* == .none) { + slot.* = fingerprint; + return true; + } + } + + return false; + } + + fn swapFromBucket( + filter: CuckooFilter, + random: std.Random, + index: BucketIndex, + fingerprint: Fingerprint, + ) Fingerprint { + assert(fingerprint != .none); + + comptime assert(@typeInfo(Bucket).array.len == 4); + const target = &filter.bucketAt(index)[random.int(u2)]; + + const old_fingerprint = target.*; + assert(old_fingerprint != .none); + + target.* = fingerprint; + + return old_fingerprint; + } + + pub fn contains(filter: CuckooFilter, trigram: Trigram) bool { + const triplet: Triplet = .initFromTrigram(trigram, @intCast(filter.buckets.len)); + + return filter.containsInBucket(triplet.index_1, triplet.fingerprint) or + filter.containsInBucket(triplet.index_2, triplet.fingerprint); + } + + fn containsInBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { + assert(fingerprint != .none); + + const bucket = filter.bucketAt(index); + for (bucket) |*slot| { + if (slot.* == fingerprint) { + return true; + } + } + + return false; + } +}; + +// TODO: More extensive (different capacities) testing. +test CuckooFilter { + const allocator = std.testing.allocator; + + const element_count = 499; + const filter_size = comptime CuckooFilter.capacityForCount(element_count) catch unreachable; + + var entries: std.AutoArrayHashMapUnmanaged(Trigram, void) = .empty; + defer entries.deinit(allocator); + try entries.ensureTotalCapacity(allocator, element_count); + + var buckets: [filter_size]CuckooFilter.Bucket = undefined; + var filter: CuckooFilter = .init(&buckets); + var filter_prng: std.Random.DefaultPrng = .init(42); + + for (0..2_500) |gen_prng_seed| { + entries.clearRetainingCapacity(); + filter.reset(); + + var gen_prng: std.Random.DefaultPrng = .init(gen_prng_seed); + for (0..element_count) |_| { + const trigram: Trigram = @bitCast(gen_prng.random().int(u24)); + entries.putAssumeCapacity(trigram, {}); + try filter.append(filter_prng.random(), trigram); + } + + // No false negatives + for (entries.keys()) |trigram| { + try std.testing.expect(filter.contains(trigram)); + } + + // Reasonable false positive rate + const fpr_count = 2_500; + var false_positives: usize = 0; + var negative_prng: std.Random.DefaultPrng = .init(~gen_prng_seed); + for (0..fpr_count) |_| { + var trigram: Trigram = @bitCast(negative_prng.random().int(u24)); + while (entries.contains(trigram)) { + trigram = @bitCast(negative_prng.random().int(u24)); + } + + false_positives += @intFromBool(filter.contains(trigram)); + } + + const fpr = @as(f32, @floatFromInt(false_positives)) / fpr_count; + + errdefer std.log.err("fpr: {d}%", .{fpr * 100}); + try std.testing.expect(fpr < 0.035); + } +} diff --git a/src/features/document_symbol.zig b/src/features/document_symbol.zig index d88745de4..306a67dda 100644 --- a/src/features/document_symbol.zig +++ b/src/features/document_symbol.zig @@ -27,7 +27,7 @@ const Context = struct { total_symbol_count: *usize, }; -fn tokenNameMaybeQuotes(tree: *const Ast, token: Ast.TokenIndex) []const u8 { +pub fn tokenNameMaybeQuotes(tree: *const Ast, token: Ast.TokenIndex) []const u8 { const token_slice = tree.tokenSlice(token); switch (tree.tokenTag(token)) { .identifier => return token_slice, diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig new file mode 100644 index 000000000..b8edd7638 --- /dev/null +++ b/src/features/workspace_symbols.zig @@ -0,0 +1,92 @@ +//! Implementation of [`workspace/symbol`](https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#workspace_symbol) + +const std = @import("std"); + +const lsp = @import("lsp"); +const types = lsp.types; + +const DocumentStore = @import("../DocumentStore.zig"); +const offsets = @import("../offsets.zig"); +const Server = @import("../Server.zig"); +const TrigramStore = @import("../TrigramStore.zig"); +const Uri = @import("../Uri.zig"); + +pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.workspace.Symbol.Params) error{OutOfMemory}!?types.workspace.Symbol.Result { + if (request.query.len == 0) return null; + + var workspace_uris: std.ArrayList(std.Uri) = try .initCapacity(arena, server.workspaces.items.len); + defer workspace_uris.deinit(arena); + + for (server.workspaces.items) |workspace| { + workspace_uris.appendAssumeCapacity(std.Uri.parse(workspace.uri.raw) catch unreachable); + } + + const handles = try server.document_store.loadTrigramStores(workspace_uris.items); + defer server.document_store.allocator.free(handles); + + var symbols: std.ArrayList(types.workspace.Symbol) = .empty; + var declaration_buffer: std.ArrayList(TrigramStore.Declaration.Index) = .empty; + + for (handles) |handle| { + const trigram_store = handle.getTrigramStoreCached(); + + declaration_buffer.clearRetainingCapacity(); + try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); + + const SortContext = struct { + names: []const std.zig.Ast.TokenIndex, + fn lessThan(ctx: @This(), lhs: TrigramStore.Declaration.Index, rhs: TrigramStore.Declaration.Index) bool { + return ctx.names[@intFromEnum(lhs)] < ctx.names[@intFromEnum(rhs)]; + } + }; + + std.mem.sortUnstable( + TrigramStore.Declaration.Index, + declaration_buffer.items, + SortContext{ .names = trigram_store.declarations.items(.name) }, + SortContext.lessThan, + ); + + const slice = trigram_store.declarations.slice(); + const names = slice.items(.name); + const kinds = slice.items(.kind); + + var last_index: usize = 0; + var last_position: offsets.Position = .{ .line = 0, .character = 0 }; + + try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); + for (declaration_buffer.items) |declaration| { + const name_token = names[@intFromEnum(declaration)]; + const kind = kinds[@intFromEnum(declaration)]; + + const loc = offsets.tokenToLoc(&handle.tree, name_token); + const name = @import("document_symbol.zig").tokenNameMaybeQuotes(&handle.tree, name_token); + + const start_position = offsets.advancePosition(handle.tree.source, last_position, last_index, loc.start, server.offset_encoding); + const end_position = offsets.advancePosition(handle.tree.source, start_position, loc.start, loc.end, server.offset_encoding); + last_index = loc.end; + last_position = end_position; + + symbols.appendAssumeCapacity(.{ + .name = name, + .kind = switch (kind) { + .variable => .Variable, + .constant => .Constant, + .function => .Function, + .test_function => .Method, // there is no SymbolKind that represents a tests, + }, + .location = .{ + .location = .{ + .uri = handle.uri.raw, + .range = .{ + .start = start_position, + .end = end_position, + }, + }, + }, + }); + } + } + + return .{ .workspace_symbols = symbols.items }; +} diff --git a/src/zls.zig b/src/zls.zig index 864bcb37f..7f244c28d 100644 --- a/src/zls.zig +++ b/src/zls.zig @@ -18,6 +18,7 @@ pub const Server = @import("Server.zig"); pub const snippets = @import("snippets.zig"); pub const testing = @import("testing.zig"); pub const translate_c = @import("translate_c.zig"); +pub const TrigramStore = @import("TrigramStore.zig"); pub const Uri = @import("Uri.zig"); pub const code_actions = @import("features/code_actions.zig"); diff --git a/tests/context.zig b/tests/context.zig index 14d6820f2..65c295cae 100644 --- a/tests/context.zig +++ b/tests/context.zig @@ -87,17 +87,21 @@ pub const Context = struct { use_file_scheme: bool = false, source: []const u8, mode: std.zig.Ast.Mode = .zig, + base_directory: []const u8 = "/", }) !zls.Uri { + std.debug.assert(std.mem.startsWith(u8, options.base_directory, "/")); + std.debug.assert(std.mem.endsWith(u8, options.base_directory, "/")); + const fmt = switch (builtin.os.tag) { - .windows => "file:/c:/Untitled-{d}.{t}", - else => "file:/Untitled-{d}.{t}", + .windows => "file:/c:{s}Untitled-{d}.{t}", + else => "file:{s}Untitled-{d}.{t}", }; const arena = self.arena.allocator(); const path = if (options.use_file_scheme) - try std.fmt.allocPrint(arena, fmt, .{ self.file_id, options.mode }) + try std.fmt.allocPrint(arena, fmt, .{ options.base_directory, self.file_id, options.mode }) else - try std.fmt.allocPrint(arena, "untitled:/Untitled-{d}.{t}", .{ self.file_id, options.mode }); + try std.fmt.allocPrint(arena, "untitled:{s}Untitled-{d}.{t}", .{ options.base_directory, self.file_id, options.mode }); const uri: zls.Uri = try .parse(arena, path); const params: types.TextDocument.DidOpenParams = .{ @@ -114,4 +118,25 @@ pub const Context = struct { self.file_id += 1; return uri; } + + pub fn addWorkspace(self: *Context, name: []const u8, base_directory: []const u8) !void { + std.debug.assert(std.mem.startsWith(u8, base_directory, "/")); + std.debug.assert(std.mem.endsWith(u8, base_directory, "/")); + + try self.server.sendNotificationSync( + self.arena.allocator(), + "workspace/didChangeWorkspaceFolders", + .{ + .event = .{ + .added = &.{ + .{ + .uri = try std.fmt.allocPrint(self.arena.allocator(), "untitled:{s}", .{base_directory}), + .name = name, + }, + }, + .removed = &.{}, + }, + }, + ); + } }; diff --git a/tests/lsp_features/workspace_symbols.zig b/tests/lsp_features/workspace_symbols.zig new file mode 100644 index 000000000..a3ceeb1cb --- /dev/null +++ b/tests/lsp_features/workspace_symbols.zig @@ -0,0 +1,101 @@ +const std = @import("std"); +const zls = @import("zls"); + +const Context = @import("../context.zig").Context; + +const types = zls.lsp.types; + +const allocator: std.mem.Allocator = std.testing.allocator; + +test "workspace symbols" { + var ctx: Context = try .init(); + defer ctx.deinit(); + + try ctx.addWorkspace("Animal Shelter", "/animal_shelter/"); + + _ = try ctx.addDocument(.{ .source = + \\const SalamanderCrab = struct { + \\ fn salamander_crab() void {} + \\}; + , .base_directory = "/animal_shelter/" }); + + _ = try ctx.addDocument(.{ .source = + \\const Dog = struct { + \\ const sheltie: Dog = .{}; + \\ var @"Mr Crabs" = @compileError("hold up"); + \\}; + \\test "walk the dog" { + \\ const dog: Dog = .sheltie; + \\ _ = dog; // nah + \\} + , .base_directory = "/animal_shelter/" }); + + _ = try ctx.addDocument(.{ .source = + \\const Lion = struct { + \\ extern fn evolveToMonke() void; + \\ fn roar() void { + \\ var lion = "cool!"; + \\ const Lion2 = struct { + \\ const lion_for_real = 0; + \\ }; + \\ } + \\}; + , .base_directory = "/animal_shelter/" }); + + _ = try ctx.addDocument(.{ .source = + \\const PotatoDoctor = struct {}; + , .base_directory = "/farm/" }); + + try testDocumentSymbol(&ctx, "Sal", + \\Constant SalamanderCrab + \\Function salamander_crab + ); + try testDocumentSymbol(&ctx, "_cr___a_b_", + \\Constant SalamanderCrab + \\Function salamander_crab + \\Variable @"Mr Crabs" + ); + try testDocumentSymbol(&ctx, "dog", + \\Constant Dog + \\Method walk the dog + ); + try testDocumentSymbol(&ctx, "potato_d", ""); + // Becomes S\x00\x00 which matches nothing + try testDocumentSymbol(&ctx, "S", ""); + try testDocumentSymbol(&ctx, "lion", + \\Constant Lion + \\Constant lion_for_real + ); + try testDocumentSymbol(&ctx, "monke", + \\Function evolveToMonke + ); +} + +fn testDocumentSymbol(ctx: *Context, query: []const u8, expected: []const u8) !void { + const response = try ctx.server.sendRequestSync( + ctx.arena.allocator(), + "workspace/symbol", + .{ .query = query }, + ) orelse { + std.debug.print("Server returned `null` as the result\n", .{}); + return error.InvalidResponse; + }; + + var actual: std.ArrayList(u8) = .empty; + defer actual.deinit(allocator); + + for (response.workspace_symbols) |workspace_symbol| { + std.debug.assert(workspace_symbol.tags == null); // unsupported for now + std.debug.assert(workspace_symbol.containerName == null); // unsupported for now + try actual.print(allocator, "{t} {s}\n", .{ + workspace_symbol.kind, + workspace_symbol.name, + }); + } + + if (actual.items.len != 0) { + _ = actual.pop(); // Final \n + } + + try zls.testing.expectEqualStrings(expected, actual.items); +} diff --git a/tests/tests.zig b/tests/tests.zig index ffe7f37d1..76307fd05 100644 --- a/tests/tests.zig +++ b/tests/tests.zig @@ -22,6 +22,7 @@ comptime { _ = @import("lsp_features/selection_range.zig"); _ = @import("lsp_features/semantic_tokens.zig"); _ = @import("lsp_features/signature_help.zig"); + _ = @import("lsp_features/workspace_symbols.zig"); // Language features _ = @import("language_features/cimport.zig");