diff --git a/Cargo.lock b/Cargo.lock index 0e02565c..b6bd9edf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,18 +4,18 @@ version = 4 [[package]] name = "addr2line" -version = "0.24.2" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" dependencies = [ "gimli", ] [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aead" @@ -29,22 +29,22 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", - "getrandom 0.2.15", + "getrandom 0.3.4", "once_cell", "version_check", - "zerocopy 0.7.35", + "zerocopy", ] [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -87,9 +87,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -102,44 +102,53 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.7" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", - "once_cell", - "windows-sys 0.59.0", + "once_cell_polyfill", + "windows-sys 0.61.2", ] [[package]] name = "anyhow" -version = "1.0.98" +version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "ar_archive_writer" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +dependencies = [ + "object 0.32.2", +] [[package]] name = "arc-swap" @@ -206,9 +215,9 @@ dependencies = [ [[package]] name = "atomic" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" dependencies = [ "bytemuck", ] @@ -221,15 +230,15 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "axum" -version = "0.8.4" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" +checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" dependencies = [ "axum-core", "bytes", @@ -246,8 +255,7 @@ dependencies = [ "mime", "percent-encoding", "pin-project-lite", - "rustversion", - "serde", + "serde_core", "serde_json", "serde_path_to_error", "serde_urlencoded", @@ -261,9 +269,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.5.2" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" +checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" dependencies = [ "bytes", "futures-core", @@ -272,7 +280,6 @@ dependencies = [ "http-body-util", "mime", "pin-project-lite", - "rustversion", "sync_wrapper", "tower-layer", "tower-service", @@ -281,17 +288,17 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.74" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6" dependencies = [ "addr2line", "cfg-if", "libc", "miniz_oxide", - "object", + "object 0.37.3", "rustc-demangle", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -365,9 +372,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" dependencies = [ "serde", ] @@ -400,9 +407,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" dependencies = [ "serde", ] @@ -415,19 +422,20 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "castaway" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" dependencies = [ "rustversion", ] [[package]] name = "cc" -version = "1.2.19" +version = "1.2.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3a13707ac958681c13b39b458c073d0d9bc8a22cb1b2f4c8e55eb72c13f362" +checksum = "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", @@ -441,9 +449,9 @@ checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "cfg_aliases" @@ -462,7 +470,7 @@ dependencies = [ "num-traits", "serde", "wasm-bindgen", - "windows-link 0.2.0", + "windows-link", ] [[package]] @@ -538,9 +546,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "cliclack" @@ -585,9 +593,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "combine" @@ -601,9 +609,9 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.4" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b" dependencies = [ "crossterm", "unicode-segmentation", @@ -640,15 +648,15 @@ dependencies = [ [[package]] name = "console" -version = "0.16.1" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b430743a6eb14e9764d4260d4c0d8123087d504eeb9c48f2b2a5e810dd369df4" +checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4" dependencies = [ "encode_unicode", "libc", "once_cell", "unicode-width", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -663,9 +671,9 @@ dependencies = [ [[package]] name = "core-foundation" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" dependencies = [ "core-foundation-sys", "libc", @@ -699,9 +707,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -787,14 +795,15 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crossterm" -version = "0.28.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" +checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b" dependencies = [ "bitflags", "crossterm_winapi", + "document-features", "parking_lot", - "rustix 0.38.44", + "rustix", "winapi", ] @@ -837,9 +846,9 @@ checksum = "85d3cef41d236720ed453e102153a53e4cc3d2fde848c0078a50cf249e8e3e5b" [[package]] name = "deranged" -version = "0.4.1" +version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cfac68e08048ae1883171632c2aef3ebc555621ae56fbccce1cbf22dd7f058" +checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587" dependencies = [ "powerfmt", ] @@ -893,7 +902,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -907,6 +916,15 @@ dependencies = [ "syn", ] +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + [[package]] name = "dotenvy" version = "0.15.7" @@ -924,15 +942,15 @@ dependencies = [ [[package]] name = "dyn-clone" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encode_unicode" @@ -969,12 +987,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1024,9 +1042,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "faststr" -version = "0.2.30" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "403ebc0cd0c6dbff1cae7098168eff6bac83fad5928b6e91f29388b8dbb61653" +checksum = "baec6a0289d7f1fe5665586ef7340af82e3037207bef60f5785e57569776f0c8" dependencies = [ "bytes", "rkyv", @@ -1034,11 +1052,17 @@ dependencies = [ "simdutf8", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" + [[package]] name = "flate2" -version = "1.1.2" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" dependencies = [ "crc32fast", "libz-rs-sys", @@ -1078,6 +1102,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "foreign-types" version = "0.3.2" @@ -1095,9 +1125,9 @@ checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] name = "form_urlencoded" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" dependencies = [ "percent-encoding", ] @@ -1199,23 +1229,24 @@ dependencies = [ [[package]] name = "generator" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" +checksum = "52f04ae4152da20c76fe800fa48659201d5cf627c5149ca0b707b69d7eef6cf9" dependencies = [ "cc", "cfg-if", "libc", "log", "rustversion", - "windows 0.61.3", + "windows-link", + "windows-result 0.4.1", ] [[package]] name = "generic-array" -version = "0.14.7" +version = "0.14.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" dependencies = [ "serde", "typenum", @@ -1224,48 +1255,48 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "wasm-bindgen", ] [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.13.3+wasi-0.2.2", + "r-efi", + "wasip2", "wasm-bindgen", - "windows-targets 0.52.6", ] [[package]] name = "gimli" -version = "0.31.1" +version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" [[package]] name = "glob" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] name = "h2" -version = "0.4.8" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5017294ff4bb30944501348f6f8e42e6ad28f42c8bbef7a74029aff064a4e3c2" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" dependencies = [ "atomic-waker", "bytes", @@ -1288,7 +1319,7 @@ checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", - "zerocopy 0.8.27", + "zerocopy", ] [[package]] @@ -1315,17 +1346,28 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", "rayon", "serde", ] +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] + [[package]] name = "heck" version = "0.5.0" @@ -1419,7 +1461,7 @@ dependencies = [ "helix-db", "helix-macros", "inventory", - "rand 0.9.1", + "rand 0.9.2", "serde", "serde_json", "sonic-rs", @@ -1438,10 +1480,12 @@ dependencies = [ "bincode", "bumpalo", "bytemuck", + "byteorder", "chrono", "core_affinity", "criterion", "flume", + "hashbrown 0.16.1", "heed3", "helix-macros", "helix-metrics", @@ -1449,22 +1493,30 @@ dependencies = [ "itertools 0.14.0", "lazy_static", "loom", + "madvise", "mimalloc", + "min-max-heap", "num_cpus", + "page_size", + "papaya", "paste", "pest", "pest_derive", "polars", "proptest", - "rand 0.9.1", + "rand 0.9.2", "rayon", "reqwest", + "roaring", + "rustc-hash", "serde", + "serde_json", "sha2", "sonic-rs", "subtle", "tempfile", - "thiserror 2.0.12", + "thiserror 2.0.17", + "tinyvec", "tokio", "tokio-test", "tokio-util", @@ -1513,11 +1565,11 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "home" -version = "0.5.11" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -1536,12 +1588,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -1582,13 +1633,14 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "1.6.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +checksum = "2ab2d4f250c3d7b1c9fcdff1cece94ea4e2dfbec68614f7b87cb205f24ca9d11" dependencies = [ + "atomic-waker", "bytes", "futures-channel", - "futures-util", + "futures-core", "h2", "http", "http-body", @@ -1596,6 +1648,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", + "pin-utils", "smallvec", "tokio", "want", @@ -1603,11 +1656,10 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.5" +version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "futures-util", "http", "hyper", "hyper-util", @@ -1618,7 +1670,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots 0.26.11", + "webpki-roots", ] [[package]] @@ -1652,9 +1704,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.16" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e" +checksum = "727805d60e7938b76b826a6ef209eb70eaa1812794f9424d4a4e2d740662df5f" dependencies = [ "base64", "bytes", @@ -1678,16 +1730,17 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", - "windows-core 0.52.0", + "windows-core 0.62.2", ] [[package]] @@ -1701,21 +1754,22 @@ dependencies = [ [[package]] name = "icu_collections" -version = "1.5.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" dependencies = [ "displaydoc", + "potential_utf", "yoke", "zerofrom", "zerovec", ] [[package]] -name = "icu_locid" -version = "1.5.0" +name = "icu_locale_core" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" dependencies = [ "displaydoc", "litemap", @@ -1724,104 +1778,66 @@ dependencies = [ "zerovec", ] -[[package]] -name = "icu_locid_transform" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", -] - -[[package]] -name = "icu_locid_transform_data" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" - [[package]] name = "icu_normalizer" -version = "1.5.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", "icu_provider", "smallvec", - "utf16_iter", - "utf8_iter", - "write16", "zerovec", ] [[package]] name = "icu_normalizer_data" -version = "1.5.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" [[package]] name = "icu_properties" -version = "1.5.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" dependencies = [ - "displaydoc", "icu_collections", - "icu_locid_transform", + "icu_locale_core", "icu_properties_data", "icu_provider", - "tinystr", + "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "1.5.0" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" [[package]] name = "icu_provider" -version = "1.5.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" dependencies = [ "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", + "icu_locale_core", "writeable", "yoke", "zerofrom", + "zerotrie", "zerovec", ] -[[package]] -name = "icu_provider_macros" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "idna" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" dependencies = [ "idna_adapter", "smallvec", @@ -1830,9 +1846,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" dependencies = [ "icu_normalizer", "icu_properties", @@ -1846,13 +1862,14 @@ checksum = "964de6e86d545b246d84badc0fef527924ace5134f30641c203ef52ba83f58d5" [[package]] name = "indexmap" -version = "2.10.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown 0.15.2", + "hashbrown 0.16.1", "serde", + "serde_core", ] [[package]] @@ -1874,7 +1891,7 @@ version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" dependencies = [ - "console 0.16.1", + "console 0.16.2", "portable-atomic", "unicode-width", "unit-prefix", @@ -1883,24 +1900,13 @@ dependencies = [ [[package]] name = "inventory" -version = "0.3.19" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b12ebb6799019b044deaf431eadfe23245b259bba5a2c0796acec3943a3cdb" +checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e" dependencies = [ "rustversion", ] -[[package]] -name = "io-uring" -version = "0.7.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" -dependencies = [ - "bitflags", - "cfg-if", - "libc", -] - [[package]] name = "iota" version = "0.2.3" @@ -1915,9 +1921,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -1940,7 +1946,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -1955,9 +1961,9 @@ dependencies = [ [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itertools" @@ -2007,18 +2013,19 @@ checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" [[package]] name = "jobserver" -version = "0.1.32" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ + "getrandom 0.3.4", "libc", ] [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8" dependencies = [ "once_cell", "wasm-bindgen", @@ -2058,9 +2065,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.172" +version = "0.2.178" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091" [[package]] name = "libm" @@ -2080,9 +2087,9 @@ dependencies = [ [[package]] name = "libredox" -version = "0.1.4" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638" +checksum = "df15f6eac291ed1cf25865b1ee60399f57e7c227e7f51bdbd4c5270396a9ed50" dependencies = [ "bitflags", "libc", @@ -2090,30 +2097,30 @@ dependencies = [ [[package]] name = "libz-rs-sys" -version = "0.5.1" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "172a788537a2221661b480fee8dc5f96c580eb34fa88764d3205dc356c7e4221" +checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c" dependencies = [ "zlib-rs", ] [[package]] name = "linux-raw-sys" -version = "0.4.15" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" [[package]] -name = "linux-raw-sys" -version = "0.9.4" +name = "litemap" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" +checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" [[package]] -name = "litemap" -version = "0.7.4" +name = "litrs" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" [[package]] name = "lmdb-master3-sys" @@ -2128,19 +2135,18 @@ dependencies = [ [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.25" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "loom" @@ -2180,6 +2186,15 @@ dependencies = [ "libc", ] +[[package]] +name = "madvise" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e1e75c3c34c2b34cec9f127418cb35240c7ebee5de36a51437e6b382c161b86" +dependencies = [ + "libc", +] + [[package]] name = "matchers" version = "0.2.0" @@ -2197,15 +2212,15 @@ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "memmap2" -version = "0.9.5" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" dependencies = [ "libc", ] @@ -2225,6 +2240,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "min-max-heap" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2687e6cf9c00f48e9284cf9fd15f2ef341d03cc7743abf9df4c5f07fdee50b18" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -2233,38 +2254,39 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.5" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] name = "mio" -version = "1.0.3" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" dependencies = [ "libc", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.52.0", + "wasi", + "windows-sys 0.61.2", ] [[package]] name = "munge" -version = "0.4.1" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64142d38c84badf60abf06ff9bd80ad2174306a5b11bd4706535090a30a419df" +checksum = "5e17401f259eba956ca16491461b6e8f72913a0a114e39736ce404410f915a0c" dependencies = [ "munge_macro", ] [[package]] name = "munge_macro" -version = "0.4.1" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bb5c1d8184f13f7d0ccbeeca0def2f9a181bce2624302793005f5ca8aa62e5e" +checksum = "4568f25ccbd45ab5d5603dc34318c1ec56b117531781260002151b8530a9f931" dependencies = [ "proc-macro2", "quote", @@ -2277,7 +2299,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a51313c5820b0b02bd422f4b44776fbf47961755c74ce64afc73bfad10226c3" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", ] [[package]] @@ -2333,11 +2355,11 @@ dependencies = [ [[package]] name = "nu-ansi-term" -version = "0.50.1" +version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] @@ -2418,9 +2440,18 @@ dependencies = [ [[package]] name = "object" -version = "0.36.7" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "memchr", +] + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" dependencies = [ "memchr", ] @@ -2467,9 +2498,15 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.20.3" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "oorandom" @@ -2490,9 +2527,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.72" +version = "0.10.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" +checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" dependencies = [ "bitflags", "cfg-if", @@ -2522,9 +2559,9 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.107" +version = "0.9.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8288979acd84749c744a9014b4382d42b8f7b2592847b5afb2ed29e5d16ede07" +checksum = "82cab2d520aa75e3c58898289429321eb788c3106963d0dc886ec7a5f4adc321" dependencies = [ "cc", "libc", @@ -2540,9 +2577,9 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "owo-colors" -version = "4.2.2" +version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dd4f4a2c8405440fd0462561f0e5806bd0f77e86f51c761481bdd4018b545e" +checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52" [[package]] name = "page_size" @@ -2554,11 +2591,21 @@ dependencies = [ "winapi", ] +[[package]] +name = "papaya" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92dd0b07c53a0a0c764db2ace8c541dc47320dad97c2200c2a637ab9dd2328f" +dependencies = [ + "equivalent", + "seize", +] + [[package]] name = "parking_lot" -version = "0.12.3" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -2566,15 +2613,15 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.10" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -2591,36 +2638,35 @@ checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" [[package]] name = "pem" -version = "3.0.5" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38af38e8470ac9dee3ce1bae1af9c1671fffc44ddfd8bd1d0a3445bf349a8ef3" +checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ "base64", - "serde", + "serde_core", ] [[package]] name = "percent-encoding" -version = "2.3.1" +version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pest" -version = "2.8.0" +version = "2.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "198db74531d58c70a361c42201efde7e2591e976d518caf7662a47dc5720e7b6" +checksum = "cbcfd20a6d4eeba40179f05735784ad32bdaef05ce8e8af05f180d45bb3e7e22" dependencies = [ "memchr", - "thiserror 2.0.12", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.8.0" +version = "2.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d725d9cfd79e87dccc9341a2ef39d1b6f6353d68c4b33c177febbe1a402c97c5" +checksum = "51f72981ade67b1ca6adc26ec221be9f463f2b5839c7508998daa17c23d94d7f" dependencies = [ "pest", "pest_generator", @@ -2628,9 +2674,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.0" +version = "2.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db7d01726be8ab66ab32f9df467ae8b1148906685bbe75c82d1e65d7f5b3f841" +checksum = "dee9efd8cdb50d719a80088b76f81aec7c41ed6d522ee750178f83883d271625" dependencies = [ "pest", "pest_meta", @@ -2641,11 +2687,10 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.8.0" +version = "2.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9f832470494906d1fca5329f8ab5791cc60beb230c74815dff541cbd2b5ca0" +checksum = "bf1d70880e76bdc13ba52eafa6239ce793d85c8e43896507e43dd8984ff05b82" dependencies = [ - "once_cell", "pest", "sha2", ] @@ -2791,7 +2836,7 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72571dde488ecccbe799798bf99ab7308ebdb7cf5d95bcc498dbd5a132f0da4d" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", "polars-arrow", "polars-core", "polars-error", @@ -2819,8 +2864,8 @@ dependencies = [ "dyn-clone", "either", "ethnum", - "getrandom 0.2.15", - "hashbrown 0.15.2", + "getrandom 0.2.16", + "hashbrown 0.15.5", "itoa", "lz4", "num-traits", @@ -2882,7 +2927,7 @@ dependencies = [ "comfy-table", "either", "hashbrown 0.14.5", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "indexmap", "itoa", "num-traits", @@ -2898,7 +2943,7 @@ dependencies = [ "rayon", "regex", "strum_macros", - "thiserror 2.0.12", + "thiserror 2.0.17", "version_check", "xxhash-rust", ] @@ -2912,7 +2957,7 @@ dependencies = [ "polars-arrow-format", "regex", "simdutf8", - "thiserror 2.0.12", + "thiserror 2.0.17", ] [[package]] @@ -2923,7 +2968,7 @@ checksum = "c8e639991a8ad4fb12880ab44bcc3cf44a5703df003142334d9caf86d77d77e7" dependencies = [ "ahash", "bitflags", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "num-traits", "once_cell", "polars-arrow", @@ -2953,7 +2998,7 @@ dependencies = [ "fast-float2", "futures", "glob", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "home", "itoa", "memchr", @@ -2987,7 +3032,7 @@ dependencies = [ "ahash", "chrono", "fallible-streaming-iterator", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "indexmap", "itoa", "num-traits", @@ -3060,7 +3105,7 @@ dependencies = [ "chrono", "chrono-tz", "either", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "hex", "indexmap", "jsonpath_lib_polars_vendor", @@ -3098,7 +3143,7 @@ dependencies = [ "ethnum", "flate2", "futures", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "lz4", "num-traits", "polars-arrow", @@ -3132,7 +3177,7 @@ dependencies = [ "crossbeam-queue", "enum_dispatch", "futures", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "num-traits", "once_cell", "polars-arrow", @@ -3162,7 +3207,7 @@ dependencies = [ "chrono", "chrono-tz", "either", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "memmap2", "num-traits", "once_cell", @@ -3293,7 +3338,7 @@ dependencies = [ "bytemuck", "bytes", "compact_str", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "indexmap", "libc", "memmap2", @@ -3314,6 +3359,15 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +[[package]] +name = "potential_utf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" +dependencies = [ + "zerovec", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -3322,18 +3376,18 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ - "zerocopy 0.7.35", + "zerocopy", ] [[package]] name = "proc-macro2" -version = "1.0.95" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" dependencies = [ "unicode-ident", ] @@ -3348,7 +3402,7 @@ dependencies = [ "bit-vec", "bitflags", "num-traits", - "rand 0.9.1", + "rand 0.9.2", "rand_chacha 0.9.0", "rand_xorshift", "regex-syntax", @@ -3359,27 +3413,28 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.26" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f" +checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01" dependencies = [ + "ar_archive_writer", "cc", ] [[package]] name = "ptr_meta" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9e76f66d3f9606f44e45598d155cb13ecf09f4a28199e48daf8c8fc937ea90" +checksum = "0b9a0cf95a1196af61d4f1cbdab967179516d9a4a4312af1f31948f8f6224a79" dependencies = [ "ptr_meta_derive", ] [[package]] name = "ptr_meta_derive" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca414edb151b4c8d125c12566ab0d74dc9cdba36fb80eb7b848c15f495fd32d1" +checksum = "7347867d0a7e1208d93b46767be83e2b8f978c3dad35f775ac8d8847551d6fe1" dependencies = [ "proc-macro2", "quote", @@ -3415,7 +3470,7 @@ dependencies = [ "rustc-hash", "rustls", "socket2", - "thiserror 2.0.12", + "thiserror 2.0.17", "tokio", "tracing", "web-time", @@ -3428,15 +3483,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" dependencies = [ "bytes", - "getrandom 0.3.1", + "getrandom 0.3.4", "lru-slab", - "rand 0.9.1", + "rand 0.9.2", "ring", "rustc-hash", "rustls", "rustls-pki-types", "slab", - "thiserror 2.0.12", + "thiserror 2.0.17", "tinyvec", "tracing", "web-time", @@ -3453,23 +3508,29 @@ dependencies = [ "once_cell", "socket2", "tracing", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.40" +version = "1.0.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "rancor" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf5f7161924b9d1cea0e4cabc97c372cea92b5f927fc13c6bca67157a0ad947" +checksum = "a063ea72381527c2a0561da9c80000ef822bdd7c3241b1cc1b12100e3df081ee" dependencies = [ "ptr_meta", ] @@ -3487,12 +3548,12 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.1", + "rand_core 0.9.3", ] [[package]] @@ -3512,7 +3573,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.1", + "rand_core 0.9.3", ] [[package]] @@ -3521,17 +3582,16 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", ] [[package]] name = "rand_core" -version = "0.9.1" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a88e0da7a2c97baa202165137c158d0a2e824ac465d13d81046727b34cb247d3" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ - "getrandom 0.3.1", - "zerocopy 0.8.27", + "getrandom 0.3.4", ] [[package]] @@ -3550,14 +3610,14 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" dependencies = [ - "rand_core 0.9.1", + "rand_core 0.9.3", ] [[package]] name = "raw-cpuid" -version = "11.5.0" +version = "11.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" dependencies = [ "bitflags", ] @@ -3604,9 +3664,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.10" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ "bitflags", ] @@ -3617,7 +3677,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", "libredox", "thiserror 1.0.69", ] @@ -3628,25 +3688,25 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", "libredox", - "thiserror 2.0.12", + "thiserror 2.0.17", ] [[package]] name = "ref-cast" -version = "1.0.23" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf0a6f84d5f1d581da8b41b47ec8600871962f2a528115b542b362d4b744931" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.23" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" dependencies = [ "proc-macro2", "quote", @@ -3655,9 +3715,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.2" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -3667,9 +3727,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.9" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -3678,21 +3738,21 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "rend" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a35e8a6bf28cd121053a66aa2e6a2e3eaffad4a60012179f0e864aa5ffeff215" +checksum = "cadadef317c2f20755a64d7fdc48f9e7178ee6b0e1f7fce33fa60f1d68a276e6" [[package]] name = "reqwest" -version = "0.12.23" +version = "0.12.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" +checksum = "3b4c14b2d9afca6a60277086b0cc6a6ae0b568f6f7916c943a8cdc79f8be240f" dependencies = [ "base64", "bytes", @@ -3733,7 +3793,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.2", + "webpki-roots", ] [[package]] @@ -3760,7 +3820,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.15", + "getrandom 0.2.16", "libc", "untrusted", "windows-sys 0.52.0", @@ -3768,12 +3828,12 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.8.10" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e147371c75553e1e2fcdb483944a8540b8438c31426279553b9a8182a9b7b65" +checksum = "35a640b26f007713818e9a9b65d34da1cf58538207b052916a83d80e43f3ffa4" dependencies = [ "bytes", - "hashbrown 0.15.2", + "hashbrown 0.15.5", "indexmap", "munge", "ptr_meta", @@ -3786,20 +3846,30 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.8.10" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "246b40ac189af6c675d124b802e8ef6d5246c53e17367ce9501f8f66a81abb7a" +checksum = "bd83f5f173ff41e00337d97f6572e416d022ef8a19f371817259ae960324c482" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "roaring" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" +dependencies = [ + "bytemuck", + "byteorder", +] + [[package]] name = "rustc-demangle" -version = "0.1.24" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" [[package]] name = "rustc-hash" @@ -3809,35 +3879,22 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" [[package]] name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - -[[package]] -name = "rustix" -version = "1.0.5" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf" +checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys 0.9.4", - "windows-sys 0.59.0", + "linux-raw-sys", + "windows-sys 0.61.2", ] [[package]] name = "rustls" -version = "0.23.25" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "822ee9188ac4ec04a2f0531e55d035fb2de73f18b41a63c70c2712503b6fb13c" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "log", "once_cell", @@ -3850,30 +3907,31 @@ dependencies = [ [[package]] name = "rustls-native-certs" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923" dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.2.0", + "security-framework 3.5.1", ] [[package]] name = "rustls-pki-types" -version = "1.11.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" +checksum = "21e6f2ab2928ca4291b86736a8bd920a277a399bba1589409d72154ff87c1282" dependencies = [ "web-time", + "zeroize", ] [[package]] name = "rustls-webpki" -version = "0.103.1" +version = "0.103.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fef8b8769aaccf73098557a87cd1816b4f9c7c16811c9c77142aa695c16f2c03" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" dependencies = [ "ring", "rustls-pki-types", @@ -3882,9 +3940,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.19" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rusty-fork" @@ -3915,11 +3973,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -3958,12 +4016,12 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.2.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" +checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef" dependencies = [ "bitflags", - "core-foundation 0.10.0", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -3971,14 +4029,24 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.14.0" +version = "2.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" +checksum = "cc1f0cbffaac4852523ce30d8bd3c5cdc873501d96ff467ca09b6767bb8cd5c0" dependencies = [ "core-foundation-sys", "libc", ] +[[package]] +name = "seize" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b55fb86dfd3a2f5f76ea78310a88f96c4ea21a3031f8d212443d56123fd0521" +dependencies = [ + "libc", + "windows-sys 0.61.2", +] + [[package]] name = "self-replace" version = "1.5.0" @@ -4011,9 +4079,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.26" +version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" +checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" [[package]] name = "serde" @@ -4047,34 +4115,36 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.143" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "indexmap", "itoa", "memchr", "ryu", "serde", + "serde_core", ] [[package]] name = "serde_path_to_error" -version = "0.1.17" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a" +checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457" dependencies = [ "itoa", "serde", + "serde_core", ] [[package]] name = "serde_spanned" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83" +checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" dependencies = [ - "serde", + "serde_core", ] [[package]] @@ -4102,9 +4172,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.8" +version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", "cpufeatures", @@ -4128,13 +4198,19 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.2" +version = "1.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +checksum = "7664a098b8e616bdfcc2dc0e9ac44eb231eedf41db4e9fe95d8d32ec728dedad" dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" + [[package]] name = "simd-json" version = "0.14.3" @@ -4142,7 +4218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa2bcf6c6e164e81bc7a5d49fc6988b3d515d9e8c07457d7b74ffb9324b9cd40" dependencies = [ "ahash", - "getrandom 0.2.15", + "getrandom 0.2.16", "halfbrown", "once_cell", "ref-cast", @@ -4166,7 +4242,7 @@ checksum = "297f631f50729c8c99b84667867963997ec0b50f32b2a7dbcab828ef0541e8bb" dependencies = [ "num-bigint", "num-traits", - "thiserror 2.0.12", + "thiserror 2.0.17", "time", ] @@ -4178,27 +4254,24 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "slab" -version = "0.4.9" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] +checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] name = "slotmap" -version = "1.0.7" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a" +checksum = "bdd58c3c93c3d278ca835519292445cb4b0d4dc59ccfdf7ceadaab3f8aeb4038" dependencies = [ "version_check", ] [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "smawk" @@ -4208,18 +4281,18 @@ checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" [[package]] name = "snafu" -version = "0.8.6" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320b01e011bf8d5d7a4a4a4be966d9160968935849c83b918827f6a435e7f627" +checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" dependencies = [ "snafu-derive", ] [[package]] name = "snafu-derive" -version = "0.8.6" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1961e2ef424c1424204d3a5d6975f934f56b6d50ff5732382d84ebf460e147f7" +checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" dependencies = [ "heck", "proc-macro2", @@ -4235,12 +4308,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.6.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807" +checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -4254,9 +4327,9 @@ dependencies = [ [[package]] name = "sonic-rs" -version = "0.5.3" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd1adc42def3cb101f3ebef3cd2d642f9a21072bbcd4ec9423343ccaa6afa596" +checksum = "4425ea8d66ec950e0a8f2ef52c766cc3d68d661d9a0845c353c40833179fd866" dependencies = [ "ahash", "bumpalo", @@ -4270,14 +4343,14 @@ dependencies = [ "simdutf8", "sonic-number", "sonic-simd", - "thiserror 2.0.12", + "thiserror 2.0.17", ] [[package]] name = "sonic-simd" -version = "0.1.0" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "940a24e82c9a97483ef66cef06b92160a8fa5cd74042c57c10b24d99d169d2fc" +checksum = "5707edbfb34a40c9f2a55fa09a49101d9fec4e0cc171ce386086bd9616f34257" dependencies = [ "cfg-if", ] @@ -4302,15 +4375,15 @@ dependencies = [ [[package]] name = "stable_deref_trait" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stacker" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b" +checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" dependencies = [ "cc", "cfg-if", @@ -4373,9 +4446,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.104" +version = "2.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +checksum = "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87" dependencies = [ "proc-macro2", "quote", @@ -4402,9 +4475,9 @@ dependencies = [ [[package]] name = "synstructure" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", @@ -4421,7 +4494,7 @@ dependencies = [ "libc", "memchr", "ntapi", - "windows 0.57.0", + "windows", ] [[package]] @@ -4452,10 +4525,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom 0.3.1", + "getrandom 0.3.4", "once_cell", - "rustix 1.0.5", - "windows-sys 0.61.1", + "rustix", + "windows-sys 0.61.2", ] [[package]] @@ -4480,11 +4553,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.12" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" dependencies = [ - "thiserror-impl 2.0.12", + "thiserror-impl 2.0.17", ] [[package]] @@ -4500,9 +4573,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.12" +version = "2.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" dependencies = [ "proc-macro2", "quote", @@ -4520,9 +4593,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.41" +version = "0.3.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40" +checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" dependencies = [ "deranged", "itoa", @@ -4535,15 +4608,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.4" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c" +checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" [[package]] name = "time-macros" -version = "0.2.22" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49" +checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" dependencies = [ "num-conv", "time-core", @@ -4551,9 +4624,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.7.6" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" dependencies = [ "displaydoc", "zerovec", @@ -4571,9 +4644,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.8.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "022db8904dfa342efe721985167e9fcd16c29b226db4397ed752a761cfce81e8" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" dependencies = [ "tinyvec_macros", ] @@ -4586,29 +4659,26 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.47.1" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" dependencies = [ - "backtrace", "bytes", - "io-uring", "libc", "mio", "parking_lot", "pin-project-lite", "signal-hook-registry", - "slab", "socket2", "tokio-macros", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", @@ -4627,9 +4697,9 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.2" +version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ "rustls", "tokio", @@ -4673,9 +4743,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.15" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" dependencies = [ "bytes", "futures-core", @@ -4687,12 +4757,12 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.5" +version = "0.9.9+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75129e1dc5000bfbaa9fee9d1b21f974f9fbad9daec557a521ee6e080825f6e8" +checksum = "eb5238e643fc34a1d5d7e753e1532a91912d74b63b92b3ea51fde8d1b7bc79dd" dependencies = [ "indexmap", - "serde", + "serde_core", "serde_spanned", "toml_datetime", "toml_parser", @@ -4702,27 +4772,27 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.7.0" +version = "0.7.4+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3" +checksum = "fe3cea6b2aa3b910092f6abd4053ea464fab5f9c170ba5e9a6aead16ec4af2b6" dependencies = [ - "serde", + "serde_core", ] [[package]] name = "toml_parser" -version = "1.0.2" +version = "1.0.5+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b551886f449aa90d4fe2bdaa9f4a2577ad2dde302c61ecf262d80b116db95c10" +checksum = "4c03bee5ce3696f31250db0bbaff18bc43301ce0e8db2ed1f07cbb2acf89984c" dependencies = [ "winnow", ] [[package]] name = "toml_writer" -version = "1.0.2" +version = "1.0.5+spec-1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc842091f2def52017664b53082ecbbeb5c7731092bad69d2c63050401dfd64" +checksum = "a9cd6190959dce0994aa8970cd32ab116d1851ead27e866039acaf2524ce44fa" [[package]] name = "tower" @@ -4743,9 +4813,9 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ "bitflags", "bytes", @@ -4774,9 +4844,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647" dependencies = [ "log", "pin-project-lite", @@ -4786,9 +4856,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.28" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -4797,9 +4867,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c" dependencies = [ "once_cell", "valuable", @@ -4828,9 +4898,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.20" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" dependencies = [ "matchers", "nu-ansi-term", @@ -4861,26 +4931,26 @@ dependencies = [ "http", "httparse", "log", - "rand 0.9.1", + "rand 0.9.2", "sha1", - "thiserror 2.0.12", + "thiserror 2.0.17", "utf-8", ] [[package]] name = "twox-hash" -version = "2.1.0" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" dependencies = [ - "rand 0.8.5", + "rand 0.9.2", ] [[package]] name = "typenum" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" [[package]] name = "ucd-trie" @@ -4896,9 +4966,9 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-ident" -version = "1.0.17" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" +checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" [[package]] name = "unicode-linebreak" @@ -4908,9 +4978,9 @@ checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" [[package]] name = "unicode-normalization" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" dependencies = [ "tinyvec", ] @@ -4932,9 +5002,9 @@ checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" [[package]] name = "unicode-width" -version = "0.2.0" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "unit-prefix" @@ -4950,9 +5020,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.4" +version = "2.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" +checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" dependencies = [ "form_urlencoded", "idna", @@ -4972,12 +5042,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" -[[package]] -name = "utf16_iter" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -4992,15 +5056,15 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "e2e054861b4bd027cd373e18e8d8d8e6548085000e41290d95ce0c373a654b4a" dependencies = [ "atomic", - "getrandom 0.3.1", + "getrandom 0.3.4", "js-sys", - "rand 0.9.1", - "serde", + "rand 0.9.2", + "serde_core", "wasm-bindgen", ] @@ -5064,50 +5128,37 @@ dependencies = [ [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] -name = "wasi" -version = "0.13.3+wasi-0.2.2" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen-rt", + "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "836d9622d604feee9e5de25ac10e3ea5f2d65b41eac0d9ce72eb5deae707ce7c" dependencies = [ "cfg-if", "js-sys", @@ -5118,9 +5169,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5128,22 +5179,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4" dependencies = [ "unicode-ident", ] @@ -5163,9 +5214,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "9b32828d774c412041098d182a8b38b16ea816958e07cf40eec2bc080ae137ac" dependencies = [ "js-sys", "wasm-bindgen", @@ -5188,7 +5239,7 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00f1243ef785213e3a32fa0396093424a3a6ea566f9948497e5a2309261a4c97" dependencies = [ - "core-foundation 0.10.0", + "core-foundation 0.10.1", "jni", "log", "ndk-context", @@ -5200,18 +5251,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" -dependencies = [ - "webpki-roots 1.0.2", -] - -[[package]] -name = "webpki-roots" -version = "1.0.2" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2" +checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e" dependencies = [ "rustls-pki-types", ] @@ -5238,7 +5280,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.1", + "windows-sys 0.61.2", ] [[package]] @@ -5257,37 +5299,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows" -version = "0.61.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" -dependencies = [ - "windows-collections", - "windows-core 0.61.2", - "windows-future", - "windows-link 0.1.3", - "windows-numerics", -] - -[[package]] -name = "windows-collections" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" -dependencies = [ - "windows-core 0.61.2", -] - -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets 0.52.6", -] - [[package]] name = "windows-core" version = "0.57.0" @@ -5302,28 +5313,17 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.61.2" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement 0.60.2", "windows-interface 0.59.3", - "windows-link 0.1.3", - "windows-result 0.3.4", + "windows-link", + "windows-result 0.4.1", "windows-strings", ] -[[package]] -name = "windows-future" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" -dependencies = [ - "windows-core 0.61.2", - "windows-link 0.1.3", - "windows-threading", -] - [[package]] name = "windows-implement" version = "0.57.0" @@ -5370,34 +5370,18 @@ dependencies = [ [[package]] name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - -[[package]] -name = "windows-link" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" - -[[package]] -name = "windows-numerics" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" -dependencies = [ - "windows-core 0.61.2", - "windows-link 0.1.3", -] +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-registry" -version = "0.5.3" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" dependencies = [ - "windows-link 0.1.3", - "windows-result 0.3.4", + "windows-link", + "windows-result 0.4.1", "windows-strings", ] @@ -5412,20 +5396,20 @@ dependencies = [ [[package]] name = "windows-result" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -5466,11 +5450,20 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.61.1" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f109e41dd4a3c848907eb83d5a42ea98b3769495597450cf6d153507b166f0f" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.0", + "windows-link", ] [[package]] @@ -5512,7 +5505,7 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -5520,12 +5513,20 @@ dependencies = [ ] [[package]] -name = "windows-threading" -version = "0.1.0" +name = "windows-targets" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link 0.1.3", + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -5546,6 +5547,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -5564,6 +5571,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -5582,12 +5595,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -5606,6 +5631,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -5624,6 +5655,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -5642,6 +5679,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -5661,31 +5704,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] -name = "winnow" -version = "0.7.13" +name = "windows_x86_64_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] -name = "wit-bindgen-rt" -version = "0.33.0" +name = "winnow" +version = "0.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" -dependencies = [ - "bitflags", -] +checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" [[package]] -name = "write16" -version = "1.0.0" +name = "wit-bindgen" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "writeable" -version = "0.5.5" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" [[package]] name = "xxhash-rust" @@ -5695,11 +5735,10 @@ checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" [[package]] name = "yoke" -version = "0.7.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" dependencies = [ - "serde", "stable_deref_trait", "yoke-derive", "zerofrom", @@ -5707,9 +5746,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.5" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" dependencies = [ "proc-macro2", "quote", @@ -5719,39 +5758,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.35" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3" dependencies = [ - "byteorder", - "zerocopy-derive 0.7.35", -] - -[[package]] -name = "zerocopy" -version = "0.8.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" -dependencies = [ - "zerocopy-derive 0.8.27", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a" dependencies = [ "proc-macro2", "quote", @@ -5760,18 +5778,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", @@ -5781,9 +5799,9 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.8.1" +version = "1.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" dependencies = [ "zeroize_derive", ] @@ -5799,11 +5817,22 @@ dependencies = [ "syn", ] +[[package]] +name = "zerotrie" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + [[package]] name = "zerovec" -version = "0.10.4" +version = "0.11.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" dependencies = [ "yoke", "zerofrom", @@ -5812,9 +5841,9 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.10.3" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" dependencies = [ "proc-macro2", "quote", @@ -5823,9 +5852,9 @@ dependencies = [ [[package]] name = "zlib-rs" -version = "0.5.1" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626bd9fa9734751fc50d6060752170984d7053f5a39061f524cda68023d4db8a" +checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235" [[package]] name = "zstd" @@ -5838,18 +5867,18 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "7.2.1" +version = "7.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.13+zstd.1.5.6" +version = "2.0.16+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748" dependencies = [ "cc", "pkg-config", diff --git a/helix-cli/src/cleanup.rs b/helix-cli/src/cleanup.rs index 7c6ef7a2..64208edc 100644 --- a/helix-cli/src/cleanup.rs +++ b/helix-cli/src/cleanup.rs @@ -71,8 +71,7 @@ impl CleanupTracker { }; // Step 1: Restore config from in-memory backup if modified - if let (Some(original_config), Some(config_path)) = - (self.original_config, self.config_path) + if let (Some(original_config), Some(config_path)) = (self.original_config, self.config_path) { match original_config.save_to_file(&config_path) { Ok(_) => { diff --git a/helix-cli/src/commands/build.rs b/helix-cli/src/commands/build.rs index e8d7ab42..0078dc79 100644 --- a/helix-cli/src/commands/build.rs +++ b/helix-cli/src/commands/build.rs @@ -5,9 +5,9 @@ use crate::metrics_sender::MetricsSender; use crate::project::{ProjectContext, get_helix_repo_cache}; use crate::prompts; use crate::utils::{ - copy_dir_recursive_excluding, diagnostic_source, - helixc_utils::{collect_hx_contents, collect_hx_files}, - print_confirm, print_error, print_status, print_success, print_warning, Spinner, + Spinner, copy_dir_recursive_excluding, diagnostic_source, helixc_utils::collect_hx_contents, + helixc_utils::collect_hx_files, print_confirm, print_error, print_status, print_success, + print_warning, }; use eyre::Result; use std::time::Instant; @@ -262,7 +262,10 @@ fn update_git_cache(repo_cache: &std::path::Path) -> Result<()> { Ok(()) } -pub(crate) async fn prepare_instance_workspace(project: &ProjectContext, instance_name: &str) -> Result<()> { +pub(crate) async fn prepare_instance_workspace( + project: &ProjectContext, + instance_name: &str, +) -> Result<()> { print_status( "PREPARE", &format!("Preparing workspace for '{instance_name}'"), @@ -292,7 +295,10 @@ pub(crate) async fn prepare_instance_workspace(project: &ProjectContext, instanc Ok(()) } -pub(crate) async fn compile_project(project: &ProjectContext, instance_name: &str) -> Result { +pub(crate) async fn compile_project( + project: &ProjectContext, + instance_name: &str, +) -> Result { print_status("COMPILE", "Compiling Helix queries..."); // Create helix-container directory in instance workspace for generated files @@ -456,7 +462,10 @@ fn read_config(instance_src_dir: &std::path::Path) -> Result { } /// Handle Rust compilation failure during Docker build - print errors and offer GitHub issue creation. -fn handle_docker_rust_compilation_failure(docker_output: &str, project: &ProjectContext) -> Result<()> { +fn handle_docker_rust_compilation_failure( + docker_output: &str, + project: &ProjectContext, +) -> Result<()> { print_error("Rust compilation failed during Docker build"); println!(); println!("This may indicate a bug in the Helix code generator."); diff --git a/helix-cli/src/commands/check.rs b/helix-cli/src/commands/check.rs index 97ae5f93..ac331d61 100644 --- a/helix-cli/src/commands/check.rs +++ b/helix-cli/src/commands/check.rs @@ -9,6 +9,7 @@ use crate::utils::helixc_utils::{ }; use crate::utils::{print_confirm, print_error, print_status, print_success, print_warning}; use eyre::Result; +use helix_db::helixc::parser::types::FieldType; use std::fs; use std::path::Path; use std::process::Command; @@ -49,6 +50,10 @@ async fn check_instance( validate_project_syntax(project)?; print_success("Syntax validation passed"); + // Step 1.5: Validate vector data types + print_status("VECTORS", "Validating vector data types..."); + validate_vector_data_types(project)?; + // Step 2: Ensure helix repo is cached (reuse from build.rs) build::ensure_helix_repo_cached().await?; @@ -115,7 +120,12 @@ async fn check_all_instances( ) -> Result<()> { print_status("CHECK", "Checking all instances"); - let instances: Vec = project.config.list_instances().into_iter().map(String::from).collect(); + let instances: Vec = project + .config + .list_instances() + .into_iter() + .map(String::from) + .collect(); if instances.is_empty() { return Err(eyre::eyre!( @@ -123,6 +133,10 @@ async fn check_all_instances( )); } + // Validate vector data types once for all instances + print_status("VECTORS", "Validating vector data types..."); + validate_vector_data_types(project)?; + // Check each instance for instance_name in &instances { check_instance(project, instance_name, metrics_sender).await?; @@ -132,6 +146,63 @@ async fn check_all_instances( Ok(()) } +/// Validate vector data types and warn about F64 usage +fn validate_vector_data_types(project: &ProjectContext) -> Result<()> { + // Collect all .hx files for validation + let hx_files = collect_hx_files(&project.root, &project.config.project.queries)?; + + // Generate content and parse + let content = generate_content(&hx_files)?; + let source = parse_content(&content)?; + + let mut found_f64_vectors = false; + let mut f64_vector_names = Vec::new(); + + // Check all vector schemas for F64 usage + for schema in source.get_schemas_in_order() { + for vector_schema in &schema.vector_schemas { + for field in &vector_schema.fields { + if contains_f64_type(&field.field_type) { + found_f64_vectors = true; + f64_vector_names.push(format!("V::{}.{}", vector_schema.name, field.name)); + } + } + } + } + + if found_f64_vectors { + print_warning("Found F64 data types in vector fields"); + println!(); + println!(" Vector fields using F64:"); + for vector_name in &f64_vector_names { + println!(" β€’ {}", vector_name); + } + println!(); + println!(" ⚠️ F64 vectors are deprecated."); + println!( + " For vectors, use [F32] instead of [F64] for better performance and compatibility." + ); + println!(" F32 provides sufficient precision for most vector similarity use cases."); + return Err(eyre::eyre!( + "Vectors with F64 data types are deprecated. Use F32 instead." + )); + } else { + print_success("Vector data types validation passed"); + } + + Ok(()) +} + +/// Recursively check if a FieldType contains F64 +fn contains_f64_type(field_type: &FieldType) -> bool { + match field_type { + FieldType::F64 => true, + FieldType::Array(inner) => contains_f64_type(inner), + FieldType::Object(obj) => obj.values().any(contains_f64_type), + _ => false, + } +} + /// Validate project syntax by parsing queries and schema (similar to build.rs but without generating files) fn validate_project_syntax(project: &ProjectContext) -> Result<()> { // Collect all .hx files for validation @@ -193,7 +264,8 @@ fn handle_cargo_check_failure( print_warning("You can report this issue to help improve Helix."); println!(); - let should_create = print_confirm("Would you like to create a GitHub issue with diagnostic information?")?; + let should_create = + print_confirm("Would you like to create a GitHub issue with diagnostic information?")?; if !should_create { return Ok(()); @@ -217,3 +289,138 @@ fn handle_cargo_check_failure( Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::{ContainerRuntime, HelixConfig, ProjectConfig}; + use std::fs; + use tempfile::tempdir; + + #[test] + fn test_validate_vector_data_types_with_f64() { + let temp_dir = tempdir().unwrap(); + let project_root = temp_dir.path(); + + // Create a helix.toml config + let config = HelixConfig { + project: ProjectConfig { + name: "test_project".to_string(), + queries: std::path::PathBuf::from("./db/"), + container_runtime: ContainerRuntime::Docker, + }, + local: std::collections::HashMap::new(), + cloud: std::collections::HashMap::new(), + }; + + // Create project context + let project = ProjectContext { + root: project_root.to_path_buf(), + config, + helix_dir: project_root.join(".helix"), + }; + + // Create db directory + let db_dir = project_root.join("db"); + fs::create_dir_all(&db_dir).unwrap(); + + // Create a .hx file with F64 vector fields + let schema_content = r#" +V::Document { + content: String, + embedding: [F64], + scores: [F64] +} + +QUERY test() => + d <- V + RETURN d +"#; + + fs::write(db_dir.join("schema.hx"), schema_content).unwrap(); + + // Test validation - should detect F64 usage + let result = validate_vector_data_types(&project); + assert!( + result.is_ok(), + "Validation should succeed but warn about F64" + ); + } + + #[test] + fn test_validate_vector_data_types_with_f32() { + let temp_dir = tempdir().unwrap(); + let project_root = temp_dir.path(); + + // Create a helix.toml config + let config = HelixConfig { + project: ProjectConfig { + name: "test_project".to_string(), + queries: std::path::PathBuf::from("./db/"), + container_runtime: ContainerRuntime::Docker, + }, + local: std::collections::HashMap::new(), + cloud: std::collections::HashMap::new(), + }; + + // Create project context + let project = ProjectContext { + root: project_root.to_path_buf(), + config, + helix_dir: project_root.join(".helix"), + }; + + // Create db directory + let db_dir = project_root.join("db"); + fs::create_dir_all(&db_dir).unwrap(); + + // Create a .hx file with F32 vector fields (correct) + let schema_content = r#" +V::Document { + content: String, + embedding: [F32], + scores: [F32] +} + +QUERY test() => + d <- V + RETURN d +"#; + + fs::write(db_dir.join("schema.hx"), schema_content).unwrap(); + + // Test validation - should pass without warnings + let result = validate_vector_data_types(&project); + assert!(result.is_ok(), "Validation should succeed with F32"); + } + + #[test] + fn test_contains_f64_type() { + use helix_db::helixc::parser::types::FieldType; + + // Test direct F64 + assert!(contains_f64_type(&FieldType::F64)); + + // Test F32 (should be false) + assert!(!contains_f64_type(&FieldType::F32)); + + // Test Array of F64 + assert!(contains_f64_type(&FieldType::Array(Box::new( + FieldType::F64 + )))); + + // Test Array of F32 (should be false) + assert!(!contains_f64_type(&FieldType::Array(Box::new( + FieldType::F32 + )))); + + // Test nested object with F64 + let mut obj = std::collections::HashMap::new(); + obj.insert("score".to_string(), FieldType::F64); + assert!(contains_f64_type(&FieldType::Object(obj))); + + // Test other types + assert!(!contains_f64_type(&FieldType::String)); + assert!(!contains_f64_type(&FieldType::Boolean)); + } +} diff --git a/helix-cli/src/commands/integrations/fly.rs b/helix-cli/src/commands/integrations/fly.rs index 7a6a4bcc..7342896b 100644 --- a/helix-cli/src/commands/integrations/fly.rs +++ b/helix-cli/src/commands/integrations/fly.rs @@ -358,7 +358,10 @@ impl<'a> FlyManager<'a> { } // Check if fly.toml already exists for this instance - let fly_toml_path = self.project.instance_workspace(instance_name).join("fly.toml"); + let fly_toml_path = self + .project + .instance_workspace(instance_name) + .join("fly.toml"); if let Some(existing_app_name) = Self::read_app_name_from_fly_toml(&fly_toml_path)? { // Check if the app in fly.toml exists on Fly.io if self.app_exists(&existing_app_name).await? { diff --git a/helix-cli/src/commands/metrics.rs b/helix-cli/src/commands/metrics.rs index 51dbf68d..4413b9e9 100644 --- a/helix-cli/src/commands/metrics.rs +++ b/helix-cli/src/commands/metrics.rs @@ -90,7 +90,8 @@ async fn show_metrics_status() -> Result<()> { Ok(()) } -static EMAIL_REGEX: LazyLock = LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap()); +static EMAIL_REGEX: LazyLock = + LazyLock::new(|| Regex::new(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$").unwrap()); fn ask_for_email() -> String { print_line("Please enter your email address:"); diff --git a/helix-cli/src/errors.rs b/helix-cli/src/errors.rs index 38d0aaeb..6b5879f3 100644 --- a/helix-cli/src/errors.rs +++ b/helix-cli/src/errors.rs @@ -92,7 +92,6 @@ impl CliError { self } - pub fn render(&self) -> String { let mut output = String::new(); @@ -186,36 +185,30 @@ pub type CliResult = Result; // Convenience functions for common error patterns with error codes #[allow(unused)] pub fn config_error>(message: S) -> CliError { - CliError::new(message) - .with_hint("run `helix init` if you need to create a new project") + CliError::new(message).with_hint("run `helix init` if you need to create a new project") } #[allow(unused)] pub fn file_error>(message: S, file_path: S) -> CliError { - CliError::new(message) - .with_file_path(file_path) + CliError::new(message).with_file_path(file_path) } #[allow(unused)] pub fn docker_error>(message: S) -> CliError { - CliError::new(message) - .with_hint("ensure Docker is running and accessible") + CliError::new(message).with_hint("ensure Docker is running and accessible") } #[allow(unused)] pub fn network_error>(message: S) -> CliError { - CliError::new(message) - .with_hint("check your internet connection and try again") + CliError::new(message).with_hint("check your internet connection and try again") } #[allow(unused)] pub fn project_error>(message: S) -> CliError { - CliError::new(message) - .with_hint("ensure you're in a valid helix project directory") + CliError::new(message).with_hint("ensure you're in a valid helix project directory") } #[allow(unused)] pub fn cloud_error>(message: S) -> CliError { - CliError::new(message) - .with_hint("run `helix auth login` to authenticate with Helix Cloud") + CliError::new(message).with_hint("run `helix auth login` to authenticate with Helix Cloud") } diff --git a/helix-cli/src/metrics_sender.rs b/helix-cli/src/metrics_sender.rs index d184b4d2..3c4fd0f4 100644 --- a/helix-cli/src/metrics_sender.rs +++ b/helix-cli/src/metrics_sender.rs @@ -1,6 +1,6 @@ use chrono::{Local, NaiveDate}; use dirs::home_dir; -use eyre::{eyre, OptionExt, Result}; +use eyre::{OptionExt, Result, eyre}; use flume::{Receiver, Sender, unbounded}; use helix_metrics::events::{ CompileEvent, DeployCloudEvent, DeployLocalEvent, EventData, EventType, RawEvent, diff --git a/helix-cli/src/project.rs b/helix-cli/src/project.rs index 737b2166..e2418c07 100644 --- a/helix-cli/src/project.rs +++ b/helix-cli/src/project.rs @@ -93,7 +93,10 @@ fn find_project_root(start: &Path) -> Result { let error = crate::errors::config_error("found v1 project configuration") .with_file_path(v1_config_path.display().to_string()) .with_context("This project uses the old v1 configuration format") - .with_hint(format!("Run 'helix migrate --path \"{}\"' to migrate this project to v2 format", current.display())); + .with_hint(format!( + "Run 'helix migrate --path \"{}\"' to migrate this project to v2 format", + current.display() + )); return Err(eyre!("{}", error.render())); } @@ -105,7 +108,10 @@ fn find_project_root(start: &Path) -> Result { let error = crate::errors::config_error("project configuration not found") .with_file_path(start.display().to_string()) - .with_context(format!("searched from {} up to filesystem root", start.display())); + .with_context(format!( + "searched from {} up to filesystem root", + start.display() + )); Err(eyre!("{}", error.render())) } diff --git a/helix-cli/src/tests/check_tests.rs b/helix-cli/src/tests/check_tests.rs index 71f9a502..e5585730 100644 --- a/helix-cli/src/tests/check_tests.rs +++ b/helix-cli/src/tests/check_tests.rs @@ -53,8 +53,7 @@ E::Likes { To: Post, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); // Create valid queries.hx let queries_content = r#" @@ -66,8 +65,7 @@ QUERY GetUserPosts(user_id: ID) => posts <- N(user_id)::Out RETURN posts "#; - fs::write(queries_dir.join("queries.hx"), queries_content) - .expect("Failed to write queries.hx"); + fs::write(queries_dir.join("queries.hx"), queries_content).expect("Failed to write queries.hx"); (temp_dir, project_path) } @@ -97,8 +95,7 @@ QUERY GetUser(user_id: ID) => user <- N(user_id) RETURN user "#; - fs::write(queries_dir.join("queries.hx"), queries_content) - .expect("Failed to write queries.hx"); + fs::write(queries_dir.join("queries.hx"), queries_content).expect("Failed to write queries.hx"); (temp_dir, project_path) } @@ -128,8 +125,7 @@ N::User { name: String, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); // Create queries.hx with invalid syntax let invalid_queries = r#" @@ -137,8 +133,7 @@ QUERY InvalidQuery { this is not valid helix syntax!!! } "#; - fs::write(queries_dir.join("queries.hx"), invalid_queries) - .expect("Failed to write queries.hx"); + fs::write(queries_dir.join("queries.hx"), invalid_queries).expect("Failed to write queries.hx"); (temp_dir, project_path) } @@ -246,7 +241,6 @@ async fn test_check_with_multiple_instances() { port: Some(6970), build_mode: crate::config::BuildMode::Debug, db_config: DbConfig::default(), - }, ); config.local.insert( @@ -255,7 +249,6 @@ async fn test_check_with_multiple_instances() { port: Some(6971), build_mode: crate::config::BuildMode::Debug, db_config: DbConfig::default(), - }, ); let config_path = project_path.join("helix.toml"); @@ -280,8 +273,7 @@ E::Follows { To: User, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); let _guard = std::env::set_current_dir(&project_path); let metrics_sender = create_test_metrics_sender(); @@ -364,8 +356,7 @@ E::Follows { To: User, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); let _guard = std::env::set_current_dir(&project_path); let metrics_sender = create_test_metrics_sender(); @@ -403,8 +394,7 @@ N::User { name: String, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); // Create additional schema in another file let more_schema = r#" @@ -464,8 +454,7 @@ N::User { name: String, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); let _guard = std::env::set_current_dir(&project_path); let metrics_sender = create_test_metrics_sender(); diff --git a/helix-cli/src/tests/compile_tests.rs b/helix-cli/src/tests/compile_tests.rs index ec414083..5186f823 100644 --- a/helix-cli/src/tests/compile_tests.rs +++ b/helix-cli/src/tests/compile_tests.rs @@ -40,8 +40,7 @@ E::Authored { To: Post, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); // Create valid queries.hx let queries_content = r#" @@ -53,8 +52,7 @@ QUERY GetUserPosts(user_id: ID) => posts <- N(user_id)::Out RETURN posts "#; - fs::write(queries_dir.join("queries.hx"), queries_content) - .expect("Failed to write queries.hx"); + fs::write(queries_dir.join("queries.hx"), queries_content).expect("Failed to write queries.hx"); (temp_dir, project_path) } @@ -115,10 +113,7 @@ async fn test_compile_with_explicit_project_path() { // Check that compiled output files were created let query_file = project_path.join("queries.rs"); - assert!( - query_file.exists(), - "Compiled queries.rs should be created" - ); + assert!(query_file.exists(), "Compiled queries.rs should be created"); } #[tokio::test] @@ -145,8 +140,7 @@ QUERY GetUser(user_id: ID) => user <- N(user_id) RETURN user "#; - fs::write(queries_dir.join("queries.hx"), queries_content) - .expect("Failed to write queries.hx"); + fs::write(queries_dir.join("queries.hx"), queries_content).expect("Failed to write queries.hx"); let _guard = std::env::set_current_dir(&project_path); @@ -184,16 +178,14 @@ N::User { name: String, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); // Create queries with invalid syntax let invalid_queries = r#" QUERY InvalidQuery this is not valid helix syntax!!! "#; - fs::write(queries_dir.join("queries.hx"), invalid_queries) - .expect("Failed to write queries.hx"); + fs::write(queries_dir.join("queries.hx"), invalid_queries).expect("Failed to write queries.hx"); let _guard = std::env::set_current_dir(&project_path); @@ -249,8 +241,7 @@ E::Follows { To: User, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); let _guard = std::env::set_current_dir(&project_path); @@ -294,8 +285,7 @@ N::User { name: String, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); // Create additional schema in another file let more_schema = r#" @@ -330,10 +320,7 @@ QUERY GetUser(id: ID) => // Check that compiled output files were created let query_file = project_path.join("queries.rs"); - assert!( - query_file.exists(), - "Compiled queries.rs should be created" - ); + assert!(query_file.exists(), "Compiled queries.rs should be created"); } #[tokio::test] @@ -361,8 +348,7 @@ N::User { name: String, } "#; - fs::write(queries_dir.join("schema.hx"), schema_content) - .expect("Failed to write schema.hx"); + fs::write(queries_dir.join("schema.hx"), schema_content).expect("Failed to write schema.hx"); let _guard = std::env::set_current_dir(&project_path); @@ -375,10 +361,7 @@ N::User { // Check that compiled output files were created let query_file = project_path.join("queries.rs"); - assert!( - query_file.exists(), - "Compiled queries.rs should be created" - ); + assert!(query_file.exists(), "Compiled queries.rs should be created"); } #[tokio::test] @@ -400,7 +383,9 @@ async fn test_compile_creates_all_required_files() { "Generated queries.rs should not be empty" ); assert!( - query_content.contains("pub") || query_content.contains("use") || query_content.contains("impl"), + query_content.contains("pub") + || query_content.contains("use") + || query_content.contains("impl"), "Generated queries.rs should contain Rust code" ); } diff --git a/helix-cli/src/tests/init_tests.rs b/helix-cli/src/tests/init_tests.rs index 7f0e11d1..9955e592 100644 --- a/helix-cli/src/tests/init_tests.rs +++ b/helix-cli/src/tests/init_tests.rs @@ -1,6 +1,6 @@ use crate::commands::init::run; use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use tempfile::TempDir; /// Helper function to create a temporary test directory @@ -9,7 +9,7 @@ fn setup_test_dir() -> TempDir { } /// Helper function to check if helix.toml exists and is valid -fn assert_helix_config_exists(project_dir: &PathBuf) { +fn assert_helix_config_exists(project_dir: &Path) { let config_path = project_dir.join("helix.toml"); assert!( config_path.exists(), @@ -104,7 +104,7 @@ async fn test_init_with_default_path() { .await; assert!(result.is_ok(), "Init with default path should succeed"); - assert_helix_config_exists(&temp_dir.path().to_path_buf()); + assert_helix_config_exists(temp_dir.path()); } #[tokio::test] @@ -166,7 +166,10 @@ async fn test_init_creates_directory_if_not_exists() { let project_path = temp_dir.path().join("new_project_dir"); // Directory should not exist yet - assert!(!project_path.exists(), "Project directory should not exist initially"); + assert!( + !project_path.exists(), + "Project directory should not exist initially" + ); let result = run( Some(project_path.to_str().unwrap().to_string()), diff --git a/helix-cli/src/tests/mod.rs b/helix-cli/src/tests/mod.rs index c321e6c3..f7511ed9 100644 --- a/helix-cli/src/tests/mod.rs +++ b/helix-cli/src/tests/mod.rs @@ -1,10 +1,10 @@ // CLI test modules #[cfg(test)] -pub mod init_tests; -#[cfg(test)] pub mod check_tests; #[cfg(test)] pub mod compile_tests; +#[cfg(test)] +pub mod init_tests; // #[cfg(test)] // pub mod build_tests; // #[cfg(test)] diff --git a/helix-cli/src/tests/project_tests.rs b/helix-cli/src/tests/project_tests.rs index 12d2df01..58648e55 100644 --- a/helix-cli/src/tests/project_tests.rs +++ b/helix-cli/src/tests/project_tests.rs @@ -1,5 +1,5 @@ use crate::config::HelixConfig; -use crate::project::{get_helix_cache_dir, ProjectContext}; +use crate::project::{ProjectContext, get_helix_cache_dir}; use std::fs; use std::path::PathBuf; use tempfile::TempDir; @@ -180,7 +180,10 @@ fn test_project_context_ensure_instance_dirs() { assert!(!workspace.exists(), "Workspace should not exist initially"); assert!(!volume.exists(), "Volume should not exist initially"); - assert!(!container.exists(), "Container dir should not exist initially"); + assert!( + !container.exists(), + "Container dir should not exist initially" + ); let result = context.ensure_instance_dirs("test-instance"); assert!(result.is_ok(), "Should create instance directories"); @@ -242,7 +245,10 @@ fn test_project_context_with_custom_queries_path() { fs::create_dir_all(project_path.join(".helix")).expect("Failed to create .helix"); let result = ProjectContext::find_and_load(Some(&project_path)); - assert!(result.is_ok(), "Should load project with custom queries path"); + assert!( + result.is_ok(), + "Should load project with custom queries path" + ); let context = result.unwrap(); assert_eq!( @@ -297,5 +303,8 @@ fn test_find_project_root_stops_at_filesystem_root() { fs::create_dir_all(&deep_path).expect("Failed to create deep path"); let result = ProjectContext::find_and_load(Some(&deep_path)); - assert!(result.is_err(), "Should fail after reaching filesystem root"); + assert!( + result.is_err(), + "Should fail after reaching filesystem root" + ); } diff --git a/helix-cli/src/utils.rs b/helix-cli/src/utils.rs index 71861550..2fb44e49 100644 --- a/helix-cli/src/utils.rs +++ b/helix-cli/src/utils.rs @@ -2,11 +2,10 @@ use crate::errors::CliError; use color_eyre::owo_colors::OwoColorize; use eyre::{Result, eyre}; use helix_db::helixc::parser::types::HxFile; +use std::io::IsTerminal; use std::{borrow::Cow, fs, path::Path}; use tokio::sync::oneshot; use tokio::time::Duration; -use std::io::IsTerminal; - const IGNORES: [&str; 3] = ["target", ".git", ".helix"]; @@ -495,10 +494,7 @@ impl Spinner { } let frame = frames[frame_idx % frames.len()]; let msg = message.lock().unwrap().clone(); - print!( - "\r{} {frame} {msg}", - format!("[{prefix}]").blue().bold() - ); + print!("\r{} {frame} {msg}", format!("[{prefix}]").blue().bold()); std::io::Write::flush(&mut std::io::stdout()).unwrap(); frame_idx += 1; tokio::time::sleep(Duration::from_millis(100)).await; diff --git a/helix-container/src/main.rs b/helix-container/src/main.rs index a6d37d08..6ab1daec 100644 --- a/helix-container/src/main.rs +++ b/helix-container/src/main.rs @@ -11,7 +11,7 @@ use helix_db::helix_gateway::{ }; use std::{collections::HashMap, sync::Arc}; use tracing::info; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, Layer}; +use tracing_subscriber::{Layer, layer::SubscriberExt, util::SubscriberInitExt}; mod queries; diff --git a/helix-db/Cargo.toml b/helix-db/Cargo.toml index beb14a15..8a708d78 100644 --- a/helix-db/Cargo.toml +++ b/helix-db/Cargo.toml @@ -15,6 +15,7 @@ helix-metrics = { path = "../metrics" } # external dependencies tokio = { version = "1.44.2", features = ["full"] } serde = { version = "1.0.217", features = ["derive"] } +serde_json = "1.0.143" bincode = "1.3.3" # TODO: Figure out bincode 2 impl with current serde impl sonic-rs = "0.5.0" inventory = "0.3.16" @@ -34,7 +35,7 @@ paste = "1.0.15" rayon = "1.11.0" mimalloc = "0.1.48" bumpalo = { version = "3.19.0", features = ["collections", "boxed", "serde"] } -bytemuck = "1.24.0" +bytemuck = { version = "1.24.0", features = ["derive", "extern_crate_alloc"] } # compiler dependencies pest = { version = "2.7", optional = true } @@ -52,13 +53,20 @@ tracing = "0.1.41" core_affinity = "0.8.3" async-trait = "0.1.88" thiserror = "2.0.12" -polars = { version = "0.46.0", features = [ - "parquet", - "lazy", - "json", -], optional = true } +polars = { version = "0.46.0", features = ["parquet", "lazy", "json"], optional = true } +subtle = "2.6.1" sha2 = "0.10" -subtle = "2.5" +byteorder = "1.5.0" +roaring = "0.11.2" +tinyvec = "1.10.0" +papaya = "0.2.3" +hashbrown = "0.16.0" +min-max-heap = "1.3.0" +page_size = "0.6.0" +rustc-hash = "2.1.1" + +[target.'cfg(not(windows))'.dependencies] +madvise = "0.1.0" [dev-dependencies] rand = "0.9.0" diff --git a/helix-db/benches/bm25_benches.rs b/helix-db/benches/bm25_benches.rs index 16282416..83a6ceb6 100644 --- a/helix-db/benches/bm25_benches.rs +++ b/helix-db/benches/bm25_benches.rs @@ -51,7 +51,7 @@ mod tests { let mut rng = rand::rng(); let mut docs = vec![]; - let relevant_count = 4000 as usize; + let relevant_count = 4000_usize; let total_docs = 1_000_000; for i in tqdm::new( @@ -126,7 +126,7 @@ mod tests { let id = v6_uuid(); let doc_lower = doc.to_lowercase(); - let _ = bm25.insert_doc(&mut wtxn, id, &doc_lower).unwrap(); + bm25.insert_doc(&mut wtxn, id, &doc_lower).unwrap(); for term in &query_terms { if doc_lower.contains(term) { @@ -139,7 +139,7 @@ mod tests { for query_term in query_terms { let rtxn = bm25.graph_env.read_txn().unwrap(); - let term_count = query_term_counts.get(query_term).unwrap().clone(); + let term_count = *query_term_counts.get(query_term).unwrap(); let results = bm25.search(&rtxn, query_term, limit).unwrap(); @@ -148,7 +148,7 @@ mod tests { debug_println!("term count: {}, results len: {}", term_count, results.len()); assert!( - precision >= 0.9 && precision <= 1.0, + (0.9..=1.0).contains(&precision), "precision {} below 0.9 or above 1.0", precision ); diff --git a/helix-db/benches/capacity_optimization_benches.rs b/helix-db/benches/capacity_optimization_benches.rs index a75fa22a..00799537 100644 --- a/helix-db/benches/capacity_optimization_benches.rs +++ b/helix-db/benches/capacity_optimization_benches.rs @@ -9,8 +9,8 @@ mod tests { use heed3::RoTxn; use helix_db::{ helix_engine::{ - bm25::bm25::{HBM25Config, BM25}, - storage_core::{storage_methods::StorageMethods, HelixGraphStorage}, + bm25::bm25::{BM25, HBM25Config}, + storage_core::{HelixGraphStorage, storage_methods::StorageMethods}, traversal_core::{ config::Config, ops::{ @@ -138,10 +138,10 @@ mod tests { .filter_map(|result| match result { Ok((_, value)) => match HelixGraphStorage::unpack_adj_edge_data(value) { Ok((edge_id, to_node)) => { - if connected_node_ids.insert(to_node) { - if let Ok(node) = storage.get_node(&rtxn, &to_node, &arena) { - connected_nodes.push(TraversalValue::Node(node)); - } + if connected_node_ids.insert(to_node) + && let Ok(node) = storage.get_node(&rtxn, &to_node, &arena) + { + connected_nodes.push(TraversalValue::Node(node)); } match storage.get_edge(&rtxn, &edge_id, &arena) { Ok(edge) => Some(TraversalValue::Edge(edge)), @@ -173,10 +173,10 @@ mod tests { .filter_map(|result| match result { Ok((_, value)) => match HelixGraphStorage::unpack_adj_edge_data(value) { Ok((edge_id, to_node)) => { - if connected_node_ids.insert(to_node) { - if let Ok(node) = storage.get_node(&rtxn, &to_node, &arena) { - connected_nodes.push(TraversalValue::Node(node)); - } + if connected_node_ids.insert(to_node) + && let Ok(node) = storage.get_node(&rtxn, &to_node, &arena) + { + connected_nodes.push(TraversalValue::Node(node)); } match storage.get_edge(&rtxn, &edge_id, &arena) { Ok(edge) => Some(TraversalValue::Edge(edge)), @@ -206,10 +206,10 @@ mod tests { .filter_map(|result| match result { Ok((_, value)) => match HelixGraphStorage::unpack_adj_edge_data(value) { Ok((edge_id, to_node)) => { - if connected_node_ids.insert(to_node) { - if let Ok(node) = storage.get_node(&rtxn, &to_node, &arena) { - connected_nodes.push(TraversalValue::Node(node)); - } + if connected_node_ids.insert(to_node) + && let Ok(node) = storage.get_node(&rtxn, &to_node, &arena) + { + connected_nodes.push(TraversalValue::Node(node)); } match storage.get_edge(&rtxn, &edge_id, &arena) { Ok(edge) => Some(TraversalValue::Edge(edge)), @@ -241,10 +241,10 @@ mod tests { .filter_map(|result| match result { Ok((_, value)) => match HelixGraphStorage::unpack_adj_edge_data(value) { Ok((edge_id, to_node)) => { - if connected_node_ids.insert(to_node) { - if let Ok(node) = storage.get_node(&rtxn, &to_node, &arena) { - connected_nodes.push(TraversalValue::Node(node)); - } + if connected_node_ids.insert(to_node) + && let Ok(node) = storage.get_node(&rtxn, &to_node, &arena) + { + connected_nodes.push(TraversalValue::Node(node)); } match storage.get_edge(&rtxn, &edge_id, &arena) { Ok(edge) => Some(TraversalValue::Edge(edge)), @@ -322,10 +322,10 @@ mod tests { let mut nodes = Vec::new(); for result in storage.nodes_db.iter(&rtxn).unwrap() { let (id, node_data) = result.unwrap(); - if let Ok(node) = Node::from_bincode_bytes(id, node_data, &arena) { - if node.label == "person" { - nodes.push(node); - } + if let Ok(node) = Node::from_bincode_bytes(id, node_data, &arena) + && node.label == "person" + { + nodes.push(node); } } times_no_capacity.push(start.elapsed().as_micros()); @@ -348,10 +348,10 @@ mod tests { let mut nodes = Vec::with_capacity(initial_capacity); for result in storage.nodes_db.iter(&rtxn).unwrap() { let (id, node_data) = result.unwrap(); - if let Ok(node) = Node::from_bincode_bytes(id, node_data, &arena) { - if node.label == "person" { - nodes.push(node); - } + if let Ok(node) = Node::from_bincode_bytes(id, node_data, &arena) + && node.label == "person" + { + nodes.push(node); } } times_with_capacity.push(start.elapsed().as_micros()); @@ -515,7 +515,7 @@ mod tests { let variance = times .iter() .map(|&t| { - let diff = if t > mean { t - mean } else { mean - t }; + let diff = t.abs_diff(mean); diff * diff }) .sum::() diff --git a/helix-db/benches/hnsw_benches.rs b/helix-db/benches/hnsw_benches.rs index 7d8f30d3..c274ffb8 100644 --- a/helix-db/benches/hnsw_benches.rs +++ b/helix-db/benches/hnsw_benches.rs @@ -3,20 +3,13 @@ mod tests { use heed3::{Env, EnvOpenOptions, RoTxn}; use helix_db::{ - helix_engine::vector_core::{ - hnsw::HNSW, - vector::HVector, - vector_core::{HNSWConfig, VectorCore}, - }, + helix_engine::vector_core::{HNSWConfig, HVector, VectorCore}, utils::tqdm::tqdm, }; use polars::prelude::*; - use rand::{ - prelude::SliceRandom, - Rng, - }; + use rand::{Rng, prelude::SliceRandom}; use std::{ - collections::{HashSet, HashMap}, + collections::{HashMap, HashSet}, fs::{self, File}, sync::{Arc, Mutex}, thread, @@ -60,12 +53,14 @@ mod tests { /// Returns query ids and their associated closest k vectors (by vec id) fn calc_ground_truths( base_vectors: Vec, - query_vectors: &Vec<(usize, Vec)>, + query_vectors: &Vec<(usize, Vec)>, k: usize, ) -> HashMap> { let base_vectors = Arc::new(base_vectors); let results = Arc::new(Mutex::new(HashMap::new())); let chunk_size = (query_vectors.len() + num_cpus::get() - 1) / num_cpus::get(); + let arena = bumpalo::Bump::new(); + let label = arena.alloc_str("test"); let handles: Vec<_> = query_vectors .chunks(chunk_size) @@ -78,36 +73,35 @@ mod tests { let local_results: HashMap> = chunk .into_iter() .map(|(query_id, query_vec)| { - let query_hvector = HVector::from_slice(0, query_vec); + let mut vecs = bumpalo::collections::Vec::new_in(&arena); + vecs.extend_from_slice(query_vec.as_slice()); + let query_hvector = HVector::from_vec(&label, vecs); let mut distances: Vec<(u128, f64)> = base_vectors .iter() .filter_map(|base_vec| { query_hvector .distance_to(base_vec) - .map(|dist| (base_vec.id.clone(), dist)) + .map(|dist| (base_vec.id, dist)) .ok() }) - .collect(); + .collect(); distances.sort_by(|a, b| { a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal) }); - let top_k_ids: Vec = distances - .into_iter() - .take(k) - .map(|(id, _)| id) - .collect(); + let top_k_ids: Vec = + distances.into_iter().take(k).map(|(id, _)| id).collect(); (query_id, top_k_ids) }) - .collect(); + .collect(); results.lock().unwrap().extend(local_results); }) }) - .collect(); + .collect(); for handle in handles { handle.join().unwrap(); @@ -185,109 +179,6 @@ mod tests { vectors } - /* - #[test] - fn bench_hnsw_search_short() { - //fetch_parquet_vectors().unwrap(); - let n_base = 4_000; - let dims = 950; - let vectors = gen_sim_vecs(n_base, dims, 0.8); - - let n_query = 400; - let mut rng = rand::rng(); - let mut shuffled_vectors = vectors.clone(); - shuffled_vectors.shuffle(&mut rng); - let base_vectors = &shuffled_vectors[..n_base - n_query]; - let query_vectors = &shuffled_vectors[n_base - n_query..]; - - println!("num of base vecs: {}", base_vectors.len()); - println!("num of query vecs: {}", query_vectors.len()); - - let k = 10; - - let env = setup_temp_env(); - let mut txn = env.write_txn().unwrap(); - - let mut total_insertion_time = std::time::Duration::from_secs(0); - let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None)).unwrap(); - - let mut all_vectors: Vec = Vec::new(); - let over_all_time = Instant::now(); - for (i, data) in vectors.iter().enumerate() { - let start_time = Instant::now(); - let vec = index.insert::(&mut txn, &data, None).unwrap(); - let time = start_time.elapsed(); - all_vectors.push(vec); - if i % 1000 == 0 { - println!("{} => inserting in {} ms", i, time.as_millis()); - println!("time taken so far: {:?}", over_all_time.elapsed()); - } - total_insertion_time += time; - } - txn.commit().unwrap(); - - let txn = env.read_txn().unwrap(); - println!("{:?}", index.config); - - println!( - "total insertion time: {:.2?} seconds", - total_insertion_time.as_secs_f64() - ); - println!( - "average insertion time per vec: {:.2?} milliseconds", - total_insertion_time.as_millis() as f64 / n_base as f64 - ); - - println!("calculating ground truths"); - let ground_truths = calc_ground_truths(all_vectors, query_vectors.to_vec(), k); - - println!("searching and comparing..."); - let test_id = format!("k = {} with {} queries", k, n_query); - - let mut total_recall = 0.0; - let mut total_precision = 0.0; - let mut total_search_time = std::time::Duration::from_secs(0); - for ((_, query), gt) in query_vectors.iter().zip(ground_truths.iter()) { - let start_time = Instant::now(); - let results = index.search::(&txn, query, k, None, false).unwrap(); - let search_duration = start_time.elapsed(); - total_search_time += search_duration; - - let result_indices: HashSet = results - .into_iter() - .map(|hvector| hvector.get_id().to_string()) - .collect(); - - let gt_indices: HashSet = gt.iter().cloned().collect(); - //println!("gt: {:?}\nresults: {:?}\n", gt_indices, result_indices); - let true_positives = result_indices.intersection(>_indices).count(); - - let recall: f64 = true_positives as f64 / gt_indices.len() as f64; - let precision: f64 = true_positives as f64 / result_indices.len() as f64; - - total_recall += recall; - total_precision += precision; - } - - println!( - "total search time: {:.2?} seconds", - total_search_time.as_secs_f64() - ); - println!( - "average search time per query: {:.2?} milliseconds", - total_search_time.as_millis() as f64 / n_query as f64 - ); - - total_recall = total_recall / n_query as f64; - total_precision = total_precision / n_query as f64; - println!( - "{}: avg. recall: {:.4?}, avg. precision: {:.4?}", - test_id, total_recall, total_precision - ); - assert!(total_recall >= 0.8, "recall not high enough!"); - } - */ - /// Test the precision of the HNSW search algorithm #[test] fn bench_hnsw_search_long() { @@ -295,6 +186,8 @@ mod tests { let n_query = 1000; // 10-20% let k = 10; let mut vectors = load_dbpedia_vectors(n_base).unwrap(); + let arena = bumpalo::Bump::new(); + let label = arena.alloc_str("test"); let mut rng = rand::rng(); vectors.shuffle(&mut rng); @@ -305,7 +198,7 @@ mod tests { .iter() .enumerate() .map(|(i, x)| (i + 1, x.clone())) - .collect::)>>(); + .collect::)>>(); println!("num of base vecs: {}", base_vectors.len()); println!("num of query vecs: {}", query_vectors.len()); @@ -319,7 +212,7 @@ mod tests { let over_all_time = Instant::now(); for (i, data) in base_vectors.iter().enumerate() { let start_time = Instant::now(); - let vec = index.insert::(&mut txn, &data, None).unwrap(); + let vec = index.insert(&mut txn, label, &data, None, &arena).unwrap(); let time = start_time.elapsed(); base_all_vectors.push(vec); //println!("{} => inserting in {} ms", i, time.as_millis()); @@ -354,7 +247,9 @@ mod tests { let mut total_search_time = std::time::Duration::from_secs(0); for (qid, query) in query_vectors.iter() { let start_time = Instant::now(); - let results = index.search::(&txn, query, k, "vector", None, false).unwrap(); + let results = index + .search(&txn, query, k, "vector", false, &arena) + .unwrap(); let search_duration = start_time.elapsed(); total_search_time += search_duration; @@ -400,4 +295,3 @@ mod tests { } // TODO: memory benchmark (only the hnsw index ofc) - diff --git a/helix-db/proptest-regressions/protocol/custom_serde/property_based_tests.txt b/helix-db/proptest-regressions/protocol/custom_serde/property_based_tests.txt new file mode 100644 index 00000000..671e9f48 --- /dev/null +++ b/helix-db/proptest-regressions/protocol/custom_serde/property_based_tests.txt @@ -0,0 +1,8 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 77500f2f8b3836370e7b9cac8ca607972d1c177022f9d547f49358b339b2f680 # shrinks to id = 0, label = "A" +cc 6dd62308707837f8c6c3890ad62075b4cb7db3bac79adfc33c941c0f10501116 # shrinks to id = 7, label = "_" diff --git a/helix-db/src/helix_engine/bm25/bm25.rs b/helix-db/src/helix_engine/bm25/bm25.rs index c8dee717..bea70972 100644 --- a/helix-db/src/helix_engine/bm25/bm25.rs +++ b/helix-db/src/helix_engine/bm25/bm25.rs @@ -1,10 +1,6 @@ use crate::{ debug_println, - helix_engine::{ - storage_core::HelixGraphStorage, - types::GraphError, - vector_core::{hnsw::HNSW, vector::HVector}, - }, + helix_engine::{storage_core::HelixGraphStorage, types::GraphError}, utils::properties::ImmutablePropertiesMap, }; @@ -403,7 +399,7 @@ pub trait HybridSearch { fn hybrid_search( self, query: &str, - query_vector: &[f64], + query_vector: &[f32], alpha: f32, limit: usize, ) -> impl std::future::Future, GraphError>> + Send; @@ -413,12 +409,11 @@ impl HybridSearch for HelixGraphStorage { async fn hybrid_search( self, query: &str, - query_vector: &[f64], + query_vector: &[f32], alpha: f32, limit: usize, ) -> Result, GraphError> { let query_owned = query.to_string(); - let query_vector_owned = query_vector.to_vec(); let graph_env_bm25 = self.graph_env.clone(); let graph_env_vector = self.graph_env.clone(); @@ -432,24 +427,15 @@ impl HybridSearch for HelixGraphStorage { } }); + let query_vector_owned = query_vector.to_vec(); let vector_handle = - task::spawn_blocking(move || -> Result>, GraphError> { + task::spawn_blocking(move || -> Result>, GraphError> { let txn = graph_env_vector.read_txn()?; let arena = Bump::new(); // MOVE - let query_slice = arena.alloc_slice_copy(query_vector_owned.as_slice()); - let results = self.vectors.search:: bool>( - &txn, - query_slice, - limit * 2, - "vector", - None, - false, - &arena, - )?; - let scores = results - .into_iter() - .map(|vec| (vec.id, vec.distance.unwrap_or(0.0))) - .collect::>(); + let results = + self.vectors + .search(&txn, query_vector_owned, limit * 2, "vector", &arena)?; + let scores = self.vectors.into_global_id(&txn, &results)?; Ok(Some(scores)) }); @@ -467,7 +453,7 @@ impl HybridSearch for HelixGraphStorage { // correct_score = alpha * bm25_score + (1.0 - alpha) * vector_score if let Some(vector_results) = vector_results? { for (doc_id, score) in vector_results { - let similarity = (1.0 / (1.0 + score)) as f32; + let similarity = 1.0 / (1.0 + score); combined_scores .entry(doc_id) .and_modify(|existing_score| *existing_score += (1.0 - alpha) * similarity) diff --git a/helix-db/src/helix_engine/bm25/bm25_tests.rs b/helix-db/src/helix_engine/bm25/bm25_tests.rs index 4a1543d9..3362c305 100644 --- a/helix-db/src/helix_engine/bm25/bm25_tests.rs +++ b/helix-db/src/helix_engine/bm25/bm25_tests.rs @@ -7,14 +7,13 @@ mod tests { }, storage_core::{HelixGraphStorage, version_info::VersionInfo}, traversal_core::config::Config, - vector_core::{hnsw::HNSW, vector::HVector}, }, protocol::value::Value, utils::properties::ImmutablePropertiesMap, }; use bumpalo::Bump; - use heed3::{Env, EnvOpenOptions, RoTxn}; + use heed3::{Env, EnvOpenOptions}; use rand::Rng; use std::collections::HashMap; use tempfile::tempdir; @@ -50,14 +49,14 @@ mod tests { (storage, temp_dir) } - fn generate_random_vectors(n: usize, d: usize) -> Vec> { + fn generate_random_vectors(n: usize, d: usize) -> Vec> { let mut rng = rand::rng(); let mut vectors = Vec::with_capacity(n); for _ in 0..n { let mut vector = Vec::with_capacity(d); for _ in 0..d { - vector.push(rng.random::()); + vector.push(rng.random::()); } vectors.push(vector); } @@ -1458,10 +1457,9 @@ mod tests { let vectors = generate_random_vectors(800, 650); let mut arena = Bump::new(); for vec in &vectors { - let slice = arena.alloc_slice_copy(vec.as_slice()); let _ = storage .vectors - .insert:: bool>(&mut wtxn, "vector", slice, None, &arena); + .insert(&mut wtxn, "vector", vec.as_slice(), None, &arena); arena.reset(); } wtxn.commit().unwrap(); @@ -1506,7 +1504,7 @@ mod tests { let slice = arena.alloc_slice_copy(vec.as_slice()); let _ = storage .vectors - .insert:: bool>(&mut wtxn, "vector", slice, None, &arena); + .insert(&mut wtxn, "vector", slice, None, &arena); arena.reset(); } wtxn.commit().unwrap(); @@ -1552,7 +1550,7 @@ mod tests { let slice = arena.alloc_slice_copy(vec.as_slice()); let _ = storage .vectors - .insert:: bool>(&mut wtxn, "vector", slice, None, &arena); + .insert(&mut wtxn, "vector", slice, None, &arena); arena.reset(); } wtxn.commit().unwrap(); diff --git a/helix-db/src/helix_engine/bm25/mod.rs b/helix-db/src/helix_engine/bm25/mod.rs index 74a06cb0..b033aefc 100644 --- a/helix-db/src/helix_engine/bm25/mod.rs +++ b/helix-db/src/helix_engine/bm25/mod.rs @@ -1,4 +1,4 @@ pub mod bm25; #[cfg(test)] -pub mod bm25_tests; \ No newline at end of file +pub mod bm25_tests; diff --git a/helix-db/src/helix_engine/mod.rs b/helix-db/src/helix_engine/mod.rs index 516c631c..621590ef 100644 --- a/helix-db/src/helix_engine/mod.rs +++ b/helix-db/src/helix_engine/mod.rs @@ -1,8 +1,8 @@ pub mod bm25; -pub mod traversal_core; pub mod macros; pub mod reranker; pub mod storage_core; +pub mod traversal_core; pub mod types; pub mod vector_core; diff --git a/helix-db/src/helix_engine/reranker/adapters/mod.rs b/helix-db/src/helix_engine/reranker/adapters/mod.rs index 49cf5abb..fada0b5d 100644 --- a/helix-db/src/helix_engine/reranker/adapters/mod.rs +++ b/helix-db/src/helix_engine/reranker/adapters/mod.rs @@ -12,7 +12,6 @@ //! .collect_to::>() //! ``` - use crate::helix_engine::{ reranker::reranker::Reranker, traversal_core::{traversal_iter::RoTraversalIterator, traversal_value::TraversalValue}, @@ -25,7 +24,9 @@ pub struct RerankIterator<'arena, I: Iterator, GraphError>>> Iterator for RerankIterator<'arena, I> { +impl<'arena, I: Iterator, GraphError>>> Iterator + for RerankIterator<'arena, I> +{ type Item = Result, GraphError>; fn next(&mut self) -> Option { @@ -34,7 +35,8 @@ impl<'arena, I: Iterator, GraphError>>> Ite } /// Trait that adds reranking capability to traversal iterators. -pub trait RerankAdapter<'arena, 'db, 'txn>: Iterator, GraphError>> +pub trait RerankAdapter<'arena, 'db, 'txn>: + Iterator, GraphError>> where 'db: 'arena, 'arena: 'txn, @@ -61,7 +63,12 @@ where self, reranker: R, query: Option<&str>, - ) -> RoTraversalIterator<'db, 'arena, 'txn, impl Iterator, GraphError>>>; + ) -> RoTraversalIterator< + 'db, + 'arena, + 'txn, + impl Iterator, GraphError>>, + >; } impl<'db, 'arena, 'txn, I> RerankAdapter<'arena, 'db, 'txn> @@ -75,7 +82,12 @@ where self, reranker: R, query: Option<&str>, - ) -> RoTraversalIterator<'db, 'arena, 'txn, impl Iterator, GraphError>>> { + ) -> RoTraversalIterator< + 'db, + 'arena, + 'txn, + impl Iterator, GraphError>>, + > { // Collect all items from the iterator let items = self.inner.filter_map(|item| item.ok()); @@ -106,7 +118,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::helix_engine::{reranker::fusion::RRFReranker, vector_core::vector::HVector}; + use crate::helix_engine::{reranker::fusion::RRFReranker, vector_core::HVector}; #[test] fn test_rerank_adapter_trait() { @@ -119,11 +131,11 @@ mod tests { #[test] fn test_rerank_iterator() { let arena = bumpalo::Bump::new(); - let data1 = arena.alloc_slice_copy(&[1.0]); - let data2 = arena.alloc_slice_copy(&[2.0]); + let data1 = bumpalo::vec![in &arena; 1.0]; + let data2 = bumpalo::vec![in &arena; 2.0]; let items = vec![ - Ok(TraversalValue::Vector(HVector::from_slice("test", 0, data1))), - Ok(TraversalValue::Vector(HVector::from_slice("test", 0, data2))), + Ok(TraversalValue::Vector(HVector::from_vec("test", data1))), + Ok(TraversalValue::Vector(HVector::from_vec("test", data2))), ]; let mut iter = RerankIterator { diff --git a/helix-db/src/helix_engine/reranker/fusion/mmr.rs b/helix-db/src/helix_engine/reranker/fusion/mmr.rs index 85623443..f96e8098 100644 --- a/helix-db/src/helix_engine/reranker/fusion/mmr.rs +++ b/helix-db/src/helix_engine/reranker/fusion/mmr.rs @@ -10,14 +10,12 @@ //! - Sim2: similarity to already selected documents (diversity) //! - Ξ»: trade-off parameter (typically 0.5-0.8) -use crate::{ - helix_engine::{ - reranker::{ - errors::{RerankerError, RerankerResult}, - reranker::{extract_score, update_score, Reranker}, - }, - traversal_core::traversal_value::TraversalValue, +use crate::helix_engine::{ + reranker::{ + errors::{RerankerError, RerankerResult}, + reranker::{Reranker, extract_score, update_score}, }, + traversal_core::traversal_value::TraversalValue, }; use std::collections::HashMap; @@ -37,18 +35,18 @@ pub struct MMRReranker { /// Lambda parameter: controls relevance vs diversity trade-off /// Higher values (closer to 1.0) favor relevance /// Lower values (closer to 0.0) favor diversity - lambda: f64, + lambda: f32, /// Distance metric for similarity calculation distance_method: DistanceMethod, /// Optional query vector for relevance calculation - query_vector: Option>, + query_vector: Option>, } impl MMRReranker { /// Create a new MMR reranker with default lambda=0.7 (favoring relevance). - pub fn new(lambda: f64) -> RerankerResult { + pub fn new(lambda: f32) -> RerankerResult { if !(0.0..=1.0).contains(&lambda) { return Err(RerankerError::InvalidParameter( "lambda must be between 0.0 and 1.0".to_string(), @@ -63,7 +61,7 @@ impl MMRReranker { } /// Create an MMR reranker with a custom distance metric. - pub fn with_distance(lambda: f64, distance_method: DistanceMethod) -> RerankerResult { + pub fn with_distance(lambda: f32, distance_method: DistanceMethod) -> RerankerResult { if !(0.0..=1.0).contains(&lambda) { return Err(RerankerError::InvalidParameter( "lambda must be between 0.0 and 1.0".to_string(), @@ -78,24 +76,32 @@ impl MMRReranker { } /// Set the query vector for relevance calculation. - pub fn with_query_vector(mut self, query: Vec) -> Self { + pub fn with_query_vector(mut self, query: Vec) -> Self { self.query_vector = Some(query); self } /// Extract vector data from a TraversalValue. - /// Note: This requires an arena to convert VectorPrecisionData to f64 slice - fn extract_vector_data<'a>(&self, item: &'a TraversalValue<'a>, _arena: &'a bumpalo::Bump) -> RerankerResult<&'a [f64]> { + fn extract_vector_data<'a>( + &self, + item: &'a TraversalValue<'a>, + arena: &'a bumpalo::Bump, + ) -> RerankerResult> { match item { - TraversalValue::Vector(v) => Ok(v.data), + TraversalValue::Vector(v) => { + let mut bump_vec = bumpalo::collections::Vec::new_in(arena); + bump_vec.extend_from_slice(v.data_borrowed()); + Ok(bump_vec) + } _ => Err(RerankerError::TextExtractionError( - "Cannot extract vector from this item type (only Vector supported for MMR)".to_string(), + "Cannot extract vector from this item type (only Vector supported for MMR)" + .to_string(), )), } } /// Calculate similarity between two items. - fn calculate_similarity(&self, item1: &[f64], item2: &[f64]) -> RerankerResult { + fn calculate_similarity(&self, item1: &[f32], item2: &[f32]) -> RerankerResult { if item1.len() != item2.len() { return Err(RerankerError::InvalidParameter( "Vector dimensions must match".to_string(), @@ -105,9 +111,13 @@ impl MMRReranker { let distance = match self.distance_method { DistanceMethod::Cosine => { // Calculate cosine similarity (1 - cosine distance) - let dot_product: f64 = item1.iter().zip(item2.iter()).map(|(a, b)| a * b).sum(); - let norm1: f64 = item1.iter().map(|x| x * x).sum::().sqrt(); - let norm2: f64 = item2.iter().map(|x| x * x).sum::().sqrt(); + let dot_product = item1 + .iter() + .zip(item2.iter()) + .map(|(a, b)| a * b) + .sum::(); + let norm1 = item1.iter().map(|x| x * x).sum::().sqrt(); + let norm2 = item2.iter().map(|x| x * x).sum::().sqrt(); if norm1 == 0.0 || norm2 == 0.0 { 0.0 @@ -117,11 +127,11 @@ impl MMRReranker { } DistanceMethod::Euclidean => { // Convert Euclidean distance to similarity (using negative exponential) - let dist_sq: f64 = item1 + let dist_sq = item1 .iter() .zip(item2.iter()) .map(|(a, b)| (a - b).powi(2)) - .sum(); + .sum::(); (-dist_sq.sqrt()).exp() } DistanceMethod::DotProduct => { @@ -134,7 +144,10 @@ impl MMRReranker { } /// Perform MMR selection on the given items. - fn mmr_select<'arena>(&self, items: Vec>) -> RerankerResult>> { + fn mmr_select<'arena>( + &self, + items: Vec>, + ) -> RerankerResult>> { // Create a temporary arena for vector conversions let arena = bumpalo::Bump::new(); if items.is_empty() { @@ -143,7 +156,7 @@ impl MMRReranker { let n = items.len(); let mut selected: Vec> = Vec::with_capacity(n); - let mut remaining: Vec<(TraversalValue<'arena>, f64)> = Vec::with_capacity(n); + let mut remaining: Vec<(TraversalValue<'arena>, f32)> = Vec::with_capacity(n); // Extract original scores and prepare remaining items for item in items { @@ -152,7 +165,7 @@ impl MMRReranker { } // Cache for similarity calculations - let mut similarity_cache: HashMap<(usize, usize), f64> = HashMap::new(); + let mut similarity_cache: HashMap<(usize, usize), f32> = HashMap::new(); // Select first item (highest original score) remaining.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); @@ -162,20 +175,20 @@ impl MMRReranker { // Iteratively select remaining items while !remaining.is_empty() { let mut best_idx = 0; - let mut best_mmr_score = f64::NEG_INFINITY; + let mut best_mmr_score = f32::NEG_INFINITY; for (idx, (item, relevance_score)) in remaining.iter().enumerate() { let item_vec = self.extract_vector_data(item, &arena)?; // Calculate relevance term let relevance = if let Some(query) = &self.query_vector { - self.calculate_similarity(item_vec, query)? + self.calculate_similarity(&item_vec, query)? } else { *relevance_score // Use original score as relevance }; // Calculate diversity term (max similarity to selected items) - let mut max_similarity: f64 = 0.0; + let mut max_similarity: f32 = 0.0; for (sel_idx, selected_item) in selected.iter().enumerate() { // Check cache first let cache_key = (idx, sel_idx); @@ -183,7 +196,7 @@ impl MMRReranker { cached } else { let sel_vec = self.extract_vector_data(selected_item, &arena)?; - let sim = self.calculate_similarity(item_vec, sel_vec)?; + let sim = self.calculate_similarity(&item_vec, &sel_vec)?; similarity_cache.insert(cache_key, sim); sim }; @@ -211,7 +224,11 @@ impl MMRReranker { } impl Reranker for MMRReranker { - fn rerank<'arena, I>(&self, items: I, _query: Option<&str>) -> RerankerResult>> + fn rerank<'arena, I>( + &self, + items: I, + _query: Option<&str>, + ) -> RerankerResult>> where I: Iterator>, { @@ -227,12 +244,13 @@ impl Reranker for MMRReranker { #[cfg(test)] mod tests { use super::*; - use crate::helix_engine::vector_core::vector::HVector; + use crate::helix_engine::vector_core::HVector; use bumpalo::Bump; - fn alloc_vector<'a>(arena: &'a Bump, data: &[f64]) -> HVector<'a> { - let slice = arena.alloc_slice_copy(data); - HVector::from_slice("test_vector", 0, slice) + fn alloc_vector<'a>(arena: &'a Bump, data: &[f32]) -> HVector<'a> { + let mut bump_vec = bumpalo::collections::Vec::new_in(arena); + bump_vec.extend_from_slice(data); + HVector::from_vec("test_vector", bump_vec) } #[test] @@ -339,9 +357,7 @@ mod tests { fn test_mmr_with_query_vector() { let arena = Bump::new(); let query = vec![1.0, 0.0, 0.0]; - let mmr = MMRReranker::new(0.7) - .unwrap() - .with_query_vector(query); + let mmr = MMRReranker::new(0.7).unwrap().with_query_vector(query); let vectors: Vec = vec![ { @@ -640,9 +656,9 @@ mod tests { // Create 100 vectors let vectors: Vec = (0..100) .map(|i| { - let angle = (i as f64) * 0.1; + let angle = (i as f32) * 0.1; let mut v = alloc_vector(&arena, &[angle.cos(), angle.sin()]); - v.distance = Some(1.0 - i as f64 / 100.0); + v.distance = Some(1.0 - i as f32 / 100.0); v.id = i as u128; TraversalValue::Vector(v) }) @@ -683,10 +699,10 @@ mod tests { // Verify vector data is preserved if let TraversalValue::Vector(v) = &results[0] { - assert_eq!(v.data, &[1.5, 2.5, 3.5]); + assert_eq!(v.data_borrowed(), &[1.5, 2.5, 3.5]); } if let TraversalValue::Vector(v) = &results[1] { - assert_eq!(v.data, &[4.5, 5.5, 6.5]); + assert_eq!(v.data_borrowed(), &[4.5, 5.5, 6.5]); } } @@ -697,8 +713,8 @@ mod tests { let vectors: Vec = (0..3) .map(|i| { - let mut v = alloc_vector(&arena, &[1.0 * i as f64, 0.0]); - v.distance = Some(1.0 - i as f64 * 0.1); + let mut v = alloc_vector(&arena, &[1.0 * i as f32, 0.0]); + v.distance = Some(1.0 - i as f32 * 0.1); v.id = i as u128; TraversalValue::Vector(v) }) @@ -719,9 +735,7 @@ mod tests { fn test_mmr_with_query_vector_relevance() { let arena = Bump::new(); let query = vec![1.0, 0.0]; - let mmr = MMRReranker::new(0.9) - .unwrap() - .with_query_vector(query); + let mmr = MMRReranker::new(0.9).unwrap().with_query_vector(query); let vectors: Vec = vec![ { @@ -762,9 +776,9 @@ mod tests { let vectors: Vec = (0..5) .map(|i| { - let data: Vec = (0..100).map(|j| if j == i { 1.0 } else { 0.0 }).collect(); + let data: Vec = (0..100).map(|j| if j == i { 1.0 } else { 0.0 }).collect(); let mut v = alloc_vector(&arena, &data); - v.distance = Some(1.0 - i as f64 * 0.1); + v.distance = Some(1.0 - i as f32 * 0.1); v.id = i as u128; TraversalValue::Vector(v) }) diff --git a/helix-db/src/helix_engine/reranker/fusion/mod.rs b/helix-db/src/helix_engine/reranker/fusion/mod.rs index 7e291f28..2d379ae4 100644 --- a/helix-db/src/helix_engine/reranker/fusion/mod.rs +++ b/helix-db/src/helix_engine/reranker/fusion/mod.rs @@ -9,4 +9,4 @@ pub mod score_normalizer; pub use mmr::{DistanceMethod, MMRReranker}; pub use rrf::RRFReranker; -pub use score_normalizer::{normalize_scores, NormalizationMethod}; +pub use score_normalizer::{NormalizationMethod, normalize_scores}; diff --git a/helix-db/src/helix_engine/reranker/fusion/rrf.rs b/helix-db/src/helix_engine/reranker/fusion/rrf.rs index 41932958..750d59c5 100644 --- a/helix-db/src/helix_engine/reranker/fusion/rrf.rs +++ b/helix-db/src/helix_engine/reranker/fusion/rrf.rs @@ -7,14 +7,12 @@ //! Formula: RRF_score(d) = Ξ£ 1/(k + rank_i(d)) //! where k is typically 60 (default). -use crate::{ - helix_engine::{ - reranker::{ - errors::{RerankerError, RerankerResult}, - reranker::{update_score, Reranker}, - }, - traversal_core::traversal_value::TraversalValue, +use crate::helix_engine::{ + reranker::{ + errors::{RerankerError, RerankerResult}, + reranker::{Reranker, update_score}, }, + traversal_core::traversal_value::TraversalValue, }; use std::collections::HashMap; @@ -25,7 +23,7 @@ use std::collections::HashMap; #[derive(Debug, Clone)] pub struct RRFReranker { /// The k parameter in the RRF formula (default: 60) - k: f64, + k: f32, } impl RRFReranker { @@ -38,7 +36,7 @@ impl RRFReranker { /// /// # Arguments /// * `k` - The k parameter in the RRF formula. Higher values give less weight to ranking position. - pub fn with_k(k: f64) -> RerankerResult { + pub fn with_k(k: f32) -> RerankerResult { if k <= 0.0 { return Err(RerankerError::InvalidParameter( "k must be positive".to_string(), @@ -55,7 +53,10 @@ impl RRFReranker { /// /// # Returns /// A vector of items reranked by RRF scores - pub fn fuse_lists<'arena, I>(lists: Vec, k: f64) -> RerankerResult>> + pub fn fuse_lists<'arena, I>( + lists: Vec, + k: f32, + ) -> RerankerResult>> where I: Iterator>, { @@ -63,7 +64,7 @@ impl RRFReranker { return Err(RerankerError::EmptyInput); } - let mut rrf_scores: HashMap = HashMap::new(); + let mut rrf_scores: HashMap = HashMap::new(); let mut items_map: HashMap> = HashMap::new(); // Process each ranked list @@ -78,7 +79,7 @@ impl RRFReranker { // Calculate reciprocal rank: 1 / (k + rank) // rank starts at 0, so actual rank is rank + 1 - let rr_score = 1.0 / (k + (rank as f64) + 1.0); + let rr_score = 1.0 / (k + (rank as f32) + 1.0); // Sum reciprocal ranks across all lists *rrf_scores.entry(id).or_insert(0.0) += rr_score; @@ -89,7 +90,7 @@ impl RRFReranker { } // Convert to scored items and sort by RRF score (descending) - let mut scored_items: Vec<(u128, f64)> = rrf_scores.into_iter().collect(); + let mut scored_items: Vec<(u128, f32)> = rrf_scores.into_iter().collect(); scored_items.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); // Update scores and collect results @@ -112,7 +113,11 @@ impl Default for RRFReranker { } impl Reranker for RRFReranker { - fn rerank<'arena, I>(&self, items: I, _query: Option<&str>) -> RerankerResult>> + fn rerank<'arena, I>( + &self, + items: I, + _query: Option<&str>, + ) -> RerankerResult>> where I: Iterator>, { @@ -127,7 +132,7 @@ impl Reranker for RRFReranker { for (rank, mut item) in items_vec.into_iter().enumerate() { // Calculate RRF score for this item based on its rank - let rrf_score = 1.0 / (self.k + (rank as f64) + 1.0); + let rrf_score = 1.0 / (self.k + (rank as f32) + 1.0); update_score(&mut item, rrf_score)?; results.push(item); } @@ -143,15 +148,13 @@ impl Reranker for RRFReranker { #[cfg(test)] mod tests { use super::*; - use crate::{ - helix_engine::vector_core::vector::HVector, - utils::items::Node, - }; + use crate::{helix_engine::vector_core::HVector, utils::items::Node}; use bumpalo::Bump; - fn alloc_vector<'a>(arena: &'a Bump, data: &[f64]) -> HVector<'a> { - let slice = arena.alloc_slice_copy(data); - HVector::from_slice("test_vector", 0, slice) + fn alloc_vector<'a>(arena: &'a Bump, data: &[f32]) -> HVector<'a> { + let mut bump_vec = bumpalo::collections::Vec::new_in(arena); + bump_vec.extend_from_slice(data); + HVector::from_vec("test_vector", bump_vec) } #[test] @@ -162,7 +165,7 @@ mod tests { let vectors: Vec = (0..5) .map(|i| { let mut v = alloc_vector(&arena, &[1.0, 2.0, 3.0]); - v.distance = Some((i + 1) as f64); + v.distance = Some((i + 1) as f32); v.id = i as u128; TraversalValue::Vector(v) }) @@ -175,7 +178,7 @@ mod tests { // Check that RRF scores are calculated correctly for (rank, item) in results.iter().enumerate() { if let TraversalValue::Vector(v) = item { - let expected_score = 1.0 / (60.0 + (rank as f64) + 1.0); + let expected_score = 1.0 / (60.0 + (rank as f32) + 1.0); assert!((v.distance.unwrap() - expected_score).abs() < 1e-10); } } @@ -242,11 +245,8 @@ mod tests { }, ]; - let results = RRFReranker::fuse_lists( - vec![list1.into_iter(), list2.into_iter()], - 60.0, - ) - .unwrap(); + let results = + RRFReranker::fuse_lists(vec![list1.into_iter(), list2.into_iter()], 60.0).unwrap(); // Items 1 and 2 appear in both lists, so should have higher scores assert_eq!(results.len(), 4); @@ -280,7 +280,8 @@ mod tests { #[test] fn test_rrf_fuse_empty_lists() { - let result = RRFReranker::fuse_lists(Vec::>::new(), 60.0); + let result = + RRFReranker::fuse_lists(Vec::>::new(), 60.0); assert!(result.is_err()); } @@ -400,17 +401,15 @@ mod tests { }, ]; - let results = RRFReranker::fuse_lists( - vec![list1.into_iter(), list2.into_iter()], - 60.0, - ) - .unwrap(); + let results = + RRFReranker::fuse_lists(vec![list1.into_iter(), list2.into_iter()], 60.0).unwrap(); // All items should be present with equal RRF scores for same ranks assert_eq!(results.len(), 4); // Items at rank 0 in their respective lists should have same score - if let (TraversalValue::Vector(v1), TraversalValue::Vector(v2)) = (&results[0], &results[1]) { + if let (TraversalValue::Vector(v1), TraversalValue::Vector(v2)) = (&results[0], &results[1]) + { let score1 = v1.distance.unwrap(); let score2 = v2.distance.unwrap(); assert!((score1 - score2).abs() < 1e-10); @@ -542,11 +541,8 @@ mod tests { }) .collect(); - let results = RRFReranker::fuse_lists( - vec![list1.into_iter(), list2.into_iter()], - 60.0, - ) - .unwrap(); + let results = + RRFReranker::fuse_lists(vec![list1.into_iter(), list2.into_iter()], 60.0).unwrap(); // Items 5, 6, 7 appear in both lists, should rank higher assert_eq!(results.len(), 10); @@ -573,7 +569,9 @@ mod tests { // Scores should be monotonically decreasing for i in 0..results.len() - 1 { - if let (TraversalValue::Vector(v1), TraversalValue::Vector(v2)) = (&results[i], &results[i + 1]) { + if let (TraversalValue::Vector(v1), TraversalValue::Vector(v2)) = + (&results[i], &results[i + 1]) + { assert!(v1.distance.unwrap() >= v2.distance.unwrap()); } } @@ -592,7 +590,7 @@ mod tests { let vectors: Vec = (0..3) .map(|i| { - let mut v = alloc_vector(&arena, &[1.0 * i as f64, 2.0 * i as f64]); + let mut v = alloc_vector(&arena, &[1.0 * i as f32, 2.0 * i as f32]); v.id = i as u128; TraversalValue::Vector(v) }) @@ -602,10 +600,10 @@ mod tests { // Verify vector data is preserved if let TraversalValue::Vector(v) = &results[0] { - assert_eq!(v.data, &[0.0, 0.0]); + assert_eq!(v.data_borrowed(), &[0.0, 0.0]); } if let TraversalValue::Vector(v) = &results[1] { - assert_eq!(v.data, &[1.0, 2.0]); + assert_eq!(v.data_borrowed(), &[1.0, 2.0]); } } } diff --git a/helix-db/src/helix_engine/reranker/fusion/score_normalizer.rs b/helix-db/src/helix_engine/reranker/fusion/score_normalizer.rs index 4c3e60cb..6dbae3da 100644 --- a/helix-db/src/helix_engine/reranker/fusion/score_normalizer.rs +++ b/helix-db/src/helix_engine/reranker/fusion/score_normalizer.rs @@ -39,12 +39,8 @@ pub fn normalize_scores(scores: &[f64], method: NormalizationMethod) -> Reranker /// Min-Max normalization: scales scores to [0, 1] range. fn normalize_minmax(scores: &[f64]) -> RerankerResult> { - let min = scores - .iter() - .fold(f64::INFINITY, |a, &b| a.min(b)); - let max = scores - .iter() - .fold(f64::NEG_INFINITY, |a, &b| a.max(b)); + let min = scores.iter().fold(f64::INFINITY, |a, &b| a.min(b)); + let max = scores.iter().fold(f64::NEG_INFINITY, |a, &b| a.max(b)); let range = max - min; @@ -53,10 +49,7 @@ fn normalize_minmax(scores: &[f64]) -> RerankerResult> { return Ok(vec![0.5; scores.len()]); } - Ok(scores - .iter() - .map(|&score| (score - min) / range) - .collect()) + Ok(scores.iter().map(|&score| (score - min) / range).collect()) } /// Z-score normalization: centers scores around mean with unit variance. diff --git a/helix-db/src/helix_engine/reranker/models/cross_encoder.rs b/helix-db/src/helix_engine/reranker/models/cross_encoder.rs index f39698a9..bc78b95f 100644 --- a/helix-db/src/helix_engine/reranker/models/cross_encoder.rs +++ b/helix-db/src/helix_engine/reranker/models/cross_encoder.rs @@ -88,7 +88,6 @@ impl CrossEncoderReranker { TraversalValue::Node(n) => n.properties, TraversalValue::Edge(e) => e.properties, TraversalValue::Vector(v) => v.properties, - TraversalValue::VectorNodeWithoutVectorData(v) => v.properties, TraversalValue::NodeWithScore { node, .. } => node.properties, _ => None, }; @@ -125,13 +124,17 @@ impl CrossEncoderReranker { /// /// This is a placeholder for actual model inference. /// TODO: Implement actual model loading and inference. - fn score_pair(&self, _query: &str, _document: &str) -> RerankerResult { + fn score_pair(&self, _query: &str, _document: &str) -> RerankerResult { todo!(); } } impl Reranker for CrossEncoderReranker { - fn rerank<'arena, I>(&self, items: I, query: Option<&str>) -> RerankerResult>> + fn rerank<'arena, I>( + &self, + items: I, + query: Option<&str>, + ) -> RerankerResult>> where I: Iterator>, { @@ -169,12 +172,13 @@ impl Reranker for CrossEncoderReranker { #[cfg(test)] mod tests { use super::*; - use crate::helix_engine::vector_core::vector::HVector; + use crate::helix_engine::vector_core::HVector; use bumpalo::Bump; - fn alloc_vector<'a>(arena: &'a Bump, data: &[f64]) -> HVector<'a> { - let slice = arena.alloc_slice_copy(data); - HVector::from_slice("test_vector", 0, slice) + fn alloc_vector<'a>(arena: &'a Bump, data: &[f32]) -> HVector<'a> { + let mut bump_vec = bumpalo::collections::Vec::new_in(arena); + bump_vec.extend_from_slice(data); + HVector::from_vec("test_vector", bump_vec) } #[ignore] @@ -213,7 +217,7 @@ mod tests { let result = reranker.extract_text(&item); assert!(result.is_err()); } - + #[ignore] #[test] fn test_rerank_without_query() { @@ -221,7 +225,8 @@ mod tests { let config = CrossEncoderConfig::new("test-model"); let reranker = CrossEncoderReranker::new(config); - let vectors: Vec = vec![TraversalValue::Vector(alloc_vector(&arena, &[1.0]))]; + let vectors: Vec = + vec![TraversalValue::Vector(alloc_vector(&arena, &[1.0]))]; let result = reranker.rerank(vectors.into_iter(), None); assert!(result.is_err()); diff --git a/helix-db/src/helix_engine/reranker/reranker.rs b/helix-db/src/helix_engine/reranker/reranker.rs index 2642c6f6..eb8f6024 100644 --- a/helix-db/src/helix_engine/reranker/reranker.rs +++ b/helix-db/src/helix_engine/reranker/reranker.rs @@ -3,11 +3,9 @@ //! Core Reranker trait and related types. -use crate::{ - helix_engine::{ - reranker::errors::{RerankerError, RerankerResult}, - traversal_core::traversal_value::TraversalValue, - }, +use crate::helix_engine::{ + reranker::errors::{RerankerError, RerankerResult}, + traversal_core::traversal_value::TraversalValue, }; /// Represents a scored item for reranking. @@ -41,7 +39,11 @@ pub trait Reranker: Send + Sync { /// /// # Returns /// A vector of reranked items with updated scores - fn rerank<'arena, I>(&self, items: I, query: Option<&str>) -> RerankerResult>> + fn rerank<'arena, I>( + &self, + items: I, + query: Option<&str>, + ) -> RerankerResult>> where I: Iterator>; @@ -53,7 +55,7 @@ pub trait Reranker: Send + Sync { /// /// This handles the different types (Node, Edge, Vector) and extracts /// their associated score/distance value. -pub fn extract_score(item: &TraversalValue) -> RerankerResult { +pub fn extract_score(item: &TraversalValue) -> RerankerResult { match item { TraversalValue::Vector(v) => Ok(v.score()), TraversalValue::NodeWithScore { score, .. } => Ok(*score), @@ -69,7 +71,7 @@ pub fn extract_score(item: &TraversalValue) -> RerankerResult { /// /// This modifies the distance/score field of the item to reflect /// the new reranked score. -pub fn update_score(item: &mut TraversalValue, new_score: f64) -> RerankerResult<()> { +pub fn update_score(item: &mut TraversalValue, new_score: f32) -> RerankerResult<()> { match item { TraversalValue::Vector(v) => { v.distance = Some(new_score); diff --git a/helix-db/src/helix_engine/storage_core/graph_visualization.rs b/helix-db/src/helix_engine/storage_core/graph_visualization.rs index 510d7f6d..7c9a0924 100644 --- a/helix-db/src/helix_engine/storage_core/graph_visualization.rs +++ b/helix-db/src/helix_engine/storage_core/graph_visualization.rs @@ -3,8 +3,8 @@ use crate::{ helix_engine::{storage_core::HelixGraphStorage, types::GraphError}, utils::items::Node, }; -use heed3::{types::*, RoIter, RoTxn}; -use sonic_rs::{json, JsonValueMutTrait, Value as JsonValue}; +use heed3::{RoIter, RoTxn, types::*}; +use sonic_rs::{JsonValueMutTrait, Value as JsonValue, json}; use std::{ cmp::Ordering, collections::{BinaryHeap, HashMap}, @@ -40,9 +40,7 @@ impl GraphVisualization for HelixGraphStorage { } if self.nodes_db.is_empty(txn)? || self.edges_db.is_empty(txn)? { - return Err(GraphError::New( - "edges or nodes db is empty!".to_string(), - )); + return Err(GraphError::New("edges or nodes db is empty!".to_string())); } let top_nodes = self.get_nodes_by_cardinality(txn, k)?; @@ -55,7 +53,7 @@ impl GraphVisualization for HelixGraphStorage { let result = json!({ "num_nodes": self.nodes_db.len(txn).unwrap_or(0), "num_edges": self.edges_db.len(txn).unwrap_or(0), - "num_vectors": self.vectors.vectors_db.len(txn).unwrap_or(0), + "num_vectors": self.vectors.num_inserted_vectors(), }); debug_println!("db stats json: {:?}", result); @@ -133,11 +131,7 @@ impl HelixGraphStorage { BinaryHeap::with_capacity(node_count as usize); // out edges - iterate through nodes by getting each unique node ID from out_edges_db - let out_node_key_iter = out_db - .out_edges_db - .lazily_decode_data() - .iter(txn) - .unwrap(); + let out_node_key_iter = out_db.out_edges_db.lazily_decode_data().iter(txn).unwrap(); for data in out_node_key_iter { match data { Ok((key, _)) => { @@ -260,18 +254,17 @@ impl HelixGraphStorage { if let Some(node_data) = self.nodes_db.get(txn, id)? { let node = Node::from_bincode_bytes(*id, node_data, &arena)?; if let Some(props) = node.properties - && let Some(prop_value) = props.get(prop) { - json_node - .as_object_mut() - .ok_or_else(|| { - GraphError::New("invalid JSON object".to_string()) - })? - .insert( - "label", - sonic_rs::to_value(&prop_value.inner_stringify()) - .unwrap_or_else(|_| sonic_rs::Value::from("")), - ); - } + && let Some(prop_value) = props.get(prop) + { + json_node + .as_object_mut() + .ok_or_else(|| GraphError::New("invalid JSON object".to_string()))? + .insert( + "label", + sonic_rs::to_value(&prop_value.inner_stringify()) + .unwrap_or_else(|_| sonic_rs::Value::from("")), + ); + } } } diff --git a/helix-db/src/helix_engine/storage_core/mod.rs b/helix-db/src/helix_engine/storage_core/mod.rs index 7666fd40..6096dd00 100644 --- a/helix-db/src/helix_engine/storage_core/mod.rs +++ b/helix-db/src/helix_engine/storage_core/mod.rs @@ -1,11 +1,8 @@ pub mod graph_visualization; pub mod metadata; pub mod storage_methods; -pub mod storage_migration; pub mod version_info; -#[cfg(test)] -mod storage_migration_tests; #[cfg(test)] mod storage_concurrent_tests; @@ -18,10 +15,7 @@ use crate::{ }, traversal_core::config::Config, types::GraphError, - vector_core::{ - hnsw::HNSW, - vector_core::{HNSWConfig, VectorCore}, - }, + vector_core::{HNSWConfig, VectorCore}, }, utils::{ items::{Edge, Node}, @@ -156,7 +150,9 @@ impl HelixGraphStorage { } } let vector_config = config.get_vector_config(); - let vectors = VectorCore::new( + + // Initialize vector core with automatic migration support + let vectors = VectorCore::new_with_migration( &graph_env, &mut wtxn, HNSWConfig::new( @@ -179,7 +175,7 @@ impl HelixGraphStorage { wtxn.commit()?; - let mut storage = Self { + let storage = Self { graph_env, nodes_db, edges_db, @@ -193,8 +189,6 @@ impl HelixGraphStorage { version_info, }; - storage_migration::migrate(&mut storage)?; - Ok(storage) } @@ -470,7 +464,6 @@ impl StorageMethods for HelixGraphStorage { } fn drop_vector(&self, txn: &mut RwTxn, id: &u128) -> Result<(), GraphError> { - let arena = bumpalo::Bump::new(); let mut edges = HashSet::new(); let mut out_edges = HashSet::new(); let mut in_edges = HashSet::new(); @@ -507,9 +500,6 @@ impl StorageMethods for HelixGraphStorage { other_out_edges.push((from_node_id, label, edge_id)); } - // println!("In edges: {}", in_edges.len()); - - // println!("Deleting edges: {}", ); // Delete all related data for edge in edges { self.edges_db.delete(txn, Self::edge_key(&edge))?; @@ -539,8 +529,89 @@ impl StorageMethods for HelixGraphStorage { } // Delete vector data - self.vectors.delete(txn, *id, &arena)?; + self.vectors.delete(txn, *id)?; Ok(()) } } + +#[cfg(test)] +mod startup_migration_tests { + use super::*; + use crate::helix_engine::storage_core::version_info::VersionInfo; + use crate::helix_engine::traversal_core::config::Config; + use tempfile::tempdir; + + #[test] + fn test_startup_with_migration() { + let temp_dir = tempdir().unwrap(); + let db_path = temp_dir.path().to_str().unwrap(); + + // Create a basic config + let config = Config::default(); + let version_info = VersionInfo::default(); + + // First startup - should work without migration + let storage = HelixGraphStorage::new(db_path, config.clone(), version_info.clone()); + assert!(storage.is_ok()); + + let storage = storage.unwrap(); + assert_eq!(storage.vectors.num_inserted_vectors(), 0); + } + + #[test] + fn test_startup_with_old_vector_database() { + let temp_dir = tempdir().unwrap(); + let db_path = temp_dir.path().to_str().unwrap(); + + // Create database directory + std::fs::create_dir_all(db_path).unwrap(); + + // Create environment and old database structure + let env = unsafe { + EnvOpenOptions::new() + .map_size(1024 * 1024 * 1024) + .max_dbs(200) + .open(Path::new(db_path)) + .unwrap() + }; + + let mut wtxn = env.write_txn().unwrap(); + + // Create old vector database structure (empty but with correct names) + let _old_vectors = env + .database_options() + .types::() + .name("vectors") + .create(&mut wtxn) + .unwrap(); + + let _old_data = env + .database_options() + .types::, Bytes>() + .name("vector_data") + .create(&mut wtxn) + .unwrap(); + + let _old_edges = env + .database_options() + .types::() + .name("hnsw_out_nodes") + .create(&mut wtxn) + .unwrap(); + + wtxn.commit().unwrap(); + drop(env); // Close environment + + // Now try to create HelixGraphStorage - should detect and migrate + let config = Config::default(); + let version_info = VersionInfo::default(); + + let storage = HelixGraphStorage::new(db_path, config, version_info); + assert!(storage.is_ok()); + + let storage = storage.unwrap(); + // After migration, vector count should be 0 (empty old database) + assert_eq!(storage.vectors.num_inserted_vectors(), 0); + } +} diff --git a/helix-db/src/helix_engine/storage_core/storage_concurrent_tests.rs b/helix-db/src/helix_engine/storage_core/storage_concurrent_tests.rs index 3035028f..fb4ea50a 100644 --- a/helix-db/src/helix_engine/storage_core/storage_concurrent_tests.rs +++ b/helix-db/src/helix_engine/storage_core/storage_concurrent_tests.rs @@ -11,15 +11,14 @@ /// - Drop operations are multi-step (not atomic) - could leave orphans /// - LMDB provides single-writer guarantee but needs validation /// - MVCC snapshot isolation needs verification - use std::sync::{Arc, Barrier}; use std::thread; use tempfile::TempDir; use crate::helix_engine::storage_core::HelixGraphStorage; -use crate::helix_engine::traversal_core::config::Config; use crate::helix_engine::storage_core::version_info::VersionInfo; -use crate::utils::items::{Node, Edge}; +use crate::helix_engine::traversal_core::config::Config; +use crate::utils::items::{Edge, Node}; use bumpalo::Bump; use uuid::Uuid; @@ -70,7 +69,10 @@ fn test_concurrent_node_creation() { properties: None, }; - storage.nodes_db.put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()).unwrap(); + storage + .nodes_db + .put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()) + .unwrap(); wtxn.commit().unwrap(); } }) @@ -114,7 +116,10 @@ fn test_concurrent_edge_creation() { version: 1, properties: None, }; - storage.nodes_db.put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()).unwrap(); + storage + .nodes_db + .put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()) + .unwrap(); } wtxn.commit().unwrap(); } @@ -122,7 +127,8 @@ fn test_concurrent_edge_creation() { // Get node IDs let node_ids: Vec = { let rtxn = storage.graph_env.read_txn().unwrap(); - storage.nodes_db + storage + .nodes_db .iter(&rtxn) .unwrap() .map(|result| { @@ -164,7 +170,10 @@ fn test_concurrent_edge_creation() { properties: None, }; - storage.edges_db.put(&mut wtxn, &edge.id, &edge.to_bincode_bytes().unwrap()).unwrap(); + storage + .edges_db + .put(&mut wtxn, &edge.id, &edge.to_bincode_bytes().unwrap()) + .unwrap(); wtxn.commit().unwrap(); } }) @@ -209,7 +218,10 @@ fn test_concurrent_node_reads() { version: 1, properties: None, }; - storage.nodes_db.put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()).unwrap(); + storage + .nodes_db + .put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()) + .unwrap(); } wtxn.commit().unwrap(); } @@ -269,7 +281,10 @@ fn test_concurrent_node_reads() { version: 1, properties: None, }; - storage.nodes_db.put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()).unwrap(); + storage + .nodes_db + .put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()) + .unwrap(); wtxn.commit().unwrap(); thread::sleep(std::time::Duration::from_millis(2)); @@ -314,7 +329,10 @@ fn test_transaction_isolation_storage() { version: 1, properties: None, }; - storage.nodes_db.put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()).unwrap(); + storage + .nodes_db + .put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()) + .unwrap(); } wtxn.commit().unwrap(); } @@ -338,7 +356,10 @@ fn test_transaction_isolation_storage() { version: 1, properties: None, }; - storage_clone.nodes_db.put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()).unwrap(); + storage_clone + .nodes_db + .put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()) + .unwrap(); wtxn.commit().unwrap(); } }); @@ -394,7 +415,10 @@ fn test_write_transaction_serialization() { properties: None, }; - storage.nodes_db.put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()).unwrap(); + storage + .nodes_db + .put(&mut wtxn, &node.id, &node.to_bincode_bytes().unwrap()) + .unwrap(); // Simulate some work during transaction thread::sleep(std::time::Duration::from_micros(100)); diff --git a/helix-db/src/helix_engine/storage_core/storage_methods.rs b/helix-db/src/helix_engine/storage_core/storage_methods.rs index 1d009ca6..5f10e1c7 100644 --- a/helix-db/src/helix_engine/storage_core/storage_methods.rs +++ b/helix-db/src/helix_engine/storage_core/storage_methods.rs @@ -34,12 +34,12 @@ pub trait StorageMethods { fn drop_node(&self, txn: &mut RwTxn, id: &u128) -> Result<(), GraphError>; /// Removes the following from the storage engine: - /// - The given edge + /// - The given edge /// - All incoming and outgoing mappings for that edge fn drop_edge(&self, txn: &mut RwTxn, id: &u128) -> Result<(), GraphError>; /// Sets the `deleted` field of a vector to true - /// - /// NOTE: The vector is not ACTUALLY deleted and is still present in the db. + /// + /// NOTE: The vector is not ACTUALLY deleted and is still present in the db. fn drop_vector(&self, txn: &mut RwTxn, id: &u128) -> Result<(), GraphError>; } diff --git a/helix-db/src/helix_engine/storage_core/storage_migration.rs b/helix-db/src/helix_engine/storage_core/storage_migration.rs deleted file mode 100644 index eb5f3da7..00000000 --- a/helix-db/src/helix_engine/storage_core/storage_migration.rs +++ /dev/null @@ -1,473 +0,0 @@ -use crate::{ - helix_engine::{ - storage_core::HelixGraphStorage, - types::GraphError, - vector_core::{vector::HVector, vector_core}, - }, - protocol::value::Value, - utils::properties::ImmutablePropertiesMap, -}; -use bincode::Options; -use itertools::Itertools; -use std::{collections::HashMap, ops::Bound}; - -use super::metadata::{NATIVE_VECTOR_ENDIANNESS, StorageMetadata, VectorEndianness}; - -pub fn migrate(storage: &mut HelixGraphStorage) -> Result<(), GraphError> { - let mut metadata = { - let txn = storage.graph_env.read_txn()?; - StorageMetadata::read(&txn, &storage.metadata_db)? - }; - - loop { - metadata = match metadata { - StorageMetadata::PreMetadata => { - migrate_pre_metadata_to_native_vector_endianness(storage)? - } - StorageMetadata::VectorNativeEndianness { - vector_endianness: NATIVE_VECTOR_ENDIANNESS, - } => { - // If the vectors are in the native vector endianness, we're done migrating them - break; - } - StorageMetadata::VectorNativeEndianness { - vector_endianness: currently_stored_vector_endianness, - } => convert_vectors_to_native_endianness(currently_stored_vector_endianness, storage)?, - }; - } - - verify_vectors_and_repair(storage)?; - remove_orphaned_vector_edges(storage)?; - - Ok(()) -} - -pub(crate) fn migrate_pre_metadata_to_native_vector_endianness( - storage: &mut HelixGraphStorage, -) -> Result { - // In PreMetadata, all vectors are stored as big endian. - // If we are on a big endian machine, all we need to do is store the metadata. - // Otherwise, we need to convert all the vectors and then store the metadata. - - let metadata = StorageMetadata::VectorNativeEndianness { - vector_endianness: NATIVE_VECTOR_ENDIANNESS, - }; - - #[cfg(target_endian = "little")] - { - // On little-endian machines, we need to convert from big-endian to little-endian - convert_all_vectors(VectorEndianness::BigEndian, storage)?; - } - - convert_all_vector_properties(storage)?; - - // Save the metadata - let mut txn = storage.graph_env.write_txn()?; - metadata.save(&mut txn, &storage.metadata_db)?; - txn.commit()?; - - Ok(metadata) -} - -pub(crate) fn convert_vectors_to_native_endianness( - currently_stored_vector_endianness: VectorEndianness, - storage: &mut HelixGraphStorage, -) -> Result { - // Convert all vectors from currently_stored_vector_endianness to native endianness - convert_all_vectors(currently_stored_vector_endianness, storage)?; - - let metadata = StorageMetadata::VectorNativeEndianness { - vector_endianness: NATIVE_VECTOR_ENDIANNESS, - }; - - // Save the updated metadata - let mut txn = storage.graph_env.write_txn()?; - metadata.save(&mut txn, &storage.metadata_db)?; - txn.commit()?; - - Ok(metadata) -} - -pub(crate) fn convert_all_vectors( - source_endianness: VectorEndianness, - storage: &mut HelixGraphStorage, -) -> Result<(), GraphError> { - const BATCH_SIZE: usize = 1024; - - let key_arena = bumpalo::Bump::new(); - let batch_bounds = { - let mut keys = vec![]; - - let txn = storage.graph_env.read_txn()?; - - for (i, kv) in storage - .vectors - .vectors_db - .lazily_decode_data() - .iter(&txn)? - .enumerate() - { - let (key, _) = kv?; - - if i % BATCH_SIZE == 0 { - let key: &[u8] = key_arena.alloc_slice_copy(key); - keys.push(key); - } - } - - let mut ranges = vec![]; - for (start, end) in keys.iter().copied().tuple_windows() { - ranges.push((Bound::Included(start), Bound::Excluded(end))); - } - ranges.extend( - keys.last() - .copied() - .map(|last_batch_end| (Bound::Included(last_batch_end), Bound::Unbounded)), - ); - - ranges - }; - - for bounds in batch_bounds { - let arena = bumpalo::Bump::new(); - - let mut txn = storage.graph_env.write_txn()?; - let mut cursor = storage.vectors.vectors_db.range_mut(&mut txn, &bounds)?; - - while let Some((key, value)) = cursor.next().transpose()? { - if key == vector_core::ENTRY_POINT_KEY { - continue; - } - - let value = convert_vector_endianness(value, source_endianness, &arena)?; - - let success = unsafe { cursor.put_current(key, value)? }; - if !success { - return Err(GraphError::New("failed to update value in LMDB".into())); - } - } - drop(cursor); - - txn.commit()?; - } - - Ok(()) -} - -/// Converts a single vector's endianness by reading f64 values in source endianness -/// and writing them in native endianness. Uses arena for allocations. -pub(crate) fn convert_vector_endianness<'arena>( - bytes: &[u8], - source_endianness: VectorEndianness, - arena: &'arena bumpalo::Bump, -) -> Result<&'arena [u8], GraphError> { - use std::{alloc, mem, ptr, slice}; - - if bytes.is_empty() { - // We use unsafe stuff below so best not to risk allocating a layout of size zero etc - return Ok(&[]); - } - - if !bytes.len().is_multiple_of(mem::size_of::()) { - return Err(GraphError::New( - "Vector data length is not a multiple of f64 size".to_string(), - )); - } - - let num_floats = bytes.len() / mem::size_of::(); - - // Allocate space for the converted f64 array in the arena - let layout = alloc::Layout::array::(num_floats) - .map_err(|_| GraphError::New("Failed to create array layout".to_string()))?; - - let data_ptr: ptr::NonNull = arena.alloc_layout(layout); - - let converted_floats: &'arena [f64] = unsafe { - let float_ptr: ptr::NonNull = data_ptr.cast(); - let float_slice = slice::from_raw_parts_mut(float_ptr.as_ptr(), num_floats); - - // Read each f64 in the source endianness and write in native endianness - for (i, float) in float_slice.iter_mut().enumerate() { - let start = i * mem::size_of::(); - let end = start + mem::size_of::(); - let float_bytes: [u8; 8] = bytes[start..end] - .try_into() - .map_err(|_| GraphError::New("Failed to extract f64 bytes".to_string()))?; - - let value = match source_endianness { - VectorEndianness::BigEndian => f64::from_be_bytes(float_bytes), - VectorEndianness::LittleEndian => f64::from_le_bytes(float_bytes), - }; - - *float = value; - } - - slice::from_raw_parts(float_ptr.as_ptr(), num_floats) - }; - - // Convert to bytes using bytemuck - let result_bytes: &[u8] = bytemuck::cast_slice(converted_floats); - - Ok(result_bytes) -} - -pub(crate) fn convert_all_vector_properties( - storage: &mut HelixGraphStorage, -) -> Result<(), GraphError> { - const BATCH_SIZE: usize = 1024; - - let batch_bounds = { - let txn = storage.graph_env.read_txn()?; - let mut keys = vec![]; - - for (i, kv) in storage - .vectors - .vector_properties_db - .lazily_decode_data() - .iter(&txn)? - .enumerate() - { - let (key, _) = kv?; - - if i % BATCH_SIZE == 0 { - keys.push(key); - } - } - - let mut ranges = vec![]; - for (start, end) in keys.iter().copied().tuple_windows() { - ranges.push((Bound::Included(start), Bound::Excluded(end))); - } - ranges.extend( - keys.last() - .copied() - .map(|last_batch_end| (Bound::Included(last_batch_end), Bound::Unbounded)), - ); - - ranges - }; - - for bounds in batch_bounds { - let arena = bumpalo::Bump::new(); - - let mut txn = storage.graph_env.write_txn()?; - let mut cursor = storage - .vectors - .vector_properties_db - .range_mut(&mut txn, &bounds)?; - - while let Some((key, value)) = cursor.next().transpose()? { - let value = convert_old_vector_properties_to_new_format(value, &arena)?; - - let success = unsafe { cursor.put_current(&key, &value)? }; - if !success { - return Err(GraphError::New("failed to update value in LMDB".into())); - } - } - drop(cursor); - - txn.commit()?; - } - - Ok(()) -} - -pub(crate) fn convert_old_vector_properties_to_new_format( - property_bytes: &[u8], - arena: &bumpalo::Bump, -) -> Result, GraphError> { - let mut old_properties: HashMap = bincode::DefaultOptions::new() - .with_fixint_encoding() - .allow_trailing_bytes() - .deserialize(property_bytes)?; - - let label = old_properties - .remove("label") - .expect("all old vectors should have label"); - let is_deleted = old_properties - .remove("is_deleted") - .expect("all old vectors should have deleted"); - - let new_properties = ImmutablePropertiesMap::new( - old_properties.len(), - old_properties.iter().map(|(k, v)| (k.as_str(), v.clone())), - arena, - ); - - let new_vector = HVector { - id: 0u128, - label: &label.inner_stringify(), - version: 0, - deleted: is_deleted == true, - level: 0, - distance: None, - data: &[], - properties: Some(new_properties), - }; - - new_vector.to_bincode_bytes().map_err(GraphError::from) -} - -fn verify_vectors_and_repair(storage: &HelixGraphStorage) -> Result<(), GraphError> { - // Verify that all vectors at level > 0 also exist at level 0 and collect ones that need repair - println!("\nVerifying vector integrity after migration..."); - let vectors_to_repair: Vec<(u128, usize)> = { - let txn = storage.graph_env.read_txn()?; - let mut missing = Vec::new(); - - for kv in storage.vectors.vectors_db.iter(&txn)? { - let (key, _) = kv?; - if key.starts_with(b"v:") && key.len() >= 26 { - let id = u128::from_be_bytes(key[2..18].try_into().unwrap()); - let level = usize::from_be_bytes(key[18..26].try_into().unwrap()); - - if level > 0 { - // Check if level 0 exists - let level_0_key = vector_core::VectorCore::vector_key(id, 0); - if storage - .vectors - .vectors_db - .get(&txn, &level_0_key)? - .is_none() - { - println!( - "ERROR: Vector {} exists at level {} but NOT at level 0!", - uuid::Uuid::from_u128(id), - level - ); - missing.push((id, level)); - } - } - } - } - missing - }; - - if !vectors_to_repair.is_empty() { - println!( - "Found {} vectors at level > 0 missing their level 0 counterparts!", - vectors_to_repair.len() - ); - println!("Repairing missing level 0 vectors..."); - - const REPAIR_BATCH_SIZE: usize = 128; - - // Process repairs in batches - for batch in vectors_to_repair.chunks(REPAIR_BATCH_SIZE) { - let mut txn = storage.graph_env.write_txn()?; - - let key_arena = bumpalo::Bump::new(); - - for &(id, source_level) in batch { - // Read vector data from source level - let source_key = vector_core::VectorCore::vector_key(id, source_level); - let vector_data: &[u8] = { - let key = storage - .vectors - .vectors_db - .get(&txn, &source_key)? - .ok_or_else(|| { - GraphError::New(format!( - "Could not read vector {} at level {source_level} for repair", - uuid::Uuid::from_u128(id) - )) - })?; - key_arena.alloc_slice_copy(key) - }; - - // Write to level 0 - let level_0_key = vector_core::VectorCore::vector_key(id, 0); - storage - .vectors - .vectors_db - .put(&mut txn, &level_0_key, vector_data)?; - println!( - " Repaired: Copied vector {} from level {} to level 0", - uuid::Uuid::from_u128(id), - source_level - ); - } - - txn.commit()?; - } - - println!( - "Repair complete! Repaired {} vectors.", - vectors_to_repair.len() - ); - } else { - println!("All vectors verified successfully!"); - } - - Ok(()) -} - -fn remove_orphaned_vector_edges(storage: &HelixGraphStorage) -> Result<(), GraphError> { - let txn = storage.graph_env.read_txn()?; - let mut orphaned_edges = Vec::new(); - - for kv in storage.vectors.edges_db.iter(&txn)? { - let (key, _) = kv?; - - // Edge key format: [source_id (16 bytes), level (8 bytes), sink_id (16 bytes)] - // Total: 40 bytes - if key.len() != 40 { - println!( - "WARNING: Vector edge key has unexpected length: {} bytes", - key.len() - ); - continue; - } - - // Extract source_id - let source_id = u128::from_be_bytes(key[0..16].try_into().unwrap()); - - // Extract level - let level = usize::from_be_bytes(key[16..24].try_into().unwrap()); - - // Extract sink_id - let sink_id = u128::from_be_bytes(key[24..40].try_into().unwrap()); - - // Check if source vector exists at level 0 - let source_key = vector_core::VectorCore::vector_key(source_id, 0); - let source_exists = storage.vectors.vectors_db.get(&txn, &source_key)?.is_some(); - - // Check if sink vector exists at level 0 - let sink_key = vector_core::VectorCore::vector_key(sink_id, 0); - let sink_exists = storage.vectors.vectors_db.get(&txn, &sink_key)?.is_some(); - - if !source_exists || !sink_exists { - orphaned_edges.push(( - uuid::Uuid::from_u128(source_id), - level, - uuid::Uuid::from_u128(sink_id), - )); - } - } - - for chunk in orphaned_edges.into_iter().chunks(64).into_iter() { - let mut txn = storage.graph_env.write_txn()?; - - for (source_id, level, sink_id) in chunk { - let edge_key = vector_core::VectorCore::out_edges_key( - source_id.as_u128(), - level, - Some(sink_id.as_u128()), - ); - - storage - .vectors - .edges_db - .get(&txn, &edge_key)? - .ok_or_else(|| { - GraphError::New("edge key doesnt exist when removing orphan".into()) - })?; - - storage.vectors.edges_db.delete(&mut txn, &edge_key)?; - } - - txn.commit()?; - } - - Ok(()) -} diff --git a/helix-db/src/helix_engine/storage_core/storage_migration_tests.rs b/helix-db/src/helix_engine/storage_core/storage_migration_tests.rs deleted file mode 100644 index 3b7e3461..00000000 --- a/helix-db/src/helix_engine/storage_core/storage_migration_tests.rs +++ /dev/null @@ -1,1037 +0,0 @@ -//! Comprehensive test suite for storage_migration.rs -//! -//! This test module covers: -//! - Unit tests for endianness conversion functions -//! - Unit tests for property conversion functions -//! - Integration tests for full migration scenarios -//! - Property-based tests for correctness validation -//! - Error handling tests for failure modes -//! - Performance tests for large datasets - -use super::{ - metadata::{StorageMetadata, VectorEndianness, NATIVE_VECTOR_ENDIANNESS}, - storage_migration::{ - convert_all_vector_properties, convert_old_vector_properties_to_new_format, - convert_vector_endianness, migrate, - }, - HelixGraphStorage, -}; -use crate::{ - helix_engine::{ - storage_core::version_info::VersionInfo, traversal_core::config::Config, - types::GraphError, - }, - protocol::value::Value, -}; -use std::collections::HashMap; -use tempfile::TempDir; - -// ============================================================================ -// Test Utilities and Fixtures -// ============================================================================ - -/// Helper function to create a test storage instance -fn setup_test_storage() -> (HelixGraphStorage, TempDir) { - let temp_dir = TempDir::new().unwrap(); - let config = Config::default(); - let version_info = VersionInfo::default(); - - let storage = - HelixGraphStorage::new(temp_dir.path().to_str().unwrap(), config, version_info).unwrap(); - - (storage, temp_dir) -} - -/// Create test vector data in a specific endianness -fn create_test_vector_bytes(values: &[f64], endianness: VectorEndianness) -> Vec { - let mut bytes = Vec::with_capacity(values.len() * 8); - for &value in values { - let value_bytes = match endianness { - VectorEndianness::BigEndian => value.to_be_bytes(), - VectorEndianness::LittleEndian => value.to_le_bytes(), - }; - bytes.extend_from_slice(&value_bytes); - } - bytes -} - -/// Read f64 values from bytes in a specific endianness -fn read_f64_values(bytes: &[u8], endianness: VectorEndianness) -> Vec { - let mut values = Vec::with_capacity(bytes.len() / 8); - for chunk in bytes.chunks_exact(8) { - let value = match endianness { - VectorEndianness::BigEndian => f64::from_be_bytes(chunk.try_into().unwrap()), - VectorEndianness::LittleEndian => f64::from_le_bytes(chunk.try_into().unwrap()), - }; - values.push(value); - } - values -} - -/// Create old-format vector properties (HashMap-based) -fn create_old_properties( - label: &str, - is_deleted: bool, - extra_props: HashMap, -) -> Vec { - let mut props = HashMap::new(); - props.insert("label".to_string(), Value::String(label.to_string())); - props.insert("is_deleted".to_string(), Value::Boolean(is_deleted)); - - for (k, v) in extra_props { - props.insert(k, v); - } - - bincode::serialize(&props).unwrap() -} - -/// Populate storage with test vectors in a specific endianness -fn populate_test_vectors( - storage: &mut HelixGraphStorage, - count: usize, - endianness: VectorEndianness, -) -> Result<(), GraphError> { - let mut txn = storage.graph_env.write_txn()?; - - for i in 0..count { - let id = i as u128; - let vector_data = create_test_vector_bytes( - &[i as f64, (i + 1) as f64, (i + 2) as f64], - endianness, - ); - - storage - .vectors - .vectors_db - .put(&mut txn, &id.to_be_bytes(), &vector_data)?; - } - - txn.commit()?; - Ok(()) -} - -/// Populate storage with old-format properties -fn populate_old_properties( - storage: &mut HelixGraphStorage, - count: usize, -) -> Result<(), GraphError> { - let mut txn = storage.graph_env.write_txn()?; - - for i in 0..count { - let id = i as u128; - let mut extra_props = HashMap::new(); - extra_props.insert("test_prop".to_string(), Value::F64(i as f64)); - - let property_bytes = - create_old_properties(&format!("label_{}", i), i % 2 == 0, extra_props); - - storage - .vectors - .vector_properties_db - .put(&mut txn, &id, &property_bytes)?; - } - - txn.commit()?; - Ok(()) -} - -/// Set storage metadata to a specific state -#[allow(dead_code)] -fn set_metadata( - storage: &mut HelixGraphStorage, - metadata: StorageMetadata, -) -> Result<(), GraphError> { - let mut txn = storage.graph_env.write_txn()?; - metadata.save(&mut txn, &storage.metadata_db)?; - txn.commit()?; - Ok(()) -} - -/// Read all vectors from storage and return as f64 values -fn read_all_vectors( - storage: &HelixGraphStorage, - endianness: VectorEndianness, -) -> Result>, GraphError> { - let txn = storage.graph_env.read_txn()?; - let mut all_vectors = Vec::new(); - - for kv in storage.vectors.vectors_db.iter(&txn)? { - let (_, value) = kv?; - let values = read_f64_values(value, endianness); - all_vectors.push(values); - } - - Ok(all_vectors) -} - -/// Clear all metadata from storage (simulates PreMetadata state) -fn clear_metadata(storage: &mut HelixGraphStorage) -> Result<(), GraphError> { - let mut txn = storage.graph_env.write_txn()?; - storage.metadata_db.clear(&mut txn)?; - txn.commit()?; - Ok(()) -} - -// ============================================================================ -// Unit Tests: Endianness Conversion -// ============================================================================ - -#[test] -fn test_convert_vector_endianness_empty_input() { - let arena = bumpalo::Bump::new(); - let result = convert_vector_endianness(&[], VectorEndianness::BigEndian, &arena); - - assert!(result.is_ok()); - assert_eq!(result.unwrap(), &[] as &[u8]); -} - -#[test] -fn test_convert_vector_endianness_single_f64() { - let arena = bumpalo::Bump::new(); - let value: f64 = 3.14159; - let big_endian_bytes = value.to_be_bytes(); - - let result = - convert_vector_endianness(&big_endian_bytes, VectorEndianness::BigEndian, &arena).unwrap(); - - // Result should be in native endianness - let native_value = f64::from_ne_bytes(result.try_into().unwrap()); - assert_eq!(native_value, value); -} - -#[test] -fn test_convert_vector_endianness_multiple_f64s() { - let arena = bumpalo::Bump::new(); - let values = vec![1.0, 2.5, -3.7, 4.2, 5.9]; - let big_endian_bytes = create_test_vector_bytes(&values, VectorEndianness::BigEndian); - - let result = - convert_vector_endianness(&big_endian_bytes, VectorEndianness::BigEndian, &arena).unwrap(); - - // Read back values in native endianness - let result_values: Vec = result - .chunks_exact(8) - .map(|chunk| f64::from_ne_bytes(chunk.try_into().unwrap())) - .collect(); - - for (original, converted) in values.iter().zip(result_values.iter()) { - assert_eq!(original, converted); - } -} - -#[test] -fn test_convert_vector_endianness_invalid_length() { - let arena = bumpalo::Bump::new(); - let invalid_bytes = vec![1, 2, 3, 4, 5]; // Not a multiple of 8 - - let result = convert_vector_endianness(&invalid_bytes, VectorEndianness::BigEndian, &arena); - - assert!(result.is_err()); - let err_msg = result.unwrap_err().to_string(); - assert!(err_msg.contains("not a multiple")); -} - -#[test] -fn test_convert_vector_endianness_roundtrip() { - let arena = bumpalo::Bump::new(); - let values = vec![1.0, 2.5, -3.7, 100.123, -999.999]; - - // Start with big endian - let big_endian_bytes = create_test_vector_bytes(&values, VectorEndianness::BigEndian); - - // Convert big -> native - let native_bytes = - convert_vector_endianness(&big_endian_bytes, VectorEndianness::BigEndian, &arena).unwrap(); - - // Read values back - let result_values: Vec = native_bytes - .chunks_exact(8) - .map(|chunk| f64::from_ne_bytes(chunk.try_into().unwrap())) - .collect(); - - for (original, converted) in values.iter().zip(result_values.iter()) { - assert_eq!(original, converted); - } -} - -#[test] -fn test_convert_vector_endianness_special_values() { - let arena = bumpalo::Bump::new(); - let special_values = vec![ - 0.0, - -0.0, - f64::INFINITY, - f64::NEG_INFINITY, - f64::MIN, - f64::MAX, - f64::EPSILON, - ]; - - let big_endian_bytes = create_test_vector_bytes(&special_values, VectorEndianness::BigEndian); - - let result = - convert_vector_endianness(&big_endian_bytes, VectorEndianness::BigEndian, &arena).unwrap(); - - let result_values: Vec = result - .chunks_exact(8) - .map(|chunk| f64::from_ne_bytes(chunk.try_into().unwrap())) - .collect(); - - for (original, converted) in special_values.iter().zip(result_values.iter()) { - // Use bit equality for special values like NaN and -0.0 - assert_eq!(original.to_bits(), converted.to_bits()); - } -} - -#[test] -fn test_convert_vector_endianness_from_little_endian() { - let arena = bumpalo::Bump::new(); - let values = vec![1.1, 2.2, 3.3]; - let little_endian_bytes = create_test_vector_bytes(&values, VectorEndianness::LittleEndian); - - let result = convert_vector_endianness( - &little_endian_bytes, - VectorEndianness::LittleEndian, - &arena, - ) - .unwrap(); - - let result_values: Vec = result - .chunks_exact(8) - .map(|chunk| f64::from_ne_bytes(chunk.try_into().unwrap())) - .collect(); - - for (original, converted) in values.iter().zip(result_values.iter()) { - assert_eq!(original, converted); - } -} - -// ============================================================================ -// Unit Tests: Property Conversion -// ============================================================================ - -#[test] -fn test_convert_old_properties_basic() { - let arena = bumpalo::Bump::new(); - let old_bytes = create_old_properties("test_label", false, HashMap::new()); - - let result = convert_old_vector_properties_to_new_format(&old_bytes, &arena); - assert!(result.is_ok()); - - // We can't directly deserialize HVector, but we can verify the conversion succeeded - let new_bytes = result.unwrap(); - assert!(!new_bytes.is_empty()); -} - -#[test] -fn test_convert_old_properties_with_deleted_flag() { - let arena = bumpalo::Bump::new(); - let old_bytes = create_old_properties("deleted_vector", true, HashMap::new()); - - let result = convert_old_vector_properties_to_new_format(&old_bytes, &arena); - assert!(result.is_ok()); - assert!(!result.unwrap().is_empty()); -} - -#[test] -fn test_convert_old_properties_with_extra_props() { - let arena = bumpalo::Bump::new(); - let mut extra = HashMap::new(); - extra.insert("name".to_string(), Value::String("test".to_string())); - extra.insert("count".to_string(), Value::F64(42.0)); - extra.insert("active".to_string(), Value::Boolean(true)); - - let old_bytes = create_old_properties("test_label", false, extra); - - let result = convert_old_vector_properties_to_new_format(&old_bytes, &arena); - assert!(result.is_ok()); - assert!(!result.unwrap().is_empty()); -} - -#[test] -fn test_convert_old_properties_empty_extra_props() { - let arena = bumpalo::Bump::new(); - let old_bytes = create_old_properties("minimal", false, HashMap::new()); - - let result = convert_old_vector_properties_to_new_format(&old_bytes, &arena); - assert!(result.is_ok()); - assert!(!result.unwrap().is_empty()); -} - -#[test] -#[should_panic(expected = "all old vectors should have label")] -fn test_convert_old_properties_missing_label() { - let arena = bumpalo::Bump::new(); - let mut props = HashMap::new(); - props.insert("is_deleted".to_string(), Value::Boolean(false)); - // Missing "label" - - let bytes = bincode::serialize(&props).unwrap(); - let _ = convert_old_vector_properties_to_new_format(&bytes, &arena); -} - -#[test] -#[should_panic(expected = "all old vectors should have deleted")] -fn test_convert_old_properties_missing_is_deleted() { - let arena = bumpalo::Bump::new(); - let mut props = HashMap::new(); - props.insert("label".to_string(), Value::String("test".to_string())); - // Missing "is_deleted" - - let bytes = bincode::serialize(&props).unwrap(); - let _ = convert_old_vector_properties_to_new_format(&bytes, &arena); -} - -#[test] -fn test_convert_old_properties_invalid_bincode() { - let arena = bumpalo::Bump::new(); - let invalid_bytes = vec![1, 2, 3, 4, 5]; // Not valid bincode - - let result = convert_old_vector_properties_to_new_format(&invalid_bytes, &arena); - assert!(result.is_err()); -} - -// ============================================================================ -// Integration Tests: Full Migration Scenarios -// ============================================================================ - -#[test] -fn test_migrate_empty_database() { - let (storage, _temp_dir) = setup_test_storage(); - - // Storage is already created with migrations run, but let's verify the state - let txn = storage.graph_env.read_txn().unwrap(); - let metadata = StorageMetadata::read(&txn, &storage.metadata_db).unwrap(); - - assert!(matches!( - metadata, - StorageMetadata::VectorNativeEndianness { .. } - )); -} - -#[test] -fn test_migrate_pre_metadata_to_native() { - let (mut storage, _temp_dir) = setup_test_storage(); - - // Clear metadata to simulate PreMetadata state - clear_metadata(&mut storage).unwrap(); - - // Populate with vectors in big-endian format (PreMetadata default) - populate_test_vectors(&mut storage, 10, VectorEndianness::BigEndian).unwrap(); - populate_old_properties(&mut storage, 10).unwrap(); - - // Run migration - let result = migrate(&mut storage); - assert!(result.is_ok()); - - // Verify metadata was updated - { - let txn = storage.graph_env.read_txn().unwrap(); - let metadata = StorageMetadata::read(&txn, &storage.metadata_db).unwrap(); - - match metadata { - StorageMetadata::VectorNativeEndianness { vector_endianness } => { - assert_eq!(vector_endianness, NATIVE_VECTOR_ENDIANNESS); - } - _ => panic!("Expected VectorNativeEndianness metadata"), - } - } // txn dropped here - - // Verify vectors are readable in native endianness - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 10); - - for (i, vector) in vectors.iter().enumerate() { - let expected = vec![i as f64, (i + 1) as f64, (i + 2) as f64]; - assert_eq!(vector, &expected); - } -} - -#[test] -fn test_migrate_single_vector() { - let (mut storage, _temp_dir) = setup_test_storage(); - - // Clear and repopulate - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 1, VectorEndianness::BigEndian).unwrap(); - - let result = migrate(&mut storage); - assert!(result.is_ok()); - - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 1); - assert_eq!(vectors[0], vec![0.0, 1.0, 2.0]); -} - -#[test] -fn test_migrate_exact_batch_size() { - let (mut storage, _temp_dir) = setup_test_storage(); - - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 1024, VectorEndianness::BigEndian).unwrap(); - - let result = migrate(&mut storage); - assert!(result.is_ok()); - - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 1024); - - // Verify first and last vectors - assert_eq!(vectors[0], vec![0.0, 1.0, 2.0]); - assert_eq!(vectors[1023], vec![1023.0, 1024.0, 1025.0]); -} - -#[test] -fn test_migrate_multiple_batches() { - let (mut storage, _temp_dir) = setup_test_storage(); - - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 2500, VectorEndianness::BigEndian).unwrap(); - - let result = migrate(&mut storage); - assert!(result.is_ok()); - - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 2500); - - // Verify vectors across batch boundaries - assert_eq!(vectors[0], vec![0.0, 1.0, 2.0]); - assert_eq!(vectors[1023], vec![1023.0, 1024.0, 1025.0]); - assert_eq!(vectors[1024], vec![1024.0, 1025.0, 1026.0]); - assert_eq!(vectors[2499], vec![2499.0, 2500.0, 2501.0]); -} - -#[test] -fn test_migrate_already_native_endianness() { - let (mut storage, _temp_dir) = setup_test_storage(); - - // Add vectors already in native endianness - populate_test_vectors(&mut storage, 10, NATIVE_VECTOR_ENDIANNESS).unwrap(); - - // Migration should be a no-op (already done during setup_test_storage) - let result = migrate(&mut storage); - assert!(result.is_ok()); - - // Vectors should remain unchanged - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 10); -} - -#[test] -fn test_migrate_idempotency() { - let (mut storage, _temp_dir) = setup_test_storage(); - - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 100, VectorEndianness::BigEndian).unwrap(); - - // Run migration multiple times - migrate(&mut storage).unwrap(); - let vectors_after_first = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - - migrate(&mut storage).unwrap(); - let vectors_after_second = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - - migrate(&mut storage).unwrap(); - let vectors_after_third = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - - // All should be identical - assert_eq!(vectors_after_first, vectors_after_second); - assert_eq!(vectors_after_second, vectors_after_third); -} - -#[test] -fn test_migrate_with_properties() { - let (mut storage, _temp_dir) = setup_test_storage(); - - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 50, VectorEndianness::BigEndian).unwrap(); - populate_old_properties(&mut storage, 50).unwrap(); - - let result = migrate(&mut storage); - assert!(result.is_ok()); - - // Verify both vectors and properties were migrated - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 50); - - // Check properties count - let txn = storage.graph_env.read_txn().unwrap(); - let prop_count = storage.vectors.vector_properties_db.len(&txn).unwrap(); - assert_eq!(prop_count, 50); -} - -#[test] -fn test_migrate_cognee_vector_string_dates_error() { - // This test reproduces a bincode I/O error that occurs when migrating - // CogneeVector data where dates were stored as RFC3339 strings instead - // of proper Date types. - // - // Old schema had: - // created_at: String (RFC3339 format via chrono::Utc::now().to_rfc3339()) - // updated_at: String (RFC3339 format) - // - // New schema expects: - // created_at: Date - // updated_at: Date - // - // This mismatch can cause bincode deserialization errors during migration. - - let (mut storage, _temp_dir) = setup_test_storage(); - - // Clear metadata to simulate PreMetadata state - clear_metadata(&mut storage).unwrap(); - - // Create old-format CogneeVector properties with dates as strings - // (matching how they were actually created in the old format) - let mut extra_props = HashMap::new(); - - // Add CogneeVector-specific fields - extra_props.insert( - "collection_name".to_string(), - Value::String("test_collection".to_string()), - ); - extra_props.insert( - "data_point_id".to_string(), - Value::String("dp_001".to_string()), - ); - extra_props.insert( - "payload".to_string(), - Value::String(r#"{"id":"123","created_at":"2024-01-01","updated_at":"2024-01-01","ontology_valid":true,"version":1,"topological_rank":0,"type":"DataPoint"}"#.to_string()), - ); - extra_props.insert( - "content".to_string(), - Value::String("Test content for CogneeVector".to_string()), - ); - - // Add dates as strings (RFC3339) - this is the problematic part - // In the old format, these were created as: - // Value::from(chrono::Utc::now().to_rfc3339()) - // which creates Value::String, not Value::Date - extra_props.insert( - "created_at".to_string(), - Value::String("2024-01-01T12:00:00.000000000Z".to_string()), - ); - extra_props.insert( - "updated_at".to_string(), - Value::String("2024-01-01T12:00:00.000000000Z".to_string()), - ); - - // Create old properties with CogneeVector label - let old_bytes = create_old_properties("CogneeVector", false, extra_props); - - // Insert into storage - { - let mut txn = storage.graph_env.write_txn().unwrap(); - let id = 123u128; - storage - .vectors - .vector_properties_db - .put(&mut txn, &id, &old_bytes) - .unwrap(); - txn.commit().unwrap(); - } - - // Verify the data was inserted - { - let txn = storage.graph_env.read_txn().unwrap(); - let stored_bytes = storage - .vectors - .vector_properties_db - .get(&txn, &123u128) - .unwrap(); - assert!(stored_bytes.is_some()); - - // Verify we can deserialize it as old format - let old_props: HashMap = bincode::deserialize(stored_bytes.unwrap()).unwrap(); - assert_eq!(old_props.get("label").unwrap(), &Value::String("CogneeVector".to_string())); - assert_eq!(old_props.get("collection_name").unwrap(), &Value::String("test_collection".to_string())); - - // Verify dates are strings, not Date types - match old_props.get("created_at").unwrap() { - Value::String(s) => assert!(s.contains("2024-01-01")), - _ => panic!("Expected created_at to be Value::String in old format"), - } - } - - // Run migration - this preserves the data as-is - let result = migrate(&mut storage); - - // Migration succeeds because it just copies the HashMap to the new format - match result { - Ok(_) => { - println!("βœ… Migration succeeded (preserves old data as-is)"); - - // The real error occurs when trying to deserialize the migrated data - // This simulates what v_from_type does when querying by label - let txn = storage.graph_env.read_txn().unwrap(); - let migrated_bytes = storage - .vectors - .vector_properties_db - .get(&txn, &123u128) - .unwrap() - .unwrap(); - - println!("Migrated data exists: {} bytes", migrated_bytes.len()); - - // Try to deserialize as VectorWithoutData (what v_from_type does) - use crate::helix_engine::vector_core::vector_without_data::VectorWithoutData; - let arena2 = bumpalo::Bump::new(); - let deserialize_result = VectorWithoutData::from_bincode_bytes(&arena2, migrated_bytes, 123u128); - - match deserialize_result { - Ok(vector) => { - println!("⚠️ Deserialization succeeded!"); - println!("Vector label: {}", vector.label); - println!("This means bincode preserved the string dates in properties."); - - // Check if dates are accessible - if let Some(created_at) = vector.get_property("created_at") { - println!("created_at type: {:?}", created_at); - match created_at { - Value::String(s) => println!(" Still a string: {}", s), - Value::Date(d) => println!(" Converted to Date: {:?}", d), - _ => println!(" Other type: {:?}", created_at), - } - } - } - Err(e) => { - println!("βœ… REPRODUCED THE ERROR during deserialization!"); - println!("Error: {}", e); - println!(); - println!("This error occurs in the v_from_type query path:"); - println!(" 1. Migration preserves dates as Value::String"); - println!(" 2. v_from_type calls VectorWithoutData::from_bincode_bytes"); - println!(" 3. Bincode deserialization expects specific value types"); - println!(" 4. Type mismatch causes ConversionError"); - - // Verify it's the expected error type - let error_str = e.to_string(); - assert!( - error_str.contains("deserializ") || error_str.contains("Conversion"), - "Expected deserialization/conversion error, got: {}", - e - ); - } - } - } - Err(e) => { - println!("❌ Migration failed unexpectedly: {}", e); - panic!("Migration should succeed but preserve old data"); - } - } -} - -// ============================================================================ -// Integration Tests: Batch Boundary Conditions -// ============================================================================ - -#[test] -fn test_migrate_batch_boundary_1023() { - let (mut storage, _temp_dir) = setup_test_storage(); - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 1023, VectorEndianness::BigEndian).unwrap(); - - let result = migrate(&mut storage); - assert!(result.is_ok()); - - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 1023); -} - -#[test] -fn test_migrate_batch_boundary_1025() { - let (mut storage, _temp_dir) = setup_test_storage(); - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 1025, VectorEndianness::BigEndian).unwrap(); - - let result = migrate(&mut storage); - assert!(result.is_ok()); - - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 1025); -} - -#[test] -fn test_migrate_batch_boundary_2047() { - let (mut storage, _temp_dir) = setup_test_storage(); - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 2047, VectorEndianness::BigEndian).unwrap(); - - let result = migrate(&mut storage); - assert!(result.is_ok()); - - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 2047); -} - -#[test] -fn test_migrate_batch_boundary_2048() { - let (mut storage, _temp_dir) = setup_test_storage(); - clear_metadata(&mut storage).unwrap(); - populate_test_vectors(&mut storage, 2048, VectorEndianness::BigEndian).unwrap(); - - let result = migrate(&mut storage); - assert!(result.is_ok()); - - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 2048); -} - -// ============================================================================ -// Property-Based Tests -// ============================================================================ - -use proptest::prelude::*; - -proptest! { - #[test] - fn proptest_endianness_conversion_preserves_values( - values in prop::collection::vec(prop::num::f64::ANY, 1..100) - ) { - let arena = bumpalo::Bump::new(); - - // Filter out NaN for equality comparison - let values: Vec = values.into_iter().filter(|v| !v.is_nan()).collect(); - if values.is_empty() { - return Ok(()); - } - - // Test both endianness conversions - for source_endianness in [VectorEndianness::BigEndian, VectorEndianness::LittleEndian] { - let source_bytes = create_test_vector_bytes(&values, source_endianness); - - let result = convert_vector_endianness(&source_bytes, source_endianness, &arena) - .expect("conversion should succeed"); - - let result_values: Vec = result - .chunks_exact(8) - .map(|chunk| f64::from_ne_bytes(chunk.try_into().unwrap())) - .collect(); - - prop_assert_eq!(values.len(), result_values.len()); - - for (original, converted) in values.iter().zip(result_values.iter()) { - prop_assert_eq!(original, converted); - } - } - } - - #[test] - fn proptest_endianness_conversion_valid_length( - byte_count in 1usize..200 - ) { - let arena = bumpalo::Bump::new(); - let bytes = vec![0u8; byte_count]; - - let result = convert_vector_endianness(&bytes, VectorEndianness::BigEndian, &arena); - - if byte_count % 8 == 0 { - prop_assert!(result.is_ok()); - } else { - prop_assert!(result.is_err()); - } - } - - #[test] - fn proptest_property_migration_preserves_data( - label in "[a-z]{1,20}", - is_deleted in any::(), - prop_count in 0usize..10 - ) { - let arena = bumpalo::Bump::new(); - let mut extra_props = HashMap::new(); - - for i in 0..prop_count { - extra_props.insert( - format!("prop_{}", i), - Value::F64(i as f64), - ); - } - - let old_bytes = create_old_properties(&label, is_deleted, extra_props); - let result = convert_old_vector_properties_to_new_format(&old_bytes, &arena) - .expect("property conversion should succeed"); - - // Verify conversion succeeded by checking result is not empty - prop_assert!(!result.is_empty()); - } -} - -// ============================================================================ -// Error Handling Tests -// ============================================================================ - -#[test] -fn test_error_invalid_vector_data_length() { - let arena = bumpalo::Bump::new(); - let invalid_bytes = vec![1, 2, 3, 4, 5, 6, 7]; // 7 bytes, not multiple of 8 - - let result = convert_vector_endianness(&invalid_bytes, VectorEndianness::BigEndian, &arena); - - assert!(result.is_err()); - match result { - Err(GraphError::New(msg)) => { - assert!(msg.contains("not a multiple")); - } - _ => panic!("Expected GraphError::New with length error"), - } -} - -#[test] -fn test_error_corrupted_property_data() { - let arena = bumpalo::Bump::new(); - let corrupted = vec![255u8; 100]; // Random bytes, not valid bincode - - let result = convert_old_vector_properties_to_new_format(&corrupted, &arena); - assert!(result.is_err()); -} - -#[test] -#[ignore] -fn test_date_bincode_serialization() { - // Test that Date values serialize/deserialize correctly with bincode - use crate::protocol::date::Date; - - // Create a Date and wrap it in Value::Date - let date = Date::new(&Value::I64(1609459200)).unwrap(); // 2021-01-01 - let value = Value::Date(date); - - // Serialize with bincode - let serialized = bincode::serialize(&value).unwrap(); - println!("\nValue::Date serialized to {} bytes", serialized.len()); - println!("Format: [variant=12] [i64 timestamp]"); - println!("Bytes: {:?}", serialized); - - // Deserialize - let deserialized: Value = bincode::deserialize(&serialized).unwrap(); - - // Verify it's a Date variant with correct value - match deserialized { - Value::Date(d) => { - assert_eq!(d.timestamp(), 1609459200); - assert!(d.to_rfc3339().starts_with("2021-01-01")); - println!("βœ… Bincode serialization works correctly!"); - println!(" Date: {}", d.to_rfc3339()); - } - _ => panic!("Expected Value::Date variant"), - } - - // Also test JSON serialization still works - let json = sonic_rs::to_string(&value).unwrap(); - let from_json: Value = sonic_rs::from_str(&json).unwrap(); - // JSON deserializes dates as strings, which is expected - assert!(matches!(from_json, Value::String(_))); - println!("βœ… JSON serialization also works (deserializes as Value::String as expected)!"); -} - -#[test] -fn test_error_handling_graceful_failure() { - // Test that errors don't corrupt the database - let (mut storage, _temp_dir) = setup_test_storage(); - - clear_metadata(&mut storage).unwrap(); - - // Add valid data - populate_test_vectors(&mut storage, 10, VectorEndianness::BigEndian).unwrap(); - - // Now add invalid data manually - { - let mut txn = storage.graph_env.write_txn().unwrap(); - let bad_id = 9999u128; - let bad_data = vec![1, 2, 3]; // Invalid length - - storage - .vectors - .vectors_db - .put(&mut txn, &bad_id.to_be_bytes(), &bad_data) - .unwrap(); - - txn.commit().unwrap(); - } - - // Migration should fail on invalid data - let result = migrate(&mut storage); - assert!(result.is_err()); - - // But the 10 valid vectors should still be there - let txn = storage.graph_env.read_txn().unwrap(); - let count = storage.vectors.vectors_db.len(&txn).unwrap(); - assert_eq!(count, 11); // 10 valid + 1 invalid -} - -// ============================================================================ -// Performance Tests -// ============================================================================ - -#[test] -#[ignore] // Run with: cargo test --release -- --ignored --nocapture -fn test_performance_large_dataset() { - use std::time::Instant; - - let (mut storage, _temp_dir) = setup_test_storage(); - - clear_metadata(&mut storage).unwrap(); - - // Create 100K vectors - println!("Populating 100K vectors..."); - let start = Instant::now(); - populate_test_vectors(&mut storage, 100_000, VectorEndianness::BigEndian).unwrap(); - println!("Population took: {:?}", start.elapsed()); - - // Migrate - println!("Running migration..."); - let start = Instant::now(); - let result = migrate(&mut storage); - let duration = start.elapsed(); - - assert!(result.is_ok()); - println!("Migration of 100K vectors took: {:?}", duration); - println!("Average: {:?} per vector", duration / 100_000); - - // Verify a sample - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 100_000); - assert_eq!(vectors[0], vec![0.0, 1.0, 2.0]); - assert_eq!(vectors[50_000], vec![50_000.0, 50_001.0, 50_002.0]); - assert_eq!(vectors[99_999], vec![99_999.0, 100_000.0, 100_001.0]); -} - -#[test] -#[ignore] -fn test_performance_property_migration() { - use std::time::Instant; - - let (mut storage, _temp_dir) = setup_test_storage(); - - clear_metadata(&mut storage).unwrap(); - - println!("Populating 50K properties..."); - populate_old_properties(&mut storage, 50_000).unwrap(); - - println!("Running property migration..."); - let start = Instant::now(); - let result = convert_all_vector_properties(&mut storage); - let duration = start.elapsed(); - - assert!(result.is_ok()); - println!("Property migration of 50K items took: {:?}", duration); - println!("Average: {:?} per property", duration / 50_000); -} - -#[test] -fn test_memory_efficiency_batch_processing() { - // This test verifies that batch processing doesn't cause memory issues - let (mut storage, _temp_dir) = setup_test_storage(); - - clear_metadata(&mut storage).unwrap(); - - // Create 5000 vectors (multiple batches) - populate_test_vectors(&mut storage, 5000, VectorEndianness::BigEndian).unwrap(); - - // Migration should complete without OOM - let result = migrate(&mut storage); - assert!(result.is_ok()); - - let vectors = read_all_vectors(&storage, NATIVE_VECTOR_ENDIANNESS).unwrap(); - assert_eq!(vectors.len(), 5000); -} diff --git a/helix-db/src/helix_engine/tests/concurrency_tests/hnsw_concurrent_tests.rs b/helix-db/src/helix_engine/tests/concurrency_tests/hnsw_concurrent_tests.rs index d2091cb6..07c6a617 100644 --- a/helix-db/src/helix_engine/tests/concurrency_tests/hnsw_concurrent_tests.rs +++ b/helix-db/src/helix_engine/tests/concurrency_tests/hnsw_concurrent_tests.rs @@ -12,21 +12,14 @@ /// - Multiple inserts at same level could create invalid graph topology /// - Delete during search might return inconsistent results /// - LMDB transaction model provides MVCC but needs validation - use bumpalo::Bump; -use heed3::{Env, EnvOpenOptions, RoTxn, RwTxn}; +use heed3::{Env, EnvOpenOptions, RwTxn}; use rand::Rng; use std::sync::{Arc, Barrier}; use std::thread; use tempfile::TempDir; -use crate::helix_engine::vector_core::{ - hnsw::HNSW, - vector::HVector, - vector_core::{HNSWConfig, VectorCore}, -}; - -type Filter = fn(&HVector, &RoTxn) -> bool; +use crate::helix_engine::vector_core::{HNSWConfig, VectorCore}; /// Setup test environment with larger map size for concurrent access /// @@ -49,13 +42,18 @@ fn setup_concurrent_env() -> (TempDir, Env) { } /// Generate a random vector of given dimensionality -fn random_vector(dim: usize) -> Vec { - (0..dim).map(|_| rand::rng().random_range(0.0..1.0)).collect() +fn random_vector(dim: usize) -> Vec { + (0..dim) + .map(|_| rand::rng().random_range(0.0..1.0)) + .collect() } /// Open existing VectorCore databases (for concurrent access) /// Note: create_database opens existing database if it exists -fn open_vector_core(env: &Env, txn: &mut RwTxn) -> Result { +fn open_vector_core( + env: &Env, + txn: &mut RwTxn, +) -> Result { VectorCore::new(env, txn, HNSWConfig::new(None, None, None)) } @@ -96,11 +94,17 @@ fn test_concurrent_inserts_single_label() { let mut wtxn = env.write_txn().unwrap(); let arena = Bump::new(); let vector = random_vector(128); - let data = arena.alloc_slice_copy(&vector); // Open the existing databases and insert let index = open_vector_core(&env, &mut wtxn).unwrap(); - index.insert::(&mut wtxn, "concurrent_test", data, None, &arena) + index + .insert( + &mut wtxn, + "concurrent_test", + vector.as_slice(), + None, + &arena, + ) .expect("Insert should succeed"); wtxn.commit().expect("Commit should succeed"); } @@ -118,10 +122,10 @@ fn test_concurrent_inserts_single_label() { let index = open_vector_core(&env, &mut wtxn).unwrap(); wtxn.commit().unwrap(); let rtxn = env.read_txn().unwrap(); - let count = index.num_inserted_vectors(&rtxn).unwrap(); + let count = index.num_inserted_vectors(); // Note: count includes entry point (+1), so actual vectors inserted = count - 1 - let expected_inserted = (num_threads * vectors_per_thread) as u64; + let expected_inserted = num_threads * vectors_per_thread; assert!( count == expected_inserted || count == expected_inserted + 1, "Expected {} or {} vectors (with entry point), found {}", @@ -133,7 +137,7 @@ fn test_concurrent_inserts_single_label() { // Additional consistency check: Verify we can perform searches (entry point exists implicitly) let arena = Bump::new(); let query = [0.5; 128]; - let search_result = index.search::(&rtxn, &query, 10, "concurrent_test", None, false, &arena); + let search_result = index.search(&rtxn, query.to_vec(), 10, "concurrent_test", &arena); assert!( search_result.is_ok(), "Should be able to search after concurrent inserts (entry point exists)" @@ -161,7 +165,9 @@ fn test_concurrent_searches_during_inserts() { for _ in 0..50 { let vector = random_vector(128); let data = arena.alloc_slice_copy(&vector); - index.insert::(&mut txn, "search_test", data, None, &arena).unwrap(); + index + .insert(&mut txn, "search_test", data, None, &arena) + .unwrap(); } txn.commit().unwrap(); } @@ -188,33 +194,21 @@ fn test_concurrent_searches_during_inserts() { // Perform many searches // Open databases once per thread let mut wtxn_init = env.write_txn().unwrap(); - let index = open_vector_core(&env, &mut wtxn_init).unwrap(); + let index: VectorCore = open_vector_core(&env, &mut wtxn_init).unwrap(); wtxn_init.commit().unwrap(); for _ in 0..50 { let rtxn = env.read_txn().unwrap(); let arena = Bump::new(); - match index.search::( - &rtxn, - &query[..], - 10, - "search_test", - None, - false, - &arena, - ) { + match index.search(&rtxn, query.to_vec(), 10, "search_test", &arena) { Ok(results) => { total_searches += 1; - total_results += results.len(); + total_results += results.nns.len(); // Validate result consistency - for (i, result) in results.iter().enumerate() { - assert!( - result.distance.is_some(), - "Result {} should have distance", - i - ); + for (i, &(_, distance)) in results.into_nns().iter().enumerate() { + assert!(distance > 0_f32, "Result {} should have distance", i); } } Err(e) => { @@ -251,7 +245,8 @@ fn test_concurrent_searches_during_inserts() { let data = arena.alloc_slice_copy(&vector); let index = open_vector_core(&env, &mut wtxn).unwrap(); - index.insert::(&mut wtxn, "search_test", data, None, &arena) + index + .insert(&mut wtxn, "search_test", data, None, &arena) .expect("Insert should succeed"); wtxn.commit().expect("Commit should succeed"); @@ -270,7 +265,7 @@ fn test_concurrent_searches_during_inserts() { let index = open_vector_core(&env, &mut wtxn).unwrap(); wtxn.commit().unwrap(); let rtxn = env.read_txn().unwrap(); - let final_count = index.num_inserted_vectors(&rtxn).unwrap(); + let final_count = index.num_inserted_vectors(); assert!( final_count >= 50, @@ -281,9 +276,12 @@ fn test_concurrent_searches_during_inserts() { // Verify we can still search successfully let arena = Bump::new(); let results = index - .search::(&rtxn, &query[..], 10, "search_test", None, false, &arena) + .search(&rtxn, query.to_vec(), 10, "search_test", &arena) .unwrap(); - assert!(!results.is_empty(), "Should find results after concurrent operations"); + assert!( + !results.nns.is_empty(), + "Should find results after concurrent operations" + ); } #[test] @@ -324,9 +322,7 @@ fn test_concurrent_inserts_multiple_labels() { let vector = random_vector(64); let data = arena.alloc_slice_copy(&vector); - index - .insert::(&mut wtxn, &label, data, None, &arena) - .unwrap(); + index.insert(&mut wtxn, &label, data, None, &arena).unwrap(); wtxn.commit().unwrap(); if i % 10 == 0 { @@ -353,7 +349,7 @@ fn test_concurrent_inserts_multiple_labels() { // Verify we can search for each label (entry point exists implicitly) let query = [0.5; 64]; - let search_result = index.search::(&rtxn, &query, 5, &label, None, false, &arena); + let search_result = index.search(&rtxn, query.to_vec(), 5, &label, &arena); assert!( search_result.is_ok(), "Should be able to search label {}", @@ -361,8 +357,8 @@ fn test_concurrent_inserts_multiple_labels() { ); } - let total_count = index.num_inserted_vectors(&rtxn).unwrap(); - let expected_total = (num_labels * vectors_per_label) as u64; + let total_count = index.num_inserted_vectors(); + let expected_total = num_labels * vectors_per_label; assert!( total_count == expected_total || total_count == expected_total + 1, "Expected {} or {} vectors (with entry point), found {}", @@ -412,7 +408,7 @@ fn test_entry_point_consistency() { let data = arena.alloc_slice_copy(&vector); index - .insert::(&mut wtxn, "entry_test", data, None, &arena) + .insert(&mut wtxn, "entry_test", data, None, &arena) .unwrap(); wtxn.commit().unwrap(); } @@ -433,17 +429,26 @@ fn test_entry_point_consistency() { // If we can successfully search, entry point must be valid let query = [0.5; 32]; - let search_result = index.search::(&rtxn, &query, 10, "entry_test", None, false, &arena); - assert!(search_result.is_ok(), "Entry point should exist and be valid"); + let search_result = index.search(&rtxn, query.to_vec(), 10, "entry_test", &arena); + assert!( + search_result.is_ok(), + "Entry point should exist and be valid" + ); let results = search_result.unwrap(); - assert!(!results.is_empty(), "Should return results if entry point is valid"); + assert!( + !results.nns.is_empty(), + "Should return results if entry point is valid" + ); // Verify results have valid properties - for result in results.iter() { - assert!(result.id > 0, "Result ID should be valid"); - assert!(!result.deleted, "Results should not be deleted"); - assert!(!result.data.is_empty(), "Results should have data"); + for &(_id, _distance) in results.into_nns().iter() { + // assert!(result.id > 0, "Result ID should be valid"); + // assert!(!result.deleted, "Results should not be deleted"); + // assert!( + // !result.data_borrowed().is_empty(), + // "Results should have data" + // ); } } @@ -484,7 +489,7 @@ fn test_graph_connectivity_after_concurrent_inserts() { let data = arena.alloc_slice_copy(&vector); index - .insert::(&mut wtxn, "connectivity_test", data, None, &arena) + .insert(&mut wtxn, "connectivity_test", data, None, &arena) .unwrap(); wtxn.commit().unwrap(); } @@ -507,29 +512,18 @@ fn test_graph_connectivity_after_concurrent_inserts() { for i in 0..10 { let query = random_vector(64); let results = index - .search::( - &rtxn, - &query, - 10, - "connectivity_test", - None, - false, - &arena, - ) + .search(&rtxn, query.to_vec(), 10, "connectivity_test", &arena) .unwrap(); assert!( - !results.is_empty(), + !results.nns.is_empty(), "Query {} should return results (graph should be connected)", i ); // All results should have valid distances - for result in results { - assert!( - result.distance.is_some() && result.distance.unwrap() >= 0.0, - "Result should have valid distance" - ); + for &(_, distance) in results.into_nns().iter() { + assert!(distance >= 0.0, "Result should have valid distance"); } } } @@ -552,8 +546,9 @@ fn test_transaction_isolation() { let arena = Bump::new(); for _ in 0..initial_count { let vector = random_vector(32); - let data = arena.alloc_slice_copy(&vector); - index.insert::(&mut txn, "isolation_test", data, None, &arena).unwrap(); + index + .insert(&mut txn, "isolation_test", vector.as_slice(), None, &arena) + .unwrap(); } txn.commit().unwrap(); } @@ -564,7 +559,7 @@ fn test_transaction_isolation() { wtxn_open.commit().unwrap(); let rtxn = env.read_txn().unwrap(); - let count_before = index.num_inserted_vectors(&rtxn).unwrap(); + let count_before = index.num_inserted_vectors(); // Entry point may be included in count (+1) assert!( @@ -584,8 +579,9 @@ fn test_transaction_isolation() { let arena = Bump::new(); let vector = random_vector(32); - let data = arena.alloc_slice_copy(&vector); - index.insert::(&mut wtxn, "isolation_test", data, None, &arena).unwrap(); + index + .insert(&mut wtxn, "isolation_test", vector.as_slice(), None, &arena) + .unwrap(); wtxn.commit().unwrap(); } }); @@ -593,7 +589,7 @@ fn test_transaction_isolation() { handle.join().unwrap(); // Original read transaction should still see the same count (snapshot isolation) - let count_after = index.num_inserted_vectors(&rtxn).unwrap(); + let count_after = index.num_inserted_vectors(); assert_eq!( count_after, count_before, "Read transaction should see consistent snapshot" @@ -606,13 +602,14 @@ fn test_transaction_isolation() { let index_new = open_vector_core(&env, &mut wtxn_new).unwrap(); wtxn_new.commit().unwrap(); - let rtxn_new = env.read_txn().unwrap(); - let count_new = index_new.num_inserted_vectors(&rtxn_new).unwrap(); + let count_new = index_new.num_inserted_vectors(); // Entry point may be included in counts (+1) let expected_new = initial_count + 20; assert!( - count_new == expected_new || count_new == expected_new + 1 || count_new == initial_count + 20 + 1, + count_new == expected_new + || count_new == expected_new + 1 + || count_new == initial_count + 20 + 1, "Expected around {} vectors, got {}", expected_new, count_new diff --git a/helix-db/src/helix_engine/tests/concurrency_tests/hnsw_loom_tests.rs b/helix-db/src/helix_engine/tests/concurrency_tests/hnsw_loom_tests.rs index 8ba2b425..f542bdf5 100644 --- a/helix-db/src/helix_engine/tests/concurrency_tests/hnsw_loom_tests.rs +++ b/helix-db/src/helix_engine/tests/concurrency_tests/hnsw_loom_tests.rs @@ -10,9 +10,6 @@ /// /// NOTE: Loom tests are expensive - they explore all possible execution orderings. /// Keep the problem space small (few operations, few threads). - - - use loom::sync::Arc; use loom::sync::atomic::{AtomicU64, Ordering}; use loom::thread; @@ -84,9 +81,7 @@ fn loom_entry_point_read_write_race() { }); // Reader thread: Reads entry point (might see 0 or 12345) - let reader = thread::spawn(move || { - reader_entry.load(Ordering::SeqCst) - }); + let reader = thread::spawn(move || reader_entry.load(Ordering::SeqCst)); writer.join().unwrap(); let read_value = reader.join().unwrap(); @@ -137,7 +132,7 @@ fn loom_neighbor_count_race() { // This test demonstrates the lost update problem // In real code, this should use fetch_add assert!( - final_count >= 1 && final_count <= 2, + (1..=2).contains(&final_count), "Expected 1 or 2, got {}", final_count ); @@ -176,7 +171,7 @@ fn loom_max_level_update_race() { // Should end up with max level of 3 let final_max = max_level.load(Ordering::SeqCst); assert!( - final_max >= 2 && final_max <= 3, + (2..=3).contains(&final_max), "Expected 2 or 3, got {}", final_max ); @@ -295,9 +290,7 @@ fn loom_two_writers_one_reader() { }); // Reader: Read value (should see 0, 1, or 2) - let reader = thread::spawn(move || { - r_value.load(Ordering::SeqCst) - }); + let reader = thread::spawn(move || r_value.load(Ordering::SeqCst)); w1.join().unwrap(); w2.join().unwrap(); diff --git a/helix-db/src/helix_engine/tests/concurrency_tests/integration_stress_tests.rs b/helix-db/src/helix_engine/tests/concurrency_tests/integration_stress_tests.rs index 233cadb5..a2b6108a 100644 --- a/helix-db/src/helix_engine/tests/concurrency_tests/integration_stress_tests.rs +++ b/helix-db/src/helix_engine/tests/concurrency_tests/integration_stress_tests.rs @@ -86,7 +86,8 @@ fn test_stress_mixed_read_write_operations() { G::new_mut(&storage, &arena, &mut wtxn) .add_edge("connects", None, id1, id2, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); wtxn.commit().unwrap(); write_ops.fetch_add(1, Ordering::Relaxed); @@ -196,7 +197,8 @@ fn test_stress_rapid_graph_growth() { let root_idx = local_count % root_ids.len(); G::new_mut(&storage, &arena, &mut wtxn) .add_edge("child_of", None, root_ids[root_idx], new_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); wtxn.commit().unwrap(); write_count.fetch_add(1, Ordering::Relaxed); diff --git a/helix-db/src/helix_engine/tests/concurrency_tests/mod.rs b/helix-db/src/helix_engine/tests/concurrency_tests/mod.rs index 801e05b7..6d2706c6 100644 --- a/helix-db/src/helix_engine/tests/concurrency_tests/mod.rs +++ b/helix-db/src/helix_engine/tests/concurrency_tests/mod.rs @@ -2,4 +2,4 @@ pub mod hnsw_concurrent_tests; pub mod hnsw_loom_tests; pub mod integration_stress_tests; -pub mod traversal_concurrent_tests; \ No newline at end of file +pub mod traversal_concurrent_tests; diff --git a/helix-db/src/helix_engine/tests/concurrency_tests/traversal_concurrent_tests.rs b/helix-db/src/helix_engine/tests/concurrency_tests/traversal_concurrent_tests.rs index 932874b7..7cfdaa7b 100644 --- a/helix-db/src/helix_engine/tests/concurrency_tests/traversal_concurrent_tests.rs +++ b/helix-db/src/helix_engine/tests/concurrency_tests/traversal_concurrent_tests.rs @@ -1,3 +1,4 @@ +use bumpalo::Bump; /// Concurrent access tests for Traversal Operations /// /// This test suite validates thread safety and concurrent operation correctness @@ -13,22 +14,18 @@ /// - MVCC ensures readers see consistent graph snapshots /// - Edge creation/deletion doesn't corrupt graph topology /// - No race conditions in neighbor list updates - use std::sync::{Arc, Barrier}; use std::thread; -use bumpalo::Bump; use tempfile::TempDir; use crate::helix_engine::storage_core::HelixGraphStorage; use crate::helix_engine::traversal_core::config::Config; use crate::helix_engine::traversal_core::ops::g::G; +use crate::helix_engine::traversal_core::ops::in_::in_::InAdapter; +use crate::helix_engine::traversal_core::ops::out::out::OutAdapter; use crate::helix_engine::traversal_core::ops::source::{ - add_n::AddNAdapter, - add_e::AddEAdapter, - n_from_id::NFromIdAdapter, + add_e::AddEAdapter, add_n::AddNAdapter, n_from_id::NFromIdAdapter, }; -use crate::helix_engine::traversal_core::ops::out::out::OutAdapter; -use crate::helix_engine::traversal_core::ops::in_::in_::InAdapter; /// Setup storage for concurrent testing fn setup_concurrent_storage() -> (TempDir, Arc) { @@ -69,7 +66,8 @@ fn test_concurrent_node_additions() { let label = format!("person_t{}_n{}", thread_id, i); G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); wtxn.commit().unwrap(); } @@ -112,7 +110,8 @@ fn test_concurrent_edge_additions() { let label = format!("node_{}", i); G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id() }) .collect(); @@ -144,8 +143,15 @@ fn test_concurrent_edge_additions() { let label = format!("knows_t{}_e{}", thread_id, i); G::new_mut(&storage, &arena, &mut wtxn) - .add_edge(&label, None, node_ids[source_idx], node_ids[target_idx], false) - .collect_to_obj().unwrap(); + .add_edge( + &label, + None, + node_ids[source_idx], + node_ids[target_idx], + false, + ) + .collect_to_obj() + .unwrap(); wtxn.commit().unwrap(); } @@ -184,7 +190,8 @@ fn test_concurrent_reads_during_writes() { let root = G::new_mut(&storage, &arena, &mut wtxn) .add_n("root", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); // Add initial neighbors @@ -192,12 +199,14 @@ fn test_concurrent_reads_during_writes() { let label = format!("initial_{}", i); let neighbor_id = G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut wtxn) .add_edge("connects", None, root, neighbor_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); } wtxn.commit().unwrap(); @@ -228,7 +237,8 @@ fn test_concurrent_reads_during_writes() { let neighbors = G::new(&storage, &rtxn, &arena) .n_from_id(&root_id) .out_node("connects") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); // Should see at least initial neighbors assert!( @@ -261,12 +271,14 @@ fn test_concurrent_reads_during_writes() { let label = format!("writer_{}_node_{}", writer_id, i); let new_node_id = G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut wtxn) .add_edge("connects", None, root_id, new_node_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); wtxn.commit().unwrap(); @@ -286,7 +298,8 @@ fn test_concurrent_reads_during_writes() { let final_neighbors = G::new(&storage, &rtxn, &arena) .n_from_id(&root_id) .out_node("connects") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let expected_count = 5 + (num_writers * 10); assert_eq!( @@ -313,19 +326,22 @@ fn test_traversal_snapshot_isolation() { let root = G::new_mut(&storage, &arena, &mut wtxn) .add_n("root", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); for i in 0..5 { let label = format!("node_{}", i); let node_id = G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut wtxn) .add_edge("links", None, root, node_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); } wtxn.commit().unwrap(); @@ -338,7 +354,8 @@ fn test_traversal_snapshot_isolation() { let initial_neighbors = G::new(&storage, &rtxn, &arena) .n_from_id(&root_id) .out_node("links") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let initial_count = initial_neighbors.len(); assert_eq!(initial_count, 5); @@ -352,12 +369,14 @@ fn test_traversal_snapshot_isolation() { let label = format!("new_node_{}", i); let new_id = G::new_mut(&storage_clone, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage_clone, &arena, &mut wtxn) .add_edge("links", None, root_id, new_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); wtxn.commit().unwrap(); } @@ -370,7 +389,8 @@ fn test_traversal_snapshot_isolation() { let current_neighbors = G::new(&storage, &rtxn, &arena2) .n_from_id(&root_id) .out_node("links") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!( current_neighbors.len(), @@ -388,7 +408,8 @@ fn test_traversal_snapshot_isolation() { let final_neighbors = G::new(&storage, &rtxn_new, &arena3) .n_from_id(&root_id) .out_node("links") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(final_neighbors.len(), 15); } @@ -410,7 +431,8 @@ fn test_concurrent_bidirectional_traversals() { let label = format!("source_{}", i); G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id() }) .collect(); @@ -420,7 +442,8 @@ fn test_concurrent_bidirectional_traversals() { let label = format!("target_{}", i); G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id() }) .collect(); @@ -430,7 +453,8 @@ fn test_concurrent_bidirectional_traversals() { for target_id in &targets { G::new_mut(&storage, &arena, &mut wtxn) .add_edge("points_to", None, *source_id, *target_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); } } @@ -463,7 +487,8 @@ fn test_concurrent_bidirectional_traversals() { let neighbors = G::new(&storage, &rtxn, &arena) .n_from_id(source_id) .out_node("points_to") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(neighbors.len(), 5, "Source should have 5 outgoing edges"); } } else { @@ -472,7 +497,8 @@ fn test_concurrent_bidirectional_traversals() { let neighbors = G::new(&storage, &rtxn, &arena) .n_from_id(target_id) .in_node("points_to") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(neighbors.len(), 5, "Target should have 5 incoming edges"); } } @@ -503,7 +529,8 @@ fn test_concurrent_multi_hop_traversals() { let root = G::new_mut(&storage, &arena, &mut wtxn) .add_n("root", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); // Create level 1 nodes @@ -512,12 +539,14 @@ fn test_concurrent_multi_hop_traversals() { let label = format!("level1_{}", i); let id = G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut wtxn) .add_edge("to_l1", None, root, id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); id }) @@ -529,12 +558,14 @@ fn test_concurrent_multi_hop_traversals() { let label = format!("level2_{}", i); let l2_id = G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut wtxn) .add_edge("to_l2", None, l1_id, l2_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); } } @@ -561,7 +592,8 @@ fn test_concurrent_multi_hop_traversals() { let level1 = G::new(&storage, &rtxn, &arena) .n_from_id(&root_id) .out_node("to_l1") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(level1.len(), 3, "Should have 3 level1 nodes"); // For each level1, traverse to level2 @@ -570,7 +602,8 @@ fn test_concurrent_multi_hop_traversals() { let level2 = G::new(&storage, &rtxn, &arena2) .n_from_id(&l1_node.id()) .out_node("to_l2") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(level2.len(), 2, "Each level1 should have 2 level2 nodes"); } @@ -615,17 +648,20 @@ fn test_concurrent_graph_topology_consistency() { let node1_id = G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label1, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let node2_id = G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label2, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut wtxn) .add_edge("connects", None, node1_id, node2_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); wtxn.commit().unwrap(); } @@ -654,17 +690,26 @@ fn test_concurrent_graph_topology_consistency() { // Verify all edges point to valid nodes for result in storage.edges_db.iter(&rtxn).unwrap() { let (edge_id, edge_bytes) = result.unwrap(); - let edge = crate::utils::items::Edge::from_bincode_bytes(edge_id, &edge_bytes, &arena).unwrap(); + let edge = + crate::utils::items::Edge::from_bincode_bytes(edge_id, edge_bytes, &arena).unwrap(); // Verify source exists assert!( - storage.nodes_db.get(&rtxn, &edge.from_node).unwrap().is_some(), + storage + .nodes_db + .get(&rtxn, &edge.from_node) + .unwrap() + .is_some(), "Edge source node not found" ); // Verify target exists assert!( - storage.nodes_db.get(&rtxn, &edge.to_node).unwrap().is_some(), + storage + .nodes_db + .get(&rtxn, &edge.to_node) + .unwrap() + .is_some(), "Edge target node not found" ); } @@ -688,7 +733,8 @@ fn test_stress_concurrent_mixed_operations() { let label = format!("root_{}", i); G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id() }) .collect(); @@ -717,13 +763,15 @@ fn test_stress_concurrent_mixed_operations() { let label = format!("w{}_n{}", writer_id, write_count); let new_id = G::new_mut(&storage, &arena, &mut wtxn) .add_n(&label, None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let root_idx = write_count % root_ids.len(); G::new_mut(&storage, &arena, &mut wtxn) .add_edge("links", None, root_ids[root_idx], new_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); wtxn.commit().unwrap(); write_count += 1; @@ -747,7 +795,8 @@ fn test_stress_concurrent_mixed_operations() { let _neighbors = G::new(&storage, &rtxn, &arena) .n_from_id(root_id) .out_node("links") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); read_count += 1; } } @@ -770,9 +819,20 @@ fn test_stress_concurrent_mixed_operations() { let total_writes: usize = write_counts.iter().sum(); let total_reads: usize = read_counts.iter().sum(); - println!("Stress test: {} writes, {} reads in {:?}", total_writes, total_reads, duration); + println!( + "Stress test: {} writes, {} reads in {:?}", + total_writes, total_reads, duration + ); // Should process many operations - assert!(total_writes > 50, "Should perform many writes, got {}", total_writes); - assert!(total_reads > 100, "Should perform many reads, got {}", total_reads); + assert!( + total_writes > 50, + "Should perform many writes, got {}", + total_writes + ); + assert!( + total_reads > 100, + "Should perform many reads, got {}", + total_reads + ); } diff --git a/helix-db/src/helix_engine/tests/hnsw_tests.rs b/helix-db/src/helix_engine/tests/hnsw_tests.rs index 78f4a48c..7b030cd6 100644 --- a/helix-db/src/helix_engine/tests/hnsw_tests.rs +++ b/helix-db/src/helix_engine/tests/hnsw_tests.rs @@ -1,15 +1,9 @@ use bumpalo::Bump; -use heed3::{Env, EnvOpenOptions, RoTxn}; +use heed3::{Env, EnvOpenOptions}; use rand::Rng; use tempfile::TempDir; -use crate::helix_engine::vector_core::{ - hnsw::HNSW, - vector::HVector, - vector_core::{HNSWConfig, VectorCore}, -}; - -type Filter = fn(&HVector, &RoTxn) -> bool; +use crate::helix_engine::vector_core::{HNSWConfig, VectorCore}; fn setup_env() -> (Env, TempDir) { let temp_dir = tempfile::tempdir().unwrap(); @@ -31,18 +25,16 @@ fn test_hnsw_insert_and_count() { let mut txn = env.write_txn().unwrap(); let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None)).unwrap(); - let vector: Vec = (0..4).map(|_| rand::rng().random_range(0.0..1.0)).collect(); + let vector: Vec = (0..4).map(|_| rand::rng().random_range(0.0..1.0)).collect(); for _ in 0..10 { let arena = Bump::new(); - let data = arena.alloc_slice_copy(&vector); let _ = index - .insert::(&mut txn, "vector", data, None, &arena) + .insert(&mut txn, "vector", vector.as_slice(), None, &arena) .unwrap(); } txn.commit().unwrap(); - let txn = env.read_txn().unwrap(); - assert!(index.num_inserted_vectors(&txn).unwrap() >= 10); + assert!(index.num_inserted_vectors() >= 10); } #[test] @@ -54,10 +46,9 @@ fn test_hnsw_search_returns_results() { let mut rng = rand::rng(); for _ in 0..128 { let arena = Bump::new(); - let vector: Vec = (0..4).map(|_| rng.random_range(0.0..1.0)).collect(); - let data = arena.alloc_slice_copy(&vector); + let vector: Vec = (0..4).map(|_| rng.random_range(0.0..1.0)).collect(); let _ = index - .insert::(&mut txn, "vector", data, None, &arena) + .insert(&mut txn, "vector", vector.as_slice(), None, &arena) .unwrap(); } txn.commit().unwrap(); @@ -66,7 +57,7 @@ fn test_hnsw_search_returns_results() { let txn = env.read_txn().unwrap(); let query = [0.5, 0.5, 0.5, 0.5]; let results = index - .search::(&txn, &query, 5, "vector", None, false, &arena) + .search(&txn, query.to_vec(), 5, "vector", &arena) .unwrap(); - assert!(!results.is_empty()); + assert!(!results.nns.is_empty()); } diff --git a/helix-db/src/helix_engine/tests/storage_tests.rs b/helix-db/src/helix_engine/tests/storage_tests.rs index 8fd061c2..b730feb6 100644 --- a/helix-db/src/helix_engine/tests/storage_tests.rs +++ b/helix-db/src/helix_engine/tests/storage_tests.rs @@ -1,5 +1,7 @@ use crate::helix_engine::{ - storage_core::{HelixGraphStorage, storage_methods::DBMethods, version_info::VersionInfo, StorageConfig}, + storage_core::{ + HelixGraphStorage, StorageConfig, storage_methods::DBMethods, version_info::VersionInfo, + }, traversal_core::config::Config, }; use tempfile::TempDir; diff --git a/helix-db/src/helix_engine/tests/traversal_tests/count_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/count_tests.rs index 8e03ee32..7aa752bc 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/count_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/count_tests.rs @@ -1,27 +1,21 @@ use std::{sync::Arc, time::Duration}; -use crate::{ - helix_engine::{ - storage_core::HelixGraphStorage, - traversal_core::{ - ops::{ - g::G, - out::out::OutAdapter, - source::{ - add_e::AddEAdapter, - add_n::AddNAdapter, - n_from_id::NFromIdAdapter, - n_from_type::NFromTypeAdapter, - }, - util::{count::CountAdapter, filter_ref::FilterRefAdapter, range::RangeAdapter}, - }, +use crate::helix_engine::{ + storage_core::HelixGraphStorage, + traversal_core::ops::{ + g::G, + out::out::OutAdapter, + source::{ + add_e::AddEAdapter, add_n::AddNAdapter, n_from_id::NFromIdAdapter, + n_from_type::NFromTypeAdapter, }, + util::{count::CountAdapter, filter_ref::FilterRefAdapter, range::RangeAdapter}, }, }; +use bumpalo::Bump; use rand::Rng; use tempfile::TempDir; -use bumpalo::Bump; fn setup_test_db() -> (TempDir, Arc) { let temp_dir = TempDir::new().unwrap(); let db_path = temp_dir.path().to_str().unwrap(); @@ -41,7 +35,8 @@ fn test_count_single_node() { let mut txn = storage.graph_env.write_txn().unwrap(); let person = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person = person.first().unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -59,13 +54,16 @@ fn test_count_node_array() { let mut txn = storage.graph_env.write_txn().unwrap(); let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -84,35 +82,28 @@ fn test_count_mixed_steps() { // Create a graph with multiple paths let person1 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person1 = person1.first().unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person2 = person2.first().unwrap(); let person3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person3 = person3.first().unwrap(); G::new_mut(&storage, &arena, &mut txn) - .add_edge( - "knows", - None, - person1.id(), - person2.id(), - false, - ) - .collect::,_>>().unwrap(); + .add_edge("knows", None, person1.id(), person2.id(), false) + .collect::, _>>() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) - .add_edge( - "knows", - None, - person1.id(), - person3.id(), - false, - ) - .collect::,_>>().unwrap(); + .add_edge("knows", None, person1.id(), person3.id(), false) + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); println!("person1: {person1:?},\nperson2: {person2:?},\nperson3: {person3:?}"); @@ -148,7 +139,8 @@ fn test_count_filter_ref() { for _ in 0..100 { let node = G::new_mut(&storage, &arena, &mut txn) .add_n("Country", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); nodes.push(node); } let mut num_countries = 0; @@ -157,16 +149,12 @@ fn test_count_filter_ref() { for _ in 0..rand_num { let city = G::new_mut(&storage, &arena, &mut txn) .add_n("City", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) - .add_edge( - "Country_to_City", - None, - node.id(), - city.id(), - false, - ) - .collect::,_>>().unwrap(); + .add_edge("Country_to_City", None, node.id(), city.id(), false) + .collect::, _>>() + .unwrap(); // sleep for one microsecond std::thread::sleep(Duration::from_micros(1)); } @@ -180,22 +168,20 @@ fn test_count_filter_ref() { .filter_ref(|val, txn| { if let Ok(val) = val { let val_id = val.id(); - Ok(G::new(&storage, &txn, &arena) + Ok(G::new(&storage, txn, &arena) .n_from_id(&val_id) .out_node("Country_to_City") .count_to_val() .map_value_or(false, |v| { - println!( - "v: {v:?}, res: {:?}", - *v > 10.clone() - ); - *v > 10.clone() + println!("v: {v:?}, res: {:?}", *v > 10); + *v > 10 })?) } else { Ok(false) } }) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); println!("count: {count:?}, num_countries: {num_countries}"); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs index cd237235..55561041 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs @@ -1,7 +1,6 @@ use std::sync::Arc; use bumpalo::Bump; -use heed3::RoTxn; use rand::Rng; use tempfile::TempDir; @@ -25,13 +24,10 @@ use crate::{ traversal_value::TraversalValue, }, types::GraphError, - vector_core::vector::HVector, }, props, }; -type Filter = fn(&HVector, &RoTxn) -> bool; - fn setup_test_db() -> (TempDir, Arc) { let temp_dir = TempDir::new().unwrap(); let db_path = temp_dir.path().to_str().unwrap(); @@ -170,7 +166,8 @@ fn test_drop_node() { let edges = G::new(&storage, &txn, &arena) .n_from_id(&node2_id) .in_e("knows") - .collect::, _>>().unwrap(); + .collect::, _>>() + .unwrap(); println!("edges: {:?}", edges); assert!(edges.is_empty()); } @@ -390,24 +387,22 @@ fn test_vector_deletion_in_existing_graph() { let mut vector_ids = Vec::new(); for _ in 0..10 { let id = match G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 1.0, 1.0, 1.0], "vector", None) + .insert_v(&[1.0, 1.0, 1.0, 1.0], "vector", None) .collect_to_obj() .unwrap() { TraversalValue::Vector(vector) => vector.id, - TraversalValue::VectorNodeWithoutVectorData(vector) => *vector.id(), other => panic!("unexpected value: {other:?}"), }; vector_ids.push(id); } let target_vector_id = match G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 1.0, 1.0, 1.0], "vector", None) + .insert_v(&[1.0, 1.0, 1.0, 1.0], "vector", None) .collect_to_obj() .unwrap() { TraversalValue::Vector(vector) => vector.id, - TraversalValue::VectorNodeWithoutVectorData(vector) => *vector.id(), other => panic!("unexpected value: {other:?}"), }; @@ -443,10 +438,8 @@ fn test_vector_deletion_in_existing_graph() { .n_from_id(&node_id) .out_vec("knows", false) .filter_ref(|val, _| match val { - Ok(TraversalValue::Vector(vector)) => Ok(*vector.id() == target_vector_id), - Ok(TraversalValue::VectorNodeWithoutVectorData(vector)) => { - Ok(*vector.id() == target_vector_id) - } + Ok(TraversalValue::Vector(vector)) => Ok(vector.id == target_vector_id), + Ok(_) => Ok(false), Err(err) => Err(GraphError::from(err.to_string())), }) diff --git a/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs index 426154d1..ec676a0f 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs @@ -22,14 +22,10 @@ use crate::{ traversal_value::TraversalValue, }, types::GraphError, - vector_core::vector::HVector, }, props, protocol::value::Value, }; -use heed3::RoTxn; - -type Filter = fn(&HVector, &RoTxn) -> bool; fn setup_test_db() -> (TempDir, Arc) { let temp_dir = TempDir::new().unwrap(); @@ -58,23 +54,27 @@ fn test_add_edge_creates_relationship() { let source_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let target_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let edge = G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, source_id, target_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); let txn = storage.graph_env.read_txn().unwrap(); let fetched = G::new(&storage, &txn, &arena) .e_from_id(&edge.id()) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(fetched.len(), 1); assert_eq!(edge_id(&fetched[0]), edge.id()); } @@ -87,15 +87,18 @@ fn test_out_e_returns_edge() { let source_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let target_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, source_id, target_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); @@ -103,7 +106,8 @@ fn test_out_e_returns_edge() { let edges = G::new(&storage, &txn, &arena) .n_from_id(&source_id) .out_e("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(edges.len(), 1); assert_eq!(edges[0].id(), edge_id(&edges[0])); } @@ -116,15 +120,18 @@ fn test_in_e_returns_edge() { let source_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let target_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, source_id, target_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); @@ -132,7 +139,8 @@ fn test_in_e_returns_edge() { let edges = G::new(&storage, &txn, &arena) .n_from_id(&target_id) .in_e("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(edges.len(), 1); assert_eq!(edge_id(&edges[0]), edges[0].id()); } @@ -145,15 +153,18 @@ fn test_out_node_returns_neighbor() { let source_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let neighbor_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, source_id, neighbor_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); @@ -161,7 +172,8 @@ fn test_out_node_returns_neighbor() { let neighbors = G::new(&storage, &txn, &arena) .n_from_id(&source_id) .out_node("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(neighbors.len(), 1); assert_eq!(neighbors[0].id(), neighbor_id); } @@ -174,11 +186,13 @@ fn test_edge_properties_can_be_read() { let source_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let target_id = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); G::new_mut(&storage, &arena, &mut txn) .add_edge( @@ -188,14 +202,16 @@ fn test_edge_properties_can_be_read() { target_id, false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); let txn = storage.graph_env.read_txn().unwrap(); let edge = G::new(&storage, &txn, &arena) .e_from_type("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(edge.len(), 1); if let TraversalValue::Edge(edge) = &edge[0] { match edge.properties.as_ref().unwrap().get("since").unwrap() { @@ -216,19 +232,21 @@ fn test_vector_edges_roundtrip() { let node_id = G::new_mut(&storage, &arena, &mut txn) .add_n("doc", None, None) - .collect::,_>>().unwrap()[0] + .collect::, _>>() + .unwrap()[0] .id(); let vector_id = match G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 0.0, 0.0], "embedding", None) - .collect_to_obj().unwrap() + .insert_v(&[1.0, 0.0, 0.0], "embedding", None) + .collect_to_obj() + .unwrap() { TraversalValue::Vector(vector) => vector.id, - TraversalValue::VectorNodeWithoutVectorData(vector) => *vector.id(), other => panic!("unexpected traversal value: {other:?}"), }; G::new_mut(&storage, &arena, &mut txn) .add_edge("has_vector", None, node_id, vector_id, false) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); @@ -236,11 +254,11 @@ fn test_vector_edges_roundtrip() { let vectors = G::new(&storage, &txn, &arena) .n_from_id(&node_id) .out_vec("has_vector", true) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(vectors.len(), 1); match &vectors[0] { - TraversalValue::Vector(vec) => assert_eq!(*vec.id(), vector_id), - TraversalValue::VectorNodeWithoutVectorData(vec) => assert_eq!(*vec.id(), vector_id), + TraversalValue::Vector(vec) => assert_eq!(vec.id, vector_id), other => panic!("unexpected traversal value: {other:?}"), } } diff --git a/helix-db/src/helix_engine/tests/traversal_tests/filter_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/filter_tests.rs index 20a259e9..31ab1881 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/filter_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/filter_tests.rs @@ -39,13 +39,16 @@ fn test_filter_nodes() { // Create nodes with different properties let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 25 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 30 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let person3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 35 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -67,7 +70,8 @@ fn test_filter_nodes() { Ok(false) } }) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 1); assert_eq!(traversal[0].id(), person3.id()); } @@ -84,14 +88,16 @@ fn test_filter_macro_single_argument() { props_option(&arena, props! { "name" => "Alice" }), None, ) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let _ = G::new_mut(&storage, &arena, &mut txn) .add_n( "person", props_option(&arena, props! { "name" => "Bob" }), None, ) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); fn has_name(val: &Result) -> Result { if let Ok(TraversalValue::Node(node)) = val { @@ -106,7 +112,8 @@ fn test_filter_macro_single_argument() { let traversal = G::new(&storage, &txn, &arena) .n_from_type("person") .filter_ref(|val, _| has_name(val)) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 2); assert!( traversal @@ -131,10 +138,12 @@ fn test_filter_macro_multiple_arguments() { let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 25 }), None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 30 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); fn age_greater_than( @@ -160,7 +169,8 @@ fn test_filter_macro_multiple_arguments() { let traversal = G::new(&storage, &txn, &arena) .n_from_type("person") .filter_ref(|val, _| age_greater_than(val, 27)) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 1); assert_eq!(traversal[0].id(), person2.id()); @@ -174,10 +184,12 @@ fn test_filter_edges() { let person1 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let _ = G::new_mut(&storage, &arena, &mut txn) .add_edge( @@ -187,7 +199,8 @@ fn test_filter_edges() { person2.id(), false, ) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let edge2 = G::new_mut(&storage, &arena, &mut txn) .add_edge( "knows", @@ -196,7 +209,8 @@ fn test_filter_edges() { person1.id(), false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -223,7 +237,8 @@ fn test_filter_edges() { let traversal = G::new(&storage, &txn, &arena) .e_from_type("knows") .filter_ref(|val, _| recent_edge(val, 2021)) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 1); assert_eq!(traversal[0].id(), edge2.id()); @@ -237,7 +252,8 @@ fn test_filter_empty_result() { let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 25 }), None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -258,7 +274,8 @@ fn test_filter_empty_result() { Ok(false) } }) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert!(traversal.is_empty()); } @@ -274,17 +291,20 @@ fn test_filter_chain() { props_option(&arena, props! { "age" => 25, "name" => "Alice" }), None, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n( "person", props_option(&arena, props! { "age" => 30, "name" => "Bob" }), None, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 35 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -320,7 +340,8 @@ fn test_filter_chain() { .n_from_type("person") .filter_ref(|val, _| has_name(val)) .filter_ref(|val, _| age_greater_than(val, 27)) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 1); assert_eq!(traversal[0].id(), person2.id()); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/node_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/node_traversal_tests.rs index 5b6de6da..0d78b544 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/node_traversal_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/node_traversal_tests.rs @@ -13,7 +13,7 @@ use crate::{ add_e::AddEAdapter, add_n::AddNAdapter, e_from_type::EFromTypeAdapter, n_from_id::NFromIdAdapter, n_from_type::NFromTypeAdapter, }, - util::{filter_ref::FilterRefAdapter, drop::Drop}, + util::{drop::Drop, filter_ref::FilterRefAdapter}, }, traversal_value::TraversalValue, }, @@ -88,23 +88,28 @@ fn test_out() { // Create graph: (person1)-[knows]->(person2)-[knows]->(person3) let person1 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person1 = person1.first().unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person2 = person2.first().unwrap(); let person3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person3 = person3.first().unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, person1.id(), person2.id(), false) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, person2.id(), person3.id(), false) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -130,22 +135,26 @@ fn test_in() { // Create graph: (person1)-[knows]->(person2) let person1 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person1 = person1.first().unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person2 = person2.first().unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, person1.id(), person2.id(), false) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); let nodes = G::new(&storage, &txn, &arena) .n_from_id(&person2.id()) .in_node("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); // Check that current step is at person1 @@ -167,26 +176,32 @@ fn test_complex_traversal() { let person1 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person1 = person1.first().unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person2 = person2.first().unwrap(); let person3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); let person3 = person3.first().unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, person1.id(), person2.id(), false) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("likes", None, person2.id(), person3.id(), false) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("follows", None, person3.id(), person1.id(), false) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -194,7 +209,8 @@ fn test_complex_traversal() { let nodes = G::new(&storage, &txn, &arena) .n_from_id(&person1.id()) .out_node("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); // Check that current step is at person2 assert_eq!(nodes.len(), 1); @@ -205,7 +221,8 @@ fn test_complex_traversal() { let nodes = G::new(&storage, &txn, &arena) .n_from_id(&node_id) .out_node("likes") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); // Check that current step is at person3 assert_eq!(nodes.len(), 1); @@ -216,7 +233,8 @@ fn test_complex_traversal() { let nodes = G::new(&storage, &txn, &arena) .n_from_id(&node_id) .out_node("follows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); // Check that current step is at person1 assert_eq!(nodes.len(), 1); @@ -232,14 +250,16 @@ fn test_n_from_id() { // Create a test node let person = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node_id = person.id(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); let count = G::new(&storage, &txn, &arena) .n_from_id(&node_id) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(count.len(), 1); } @@ -253,20 +273,24 @@ fn test_n_from_id_with_traversal() { // Create test graph: (person1)-[knows]->(person2) let person1 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, person1.id(), person2.id(), true) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); let count = G::new(&storage, &txn, &arena) .n_from_id(&person1.id()) .out_node("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); // Check that traversal reaches person2 assert_eq!(count.len(), 1); @@ -281,7 +305,8 @@ fn test_n_from_id_nonexistent() { let txn = storage.graph_env.read_txn().unwrap(); G::new(&storage, &txn, &arena) .n_from_id(&100) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); } #[test] @@ -293,23 +318,29 @@ fn test_n_from_id_chain_operations() { // Create test graph: (person1)-[knows]->(person2)-[likes]->(person3) let person1 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let person2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let _ = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let person3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("knows", None, person1.id(), person2.id(), false) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) .add_edge("likes", None, person2.id(), person3.id(), false) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); @@ -317,7 +348,8 @@ fn test_n_from_id_chain_operations() { .n_from_id(&person1.id()) .out_node("knows") .out_node("likes") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); // Check that the chain of traversals reaches person3 assert_eq!(nodes.len(), 1); @@ -336,7 +368,8 @@ fn test_with_id_type() { props_option(&arena, props! { "name" => "test" }), None, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); #[derive(Serialize, Deserialize, Debug)] struct Input { @@ -355,7 +388,8 @@ fn test_with_id_type() { let txn = storage.graph_env.read_txn().unwrap(); let traversal = G::new(&storage, &txn, &arena) .n_from_id(&input.id) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 1); assert_eq!(traversal[0].id(), input.id.inner()); @@ -368,57 +402,61 @@ fn test_double_add_and_double_fetch() { let arena = Bump::new(); let mut txn = db.graph_env.write_txn().unwrap(); - let original_node1 = G::new_mut(&db, &arena, &mut txn) + let original_node1 = G::new_mut(db, &arena, &mut txn) .add_n( "person", props_option(&arena, props! { "entity_name" => "person1" }), None, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); - let original_node2 = G::new_mut(&db, &arena, &mut txn) + let original_node2 = G::new_mut(db, &arena, &mut txn) .add_n( "person", props_option(&arena, props! { "entity_name" => "person2" }), None, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let mut txn = db.graph_env.write_txn().unwrap(); - let node1 = G::new(&db, &txn, &arena) + let node1 = G::new(db, &txn, &arena) .n_from_type("person") .filter_ref(|val, _| { if let Ok(val) = val { Ok(val .get_property("entity_name") - .map_or(false, |v| *v == "person1")) + .is_some_and(|v| *v == "person1")) } else { Ok(false) } }) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); - let node2 = G::new(&db, &txn, &arena) + let node2 = G::new(db, &txn, &arena) .n_from_type("person") .filter_ref(|val, _| { if let Ok(val) = val { Ok(val .get_property("entity_name") - .map_or(false, |v| *v == "person2")) + .is_some_and(|v| *v == "person2")) } else { Ok(false) } }) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(node1.len(), 1); assert_eq!(node1[0].id(), original_node1.id()); assert_eq!(node2.len(), 1); assert_eq!(node2[0].id(), original_node2.id()); - let _e = G::new_mut(&db, &arena, &mut txn) + let _e = G::new_mut(db, &arena, &mut txn) .add_edge( "knows", None, @@ -426,14 +464,16 @@ fn test_double_add_and_double_fetch() { node2.first().unwrap().id(), false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let txn = db.graph_env.read_txn().unwrap(); - let e = G::new(&db, &txn, &arena) + let e = G::new(db, &txn, &arena) .e_from_type("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(e.len(), 1); assert_eq!(e[0].id(), e.first().unwrap().id()); if let TraversalValue::Edge(e) = &e[0] { diff --git a/helix-db/src/helix_engine/tests/traversal_tests/range_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/range_tests.rs index 503a0350..63fe75f3 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/range_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/range_tests.rs @@ -1,26 +1,20 @@ -use std::sync::Arc; use super::test_utils::props_option; +use std::sync::Arc; -use tempfile::TempDir; -use bumpalo::Bump; use crate::{ helix_engine::{ storage_core::HelixGraphStorage, - traversal_core::{ - ops::{ - g::G, - out::out::OutAdapter, - source::{ - add_e::AddEAdapter, - add_n::AddNAdapter, - n_from_type::NFromTypeAdapter, - }, - util::range::RangeAdapter, - }, + traversal_core::ops::{ + g::G, + out::out::OutAdapter, + source::{add_e::AddEAdapter, add_n::AddNAdapter, n_from_type::NFromTypeAdapter}, + util::range::RangeAdapter, }, }, props, }; +use bumpalo::Bump; +use tempfile::TempDir; fn setup_test_db() -> (TempDir, Arc) { let temp_dir = TempDir::new().unwrap(); @@ -45,7 +39,8 @@ fn test_range_subset() { .map(|_| { G::new_mut(&storage, &arena, &mut txn) .add_n("person", None, None) - .collect::,_>>().unwrap() + .collect::, _>>() + .unwrap() .first() .unwrap(); }) @@ -72,7 +67,8 @@ fn test_range_chaining() { .map(|i| { G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "name" => i }), None) - .collect::,_>>().unwrap() + .collect::, _>>() + .unwrap() .first() .unwrap() .clone() @@ -82,32 +78,23 @@ fn test_range_chaining() { // Create edges connecting nodes sequentially for i in 0..4 { G::new_mut(&storage, &arena, &mut txn) - .add_edge( - "knows", - None, - nodes[i].id(), - nodes[i + 1].id(), - false, - ) - .collect::,_>>().unwrap(); + .add_edge("knows", None, nodes[i].id(), nodes[i + 1].id(), false) + .collect::, _>>() + .unwrap(); } G::new_mut(&storage, &arena, &mut txn) - .add_edge( - "knows", - None, - nodes[4].id(), - nodes[0].id(), - false, - ) - .collect::,_>>().unwrap(); + .add_edge("knows", None, nodes[4].id(), nodes[0].id(), false) + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); let count = G::new(&storage, &txn, &arena) .n_from_type("person") // Get all nodes .range(0, 3) // Take first 3 nodes .out_node("knows") // Get their outgoing nodes - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(count.len(), 3); } @@ -121,7 +108,8 @@ fn test_range_empty() { let count = G::new(&storage, &txn, &arena) .n_from_type("person") // Get all nodes .range(0, 0) // Take first 3 nodes - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(count.len(), 0); } diff --git a/helix-db/src/helix_engine/tests/traversal_tests/secondary_index_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/secondary_index_tests.rs index 1b371d69..9abbb9a5 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/secondary_index_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/secondary_index_tests.rs @@ -50,25 +50,29 @@ fn test_delete_node_with_secondary_index() { props_option(&arena, props! { "name" => "John" }), Some(&["name"]), ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node_id = node.id(); G::new_mut_from_iter(&storage, &mut txn, std::iter::once(node), &arena) .update(&[("name", Value::from("Jane"))]) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); let txn = storage.graph_env.read_txn().unwrap(); let jane_nodes = G::new(&storage, &txn, &arena) .n_from_index("person", "name", &"Jane".to_string()) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(jane_nodes.len(), 1); assert_eq!(jane_nodes[0].id(), node_id); let john_nodes = G::new(&storage, &txn, &arena) .n_from_index("person", "name", &"John".to_string()) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert!(john_nodes.is_empty()); drop(txn); @@ -76,7 +80,8 @@ fn test_delete_node_with_secondary_index() { let txn = storage.graph_env.read_txn().unwrap(); let traversal = G::new(&storage, &txn, &arena) .n_from_id(&node_id) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); drop(txn); let mut txn = storage.graph_env.write_txn().unwrap(); @@ -87,7 +92,8 @@ fn test_delete_node_with_secondary_index() { let txn = storage.graph_env.read_txn().unwrap(); let node = G::new(&storage, &txn, &arena) .n_from_index("person", "name", &"Jane".to_string()) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert!(node.is_empty()); } @@ -103,21 +109,24 @@ fn test_update_of_secondary_indices() { props_option(&arena, props! { "name" => "John" }), Some(&["name"]), ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); let mut txn = storage.graph_env.write_txn().unwrap(); G::new_mut_from_iter(&storage, &mut txn, std::iter::once(node), &arena) .update(&[("name", Value::from("Jane"))]) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); let txn = storage.graph_env.read_txn().unwrap(); let nodes = G::new(&storage, &txn, &arena) .n_from_index("person", "name", &"Jane".to_string()) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(nodes.len(), 1); if let TraversalValue::Node(node) = &nodes[0] { match node.properties.as_ref().unwrap().get("name").unwrap() { @@ -130,6 +139,7 @@ fn test_update_of_secondary_indices() { let john_nodes = G::new(&storage, &txn, &arena) .n_from_index("person", "name", &"John".to_string()) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert!(john_nodes.is_empty()); } diff --git a/helix-db/src/helix_engine/tests/traversal_tests/shortest_path_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/shortest_path_tests.rs index b84b063a..f87b02ed 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/shortest_path_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/shortest_path_tests.rs @@ -261,8 +261,8 @@ fn test_dijkstra_custom_weight_function() { .as_ref() .and_then(|props| props.get("distance")) .and_then(|v| match v { - Value::F64(f) => Some(*f), - Value::F32(f) => Some(*f as f64), + Value::F64(f) => Some(*f as f32), + Value::F32(f) => Some(*f), _ => None, }) .ok_or_else(|| { @@ -399,8 +399,8 @@ fn test_dijkstra_multi_context_weight() { .as_ref() .and_then(|props| props.get("distance")) .and_then(|v| match v { - Value::F64(f) => Some(*f), - Value::F32(f) => Some(*f as f64), + Value::F64(f) => Some(*f as f32), + Value::F32(f) => Some(*f), _ => None, }) .ok_or_else(|| { @@ -414,8 +414,8 @@ fn test_dijkstra_multi_context_weight() { .as_ref() .and_then(|props| props.get("traffic_factor")) .and_then(|v| match v { - Value::F64(f) => Some(*f), - Value::F32(f) => Some(*f as f64), + Value::F64(f) => Some(*f as f32), + Value::F32(f) => Some(*f), _ => None, }) .ok_or_else(|| { @@ -837,7 +837,7 @@ fn test_astar_custom_weight_and_heuristic() { let start = G::new_mut(&storage, &arena, &mut txn) .add_n( "city", - props_option(&arena, props!("name" => "start", "h" => 10.0)), + props_option(&arena, props!("name" => "start", "h" => 10.0_f32)), None, ) .collect::, _>>() @@ -857,7 +857,10 @@ fn test_astar_custom_weight_and_heuristic() { G::new_mut(&storage, &arena, &mut txn) .add_edge( "road", - props_option(&arena, props!("distance" => 100.0, "traffic" => 0.5)), + props_option( + &arena, + props!("distance" => 100.0_f32, "traffic" => 0.5_f32), + ), start, end, false, @@ -874,18 +877,18 @@ fn test_astar_custom_weight_and_heuristic() { let custom_weight = |edge: &crate::utils::items::Edge, _src: &crate::utils::items::Node, _dst: &crate::utils::items::Node| { - let distance = edge - .get_property("distance") - .ok_or(crate::helix_engine::types::GraphError::New( - "distance property not found".to_string(), - ))? - .as_f64(); + let a = + edge.get_property("distance") + .ok_or(crate::helix_engine::types::GraphError::New( + "distance property not found".to_string(), + ))?; + let distance = a.as_f32(); let traffic = edge .get_property("traffic") .ok_or(crate::helix_engine::types::GraphError::New( "traffic property not found".to_string(), ))? - .as_f64(); + .as_f32(); Ok(distance * traffic) }; diff --git a/helix-db/src/helix_engine/tests/traversal_tests/test_utils.rs b/helix-db/src/helix_engine/tests/traversal_tests/test_utils.rs index 21c8094e..1a4cff1a 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/test_utils.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/test_utils.rs @@ -1,7 +1,4 @@ -use crate::{ - protocol::value::Value, - utils::properties::ImmutablePropertiesMap, -}; +use crate::{protocol::value::Value, utils::properties::ImmutablePropertiesMap}; use bumpalo::Bump; pub fn props_map<'arena>( diff --git a/helix-db/src/helix_engine/tests/traversal_tests/update_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/update_tests.rs index 4e02c02f..81b3479a 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/update_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/update_tests.rs @@ -16,8 +16,8 @@ use crate::{ traversal_value::TraversalValue, }, }, - protocol::value::Value, props, + protocol::value::Value, }; fn setup_test_db() -> (TempDir, Arc) { @@ -39,32 +39,45 @@ fn test_update_node() { let mut txn = storage.graph_env.write_txn().unwrap(); let node = G::new_mut(&storage, &arena, &mut txn) - .add_n("person", props_option(&arena, props!("name" => "test")), None) - .collect_to_obj().unwrap(); + .add_n( + "person", + props_option(&arena, props!("name" => "test")), + None, + ) + .collect_to_obj() + .unwrap(); G::new_mut(&storage, &arena, &mut txn) - .add_n("person", props_option(&arena, props!("name" => "test2")), None) - .collect_to_obj().unwrap(); + .add_n( + "person", + props_option(&arena, props!("name" => "test2")), + None, + ) + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); let arena_read = Bump::new(); let txn = storage.graph_env.read_txn().unwrap(); let traversal = G::new(&storage, &txn, &arena_read) .n_from_id(&node.id()) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); drop(txn); let arena = Bump::new(); let mut txn = storage.graph_env.write_txn().unwrap(); G::new_mut_from_iter(&storage, &mut txn, traversal.into_iter(), &arena) .update(&[("name", Value::from("john"))]) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); txn.commit().unwrap(); let arena = Bump::new(); let txn = storage.graph_env.read_txn().unwrap(); let updated = G::new(&storage, &txn, &arena) .n_from_id(&node.id()) - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(updated.len(), 1); match &updated[0] { diff --git a/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs index 737e9cbb..67030994 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs @@ -1,30 +1,24 @@ -use std::sync::Arc; use super::test_utils::props_option; +use std::sync::Arc; use crate::{ helix_engine::{ storage_core::HelixGraphStorage, - traversal_core::{ - ops::{ - g::G, - out::{out::OutAdapter, out_e::OutEdgesAdapter}, - source::{ - add_e::AddEAdapter, - add_n::AddNAdapter, - n_from_type::NFromTypeAdapter, - }, - util::{dedup::DedupAdapter, order::OrderByAdapter}, - vectors::{insert::InsertVAdapter, search::SearchVAdapter}, - }, + traversal_core::ops::{ + g::G, + out::{out::OutAdapter, out_e::OutEdgesAdapter}, + source::{add_e::AddEAdapter, add_n::AddNAdapter, n_from_type::NFromTypeAdapter}, + util::{dedup::DedupAdapter, order::OrderByAdapter}, + vectors::{insert::InsertVAdapter, search::SearchVAdapter}, }, - vector_core::vector::HVector, + vector_core::HVector, }, props, }; +use bumpalo::Bump; use heed3::RoTxn; use tempfile::TempDir; -use bumpalo::Bump; fn setup_test_db() -> (TempDir, Arc) { let temp_dir = TempDir::new().unwrap(); let db_path = temp_dir.path().to_str().unwrap(); @@ -45,15 +39,18 @@ fn test_order_node_by_asc() { let node = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 30 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 20 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 10 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); @@ -61,7 +58,8 @@ fn test_order_node_by_asc() { let traversal = G::new(&storage, &txn, &arena) .n_from_type("person") .order_by_asc("age") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 3); assert_eq!(traversal[0].id(), node3.id()); @@ -77,15 +75,18 @@ fn test_order_node_by_desc() { let node = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 30 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 20 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 10 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); @@ -93,7 +94,8 @@ fn test_order_node_by_desc() { let traversal = G::new(&storage, &txn, &arena) .n_from_type("person") .order_by_desc("age") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 3); assert_eq!(traversal[0].id(), node.id()); @@ -109,15 +111,18 @@ fn test_order_edge_by_asc() { let node = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 30 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 20 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 10 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let edge = G::new_mut(&storage, &arena, &mut txn) .add_edge( @@ -127,7 +132,8 @@ fn test_order_edge_by_asc() { node2.id(), false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let edge2 = G::new_mut(&storage, &arena, &mut txn) .add_edge( @@ -137,7 +143,8 @@ fn test_order_edge_by_asc() { node2.id(), false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); @@ -146,7 +153,8 @@ fn test_order_edge_by_asc() { .n_from_type("person") .out_e("knows") .order_by_asc("since") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 2); assert_eq!(traversal[0].id(), edge.id()); @@ -161,15 +169,18 @@ fn test_order_edge_by_desc() { let node = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 30 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 20 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 10 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let edge = G::new_mut(&storage, &arena, &mut txn) .add_edge( @@ -179,7 +190,8 @@ fn test_order_edge_by_desc() { node2.id(), false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let edge2 = G::new_mut(&storage, &arena, &mut txn) .add_edge( @@ -189,7 +201,8 @@ fn test_order_edge_by_desc() { node2.id(), false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); @@ -198,7 +211,8 @@ fn test_order_edge_by_desc() { .n_from_type("person") .out_e("knows") .order_by_desc("since") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 2); assert_eq!(traversal[0].id(), edge2.id()); @@ -213,16 +227,31 @@ fn test_order_vector_by_asc() { type FnTy = fn(&HVector, &RoTxn) -> bool; let vector = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", props_option(&arena, props! { "age" => 30 })) - .collect_to_obj().unwrap(); + .insert_v( + &[1.0, 2.0, 3.0], + "vector", + props_option(&arena, props! { "age" => 30 }), + ) + .collect_to_obj() + .unwrap(); let vector2 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", props_option(&arena, props! { "age" => 20 })) - .collect_to_obj().unwrap(); + .insert_v( + &[1.0, 2.0, 3.0], + "vector", + props_option(&arena, props! { "age" => 20 }), + ) + .collect_to_obj() + .unwrap(); let vector3 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", props_option(&arena, props! { "age" => 10 })) - .collect_to_obj().unwrap(); + .insert_v( + &[1.0, 2.0, 3.0], + "vector", + props_option(&arena, props! { "age" => 10 }), + ) + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); @@ -230,7 +259,8 @@ fn test_order_vector_by_asc() { let traversal = G::new(&storage, &txn, &arena) .search_v::(&[1.0, 2.0, 3.0], 10, "vector", None) .order_by_asc("age") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 3); assert_eq!(traversal[0].id(), vector3.id()); @@ -246,16 +276,31 @@ fn test_order_vector_by_desc() { type FnTy = fn(&HVector, &RoTxn) -> bool; let vector = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", props_option(&arena, props! { "age" => 30 })) - .collect_to_obj().unwrap(); + .insert_v( + &[1.0, 2.0, 3.0], + "vector", + props_option(&arena, props! { "age" => 30 }), + ) + .collect_to_obj() + .unwrap(); let vector2 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", props_option(&arena, props! { "age" => 20 })) - .collect_to_obj().unwrap(); + .insert_v( + &[1.0, 2.0, 3.0], + "vector", + props_option(&arena, props! { "age" => 20 }), + ) + .collect_to_obj() + .unwrap(); let vector3 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", props_option(&arena, props! { "age" => 10 })) - .collect_to_obj().unwrap(); + .insert_v( + &[1.0, 2.0, 3.0], + "vector", + props_option(&arena, props! { "age" => 10 }), + ) + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); @@ -263,7 +308,8 @@ fn test_order_vector_by_desc() { let traversal = G::new(&storage, &txn, &arena) .search_v::(&[1.0, 2.0, 3.0], 10, "vector", None) .order_by_desc("age") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 3); assert_eq!(traversal[0].id(), vector.id()); @@ -279,15 +325,18 @@ fn test_dedup() { let node = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 30 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node2 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 20 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let node3 = G::new_mut(&storage, &arena, &mut txn) .add_n("person", props_option(&arena, props! { "age" => 10 }), None) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let _edge = G::new_mut(&storage, &arena, &mut txn) .add_edge( @@ -297,7 +346,8 @@ fn test_dedup() { node2.id(), false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); let _edge2 = G::new_mut(&storage, &arena, &mut txn) .add_edge( @@ -307,7 +357,8 @@ fn test_dedup() { node2.id(), false, ) - .collect_to_obj().unwrap(); + .collect_to_obj() + .unwrap(); txn.commit().unwrap(); @@ -315,7 +366,8 @@ fn test_dedup() { let traversal = G::new(&storage, &txn, &arena) .n_from_type("person") .out_node("knows") - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 2); @@ -323,7 +375,8 @@ fn test_dedup() { .n_from_type("person") .out_node("knows") .dedup() - .collect::,_>>().unwrap(); + .collect::, _>>() + .unwrap(); assert_eq!(traversal.len(), 1); assert_eq!(traversal[0].id(), node2.id()); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs index ed49fdac..2988de02 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs @@ -13,7 +13,8 @@ use crate::{ out::{out::OutAdapter, out_e::OutEdgesAdapter}, source::{ add_e::AddEAdapter, add_n::AddNAdapter, e_from_type::EFromTypeAdapter, - n_from_id::NFromIdAdapter, v_from_id::VFromIdAdapter, v_from_type::VFromTypeAdapter, + n_from_id::NFromIdAdapter, v_from_id::VFromIdAdapter, + v_from_type::VFromTypeAdapter, }, util::drop::Drop, vectors::{ @@ -22,7 +23,7 @@ use crate::{ }, }, types::GraphError, - vector_core::vector::HVector, + vector_core::HVector, }, utils::properties::ImmutablePropertiesMap, }; @@ -48,7 +49,7 @@ fn test_insert_and_fetch_vector() { let mut txn = storage.graph_env.write_txn().unwrap(); let vector = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[0.1, 0.2, 0.3], "embedding", None) + .insert_v(&[0.1, 0.2, 0.3], "embedding", None) .collect_to_obj() .unwrap(); txn.commit().unwrap(); @@ -81,7 +82,7 @@ fn test_vector_edges_from_and_to_node() { .unwrap()[0] .id(); let vector_id = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 0.0, 0.0], "embedding", None) + .insert_v(&[1.0, 0.0, 0.0], "embedding", None) .collect_to_obj() .unwrap() .id(); @@ -122,7 +123,7 @@ fn test_brute_force_vector_search_orders_by_distance() { let mut vector_ids = Vec::new(); for vector in vectors { let vec_id = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&vector, "vector", None) + .insert_v(&vector, "vector", None) .collect_to_obj() .unwrap() .id(); @@ -159,7 +160,7 @@ fn test_drop_vector_removes_edges() { .unwrap()[0] .id(); let vector_id = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[0.5, 0.5, 0.5], "vector", None) + .insert_v(&[0.5, 0.5, 0.5], "vector", None) .collect_to_obj() .unwrap() .id(); @@ -170,12 +171,11 @@ fn test_drop_vector_removes_edges() { txn.commit().unwrap(); let arena = Bump::new(); - let txn = storage.graph_env.read_txn().unwrap(); - let vectors = G::new(&storage, &txn, &arena) + let read_txn = storage.graph_env.read_txn().unwrap(); + let vectors = G::new(&storage, &read_txn, &arena) .search_v::(&[0.5, 0.5, 0.5], 10, "vector", None) .collect::, _>>() .unwrap(); - drop(txn); let mut txn = storage.graph_env.write_txn().unwrap(); Drop::drop_traversal( @@ -188,6 +188,8 @@ fn test_drop_vector_removes_edges() { .unwrap(); txn.commit().unwrap(); + drop(read_txn); + let arena = Bump::new(); let txn = storage.graph_env.read_txn().unwrap(); let remaining = G::new(&storage, &txn, &arena) @@ -210,7 +212,7 @@ fn test_v_from_type_basic_with_vector_data() { // Insert a vector with label "test_label" let vector = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "test_label", None) + .insert_v(&[1.0, 2.0, 3.0], "test_label", None) .collect_to_obj() .unwrap(); let vector_id = vector.id(); @@ -228,9 +230,11 @@ fn test_v_from_type_basic_with_vector_data() { assert_eq!(results[0].id(), vector_id); // Verify it's a full HVector with data - if let crate::helix_engine::traversal_core::traversal_value::TraversalValue::Vector(v) = &results[0] { - assert_eq!(v.data.len(), 3); - assert_eq!(v.data[0], 1.0); + if let crate::helix_engine::traversal_core::traversal_value::TraversalValue::Vector(v) = + &results[0] + { + assert_eq!(v.data_borrowed().len(), 3); + assert_eq!(v.data_borrowed()[0], 1.0); } else { panic!("Expected TraversalValue::Vector"); } @@ -244,7 +248,7 @@ fn test_v_from_type_without_vector_data() { // Insert a vector with label "no_data_label" let vector = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[4.0, 5.0, 6.0], "no_data_label", None) + .insert_v(&[4.0, 5.0, 6.0], "no_data_label", None) .collect_to_obj() .unwrap(); let vector_id = vector.id(); @@ -261,11 +265,12 @@ fn test_v_from_type_without_vector_data() { assert_eq!(results.len(), 1); assert_eq!(results[0].id(), vector_id); - // Verify it's a VectorWithoutData + // Verify it's a Vector with no data match &results[0] { - crate::helix_engine::traversal_core::traversal_value::TraversalValue::VectorNodeWithoutVectorData(v) => { - assert_eq!(*v.id(), vector_id); - assert_eq!(v.label(), "no_data_label"); + crate::helix_engine::traversal_core::traversal_value::TraversalValue::Vector(v) => { + assert_eq!(v.id, vector_id); + assert_eq!(v.label, "no_data_label"); + assert!(v.data.is_none()); } _ => panic!("Expected TraversalValue::VectorNodeWithoutVectorData"), } @@ -279,15 +284,15 @@ fn test_v_from_type_multiple_same_label() { // Insert multiple vectors with the same label let v1 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "shared_label", None) + .insert_v(&[1.0, 2.0, 3.0], "shared_label", None) .collect_to_obj() .unwrap(); let v2 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[4.0, 5.0, 6.0], "shared_label", None) + .insert_v(&[4.0, 5.0, 6.0], "shared_label", None) .collect_to_obj() .unwrap(); let v3 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[7.0, 8.0, 9.0], "shared_label", None) + .insert_v(&[7.0, 8.0, 9.0], "shared_label", None) .collect_to_obj() .unwrap(); @@ -319,15 +324,15 @@ fn test_v_from_type_multiple_different_labels() { // Insert vectors with different labels let v1 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "label_a", None) + .insert_v(&[1.0, 2.0, 3.0], "label_a", None) .collect_to_obj() .unwrap(); let _v2 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[4.0, 5.0, 6.0], "label_b", None) + .insert_v(&[4.0, 5.0, 6.0], "label_b", None) .collect_to_obj() .unwrap(); let _v3 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[7.0, 8.0, 9.0], "label_c", None) + .insert_v(&[7.0, 8.0, 9.0], "label_c", None) .collect_to_obj() .unwrap(); txn.commit().unwrap(); @@ -352,7 +357,7 @@ fn test_v_from_type_nonexistent_label() { // Insert a vector with a different label let _vector = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "existing_label", None) + .insert_v(&[1.0, 2.0, 3.0], "existing_label", None) .collect_to_obj() .unwrap(); txn.commit().unwrap(); @@ -385,8 +390,8 @@ fn test_v_from_type_empty_database() { #[test] fn test_v_from_type_with_properties() { - use std::collections::HashMap; use crate::protocol::value::Value; + use std::collections::HashMap; let (_temp_dir, storage) = setup_test_db(); let arena = Bump::new(); @@ -398,21 +403,26 @@ fn test_v_from_type_with_properties() { properties.insert("count".to_string(), Value::I64(42)); properties.insert("score".to_string(), Value::F64(3.14)); properties.insert("active".to_string(), Value::Boolean(true)); - properties.insert("tags".to_string(), Value::Array(vec![ - Value::String("tag1".to_string()), - Value::String("tag2".to_string()), - ])); + properties.insert( + "tags".to_string(), + Value::Array(vec![ + Value::String("tag1".to_string()), + Value::String("tag2".to_string()), + ]), + ); // Convert to ImmutablePropertiesMap let props_map = ImmutablePropertiesMap::new( properties.len(), - properties.iter().map(|(k, v)| (arena.alloc_str(k) as &str, v.clone())), + properties + .iter() + .map(|(k, v)| (arena.alloc_str(k) as &str, v.clone())), &arena, ); // Insert vector with properties let vector = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "prop_label", Some(props_map)) + .insert_v(&[1.0, 2.0, 3.0], "prop_label", Some(props_map)) .collect_to_obj() .unwrap(); let vector_id = vector.id(); @@ -430,9 +440,14 @@ fn test_v_from_type_with_properties() { assert_eq!(results[0].id(), vector_id); // Verify properties are preserved - if let crate::helix_engine::traversal_core::traversal_value::TraversalValue::VectorNodeWithoutVectorData(v) = &results[0] { + if let crate::helix_engine::traversal_core::traversal_value::TraversalValue::Vector(v) = + &results[0] + { let props = v.properties.as_ref().unwrap(); - assert_eq!(props.get("name"), Some(&Value::String("test_vector".to_string()))); + assert_eq!( + props.get("name"), + Some(&Value::String("test_vector".to_string())) + ); assert_eq!(props.get("count"), Some(&Value::I64(42))); assert_eq!(props.get("score"), Some(&Value::F64(3.14))); assert_eq!(props.get("active"), Some(&Value::Boolean(true))); @@ -449,11 +464,11 @@ fn test_v_from_type_deleted_vectors_filtered() { // Insert two vectors with the same label let v1 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "delete_test", None) + .insert_v(&[1.0, 2.0, 3.0], "delete_test", None) .collect_to_obj() .unwrap(); let v2 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[4.0, 5.0, 6.0], "delete_test", None) + .insert_v(&[4.0, 5.0, 6.0], "delete_test", None) .collect_to_obj() .unwrap(); txn.commit().unwrap(); @@ -506,11 +521,11 @@ fn test_v_from_type_with_edges_and_nodes() { // Create vectors and connect them to the node let v1 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 0.0, 0.0], "embedding", None) + .insert_v(&[1.0, 0.0, 0.0], "embedding", None) .collect_to_obj() .unwrap(); let v2 = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[0.0, 1.0, 0.0], "embedding", None) + .insert_v(&[0.0, 1.0, 0.0], "embedding", None) .collect_to_obj() .unwrap(); @@ -549,191 +564,6 @@ fn test_v_from_type_with_edges_and_nodes() { assert_eq!(from_node.len(), 2); } -#[test] -fn test_v_from_type_after_migration() { - use std::collections::HashMap; - use crate::protocol::value::Value; - use crate::helix_engine::storage_core::storage_migration::migrate; - - // Helper to create old-format vector properties (HashMap-based) - fn create_old_properties( - label: &str, - is_deleted: bool, - extra_props: HashMap, - ) -> Vec { - let mut props = HashMap::new(); - props.insert("label".to_string(), Value::String(label.to_string())); - props.insert("is_deleted".to_string(), Value::Boolean(is_deleted)); - - for (k, v) in extra_props { - props.insert(k, v); - } - - bincode::serialize(&props).unwrap() - } - - // Helper to clear metadata (simulates PreMetadata state) - fn clear_metadata(storage: &mut crate::helix_engine::storage_core::HelixGraphStorage) -> Result<(), crate::helix_engine::types::GraphError> { - let mut txn = storage.graph_env.write_txn()?; - storage.metadata_db.clear(&mut txn)?; - txn.commit()?; - Ok(()) - } - - let (_temp_dir, storage) = setup_test_db(); - let mut storage_mut = match Arc::try_unwrap(storage) { - Ok(s) => s, - Err(_) => panic!("Failed to unwrap Arc - there are multiple references"), - }; - - // Clear metadata to simulate PreMetadata state (before migration) - clear_metadata(&mut storage_mut).unwrap(); - - // Create old-format vectors with various properties - { - let mut txn = storage_mut.graph_env.write_txn().unwrap(); - - // Vector 1: Simple vector with test label - let mut props1 = HashMap::new(); - props1.insert("name".to_string(), Value::String("vector1".to_string())); - props1.insert("count".to_string(), Value::I64(100)); - let old_bytes1 = create_old_properties("test_migration", false, props1); - storage_mut - .vectors - .vector_properties_db - .put(&mut txn, &1u128, &old_bytes1) - .unwrap(); - - // Add actual vector data with proper key format - let vector_data1: Vec = vec![1.0, 2.0, 3.0]; - let bytes1: Vec = vector_data1.iter().flat_map(|f| f.to_be_bytes()).collect(); - let key1 = [b"v:".as_slice(), &1u128.to_be_bytes(), &0usize.to_be_bytes()].concat(); - storage_mut - .vectors - .vectors_db - .put(&mut txn, &key1, &bytes1) - .unwrap(); - - // Vector 2: Another vector with same label - let mut props2 = HashMap::new(); - props2.insert("name".to_string(), Value::String("vector2".to_string())); - props2.insert("score".to_string(), Value::F64(0.95)); - let old_bytes2 = create_old_properties("test_migration", false, props2); - storage_mut - .vectors - .vector_properties_db - .put(&mut txn, &2u128, &old_bytes2) - .unwrap(); - - // Add actual vector data with proper key format - let vector_data2: Vec = vec![4.0, 5.0, 6.0]; - let bytes2: Vec = vector_data2.iter().flat_map(|f| f.to_be_bytes()).collect(); - let key2 = [b"v:".as_slice(), &2u128.to_be_bytes(), &0usize.to_be_bytes()].concat(); - storage_mut - .vectors - .vectors_db - .put(&mut txn, &key2, &bytes2) - .unwrap(); - - // Vector 3: Different label - let mut props3 = HashMap::new(); - props3.insert("name".to_string(), Value::String("vector3".to_string())); - let old_bytes3 = create_old_properties("other_label", false, props3); - storage_mut - .vectors - .vector_properties_db - .put(&mut txn, &3u128, &old_bytes3) - .unwrap(); - - // Add actual vector data with proper key format - let vector_data3: Vec = vec![7.0, 8.0, 9.0]; - let bytes3: Vec = vector_data3.iter().flat_map(|f| f.to_be_bytes()).collect(); - let key3 = [b"v:".as_slice(), &3u128.to_be_bytes(), &0usize.to_be_bytes()].concat(); - storage_mut - .vectors - .vectors_db - .put(&mut txn, &key3, &bytes3) - .unwrap(); - - txn.commit().unwrap(); - } - - // Run migration - let result = migrate(&mut storage_mut); - assert!(result.is_ok(), "Migration should succeed"); - - // Now query using v_from_type on the migrated data - let storage = Arc::new(storage_mut); - let arena = Bump::new(); - let txn = storage.graph_env.read_txn().unwrap(); - - // Query for "test_migration" label - should find 2 vectors - let results_with_data = G::new(&storage, &txn, &arena) - .v_from_type("test_migration", true) - .collect::, _>>() - .unwrap(); - - assert_eq!(results_with_data.len(), 2, "Should find 2 vectors with test_migration label"); - - // Verify we got the right vectors - let ids: Vec = results_with_data.iter().map(|v| v.id()).collect(); - assert!(ids.contains(&1u128), "Should contain vector 1"); - assert!(ids.contains(&2u128), "Should contain vector 2"); - - // Verify vector data is accessible - if let crate::helix_engine::traversal_core::traversal_value::TraversalValue::Vector(v) = &results_with_data[0] { - assert_eq!(v.data.len(), 3, "Vector should have 3 dimensions"); - } else { - panic!("Expected TraversalValue::Vector"); - } - - // Query without vector data to check properties - let arena2 = Bump::new(); - let results_without_data = G::new(&storage, &txn, &arena2) - .v_from_type("test_migration", false) - .collect::, _>>() - .unwrap(); - - assert_eq!(results_without_data.len(), 2, "Should still find 2 vectors"); - - // Verify properties are preserved after migration - for result in &results_without_data { - if let crate::helix_engine::traversal_core::traversal_value::TraversalValue::VectorNodeWithoutVectorData(v) = result { - assert_eq!(v.label(), "test_migration"); - - // Check that properties are accessible - let props = v.properties.as_ref().unwrap(); - let name = props.get("name"); - assert!(name.is_some(), "name property should exist"); - - // Verify it's a string - match name.unwrap() { - Value::String(s) => assert!(s == "vector1" || s == "vector2"), - _ => panic!("Expected name to be a string"), - } - } - } - - // Query for "other_label" - should find 1 vector - let arena3 = Bump::new(); - let other_results = G::new(&storage, &txn, &arena3) - .v_from_type("other_label", true) - .collect::, _>>() - .unwrap(); - - assert_eq!(other_results.len(), 1, "Should find 1 vector with other_label"); - assert_eq!(other_results[0].id(), 3u128); - - // Query for non-existent label after migration - let arena4 = Bump::new(); - let empty_results = G::new(&storage, &txn, &arena4) - .v_from_type("nonexistent", true) - .collect::, _>>() - .unwrap(); - - assert!(empty_results.is_empty(), "Should find no vectors with nonexistent label"); -} - // ============================================================================ // Error Tests for v_from_id // ============================================================================ @@ -790,7 +620,7 @@ fn test_v_from_id_with_deleted_vector() { // Create a vector let vector = G::new_mut(&storage, &arena, &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "test_vector", None) + .insert_v(&[1.0, 2.0, 3.0], "test_vector", None) .collect_to_obj() .unwrap(); let vector_id = vector.id(); diff --git a/helix-db/src/helix_engine/tests/vector_tests.rs b/helix-db/src/helix_engine/tests/vector_tests.rs index 4799902d..0f45b47d 100644 --- a/helix-db/src/helix_engine/tests/vector_tests.rs +++ b/helix-db/src/helix_engine/tests/vector_tests.rs @@ -1,18 +1,20 @@ -use crate::helix_engine::vector_core::vector_distance::{MAX_DISTANCE, MIN_DISTANCE, ORTHOGONAL}; - -use crate::helix_engine::vector_core::vector::HVector; +use crate::helix_engine::vector_core::{ + HVector, + distance::{MAX_DISTANCE, MIN_DISTANCE, ORTHOGONAL}, +}; use bumpalo::Bump; -fn alloc_vector<'a>(arena: &'a Bump, data: &[f64]) -> HVector<'a> { - let slice = arena.alloc_slice_copy(data); - HVector::from_slice("vector", 0, slice) +fn alloc_vector<'a>(arena: &'a Bump, data: &[f32]) -> HVector<'a> { + let mut bump_vec = bumpalo::collections::Vec::new_in(arena); + bump_vec.extend_from_slice(data); + HVector::from_vec("test_vector", bump_vec) } #[test] fn test_hvector_from_slice() { let arena = Bump::new(); let vector = alloc_vector(&arena, &[1.0, 2.0, 3.0]); - assert_eq!(vector.data, &[1.0, 2.0, 3.0]); + assert_eq!(vector.data_borrowed(), &[1.0, 2.0, 3.0]); } #[test] @@ -30,7 +32,13 @@ fn test_hvector_distance_min() { let v1 = alloc_vector(&arena, &[1.0, 2.0, 3.0]); let v2 = alloc_vector(&arena, &[1.0, 2.0, 3.0]); let distance = v2.distance_to(&v1).unwrap(); - assert_eq!(distance, MIN_DISTANCE); + println!("Distance {}", distance); + assert!( + (distance - MIN_DISTANCE).abs() < 1e-6, + "Distance {} is not close enough to MIN_DISTANCE ({})", + distance, + MIN_DISTANCE + ); } #[test] @@ -97,5 +105,5 @@ fn test_hvector_cosine_similarity() { let arena2 = Bump::new(); let v2 = alloc_vector(&arena2, &[4.0, 5.0, 6.0]); let similarity = v1.distance_to(&v2).unwrap(); - assert!((similarity - (1.0 - 0.9746318461970762)).abs() < 1e-9); + assert!((similarity - (1.0 - 0.974_631_85)).abs() < 1e-7); } diff --git a/helix-db/src/helix_engine/traversal_core/config.rs b/helix-db/src/helix_engine/traversal_core/config.rs index 74721b61..fee59808 100644 --- a/helix-db/src/helix_engine/traversal_core/config.rs +++ b/helix-db/src/helix_engine/traversal_core/config.rs @@ -93,7 +93,7 @@ impl Config { let config = std::fs::read_to_string(config_path)?; let mut config = sonic_rs::from_str::(&config)?; - + // Schema will be populated from INTROSPECTION_DATA during code generation config.schema = None; diff --git a/helix-db/src/helix_engine/traversal_core/ops/bm25/hybrid_search_bm25.rs b/helix-db/src/helix_engine/traversal_core/ops/bm25/hybrid_search_bm25.rs index b7af7680..c1006b27 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/bm25/hybrid_search_bm25.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/bm25/hybrid_search_bm25.rs @@ -79,4 +79,3 @@ impl<'a, I: Iterator>> HybridSearchBM2 } } */ - diff --git a/helix-db/src/helix_engine/traversal_core/ops/bm25/mod.rs b/helix-db/src/helix_engine/traversal_core/ops/bm25/mod.rs index 6395c339..2e709c9b 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/bm25/mod.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/bm25/mod.rs @@ -1,3 +1,2 @@ -pub mod search_bm25; pub mod hybrid_search_bm25; - +pub mod search_bm25; diff --git a/helix-db/src/helix_engine/traversal_core/ops/bm25/search_bm25.rs b/helix-db/src/helix_engine/traversal_core/ops/bm25/search_bm25.rs index 63bf86b5..16d389a5 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/bm25/search_bm25.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/bm25/search_bm25.rs @@ -82,7 +82,7 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE if label_in_lmdb == label_as_bytes { match Node::<'arena>::from_bincode_bytes(id, value, self.arena) { Ok(node) => { - return Some(Ok(TraversalValue::NodeWithScore { node, score: score as f64 })); + return Some(Ok(TraversalValue::NodeWithScore { node, score })); } Err(e) => { println!("{} Error decoding node: {:?}", line!(), e); diff --git a/helix-db/src/helix_engine/traversal_core/ops/in_/in_.rs b/helix-db/src/helix_engine/traversal_core/ops/in_/in_.rs index 92fe6d19..b64a77dd 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/in_/in_.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/in_/in_.rs @@ -88,7 +88,7 @@ impl<'db, 'arena, 'txn, 's, I: Iterator, Gr .vectors .get_vector_properties(self.txn, item_id, self.arena) { - return Some(Ok(TraversalValue::VectorNodeWithoutVectorData(vec))); + return Some(Ok(TraversalValue::Vector(vec))); } None } else { diff --git a/helix-db/src/helix_engine/traversal_core/ops/in_/mod.rs b/helix-db/src/helix_engine/traversal_core/ops/in_/mod.rs index 13057d6d..afdee820 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/in_/mod.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/in_/mod.rs @@ -1,4 +1,4 @@ pub mod in_; pub mod in_e; pub mod to_n; -pub mod to_v; \ No newline at end of file +pub mod to_v; diff --git a/helix-db/src/helix_engine/traversal_core/ops/in_/to_v.rs b/helix-db/src/helix_engine/traversal_core/ops/in_/to_v.rs index 0c627a60..f0c3dc6c 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/in_/to_v.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/in_/to_v.rs @@ -47,9 +47,7 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE item.to_node, self.arena, ) { - Ok(Some(vector)) => { - Some(Ok(TraversalValue::VectorNodeWithoutVectorData(vector))) - } + Ok(Some(vector)) => Some(Ok(TraversalValue::Vector(vector))), Ok(None) => None, Err(e) => Some(Err(GraphError::from(e))), } diff --git a/helix-db/src/helix_engine/traversal_core/ops/out/from_v.rs b/helix-db/src/helix_engine/traversal_core/ops/out/from_v.rs index a3753d69..5b2fed9a 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/out/from_v.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/out/from_v.rs @@ -53,7 +53,7 @@ where item.from_node, self.arena, ) { - Ok(Some(vector)) => TraversalValue::VectorNodeWithoutVectorData(vector), + Ok(Some(vector)) => TraversalValue::Vector(vector), Ok(None) => { return Some(Err(GraphError::from(VectorError::VectorNotFound( item.from_node.to_string(), diff --git a/helix-db/src/helix_engine/traversal_core/ops/out/out.rs b/helix-db/src/helix_engine/traversal_core/ops/out/out.rs index 4fcc6c3a..eecf82dc 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/out/out.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/out/out.rs @@ -88,7 +88,7 @@ impl<'db, 'arena, 'txn, 's, I: Iterator, Gr .vectors .get_vector_properties(self.txn, item_id, self.arena) { - return Some(Ok(TraversalValue::VectorNodeWithoutVectorData(vec))); + return Some(Ok(TraversalValue::Vector(vec))); } None } else { diff --git a/helix-db/src/helix_engine/traversal_core/ops/source/mod.rs b/helix-db/src/helix_engine/traversal_core/ops/source/mod.rs index 774d9306..75293c67 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/source/mod.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/source/mod.rs @@ -7,4 +7,4 @@ pub mod n_from_id; pub mod n_from_index; pub mod n_from_type; pub mod v_from_id; -pub mod v_from_type; \ No newline at end of file +pub mod v_from_type; diff --git a/helix-db/src/helix_engine/traversal_core/ops/source/n_from_index.rs b/helix-db/src/helix_engine/traversal_core/ops/source/n_from_index.rs index e720fd6a..064312f5 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/source/n_from_index.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/source/n_from_index.rs @@ -1,9 +1,13 @@ use crate::{ helix_engine::{ - traversal_core::{traversal_iter::RoTraversalIterator, traversal_value::TraversalValue, LMDB_STRING_HEADER_LENGTH}, + traversal_core::{ + LMDB_STRING_HEADER_LENGTH, traversal_iter::RoTraversalIterator, + traversal_value::TraversalValue, + }, types::GraphError, }, - protocol::value::Value, utils::items::Node, + protocol::value::Value, + utils::items::Node, }; use serde::Serialize; @@ -79,18 +83,18 @@ impl< ); let length_of_label_in_lmdb = u64::from_le_bytes(value[..LMDB_STRING_HEADER_LENGTH].try_into().unwrap()) as usize; - + if length_of_label_in_lmdb != label.len() { return None; } - + assert!( value.len() >= length_of_label_in_lmdb + LMDB_STRING_HEADER_LENGTH, "value length is not at least the header length plus the label length meaning there has been a corruption on node insertion" ); let label_in_lmdb = &value[LMDB_STRING_HEADER_LENGTH ..LMDB_STRING_HEADER_LENGTH + length_of_label_in_lmdb]; - + if label_in_lmdb == label_as_bytes { match Node::<'arena>::from_bincode_bytes(node_id, value, self.arena) { Ok(node) => { @@ -104,10 +108,10 @@ impl< } else { return None; } - + } None - + }); diff --git a/helix-db/src/helix_engine/traversal_core/ops/source/n_from_type.rs b/helix-db/src/helix_engine/traversal_core/ops/source/n_from_type.rs index 90c9fdc9..dfd49f24 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/source/n_from_type.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/source/n_from_type.rs @@ -15,7 +15,7 @@ pub trait NFromTypeAdapter<'db, 'arena, 'txn, 's>: /// Returns an iterator containing the nodes with the given label. /// /// Note that the `label` cannot be empty and must be a valid, existing node label.' - /// + /// /// The label is stored before the node properties in LMDB. /// Bincode assures that the fields of a struct are stored in the same order as they are defined in the struct (first to last). /// @@ -58,18 +58,18 @@ impl<'db, 'arena, 'txn, 's, I: Iterator, Gr ); let length_of_label_in_lmdb = u64::from_le_bytes(value[..LMDB_STRING_HEADER_LENGTH].try_into().unwrap()) as usize; - + if length_of_label_in_lmdb != label.len() { return None; } - + assert!( value.len() >= length_of_label_in_lmdb + LMDB_STRING_HEADER_LENGTH, "value length is not at least the header length plus the label length meaning there has been a corruption on node insertion" ); let label_in_lmdb = &value[LMDB_STRING_HEADER_LENGTH ..LMDB_STRING_HEADER_LENGTH + length_of_label_in_lmdb]; - + if label_in_lmdb == label_as_bytes { match Node::<'arena>::from_bincode_bytes(id, value, self.arena) { Ok(node) => { diff --git a/helix-db/src/helix_engine/traversal_core/ops/source/v_from_id.rs b/helix-db/src/helix_engine/traversal_core/ops/source/v_from_id.rs index 3de40fbc..4bfe07da 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/source/v_from_id.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/source/v_from_id.rs @@ -66,7 +66,7 @@ where if vec.deleted { Err(GraphError::from(VectorError::VectorDeleted)) } else { - Ok(TraversalValue::VectorNodeWithoutVectorData(vec)) + Ok(TraversalValue::Vector(vec)) } } Ok(None) => Err(GraphError::from(VectorError::VectorNotFound( diff --git a/helix-db/src/helix_engine/traversal_core/ops/source/v_from_type.rs b/helix-db/src/helix_engine/traversal_core/ops/source/v_from_type.rs index 76be4d09..ad9d90e8 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/source/v_from_type.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/source/v_from_type.rs @@ -1,8 +1,7 @@ use crate::helix_engine::{ - traversal_core::{LMDB_STRING_HEADER_LENGTH, traversal_iter::RoTraversalIterator, traversal_value::TraversalValue}, - types::{GraphError, VectorError}, - vector_core::{vector_without_data::VectorWithoutData}, - }; + traversal_core::{traversal_iter::RoTraversalIterator, traversal_value::TraversalValue}, + types::GraphError, +}; pub trait VFromTypeAdapter<'db, 'arena, 'txn>: Iterator, GraphError>> @@ -36,76 +35,24 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE 'txn, impl Iterator, GraphError>>, > { - let label_bytes = label.as_bytes(); - let iter = self - .storage - .vectors - .vector_properties_db - .iter(self.txn) - .unwrap() - .filter_map(move |item| { - if let Ok((id, value)) = item { - - - // get label via bytes directly - assert!( - value.len() >= LMDB_STRING_HEADER_LENGTH, - "value length does not contain header which means the `label` field was missing from the node on insertion" - ); - let length_of_label_in_lmdb = - u64::from_le_bytes(value[..LMDB_STRING_HEADER_LENGTH].try_into().unwrap()) as usize; - assert!( - value.len() >= length_of_label_in_lmdb + LMDB_STRING_HEADER_LENGTH, - "value length is not at least the header length plus the label length meaning there has been a corruption on node insertion" - ); - let label_in_lmdb = &value[LMDB_STRING_HEADER_LENGTH - ..LMDB_STRING_HEADER_LENGTH + length_of_label_in_lmdb]; - - - // get deleted via bytes directly - - // skip single byte for version - let version_index = length_of_label_in_lmdb + LMDB_STRING_HEADER_LENGTH; - - // get bool for deleted - let deleted_index = version_index + 1; - let deleted = value[deleted_index] == 1; - - if deleted { - return None; - } - - if label_in_lmdb == label_bytes { - let vector_without_data = VectorWithoutData::from_bincode_bytes(self.arena, value, id) - .map_err(|e| VectorError::ConversionError(e.to_string())) - .ok()?; - - if get_vector_data { - let mut vector = match self.storage.vectors.get_raw_vector_data(self.txn, id, label, self.arena) { - Ok(bytes) => bytes, - Err(VectorError::VectorDeleted) => return None, - Err(e) => return Some(Err(GraphError::from(e))), - }; - vector.expand_from_vector_without_data(vector_without_data); - return Some(Ok(TraversalValue::Vector(vector))); - } else { - return Some(Ok(TraversalValue::VectorNodeWithoutVectorData( - vector_without_data - ))); - } - } else { - return None; - } - - } - None - }); + let mut inner = Vec::new(); + match self.storage.vectors.get_all_vectors_by_label( + self.txn, + label, + get_vector_data, + self.arena, + ) { + Ok(vec) => vec + .into_iter() + .for_each(|v| inner.push(Ok(TraversalValue::Vector(v)))), + Err(err) => inner.push(Err(GraphError::from(err))), + } RoTraversalIterator { storage: self.storage, arena: self.arena, txn: self.txn, - inner: iter, + inner: inner.into_iter(), } } } diff --git a/helix-db/src/helix_engine/traversal_core/ops/util/drop.rs b/helix-db/src/helix_engine/traversal_core/ops/util/drop.rs index b2f5ec86..a69d5068 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/util/drop.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/util/drop.rs @@ -42,12 +42,6 @@ where Ok(_) => Ok(()), Err(e) => Err(e), }, - TraversalValue::VectorNodeWithoutVectorData(vector) => { - match storage.drop_vector(txn, &vector.id) { - Ok(_) => Ok(()), - Err(e) => Err(e), - } - } TraversalValue::Empty => Ok(()), _ => Err(GraphError::ConversionError(format!( "Incorrect Type: {item:?}" diff --git a/helix-db/src/helix_engine/traversal_core/ops/util/paths.rs b/helix-db/src/helix_engine/traversal_core/ops/util/paths.rs index d562a91f..aa1024ac 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/util/paths.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/util/paths.rs @@ -5,7 +5,10 @@ use crate::{ types::GraphError, }, protocol::value::Value, - utils::{items::{Edge, Node}, label_hash::hash_label}, + utils::{ + items::{Edge, Node}, + label_hash::hash_label, + }, }; use heed3::RoTxn; use std::{ @@ -19,23 +22,23 @@ pub fn default_weight_fn<'arena>( edge: &Edge<'arena>, _src_node: &Node<'arena>, _dst_node: &Node<'arena>, -) -> Result { +) -> Result { Ok(edge .properties .as_ref() .and_then(|props| props.get("weight")) .and_then(|w| match w { - Value::F32(f) => Some(*f as f64), - Value::F64(f) => Some(*f), - Value::I8(i) => Some(*i as f64), - Value::I16(i) => Some(*i as f64), - Value::I32(i) => Some(*i as f64), - Value::I64(i) => Some(*i as f64), - Value::U8(i) => Some(*i as f64), - Value::U16(i) => Some(*i as f64), - Value::U32(i) => Some(*i as f64), - Value::U64(i) => Some(*i as f64), - Value::U128(i) => Some(*i as f64), + Value::F32(f) => Some(*f), + Value::F64(f) => Some(*f as f32), + Value::I8(i) => Some(*i as f32), + Value::I16(i) => Some(*i as f32), + Value::I32(i) => Some(*i as f32), + Value::I64(i) => Some(*i as f32), + Value::U8(i) => Some(*i as f32), + Value::U16(i) => Some(*i as f32), + Value::U32(i) => Some(*i as f32), + Value::U64(i) => Some(*i as f32), + Value::U128(i) => Some(*i as f32), _ => None, }) .unwrap_or(1.0)) @@ -46,22 +49,22 @@ pub fn default_weight_fn<'arena>( pub fn property_heuristic<'arena>( node: &Node<'arena>, property_name: &str, -) -> Result { +) -> Result { node.properties .as_ref() .and_then(|props| props.get(property_name)) .and_then(|v| match v { - Value::F64(f) => Some(*f), - Value::F32(f) => Some(*f as f64), - Value::I64(i) => Some(*i as f64), - Value::I32(i) => Some(*i as f64), - Value::I16(i) => Some(*i as f64), - Value::I8(i) => Some(*i as f64), - Value::U128(i) => Some(*i as f64), - Value::U64(i) => Some(*i as f64), - Value::U32(i) => Some(*i as f64), - Value::U16(i) => Some(*i as f64), - Value::U8(i) => Some(*i as f64), + Value::F32(f) => Some(*f), + Value::F64(f) => Some(*f as f32), + Value::I64(i) => Some(*i as f32), + Value::I32(i) => Some(*i as f32), + Value::I16(i) => Some(*i as f32), + Value::I8(i) => Some(*i as f32), + Value::U128(i) => Some(*i as f32), + Value::U64(i) => Some(*i as f32), + Value::U32(i) => Some(*i as f32), + Value::U16(i) => Some(*i as f32), + Value::U8(i) => Some(*i as f32), _ => None, }) .ok_or_else(|| { @@ -85,12 +88,18 @@ pub enum PathAlgorithm { AStar, } -pub struct ShortestPathIterator<'db, 'arena, 'txn, I, F, H = fn(&Node<'arena>) -> Result> -where +pub struct ShortestPathIterator< + 'db, + 'arena, + 'txn, + I, + F, + H = fn(&Node<'arena>) -> Result, +> where 'db: 'arena, 'arena: 'txn, - F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, - H: Fn(&Node<'arena>) -> Result, + F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, + H: Fn(&Node<'arena>) -> Result, { pub arena: &'arena bumpalo::Bump, pub iter: I, @@ -106,7 +115,7 @@ where #[derive(Debug, Clone)] struct DijkstraState { node_id: u128, - distance: f64, + distance: f32, } impl Eq for DijkstraState {} @@ -136,8 +145,8 @@ impl PartialOrd for DijkstraState { #[derive(Debug, Clone)] struct AStarState { node_id: u128, - g_score: f64, - f_score: f64, + g_score: f32, + f_score: f32, } impl Eq for AStarState {} @@ -170,8 +179,8 @@ impl< 'arena: 'txn, 'txn, I: Iterator, GraphError>>, - F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, - H: Fn(&Node<'arena>) -> Result, + F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, + H: Fn(&Node<'arena>) -> Result, > Iterator for ShortestPathIterator<'db, 'arena, 'txn, I, F, H> { type Item = Result, GraphError>; @@ -199,8 +208,8 @@ impl< impl<'db, 'arena, 'txn, I, F, H> ShortestPathIterator<'db, 'arena, 'txn, I, F, H> where - F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, - H: Fn(&Node<'arena>) -> Result, + F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, + H: Fn(&Node<'arena>) -> Result, { fn reconstruct_path( &self, @@ -391,12 +400,12 @@ where None => { return Some(Err(GraphError::TraversalError( "A* algorithm requires a heuristic function".to_string(), - ))) + ))); } }; let mut heap = BinaryHeap::new(); - let mut g_scores: HashMap = HashMap::with_capacity(64); + let mut g_scores: HashMap = HashMap::with_capacity(64); let mut parent: HashMap = HashMap::with_capacity(32); // Calculate initial heuristic for start node @@ -541,7 +550,7 @@ pub trait ShortestPathAdapter<'db, 'arena, 'txn, 's, I>: 'arena, 'txn, I, - fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, + fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, >, >; @@ -554,7 +563,7 @@ pub trait ShortestPathAdapter<'db, 'arena, 'txn, 's, I>: weight_fn: F, ) -> RoTraversalIterator<'db, 'arena, 'txn, ShortestPathIterator<'db, 'arena, 'txn, I, F>> where - F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result; + F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result; fn shortest_path_astar( self, @@ -565,8 +574,8 @@ pub trait ShortestPathAdapter<'db, 'arena, 'txn, 's, I>: heuristic_fn: H, ) -> RoTraversalIterator<'db, 'arena, 'txn, ShortestPathIterator<'db, 'arena, 'txn, I, F, H>> where - F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, - H: Fn(&Node<'arena>) -> Result; + F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, + H: Fn(&Node<'arena>) -> Result; } impl<'db, 'arena, 'txn, 's, I: Iterator, GraphError>>> @@ -587,10 +596,16 @@ impl<'db, 'arena, 'txn, 's, I: Iterator, Gr 'arena, 'txn, I, - fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, + fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, >, > { - self.shortest_path_with_algorithm(edge_label, from, to, PathAlgorithm::BFS, default_weight_fn) + self.shortest_path_with_algorithm( + edge_label, + from, + to, + PathAlgorithm::BFS, + default_weight_fn, + ) } #[inline] @@ -603,7 +618,7 @@ impl<'db, 'arena, 'txn, 's, I: Iterator, Gr weight_fn: F, ) -> RoTraversalIterator<'db, 'arena, 'txn, ShortestPathIterator<'db, 'arena, 'txn, I, F>> where - F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, + F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, { RoTraversalIterator { arena: self.arena, @@ -637,8 +652,8 @@ impl<'db, 'arena, 'txn, 's, I: Iterator, Gr heuristic_fn: H, ) -> RoTraversalIterator<'db, 'arena, 'txn, ShortestPathIterator<'db, 'arena, 'txn, I, F, H>> where - F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, - H: Fn(&Node<'arena>) -> Result, + F: Fn(&Edge<'arena>, &Node<'arena>, &Node<'arena>) -> Result, + H: Fn(&Node<'arena>) -> Result, { RoTraversalIterator { arena: self.arena, diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs index 7aa0dd24..95ae6e1f 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs @@ -1,7 +1,10 @@ use crate::helix_engine::{ traversal_core::{traversal_iter::RoTraversalIterator, traversal_value::TraversalValue}, types::GraphError, - vector_core::vector_distance::cosine_similarity, + vector_core::{ + distance::{Cosine, Distance}, + node::Item, + }, }; use itertools::Itertools; @@ -10,7 +13,7 @@ pub trait BruteForceSearchVAdapter<'db, 'arena, 'txn>: { fn brute_force_search_v( self, - query: &'arena [f64], + query: &'arena [f32], k: K, ) -> RoTraversalIterator< 'db, @@ -28,7 +31,7 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE { fn brute_force_search_v( self, - query: &'arena [f64], + _query: &'arena [f32], k: K, ) -> RoTraversalIterator< 'db, @@ -40,11 +43,15 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE K: TryInto, K::Error: std::fmt::Debug, { + let _arena = bumpalo::Bump::new(); let iter = self .inner .filter_map(|v| match v { Ok(TraversalValue::Vector(mut v)) => { - let d = cosine_similarity(v.data, query).unwrap(); + let d = Cosine::distance( + v.data.as_ref().unwrap(), + &Item::::from_slice(v.data_borrowed()), + ); v.set_distance(d); Some(v) } @@ -52,14 +59,15 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE }) .sorted_by(|v1, v2| v1.partial_cmp(v2).unwrap()) .take(k.try_into().unwrap()) - .filter_map(move |mut item| { + .filter_map(move |item| { match self .storage .vectors - .get_vector_properties(self.txn, *item.id(), self.arena) + .get_vector_properties(self.txn, item.id, self.arena) { - Ok(Some(vector_without_data)) => { - item.expand_from_vector_without_data(vector_without_data); + Ok(Some(_vector_without_data)) => { + // todo! + // item.expand_from_vector_without_data(vector_without_data); Some(item) } diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs index 3c167ef1..7125533e 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs @@ -2,18 +2,17 @@ use crate::{ helix_engine::{ traversal_core::{traversal_iter::RwTraversalIterator, traversal_value::TraversalValue}, types::GraphError, - vector_core::{hnsw::HNSW, vector::HVector}, + vector_core::HVector, }, utils::properties::ImmutablePropertiesMap, }; -use heed3::RoTxn; pub trait InsertVAdapter<'db, 'arena, 'txn>: Iterator, GraphError>> { - fn insert_v( + fn insert_v( self, - query: &'arena [f64], + query: &'arena [f32], label: &'arena str, properties: Option>, ) -> RwTraversalIterator< @@ -21,17 +20,15 @@ pub trait InsertVAdapter<'db, 'arena, 'txn>: 'arena, 'txn, impl Iterator, GraphError>>, - > - where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool; + >; } impl<'db, 'arena, 'txn, I: Iterator, GraphError>>> InsertVAdapter<'db, 'arena, 'txn> for RwTraversalIterator<'db, 'arena, 'txn, I> { - fn insert_v( + fn insert_v( self, - query: &'arena [f64], + query: &'arena [f32], label: &'arena str, properties: Option>, ) -> RwTraversalIterator< @@ -39,14 +36,11 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE 'arena, 'txn, impl Iterator, GraphError>>, - > - where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool, - { + > { let vector: Result, crate::helix_engine::types::VectorError> = self .storage .vectors - .insert::(self.txn, label, query, properties, self.arena); + .insert(self.txn, label, query, properties, self.arena); let result = match vector { Ok(vector) => Ok(TraversalValue::Vector(vector)), diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs index df8e619e..023dbdb3 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs @@ -3,7 +3,7 @@ use heed3::RoTxn; use crate::helix_engine::{ traversal_core::{traversal_iter::RoTraversalIterator, traversal_value::TraversalValue}, types::{GraphError, VectorError}, - vector_core::{hnsw::HNSW, vector::HVector}, + vector_core::HVector, }; use std::iter::once; @@ -12,10 +12,10 @@ pub trait SearchVAdapter<'db, 'arena, 'txn>: { fn search_v( self, - query: &'arena [f64], + query: &'arena [f32], k: K, label: &'arena str, - filter: Option<&'arena [F]>, + filter: Option, ) -> RoTraversalIterator< 'db, 'arena, @@ -33,10 +33,10 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE { fn search_v( self, - query: &'arena [f64], + query: &'arena [f32], k: K, label: &'arena str, - filter: Option<&'arena [F]>, + filter: Option, ) -> RoTraversalIterator< 'db, 'arena, @@ -50,20 +50,43 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE { let vectors = self.storage.vectors.search( self.txn, - query, + query.to_vec(), k.try_into().unwrap(), label, - filter, - false, self.arena, ); let iter = match vectors { - Ok(vectors) => vectors - .into_iter() - .map(|vector| Ok::(TraversalValue::Vector(vector))) - .collect::>() - .into_iter(), + Ok(vectors) => { + match self.storage.vectors.nns_to_hvectors( + self.txn, + vectors.into_nns(), + false, + self.arena, + ) { + Ok(hvectors) => match filter { + Some(filter) => hvectors + .into_iter() + .filter(|vector| filter(vector, self.txn)) + .map(|vector| { + Ok::(TraversalValue::Vector(vector)) + }) + .collect::>() + .into_iter(), + None => hvectors + .into_iter() + .map(|vector| { + Ok::(TraversalValue::Vector(vector)) + }) + .collect::>() + .into_iter(), + }, + Err(err) => { + let error = GraphError::VectorError(format!("{err}")); + once(Err(error)).collect::>().into_iter() + } + } + } Err(VectorError::VectorNotFound(id)) => { let error = GraphError::VectorError(format!("vector not found for id {id}")); once(Err(error)).collect::>().into_iter() diff --git a/helix-db/src/helix_engine/traversal_core/traversal_iter.rs b/helix-db/src/helix_engine/traversal_core/traversal_iter.rs index 30206239..22b58fae 100644 --- a/helix-db/src/helix_engine/traversal_core/traversal_iter.rs +++ b/helix-db/src/helix_engine/traversal_core/traversal_iter.rs @@ -49,7 +49,9 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE } pub fn collect_to_obj(mut self) -> Result, GraphError> { - self.inner.next().unwrap_or(Err(GraphError::New("No value found".to_string()))) + self.inner + .next() + .unwrap_or(Err(GraphError::New("No value found".to_string()))) } pub fn collect_to_value(self) -> Value { @@ -64,15 +66,14 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE default: bool, f: impl Fn(&Value) -> bool, ) -> Result { - let val = match &self.inner.next() { + match &self.inner.next() { Some(Ok(TraversalValue::Value(val))) => Ok(f(val)), Some(Ok(_)) => Err(GraphError::ConversionError( "Expected value, got something else".to_string(), )), Some(Err(err)) => Err(GraphError::from(err.to_string())), None => Ok(default), - }; - val + } } } @@ -130,7 +131,9 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE } pub fn collect_to_obj(mut self) -> Result, GraphError> { - self.inner.next().unwrap_or(Err(GraphError::New("No value found".to_string()))) + self.inner + .next() + .unwrap_or(Err(GraphError::New("No value found".to_string()))) } pub fn map_value_or( @@ -138,14 +141,13 @@ impl<'db, 'arena, 'txn, I: Iterator, GraphE default: bool, f: impl Fn(&Value) -> bool, ) -> Result { - let val = match &self.inner.next() { + match &self.inner.next() { Some(Ok(TraversalValue::Value(val))) => Ok(f(val)), Some(Ok(_)) => Err(GraphError::ConversionError( "Expected value, got something else".to_string(), )), Some(Err(err)) => Err(GraphError::from(err.to_string())), None => Ok(default), - }; - val + } } } diff --git a/helix-db/src/helix_engine/traversal_core/traversal_value.rs b/helix-db/src/helix_engine/traversal_core/traversal_value.rs index 56ad5d2f..6cc01365 100644 --- a/helix-db/src/helix_engine/traversal_core/traversal_value.rs +++ b/helix-db/src/helix_engine/traversal_core/traversal_value.rs @@ -1,7 +1,7 @@ use serde::Serialize; use crate::{ - helix_engine::vector_core::{vector::HVector, vector_without_data::VectorWithoutData}, + helix_engine::vector_core::HVector, protocol::value::Value, utils::items::{Edge, Node}, }; @@ -18,8 +18,6 @@ pub enum TraversalValue<'arena> { Edge(Edge<'arena>), /// A vector in the graph Vector(HVector<'arena>), - /// Vector node without vector data - VectorNodeWithoutVectorData(VectorWithoutData<'arena>), /// A count of the number of items /// A path between two nodes in the graph Path((Vec>, Vec>)), @@ -27,7 +25,7 @@ pub enum TraversalValue<'arena> { Value(Value), /// Item With Score - NodeWithScore { node: Node<'arena>, score: f64 }, + NodeWithScore { node: Node<'arena>, score: f32 }, /// An empty traversal value Empty, } @@ -38,7 +36,6 @@ impl<'arena> TraversalValue<'arena> { TraversalValue::Node(node) => node.id, TraversalValue::Edge(edge) => edge.id, TraversalValue::Vector(vector) => vector.id, - TraversalValue::VectorNodeWithoutVectorData(vector) => vector.id, TraversalValue::NodeWithScore { node, .. } => node.id, TraversalValue::Empty => 0, _ => 0, @@ -50,7 +47,6 @@ impl<'arena> TraversalValue<'arena> { TraversalValue::Node(node) => node.label, TraversalValue::Edge(edge) => edge.label, TraversalValue::Vector(vector) => vector.label, - TraversalValue::VectorNodeWithoutVectorData(vector) => vector.label, TraversalValue::NodeWithScore { node, .. } => node.label, TraversalValue::Empty => "", _ => "", @@ -71,18 +67,16 @@ impl<'arena> TraversalValue<'arena> { } } - pub fn data(&self) -> &'arena [f64] { + pub fn data(&'arena self) -> &'arena [f32] { match self { - TraversalValue::Vector(vector) => vector.data, - TraversalValue::VectorNodeWithoutVectorData(_) => &[], + TraversalValue::Vector(vector) => vector.data_borrowed(), _ => unimplemented!(), } } - pub fn score(&self) -> f64 { + pub fn score(&self) -> f32 { match self { TraversalValue::Vector(vector) => vector.score(), - TraversalValue::VectorNodeWithoutVectorData(_) => 2f64, TraversalValue::NodeWithScore { score, .. } => *score, _ => unimplemented!(), } @@ -93,7 +87,6 @@ impl<'arena> TraversalValue<'arena> { TraversalValue::Node(node) => node.label, TraversalValue::Edge(edge) => edge.label, TraversalValue::Vector(vector) => vector.label, - TraversalValue::VectorNodeWithoutVectorData(vector) => vector.label, TraversalValue::NodeWithScore { node, .. } => node.label, TraversalValue::Empty => "", _ => "", @@ -105,7 +98,6 @@ impl<'arena> TraversalValue<'arena> { TraversalValue::Node(node) => node.get_property(property), TraversalValue::Edge(edge) => edge.get_property(property), TraversalValue::Vector(vector) => vector.get_property(property), - TraversalValue::VectorNodeWithoutVectorData(vector) => vector.get_property(property), TraversalValue::NodeWithScore { node, .. } => node.get_property(property), TraversalValue::Empty => None, _ => None, @@ -119,7 +111,6 @@ impl Hash for TraversalValue<'_> { TraversalValue::Node(node) => node.id.hash(state), TraversalValue::Edge(edge) => edge.id.hash(state), TraversalValue::Vector(vector) => vector.id.hash(state), - TraversalValue::VectorNodeWithoutVectorData(vector) => vector.id.hash(state), TraversalValue::NodeWithScore { node, .. } => node.id.hash(state), TraversalValue::Empty => state.write_u8(0), _ => state.write_u8(0), @@ -134,20 +125,8 @@ impl PartialEq for TraversalValue<'_> { (TraversalValue::Node(node1), TraversalValue::Node(node2)) => node1.id == node2.id, (TraversalValue::Edge(edge1), TraversalValue::Edge(edge2)) => edge1.id == edge2.id, (TraversalValue::Vector(vector1), TraversalValue::Vector(vector2)) => { - vector1.id() == vector2.id() + vector1.id == vector2.id } - ( - TraversalValue::VectorNodeWithoutVectorData(vector1), - TraversalValue::VectorNodeWithoutVectorData(vector2), - ) => vector1.id() == vector2.id(), - ( - TraversalValue::Vector(vector1), - TraversalValue::VectorNodeWithoutVectorData(vector2), - ) => vector1.id() == vector2.id(), - ( - TraversalValue::VectorNodeWithoutVectorData(vector1), - TraversalValue::Vector(vector2), - ) => vector1.id() == vector2.id(), ( TraversalValue::NodeWithScore { node: n1, .. }, TraversalValue::NodeWithScore { node: n2, .. }, diff --git a/helix-db/src/helix_engine/types.rs b/helix-db/src/helix_engine/types.rs index 5be25a1d..ce0797dd 100644 --- a/helix-db/src/helix_engine/types.rs +++ b/helix-db/src/helix_engine/types.rs @@ -1,4 +1,8 @@ -use crate::{helix_gateway::router::router::IoContFn, helixc::parser::errors::ParserError}; +use crate::{ + helix_engine::vector_core::{ItemId, LayerId, key::Key, node_id::NodeMode}, + helix_gateway::router::router::IoContFn, + helixc::parser::errors::ParserError, +}; use core::fmt; use heed3::Error as HeedError; use sonic_rs::Error as SonicError; @@ -30,8 +34,6 @@ pub enum GraphError { ParamNotFound(&'static str), IoNeeded(IoContFn), RerankerError(String), - - } impl std::error::Error for GraphError {} @@ -155,6 +157,31 @@ pub enum VectorError { ConversionError(String), VectorCoreError(String), VectorAlreadyDeleted(String), + InvalidVecDimension { + expected: usize, + received: usize, + }, + MissingKey { + /// The index that caused the error + index: u16, + /// The kind of item that was being queried + mode: &'static str, + /// The item ID queried + item: ItemId, + /// The item's layer + layer: LayerId, + }, + Io(String), + NeedBuild(u16), + /// The user is trying to query a database with a distance that is not of the right type. + UnmatchingDistance { + /// The expected distance type. + expected: String, + /// The distance given by the user. + received: &'static str, + }, + MissingMetadata(u16), + HasNoData, } impl std::error::Error for VectorError {} @@ -170,6 +197,50 @@ impl fmt::Display for VectorError { VectorError::ConversionError(msg) => write!(f, "Conversion error: {msg}"), VectorError::VectorCoreError(msg) => write!(f, "Vector core error: {msg}"), VectorError::VectorAlreadyDeleted(id) => write!(f, "Vector already deleted: {id}"), + VectorError::InvalidVecDimension { expected, received } => { + write!( + f, + "Invalid vector dimension: expected {expected}, received {received}" + ) + } + VectorError::MissingKey { + index, mode, item, .. + } => write!( + f, + "Internal error: {mode}({item}) is missing in index `{index}`" + ), + VectorError::Io(error) => write!(f, "IO error: {error}"), + VectorError::NeedBuild(idx) => write!( + f, + "The graph has not been built after an update on index {idx}" + ), + VectorError::UnmatchingDistance { expected, received } => { + write!( + f, + "Invalid distance provided. Got {received} but expected {expected}" + ) + } + VectorError::MissingMetadata(idx) => write!( + f, + "Metadata are missing on index {idx}, You must build your database before attempting to read it" + ), + VectorError::HasNoData => write!(f, "Trying to access data where there is none"), + } + } +} + +impl VectorError { + pub(crate) fn missing_key(key: Key) -> Self { + Self::MissingKey { + index: key.index, + mode: match key.node.mode { + NodeMode::Item => "Item", + NodeMode::Links => "Links", + NodeMode::Metadata => "Metadata", + NodeMode::Updated => "Updated", + }, + item: key.node.item, + layer: key.node.layer, } } } @@ -203,3 +274,9 @@ impl From for VectorError { VectorError::ConversionError(format!("bincode error: {error}")) } } + +impl From for VectorError { + fn from(error: std::io::Error) -> Self { + VectorError::Io(format!("Io Error: {error}")) + } +} diff --git a/helix-db/src/helix_engine/vector_core/binary_heap.rs b/helix-db/src/helix_engine/vector_core/binary_heap.rs deleted file mode 100644 index 5c802f1f..00000000 --- a/helix-db/src/helix_engine/vector_core/binary_heap.rs +++ /dev/null @@ -1,567 +0,0 @@ -use core::mem::{ManuallyDrop, swap}; -use core::ptr; -use core::slice; -use std::iter::FusedIterator; -pub struct BinaryHeap<'arena, T> { - pub arena: &'arena bumpalo::Bump, - data: bumpalo::collections::Vec<'arena, T>, -} - -impl<'arena, T: Ord> BinaryHeap<'arena, T> { - pub fn new(arena: &'arena bumpalo::Bump) -> BinaryHeap<'arena, T> { - BinaryHeap { - arena, - data: bumpalo::collections::Vec::with_capacity_in(0, arena), - } - } - - pub fn with_capacity(arena: &'arena bumpalo::Bump, capacity: usize) -> BinaryHeap<'arena, T> { - BinaryHeap { - arena, - data: bumpalo::collections::Vec::with_capacity_in(capacity, arena), - } - } - - #[inline] - pub fn extend>(&mut self, iter: I) { - let guard = RebuildOnDrop { - rebuild_from: self.len(), - heap: self, - }; - guard.heap.data.extend(iter); - } - - pub fn pop(&mut self) -> Option { - self.data.pop().map(|mut item| { - if !self.is_empty() { - swap(&mut item, &mut self.data[0]); - // SAFETY: !self.is_empty() means that self.len() > 0 - unsafe { self.sift_down_to_bottom(0) }; - } - item - }) - } - - #[must_use] - pub fn peek(&self) -> Option<&T> { - self.data.first() - } - - pub fn from( - arena: &'arena bumpalo::Bump, - data: bumpalo::collections::Vec<'arena, T>, - ) -> BinaryHeap<'arena, T> { - BinaryHeap { arena, data } - } - - pub fn push(&mut self, item: T) { - let old_len = self.len(); - self.data.push(item); - // SAFETY: Since we pushed a new item it means that - // old_len = self.len() - 1 < self.len() - unsafe { self.sift_up(0, old_len) }; - } - - // The implementations of sift_up and sift_down use unsafe blocks in - // order to move an element out of the vector (leaving behind a - // hole), shift along the others and move the removed element back into the - // vector at the final location of the hole. - // The `Hole` type is used to represent this, and make sure - // the hole is filled back at the end of its scope, even on panic. - // Using a hole reduces the constant factor compared to using swaps, - // which involves twice as many moves. - - /// # Safety - /// - /// The caller must guarantee that `pos < self.len()`. - /// - /// Returns the new position of the element. - unsafe fn sift_up(&mut self, start: usize, pos: usize) -> usize { - // Take out the value at `pos` and create a hole. - // SAFETY: The caller guarantees that pos < self.len() - let mut hole = unsafe { Hole::new(&mut self.data, pos) }; - - while hole.pos() > start { - let parent = (hole.pos() - 1) / 2; - - // SAFETY: hole.pos() > start >= 0, which means hole.pos() > 0 - // and so hole.pos() - 1 can't underflow. - // This guarantees that parent < hole.pos() so - // it's a valid index and also != hole.pos(). - if hole.element() <= unsafe { hole.get(parent) } { - break; - } - - // SAFETY: Same as above - unsafe { hole.move_to(parent) }; - } - - hole.pos() - } - - /// Take an element at `pos` and move it down the heap, - /// while its children are larger. - /// - /// Returns the new position of the element. - /// - /// # Safety - /// - /// The caller must guarantee that `pos < end <= self.len()`. - unsafe fn sift_down_range(&mut self, pos: usize, end: usize) -> usize { - // SAFETY: The caller guarantees that pos < end <= self.len(). - let mut hole = unsafe { Hole::new(&mut self.data, pos) }; - let mut child = 2 * hole.pos() + 1; - - // Loop invariant: child == 2 * hole.pos() + 1. - while child <= end.saturating_sub(2) { - // compare with the greater of the two children - // SAFETY: child < end - 1 < self.len() and - // child + 1 < end <= self.len(), so they're valid indexes. - // child == 2 * hole.pos() + 1 != hole.pos() and - // child + 1 == 2 * hole.pos() + 2 != hole.pos(). - // FIXME: 2 * hole.pos() + 1 or 2 * hole.pos() + 2 could overflow - // if T is a ZST - child += unsafe { hole.get(child) <= hole.get(child + 1) } as usize; - - // if we are already in order, stop. - // SAFETY: child is now either the old child or the old child+1 - // We already proven that both are < self.len() and != hole.pos() - if hole.element() >= unsafe { hole.get(child) } { - return hole.pos(); - } - - // SAFETY: same as above. - unsafe { hole.move_to(child) }; - child = 2 * hole.pos() + 1; - } - - // SAFETY: && short circuit, which means that in the - // second condition it's already true that child == end - 1 < self.len(). - if child == end - 1 && hole.element() < unsafe { hole.get(child) } { - // SAFETY: child is already proven to be a valid index and - // child == 2 * hole.pos() + 1 != hole.pos(). - unsafe { hole.move_to(child) }; - } - - hole.pos() - } - - /// # Safety - /// - /// The caller must guarantee that `pos < self.len()`. - unsafe fn sift_down(&mut self, pos: usize) -> usize { - let len = self.len(); - // SAFETY: pos < len is guaranteed by the caller and - // obviously len = self.len() <= self.len(). - unsafe { self.sift_down_range(pos, len) } - } - - /// Take an element at `pos` and move it all the way down the heap, - /// then sift it up to its position. - /// - /// Note: This is faster when the element is known to be large / should - /// be closer to the bottom. - /// - /// # Safety - /// - /// The caller must guarantee that `pos < self.len()`. - unsafe fn sift_down_to_bottom(&mut self, mut pos: usize) { - let end = self.len(); - let start = pos; - - // SAFETY: The caller guarantees that pos < self.len(). - let mut hole = unsafe { Hole::new(&mut self.data, pos) }; - let mut child = 2 * hole.pos() + 1; - - // Loop invariant: child == 2 * hole.pos() + 1. - while child <= end.saturating_sub(2) { - // SAFETY: child < end - 1 < self.len() and - // child + 1 < end <= self.len(), so they're valid indexes. - // child == 2 * hole.pos() + 1 != hole.pos() and - // child + 1 == 2 * hole.pos() + 2 != hole.pos(). - // FIXME: 2 * hole.pos() + 1 or 2 * hole.pos() + 2 could overflow - // if T is a ZST - child += unsafe { hole.get(child) <= hole.get(child + 1) } as usize; - - // SAFETY: Same as above - unsafe { hole.move_to(child) }; - child = 2 * hole.pos() + 1; - } - - if child == end - 1 { - // SAFETY: child == end - 1 < self.len(), so it's a valid index - // and child == 2 * hole.pos() + 1 != hole.pos(). - unsafe { hole.move_to(child) }; - } - pos = hole.pos(); - drop(hole); - - // SAFETY: pos is the position in the hole and was already proven - // to be a valid index. - unsafe { self.sift_up(start, pos) }; - } - - /// Rebuild assuming data[0..start] is still a proper heap. - fn rebuild_tail(&mut self, start: usize) { - if start == self.len() { - return; - } - - let tail_len = self.len() - start; - - #[inline(always)] - fn log2_fast(x: usize) -> usize { - (usize::BITS - x.leading_zeros() - 1) as usize - } - - // `rebuild` takes O(self.len()) operations - // and about 2 * self.len() comparisons in the worst case - // while repeating `sift_up` takes O(tail_len * log(start)) operations - // and about 1 * tail_len * log_2(start) comparisons in the worst case, - // assuming start >= tail_len. For larger heaps, the crossover point - // no longer follows this reasoning and was determined empirically. - let better_to_rebuild = if start < tail_len { - true - } else if self.len() <= 2048 { - 2 * self.len() < tail_len * log2_fast(start) - } else { - 2 * self.len() < tail_len * 11 - }; - - if better_to_rebuild { - self.rebuild(); - } else { - for i in start..self.len() { - // SAFETY: The index `i` is always less than self.len(). - unsafe { self.sift_up(0, i) }; - } - } - } - - fn rebuild(&mut self) { - let mut n = self.len() / 2; - while n > 0 { - n -= 1; - // SAFETY: n starts from self.len() / 2 and goes down to 0. - // The only case when !(n < self.len()) is if - // self.len() == 0, but it's ruled out by the loop condition. - unsafe { self.sift_down(n) }; - } - } - - /// Moves all the elements of `other` into `self`, leaving `other` empty. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::collections::BinaryHeap; - /// - /// let mut a = BinaryHeap::from([-10, 1, 2, 3, 3]); - /// let mut b = BinaryHeap::from([-20, 5, 43]); - /// - /// a.append(&mut b); - /// - /// assert_eq!(a.into_sorted_vec(), [-20, -10, 1, 2, 3, 3, 5, 43]); - /// assert!(b.is_empty()); - /// ``` - pub fn append(&mut self, other: &mut Self) { - if self.len() < other.len() { - swap(self, other); - } - - let start = self.data.len(); - - self.data.append(&mut other.data); - - self.rebuild_tail(start); - } - - /// Returns the length of the binary heap. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::collections::BinaryHeap; - /// let heap = BinaryHeap::from([1, 3]); - /// - /// assert_eq!(heap.len(), 2); - /// ``` - #[must_use] - pub fn len(&self) -> usize { - self.data.len() - } - - /// Checks if the binary heap is empty. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::collections::BinaryHeap; - /// let mut heap = BinaryHeap::new(); - /// - /// assert!(heap.is_empty()); - /// - /// heap.push(3); - /// heap.push(5); - /// heap.push(1); - /// - /// assert!(!heap.is_empty()); - /// ``` - #[must_use] - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Clears the binary heap, returning an iterator over the removed elements - /// in arbitrary order. If the iterator is dropped before being fully - /// consumed, it drops the remaining elements in arbitrary order. - /// - /// The returned iterator keeps a mutable borrow on the heap to optimize - /// its implementation. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::collections::BinaryHeap; - /// let mut heap = BinaryHeap::from([1, 3]); - /// - /// assert!(!heap.is_empty()); - /// - /// for x in heap.drain() { - /// println!("{x}"); - /// } - /// - /// assert!(heap.is_empty()); - /// ``` - #[inline] - pub fn drain(&'arena mut self) -> Drain<'arena, 'arena, T> { - Drain { - iter: self.data.drain(..), - } - } - - pub fn reserve(&mut self, additional: usize) { - self.data.reserve(additional); - } - - pub fn iter(&self) -> Iter<'_, T> { - Iter { - iter: self.data.iter(), - } - } -} - -/// Hole represents a hole in a slice i.e., an index without valid value -/// (because it was moved from or duplicated). -/// In drop, `Hole` will restore the slice by filling the hole -/// position with the value that was originally removed. -struct Hole<'a, T: 'a> { - data: &'a mut [T], - elt: ManuallyDrop, - pos: usize, -} - -impl<'a, T> Hole<'a, T> { - /// Creates a new `Hole` at index `pos`. - /// - /// Unsafe because pos must be within the data slice. - #[inline] - unsafe fn new(data: &'a mut [T], pos: usize) -> Self { - debug_assert!(pos < data.len()); - // SAFE: pos should be inside the slice - let elt = unsafe { ptr::read(data.get_unchecked(pos)) }; - Hole { - data, - elt: ManuallyDrop::new(elt), - pos, - } - } - - #[inline] - fn pos(&self) -> usize { - self.pos - } - - /// Returns a reference to the element removed. - #[inline] - fn element(&self) -> &T { - &self.elt - } - - /// Returns a reference to the element at `index`. - /// - /// Unsafe because index must be within the data slice and not equal to pos. - #[inline] - unsafe fn get(&self, index: usize) -> &T { - debug_assert!(index != self.pos); - debug_assert!(index < self.data.len()); - unsafe { self.data.get_unchecked(index) } - } - - /// Move hole to new location - /// - /// Unsafe because index must be within the data slice and not equal to pos. - #[inline] - unsafe fn move_to(&mut self, index: usize) { - debug_assert!(index != self.pos); - debug_assert!(index < self.data.len()); - unsafe { - let ptr = self.data.as_mut_ptr(); - let index_ptr: *const _ = ptr.add(index); - let hole_ptr = ptr.add(self.pos); - ptr::copy_nonoverlapping(index_ptr, hole_ptr, 1); - } - self.pos = index; - } -} - -impl Drop for Hole<'_, T> { - #[inline] - fn drop(&mut self) { - // fill the hole again - unsafe { - let pos = self.pos; - ptr::copy_nonoverlapping(&*self.elt, self.data.get_unchecked_mut(pos), 1); - } - } -} - -#[derive(Debug)] -pub struct Drain<'a, 'arena, T: 'a> { - iter: bumpalo::collections::vec::Drain<'a, 'arena, T>, -} - -impl<'arena, T> Iterator for Drain<'_, 'arena, T> { - type Item = T; - - #[inline] - fn next(&mut self) -> Option { - self.iter.next() - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -impl<'arena, T> DoubleEndedIterator for Drain<'_, 'arena, T> { - #[inline] - fn next_back(&mut self) -> Option { - self.iter.next_back() - } -} - -impl<'arena, T> FusedIterator for Drain<'_, 'arena, T> {} - -pub struct Iter<'a, T: 'a> { - iter: slice::Iter<'a, T>, -} - -impl<'a, T> Iterator for Iter<'a, T> { - type Item = &'a T; - - #[inline] - fn next(&mut self) -> Option<&'a T> { - self.iter.next() - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } - - #[inline] - fn last(self) -> Option<&'a T> { - self.iter.last() - } -} - -impl<'a, T> DoubleEndedIterator for Iter<'a, T> { - #[inline] - fn next_back(&mut self) -> Option<&'a T> { - self.iter.next_back() - } -} -impl FusedIterator for Iter<'_, T> {} - -struct RebuildOnDrop<'a, 'arena, T: Ord> { - heap: &'a mut BinaryHeap<'arena, T>, - rebuild_from: usize, -} - -impl<'arena, T: Ord> Drop for RebuildOnDrop<'_, 'arena, T> { - fn drop(&mut self) { - self.heap.rebuild_tail(self.rebuild_from); - } -} - -/// An owning iterator over the elements of a `BinaryHeap`. -/// -/// This `struct` is created by [`BinaryHeap::into_iter()`] -/// (provided by the [`IntoIterator`] trait). See its documentation for more. -/// -/// [`into_iter`]: BinaryHeap::into_iter -pub struct IntoIter<'arena, T> { - iter: bumpalo::collections::vec::IntoIter<'arena, T>, -} - -impl<'arena, T> Iterator for IntoIter<'arena, T> { - type Item = T; - - #[inline] - fn next(&mut self) -> Option { - self.iter.next() - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -impl<'arena, T> DoubleEndedIterator for IntoIter<'arena, T> { - #[inline] - fn next_back(&mut self) -> Option { - self.iter.next_back() - } -} - -impl FusedIterator for IntoIter<'_, T> {} - -impl<'arena, T> IntoIterator for BinaryHeap<'arena, T> { - type Item = T; - type IntoIter = IntoIter<'arena, T>; - - /// Creates a consuming iterator, that is, one that moves each value out of - /// the binary heap in arbitrary order. The binary heap cannot be used - /// after calling this. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::collections::BinaryHeap; - /// let heap = BinaryHeap::from([1, 2, 3, 4]); - /// - /// // Print 1, 2, 3, 4 in arbitrary order - /// for x in heap.into_iter() { - /// // x has type i32, not &i32 - /// println!("{x}"); - /// } - /// ``` - fn into_iter(self) -> IntoIter<'arena, T> { - IntoIter { - iter: self.data.into_iter(), - } - } -} diff --git a/helix-db/src/helix_engine/vector_core/distance/cosine.rs b/helix-db/src/helix_engine/vector_core/distance/cosine.rs new file mode 100644 index 00000000..5b790960 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/distance/cosine.rs @@ -0,0 +1,68 @@ +use std::fmt; + +use bytemuck::{Pod, Zeroable}; +use serde::Serialize; + +use crate::helix_engine::vector_core::{ + distance::{Distance, MAX_DISTANCE}, + node::Item, + spaces::simple::dot_product, + unaligned_vector::UnalignedVector, +}; + +/// The Cosine similarity is a measure of similarity between two +/// non-zero vectors defined in an inner product space. Cosine similarity +/// is the cosine of the angle between the vectors. +#[derive(Debug, Serialize, Clone)] +pub enum Cosine {} + +/// The header of Cosine item nodes. +#[repr(C)] +#[derive(Pod, Serialize, Zeroable, Clone, Copy)] +pub struct NodeHeaderCosine { + norm: f32, +} +impl fmt::Debug for NodeHeaderCosine { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("NodeHeaderCosine") + .field("norm", &format!("{:.4}", self.norm)) + .finish() + } +} + +impl Distance for Cosine { + type Header = NodeHeaderCosine; + type VectorCodec = f32; + + fn name() -> &'static str { + "cosine" + } + + fn new_header(vector: &UnalignedVector) -> Self::Header { + NodeHeaderCosine { + norm: Self::norm_no_header(vector), + } + } + + fn distance(p: &Item, q: &Item) -> f32 { + let pn = p.header.norm; + let qn = q.header.norm; + let pq = dot_product(&p.vector, &q.vector); + let pnqn = pn * qn; + if pnqn > f32::EPSILON { + let cos = pq / pnqn; + let cos = cos.clamp(-1.0, 1.0); + // cos is [-1; 1] + // cos = 0. -> 0.5 + // cos = -1. -> 1.0 + // cos = 1. -> 0.0 + 1.0 - cos + } else { + MAX_DISTANCE + } + } + + fn norm_no_header(v: &UnalignedVector) -> f32 { + dot_product(v, v).sqrt() + } +} diff --git a/helix-db/src/helix_engine/vector_core/distance/mod.rs b/helix-db/src/helix_engine/vector_core/distance/mod.rs new file mode 100644 index 00000000..78672671 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/distance/mod.rs @@ -0,0 +1,42 @@ +use core::fmt; + +use bytemuck::{Pod, Zeroable}; + +use crate::helix_engine::vector_core::{ + node::Item, + unaligned_vector::{UnalignedVector, VectorCodec}, +}; + +pub use cosine::{Cosine, NodeHeaderCosine}; + +mod cosine; + +pub type DistanceValue = f32; + +pub const MAX_DISTANCE: f32 = 2.0; +pub const ORTHOGONAL: f32 = 1.0; +pub const MIN_DISTANCE: f32 = 0.0; + +pub trait Distance: Send + Sync + Sized + Clone + fmt::Debug + 'static { + /// A header structure with informations related to the + type Header: Pod + Zeroable + fmt::Debug; + type VectorCodec: VectorCodec; + + /// The name of the distance. + /// + /// Note that the name is used to identify the distance and will help some performance improvements. + /// For example, the "cosine" distance is matched against the "binary quantized cosine" to avoid + /// recomputing links when moving from the former to the latter distance. + fn name() -> &'static str; + + fn new_header(vector: &UnalignedVector) -> Self::Header; + + /// Returns a non-normalized distance. + fn distance(p: &Item, q: &Item) -> DistanceValue; + + fn norm(item: &Item) -> f32 { + Self::norm_no_header(&item.vector) + } + + fn norm_no_header(v: &UnalignedVector) -> f32; +} diff --git a/helix-db/src/helix_engine/vector_core/hnsw.rs b/helix-db/src/helix_engine/vector_core/hnsw.rs index e110f248..defec54c 100644 --- a/helix-db/src/helix_engine/vector_core/hnsw.rs +++ b/helix-db/src/helix_engine/vector_core/hnsw.rs @@ -1,63 +1,561 @@ -use crate::helix_engine::vector_core::vector::HVector; -use crate::{helix_engine::types::VectorError, utils::properties::ImmutablePropertiesMap}; - -use heed3::{RoTxn, RwTxn}; - -pub trait HNSW { - /// Search for the k nearest neighbors of a query vector - /// - /// # Arguments - /// - /// * `txn` - The transaction to use - /// * `query` - The query vector - /// * `k` - The number of nearest neighbors to search for - /// - /// # Returns - /// - /// A vector of tuples containing the id and distance of the nearest neighbors - fn search<'db, 'arena, 'txn, F>( - &'db self, - txn: &'txn RoTxn<'db>, - query: &'arena [f64], - k: usize, - label: &'arena str, - filter: Option<&'arena [F]>, - should_trickle: bool, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> +use core::fmt; +use std::cmp::Reverse; +use std::collections::BinaryHeap; +use std::marker::PhantomData; +use std::{borrow::Cow, fmt::Debug}; + +use heed3::RwTxn; +use min_max_heap::MinMaxHeap; +use papaya::HashMap; +use rand::Rng; +use rand::distr::Distribution; +use rand::distr::weighted::WeightedIndex; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use roaring::RoaringBitmap; + +use crate::helix_engine::vector_core::node::{Item, Node}; +use crate::helix_engine::vector_core::{ + CoreDatabase, ItemId, + distance::Distance, + key::Key, + node::Links, + ordered_float::OrderedFloat, + parallel::{ImmutableItems, ImmutableLinks}, + stats::BuildStats, + writer::{BuildOption, FrozenReader}, +}; +use crate::helix_engine::vector_core::{VectorCoreResult, VectorError}; + +pub(crate) type ScoredLink = (OrderedFloat, ItemId); + +pub struct NodeState { + links: Vec, +} + +impl Debug for NodeState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // from [crate::unaligned_vector] + struct Number(f32); + impl fmt::Debug for Number { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:0.3}", self.0) + } + } + let mut list = f.debug_list(); + + for &(OrderedFloat(dist), id) in &self.links { + let tup = (id, Number(dist)); + list.entry(&tup); + } + + list.finish() + } +} + +pub struct HnswBuilder { + assign_probas: Vec, + ef_construction: usize, + alpha: f32, + m: usize, + m_max_0: usize, + pub max_level: usize, + pub entry_points: Vec, + pub layers: Vec>, + distance: PhantomData, +} + +impl HnswBuilder { + pub fn new(opts: &BuildOption) -> Self { + let assign_probas = Self::get_default_probas(opts.m); + Self { + assign_probas, + ef_construction: opts.ef_construction, + alpha: opts.alpha, + max_level: 0, + entry_points: Vec::new(), + layers: vec![], + distance: PhantomData, + m: opts.m, + m_max_0: opts.m_max_0, + } + } + + pub fn with_entry_points(mut self, entry_points: Vec) -> Self { + self.entry_points = entry_points; + self + } + + pub fn with_max_level(mut self, max_level: usize) -> Self { + self.max_level = max_level; + self + } + + // can probably even be u8's ... + fn get_random_level(&mut self, rng: &mut R) -> usize where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool, - 'db: 'arena, - 'arena: 'txn; - - /// Insert a new vector into the index - /// - /// # Arguments - /// - /// * `txn` - The transaction to use - /// * `data` - The vector data - /// - /// # Returns - /// - /// An HVector of the data inserted - fn insert<'db, 'arena, 'txn, F>( - &'db self, - txn: &'txn mut RwTxn<'db>, - label: &'arena str, - data: &'arena [f64], - properties: Option>, - arena: &'arena bumpalo::Bump, - ) -> Result, VectorError> + R: Rng + ?Sized, + { + let dist = WeightedIndex::new(&self.assign_probas).unwrap(); + dist.sample(rng) + } + + fn get_default_probas(m: usize) -> Vec { + let mut assign_probas = Vec::with_capacity(m); + let level_factor = 1.0 / (m as f32 + f32::EPSILON).ln(); + let mut level = 0; + loop { + // P(L( + &mut self, + mut to_insert: RoaringBitmap, + to_delete: &RoaringBitmap, + database: CoreDatabase, + index: u16, + wtxn: &mut RwTxn, + rng: &mut R, + ) -> VectorCoreResult> where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool, - 'db: 'arena, - 'arena: 'txn; - - /// Delete a vector from the index - /// - /// # Arguments - /// - /// * `txn` - The transaction to use - /// * `id` - The id of the vector - fn delete(&self, txn: &mut RwTxn, id: u128, arena: &bumpalo::Bump) -> Result<(), VectorError>; + R: Rng + ?Sized, + { + let mut build_stats = BuildStats::new(); + + let items = ImmutableItems::new(wtxn, database, index)?; + let links = ImmutableLinks::new(wtxn, database, index, database.len(wtxn)?)?; + let lmdb = FrozenReader { + index, + items: &items, + links: &links, + }; + + // Generate a random level for each point + let mut cur_max_level = usize::MIN; + let mut levels: Vec<_> = to_insert + .iter() + .map(|item_id| { + let level = self.get_random_level(rng); + cur_max_level = cur_max_level.max(level); + (item_id, level) + }) + .collect(); + + let ok_eps = + self.prepare_levels_and_entry_points(&mut levels, cur_max_level, to_delete, &lmdb)?; + to_insert |= ok_eps; + + let level_groups: Vec<_> = levels.chunk_by(|(_, la), (_, lb)| la == lb).collect(); + + // Insert layers L...0 multi-threaded + level_groups.into_iter().try_for_each(|grp| { + grp.into_par_iter().try_for_each(|&(item_id, lvl)| { + self.insert(item_id, lvl, &lmdb, &build_stats)?; + Ok(()) as Result<(), VectorError> + })?; + + build_stats.layer_dist.insert(grp[0].1, grp.len()); + + Ok(()) as Result<(), VectorError> + })?; + + self.maybe_patch_old_links(&lmdb, to_delete)?; + + // Single-threaded write to lmdb + for lvl in 0..=self.max_level { + let Some(map) = self.layers.get(lvl) else { + break; + }; + let map_guard = map.pin(); + + for (item_id, node_state) in &map_guard { + let key = Key::links(index, *item_id, lvl as u8); + let links = Links { + links: Cow::Owned(RoaringBitmap::from_iter( + node_state.links.iter().map(|(_, i)| *i), + )), + }; + + database.put(wtxn, &key, &Node::Links(links))?; + } + } + + build_stats.compute_mean_degree(wtxn, &database, index)?; + Ok(build_stats) + } + + fn prepare_levels_and_entry_points( + &mut self, + levels: &mut Vec<(u32, usize)>, + cur_max_level: usize, + to_delete: &RoaringBitmap, + lmdb: &FrozenReader, + ) -> VectorCoreResult { + let old_eps = RoaringBitmap::from_iter(self.entry_points.iter()); + let mut ok_eps = &old_eps - to_delete; + + // If any old entry points were deleted we need to replace them + for _ in (old_eps & to_delete).iter() { + let mut l = self.max_level; + loop { + for result in lmdb.links.iter_layer(l as u8) { + let ((item_id, _), _) = result?; + + if !to_delete.contains(item_id) && ok_eps.insert(item_id) { + break; + } + } + + // no points found in layer, continue to next one + l = match l.checked_sub(1) { + Some(new_level) => new_level, + None => break, + }; + } + } + // If the loop above added no points, we must have deleted the entire prev graph! + if ok_eps.is_empty() { + self.max_level = 0; + } + + // Schedule old entry point ids for re-indexing, otherwise we end up building a completely + // isolated sub-graph. + levels.extend(ok_eps.iter().map(|id| (id, self.max_level))); + + if cur_max_level > self.max_level { + self.entry_points.clear(); + } + + self.max_level = self.max_level.max(cur_max_level); + for _ in 0..=self.max_level { + self.layers.push(HashMap::new()); + } + + levels.sort_unstable_by(|(_, a), (_, b)| b.cmp(a)); + + let upper_layer: Vec<_> = levels + .iter() + .take_while(|(_, l)| *l == self.max_level) + .filter(|&(item_id, _)| !self.entry_points.contains(item_id)) + .collect(); + + for &(item_id, _) in upper_layer { + ok_eps.insert(item_id); + self.add_in_layers_below(item_id, self.max_level); + } + + self.entry_points = ok_eps.iter().collect(); + Ok(ok_eps) + } + + fn insert( + &self, + query: ItemId, + level: usize, + lmdb: &FrozenReader<'_, D>, + build_stats: &BuildStats, + ) -> VectorCoreResult<()> { + let mut eps = Vec::from_iter(self.entry_points.clone()); + + let q = lmdb.get_item(query)?; + + // Greedy search with: ef = 1 + for lvl in (level + 1..=self.max_level).rev() { + let neighbours = self.walk_layer(&q, &eps, lvl, 1, lmdb, build_stats)?; + let closest = neighbours + .peek_min() + .map(|(_, n)| *n) + .expect("No neighbor was found"); + eps = vec![closest]; + } + + self.add_in_layers_below(query, level); + + // Beam search with: ef = ef_construction + for lvl in (0..=level).rev() { + let neighbours = self + .walk_layer(&q, &eps, lvl, self.ef_construction, lmdb, build_stats)? + .into_vec(); + + eps.clear(); + for (dist, n) in self.robust_prune(neighbours, level, self.alpha, lmdb)? { + // add links in both directions + self.add_link(query, (dist, n), lvl, lmdb)?; + self.add_link(n, (dist, query), lvl, lmdb)?; + eps.push(n); + + build_stats.incr_link_count(2); + } + } + + Ok(()) + } + + /// During incremental updates we store a working copy of potential links to the new items. At + /// the end of indexing we need to merge the old and new links and prune ones pointing to + /// deleted items. + /// Algorithm 4 from FreshDiskANN paper. + fn maybe_patch_old_links( + &mut self, + lmdb: &FrozenReader, + to_delete: &RoaringBitmap, + ) -> VectorCoreResult<()> { + let links_in_db: Vec<_> = lmdb + .links + .iter() + .map(|result| { + result.map(|((id, lvl), v)| { + // Resize the layers if necessary. We must do this to accomodate links from + // previous builds that exist on levels larger than our current one. + if self.layers.len() <= lvl as usize { + self.layers.resize_with(lvl as usize + 1, HashMap::new); + } + ((id, lvl as usize), v.into_owned()) + }) + }) + .collect(); + + links_in_db.into_par_iter().try_for_each(|result| { + let ((id, lvl), links) = result?; + + // Since we delete links AFTER a build (we need to do this to apply diskann-approach + // for patching), links belonging to deleted items may still be present. We don't + // care about patching them. + if to_delete.contains(id) { + return Ok(()) as Result<(), VectorError>; + } + let del_subset = &links & to_delete; + + // This is safe because we resized layers above. + let map_guard = self.layers[lvl].pin(); + let mut new_links = map_guard + .get(&id) + .map(|s| s.links.to_vec()) + .unwrap_or_default(); + + // No work to be done, continue + if del_subset.is_empty() && new_links.is_empty() { + return Ok(()); + } + + // Iter through each of the deleted, and explore his neighbours + let mut bitmap = RoaringBitmap::new(); + for item_id in del_subset.iter() { + bitmap.extend(lmdb.get_links(item_id, lvl)?.iter()); + } + bitmap |= links; + bitmap -= to_delete; + + // TODO: abstract this layer search and pruning bit as its duplicated a lot in + // this file + for other in bitmap { + let dist = D::distance(&lmdb.get_item(id)?, &lmdb.get_item(other)?); + new_links.push((OrderedFloat(dist), other)); + } + let pruned = self.robust_prune(new_links, lvl, self.alpha, lmdb)?; + let _ = map_guard.insert( + id, + NodeState { + links: Vec::from_iter(pruned), + }, + ); + Ok(()) + })?; + + Ok(()) + } + + /// Rather than simply insert, we'll make it a no-op so we can re-insert the same item without + /// overwriting it's links in mem. This is useful in cases like Vanama build. + fn add_in_layers_below(&self, item_id: ItemId, level: usize) { + for level in 0..=level { + let Some(map) = self.layers.get(level) else { + break; + }; + map.pin() + .get_or_insert(item_id, NodeState { links: vec![] }); + } + } + + /// Returns only the Id's of our neighbours. Always check lmdb first. + fn get_neighbours( + &self, + lmdb: &FrozenReader<'_, D>, + item_id: ItemId, + level: usize, + build_stats: &BuildStats, + ) -> VectorCoreResult> { + let mut res = Vec::new(); + + // O(1) from frozzenreader + if let Ok(Links { links }) = lmdb.get_links(item_id, level) { + build_stats.incr_lmdb_hits(); + res.extend(links.iter()); + } + + // O(1) from self.layers + let Some(map) = self.layers.get(level) else { + return Ok(res); + }; + match map.pin().get(&item_id) { + Some(node_state) => res.extend(node_state.links.iter().map(|(_, i)| *i)), + None => { + if res.is_empty() { + build_stats.incr_link_misses(); + } + } + } + + Ok(res) + } + + #[allow(clippy::too_many_arguments)] + fn walk_layer( + &self, + query: &Item, + eps: &[ItemId], + level: usize, + ef: usize, + lmdb: &FrozenReader<'_, D>, + build_stats: &BuildStats, + ) -> VectorCoreResult> { + let mut candidates = BinaryHeap::new(); + let mut res = MinMaxHeap::with_capacity(ef); + let mut visited = RoaringBitmap::new(); + + // Register all entry points as visited and populate candidates + for &ep in eps { + let ve = lmdb.get_item(ep)?; + let dist = D::distance(query, &ve); + + candidates.push((Reverse(OrderedFloat(dist)), ep)); + res.push((OrderedFloat(dist), ep)); + visited.insert(ep); + } + + while let Some(&(Reverse(OrderedFloat(f)), _)) = candidates.peek() { + let &(OrderedFloat(f_max), _) = res.peek_max().unwrap(); + if f > f_max { + break; + } + let (_, c) = candidates.pop().unwrap(); // Now safe to pop + + // Get neighborhood of candidate either from self or LMDB + let proximity = self.get_neighbours(lmdb, c, level, build_stats)?; + for point in proximity { + if !visited.insert(point) { + continue; + } + // If the item isn't in the frozzen reader it must have been deleted from the index, + // in which case its OK not to explore it + let item = match lmdb.get_item(point) { + Ok(item) => item, + Err(VectorError::MissingKey { .. }) => continue, + Err(e) => return Err(e), + }; + let dist = D::distance(query, &item); + + if res.len() < ef || dist < f_max { + candidates.push((Reverse(OrderedFloat(dist)), point)); + + if res.len() == ef { + let _ = res.push_pop_max((OrderedFloat(dist), point)); + } else { + res.push((OrderedFloat(dist), point)); + } + } + } + } + + Ok(res) + } + + /// Tries to add a new link between nodes in a single direction. + // TODO: prevent duplicate links the other way. I think this arises ONLY for entrypoints since + // we pre-emptively add them in each layer before + fn add_link( + &self, + p: ItemId, + q: ScoredLink, + level: usize, + lmdb: &FrozenReader<'_, D>, + ) -> VectorCoreResult<()> { + if p == q.1 { + return Ok(()); + } + + let Some(map) = self.layers.get(level) else { + return Ok(()); + }; + let map_guard = map.pin(); + + // 'pure' links update function + let _add_link = |node_state: &NodeState| { + let mut links = node_state.links.clone(); + let cap = if level == 0 { self.m_max_0 } else { self.m }; + + if node_state.links.len() < cap { + links.push(q); + return NodeState { links }; + } + + let new_links = self + .robust_prune(links, level, self.alpha, lmdb) + .map(Vec::from_iter) + .unwrap_or_else(|_| node_state.links.clone()); + + NodeState { links: new_links } + }; + + map_guard.update_or_insert_with(p, _add_link, || NodeState { links: vec![q] }); + + Ok(()) + } + + /// Naively choosing the nearest neighbours performs poorly on clustered data since we can never + /// escape our local neighbourhood. "Sparse Neighbourhood Graph" (SNG) condition sufficient for + /// quick convergence. + fn robust_prune( + &self, + mut candidates: Vec, + level: usize, + alpha: f32, + lmdb: &FrozenReader<'_, D>, + ) -> VectorCoreResult> { + let cap = if level == 0 { self.m_max_0 } else { self.m }; + candidates.sort_by(|a, b| b.cmp(a)); + let mut selected: Vec = Vec::with_capacity(cap); + + while let Some((dist_to_query, c)) = candidates.pop() { + if selected.len() == cap { + break; + } + + // ensure we're closer to the query than we are to other candidates + let mut ok_to_add = true; + for i in selected.iter().map(|(_, i)| *i) { + let d = D::distance(&lmdb.get_item(c)?, &lmdb.get_item(i)?); + if OrderedFloat(d * alpha) < dist_to_query { + ok_to_add = false; + break; + } + } + + if ok_to_add { + selected.push((dist_to_query, c)); + } + } + + Ok(selected) + } } diff --git a/helix-db/src/helix_engine/vector_core/item_iter.rs b/helix-db/src/helix_engine/vector_core/item_iter.rs new file mode 100644 index 00000000..2c19a33a --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/item_iter.rs @@ -0,0 +1,64 @@ +use heed3::RoTxn; + +use crate::helix_engine::vector_core::{ + CoreDatabase, LmdbResult, + distance::Distance, + key::{KeyCodec, Prefix, PrefixCodec}, + node::{Item, Node, NodeCodec}, + node_id::NodeId, +}; + +// used by the reader +pub struct ItemIter<'t, D: Distance> { + pub inner: heed3::RoPrefix<'t, KeyCodec, NodeCodec>, + dimensions: usize, +} + +impl<'t, D: Distance> ItemIter<'t, D> { + pub fn new( + database: CoreDatabase, + index: u16, + dimensions: usize, + rtxn: &'t RoTxn, + ) -> heed3::Result { + Ok(ItemIter { + inner: database + .remap_key_type::() + .prefix_iter(rtxn, &Prefix::item(index))? + .remap_key_type::(), + dimensions, + }) + } + + pub fn next_id(&mut self) -> Option> { + match self.inner.next() { + Some(Ok((key, node))) => match node { + Node::Item(_) => Some(Ok(key.node)), + Node::Links(_) => unreachable!("Node must not be a link"), + }, + Some(Err(e)) => Some(Err(e)), + None => None, + } + } +} + +impl<'t, D: Distance> Iterator for ItemIter<'t, D> { + type Item = LmdbResult<(NodeId, Item<'t, D>)>; + + fn next(&mut self) -> Option { + match self.inner.next() { + Some(Ok((key, node))) => match node { + Node::Item(mut item) => { + if item.vector.len() != self.dimensions { + // quantized codecs pad to 8-bytes so we truncate to recover len + item.vector.to_mut().truncate(self.dimensions); + } + Some(Ok((key.node, item))) + } + Node::Links(_) => unreachable!("Node must not be a link"), + }, + Some(Err(e)) => Some(Err(e)), + None => None, + } + } +} diff --git a/helix-db/src/helix_engine/vector_core/key.rs b/helix-db/src/helix_engine/vector_core/key.rs new file mode 100644 index 00000000..73b82e0f --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/key.rs @@ -0,0 +1,174 @@ +use std::borrow::Cow; +use std::mem::size_of; + +use byteorder::{BigEndian, ByteOrder}; +use heed3::BoxedError; + +use crate::helix_engine::vector_core::node_id::{NodeId, NodeMode}; + +/// This whole structure must fit in an u64 so we can tell LMDB to optimize its storage. +/// The `index` is specified by the user and is used to differentiate between multiple indexes. +/// The `mode` indicates what we're looking at. +/// The `item` point to a specific node. +/// If the mode is: +/// - `Item`: we're looking at an `Item` node. +/// - `Links`: we're looking at the `Links` bitmap of neighbours for a node +/// - `Updated`: The list of items that has been updated since the last build of the database. +/// - `Metadata`: There is only one item at `0` that contains the header required to read the index. +#[derive(Debug, Copy, Clone)] +pub struct Key { + /// The prefix specified by the user. + pub index: u16, + pub node: NodeId, +} + +impl Key { + pub const fn new(index: u16, node: NodeId) -> Self { + Self { index, node } + } + + pub const fn metadata(index: u16) -> Self { + Self::new(index, NodeId::metadata()) + } + + pub const fn version(index: u16) -> Self { + Self::new(index, NodeId::version()) + } + + pub const fn updated(index: u16, item: u32) -> Self { + Self::new(index, NodeId::updated(item)) + } + + pub const fn item(index: u16, item: u32) -> Self { + Self::new(index, NodeId::item(item)) + } + + pub const fn links(index: u16, item: u32, layer: u8) -> Self { + Self::new(index, NodeId::links(item, layer)) + } +} + +/// The heed codec used internally to encode/decoding the internal key type. +pub enum KeyCodec {} + +impl<'a> heed3::BytesEncode<'a> for KeyCodec { + type EItem = Key; + + fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { + let mut output = Vec::with_capacity(size_of::()); + output.extend_from_slice(&item.index.to_be_bytes()); + output.extend_from_slice(&(item.node.mode as u8).to_be_bytes()); + output.extend_from_slice(&item.node.item.to_be_bytes()); + output.extend_from_slice(&(item.node.layer).to_be_bytes()); + + Ok(Cow::Owned(output)) + } +} + +impl heed3::BytesDecode<'_> for KeyCodec { + type DItem = Key; + + fn bytes_decode(bytes: &[u8]) -> Result { + let prefix = BigEndian::read_u16(bytes); + let bytes = &bytes[size_of::()..]; + let mode = bytes[0].try_into()?; + let bytes = &bytes[size_of::()..]; + let item = BigEndian::read_u32(bytes); + let bytes = &bytes[size_of::()..]; + let layer = bytes[0]; + + Ok(Key { + index: prefix, + node: NodeId { mode, item, layer }, + }) + } +} + +/// This is used to query part of a key. +#[derive(Debug, Copy, Clone)] +pub struct Prefix { + /// The index specified by the user. + index: u16, + // Indicate what the item represent. + mode: Option, +} + +impl Prefix { + pub const fn all(index: u16) -> Self { + Self { index, mode: None } + } + + pub const fn item(index: u16) -> Self { + Self { + index, + mode: Some(NodeMode::Item), + } + } + + pub const fn links(index: u16) -> Self { + Self { + index, + mode: Some(NodeMode::Links), + } + } + + pub const fn updated(index: u16) -> Self { + Self { + index, + mode: Some(NodeMode::Updated), + } + } +} + +pub enum PrefixCodec {} + +impl<'a> heed3::BytesEncode<'a> for PrefixCodec { + type EItem = Prefix; + + fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { + let mode_used = item.mode.is_some() as usize; + let mut output = Vec::with_capacity(size_of::() + mode_used); + + output.extend_from_slice(&item.index.to_be_bytes()); + if let Some(mode) = item.mode { + output.extend_from_slice(&(mode as u8).to_be_bytes()); + } + + Ok(Cow::Owned(output)) + } +} + +#[cfg(test)] +mod test { + use heed3::{BytesDecode, BytesEncode}; + + use super::*; + + #[test] + fn check_size_of_types() { + let key = Key::metadata(0); + let encoded = KeyCodec::bytes_encode(&key).unwrap(); + assert_eq!(encoded.len(), size_of::()); + } + + // TODO: fuzz this + #[test] + fn test_links_key() { + let key = Key::links(0, 1, 42); + let bytes = KeyCodec::bytes_encode(&key).unwrap(); + let key2 = KeyCodec::bytes_decode(&bytes).unwrap(); + assert_eq!(key.node.item, key2.node.item); + assert_eq!(key.node.layer, key2.node.layer); + assert_eq!(key.node.mode, key2.node.mode); + } + + #[test] + fn test_item_key() { + let key = Key::item(0, 42); + let bytes = KeyCodec::bytes_encode(&key).unwrap(); + let key2 = KeyCodec::bytes_decode(&bytes).unwrap(); + assert_eq!(key.node.item, key2.node.item); + assert_eq!(key.node.layer, key2.node.layer); + assert_eq!(key.node.mode, key2.node.mode); + } +} diff --git a/helix-db/src/helix_engine/vector_core/metadata.rs b/helix-db/src/helix_engine/vector_core/metadata.rs new file mode 100644 index 00000000..0d251ba5 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/metadata.rs @@ -0,0 +1,75 @@ +use std::{borrow::Cow, ffi::CStr}; + +use byteorder::{BigEndian, ByteOrder}; +use heed3::BoxedError; +use roaring::RoaringBitmap; + +use crate::helix_engine::vector_core::node::ItemIds; + +#[derive(Debug)] +pub struct Metadata<'a> { + pub dimensions: u32, + pub items: RoaringBitmap, + pub distance: &'a str, + pub entry_points: ItemIds<'a>, + pub max_level: u8, +} + +pub enum MetadataCodec {} + +impl<'a> heed3::BytesEncode<'a> for MetadataCodec { + type EItem = Metadata<'a>; + + fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { + let Metadata { + dimensions, + items, + entry_points, + distance, + max_level, + } = item; + debug_assert!(!distance.as_bytes().contains(&0)); + + let mut output = Vec::with_capacity( + size_of::() + + items.serialized_size() + + entry_points.len() * size_of::() + + distance.len() + + 1, + ); + output.extend_from_slice(distance.as_bytes()); + output.push(0); + output.extend_from_slice(&dimensions.to_be_bytes()); + output.extend_from_slice(&(items.serialized_size() as u32).to_be_bytes()); + items.serialize_into(&mut output)?; + output.extend_from_slice(entry_points.raw_bytes()); + output.push(*max_level); + + Ok(Cow::Owned(output)) + } +} + +impl<'a> heed3::BytesDecode<'a> for MetadataCodec { + type DItem = Metadata<'a>; + + fn bytes_decode(bytes: &'a [u8]) -> Result { + let distance = CStr::from_bytes_until_nul(bytes)?.to_str()?; + let bytes = &bytes[distance.len() + 1..]; + let dimensions = BigEndian::read_u32(bytes); + let bytes = &bytes[size_of::()..]; + let items_size = BigEndian::read_u32(bytes) as usize; + let bytes = &bytes[size_of::()..]; + let items = RoaringBitmap::deserialize_from(&bytes[..items_size])?; + let bytes = &bytes[items_size..]; + let entry_points = ItemIds::from_bytes(&bytes[..bytes.len() - 1]); + let max_level = bytes[bytes.len() - 1]; + + Ok(Metadata { + dimensions, + items, + distance, + entry_points, + max_level, + }) + } +} diff --git a/helix-db/src/helix_engine/vector_core/migration.rs b/helix-db/src/helix_engine/vector_core/migration.rs new file mode 100644 index 00000000..55493acc --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/migration.rs @@ -0,0 +1,515 @@ +use std::{collections::HashMap, sync::atomic}; + +use byteorder::BE; +use heed3::{ + Database, Env, RwTxn, + types::{Bytes, U32, U128, Unit}, +}; +use rand::{SeedableRng, rngs::StdRng}; +use serde::Deserialize; + +use crate::{ + helix_engine::{ + types::VectorError, + vector_core::{HNSWConfig, VectorCore, VectorCoreResult}, + }, + protocol::value::Value, + utils::properties::ImmutablePropertiesMap, +}; + +// Constants from old version +const OLD_VECTOR_PREFIX: &[u8] = b"v:"; + +#[derive(Debug)] +struct OldVectorData { + id: u128, + label: String, + data: Vec, + properties: Option, + deleted: bool, +} + +/// Old vector properties structure for deserialization +#[derive(Deserialize)] +struct OldVectorProperties { + label: String, + #[serde(default)] + deleted: bool, + #[serde(default)] + properties: Option, +} + +pub fn needs_migration_from_old_format(env: &Env, txn: &RwTxn) -> VectorCoreResult { + // Check for old database structure + let old_vectors_db = env + .database_options() + .types::() + .name("vectors") + .open(txn)?; + let old_edges_db = env + .database_options() + .types::() + .name("hnsw_out_nodes") + .open(txn)?; + let new_id_map_db = env + .database_options() + .types::, U128>() + .name("id_map") + .open(txn)?; + + // If old DBs exist but new ID map doesn't, we need migration + match (old_vectors_db, old_edges_db, new_id_map_db) { + (Some(_), Some(_), None) => { + // Old DBs exist, new doesn't - need migration + Ok(true) + } + (Some(_), Some(_), Some(id_map)) => { + // Check if ID map is empty (incomplete migration) + Ok(id_map.is_empty(txn)?) + } + _ => Ok(false), + } +} + +pub fn migrate_from_old_format( + env: &Env, + txn: &mut RwTxn, + config: HNSWConfig, +) -> VectorCoreResult { + // Open old databases for reading + let old_vectors_db: Database = env + .database_options() + .types::() + .name("vectors") + .open(txn)? + .ok_or_else(|| { + VectorError::VectorCoreError("Old vectors database not found".to_string()) + })?; + let old_vector_properties_db: Database, Bytes> = env + .database_options() + .types::, Bytes>() + .name("vector_data") + .open(txn)? + .ok_or_else(|| { + VectorError::VectorCoreError("Old vector_data database not found".to_string()) + })?; + + // Create new VectorCore with empty databases + let new_core = VectorCore::new(env, txn, config)?; + + // Migrate all vectors + let migrated_vectors = extract_old_vectors(txn, &old_vectors_db, &old_vector_properties_db)?; + + if migrated_vectors.is_empty() { + return Ok(new_core); + } + + // Group vectors by label and migrate each group + let mut label_groups = HashMap::>::new(); + + for vector_data in migrated_vectors { + label_groups + .entry(vector_data.label.clone()) + .or_default() + .push(vector_data); + } + + // Migrate each label group + for (label, vectors) in label_groups { + migrate_label_group(&new_core, txn, &label, vectors)?; + } + + backup_old_databases(env, txn)?; + + Ok(new_core) +} + +fn extract_old_vectors( + txn: &RwTxn, + old_vectors_db: &Database, + old_vector_properties_db: &Database, Bytes>, +) -> VectorCoreResult> { + let mut vectors = Vec::new(); + let mut seen_ids = std::collections::HashSet::new(); + + // Iterate through old vector database + let prefix_iter = old_vectors_db + .prefix_iter(txn, OLD_VECTOR_PREFIX) + .map_err(|e| { + VectorError::VectorCoreError(format!("Failed to iterate old vectors: {}", e)) + })?; + + for result in prefix_iter { + let (key, vector_data_bytes) = result.map_err(|e| { + VectorError::VectorCoreError(format!("Failed to read old vector key: {}", e)) + })?; + + // Parse old key format: [v:][id][level] + if key.len() < OLD_VECTOR_PREFIX.len() + 16 + 8 { + panic!("Malformed key: {:?}", key); + } + + let mut id_bytes = [0u8; 16]; + id_bytes.copy_from_slice(&key[OLD_VECTOR_PREFIX.len()..OLD_VECTOR_PREFIX.len() + 16]); + let id = u128::from_be_bytes(id_bytes); + + // Only process level 0 vectors to avoid duplicates + let mut level_bytes = [0u8; 8]; + level_bytes + .copy_from_slice(&key[OLD_VECTOR_PREFIX.len() + 16..OLD_VECTOR_PREFIX.len() + 16 + 8]); + let level = usize::from_be_bytes(level_bytes); + + // Skip if we've already processed this ID or if it's not level 0 + if level != 0 || !seen_ids.insert(id) { + continue; + } + + // Get properties from old properties database + let properties_bytes = old_vector_properties_db.get(txn, &id).map_err(|e| { + VectorError::VectorCoreError(format!("Failed to read old vector properties: {}", e)) + })?; + + // Parse old vector format + let old_vector = parse_old_vector_format(id, level, vector_data_bytes, properties_bytes)?; + + vectors.push(old_vector); + } + + Ok(vectors) +} + +fn parse_old_vector_format( + id: u128, + _level: usize, + vector_data_bytes: &[u8], + properties_bytes: Option<&[u8]>, +) -> VectorCoreResult { + // Parse vector data (assuming old format was f64) + let data = convert_old_vector_data(vector_data_bytes)?; + + // Parse properties using old format deserializer + let (label, properties, deleted) = if let Some(props_bytes) = properties_bytes { + parse_old_properties_format(props_bytes)? + } else { + ("unknown".to_string(), None, false) + }; + + Ok(OldVectorData { + id, + label, + data, + properties, + deleted, + }) +} + +fn convert_old_vector_data(vector_data_bytes: &[u8]) -> VectorCoreResult> { + // Assume f64 format + if vector_data_bytes.len().is_multiple_of(8) { + let f64_slice: &[f64] = bytemuck::cast_slice(vector_data_bytes); + Ok(f64_slice.iter().map(|&x| x as f32).collect()) + } else { + Err(VectorError::ConversionError( + "Invalid vector data format in old database".to_string(), + )) + } +} + +fn parse_old_properties_format( + properties_bytes: &[u8], +) -> VectorCoreResult<(String, Option, bool)> { + match bincode::deserialize::(properties_bytes) { + Ok(old_props) => Ok((old_props.label, old_props.properties, old_props.deleted)), + Err(e) => { + println!("Warning: Could not parse old properties format: {}", e); + Ok(("migrated".to_string(), None, false)) + } + } +} + +fn migrate_label_group( + core: &VectorCore, + txn: &mut RwTxn, + label: &str, + vectors: Vec, +) -> VectorCoreResult<()> { + if vectors.is_empty() { + return Ok(()); + } + + println!("Migrating label '{}' with {} vectors", label, vectors.len()); + + let arena = bumpalo::Bump::new(); + + // Get dimension from first vector + let dimension = vectors[0].data.len(); + + // Create writer for this label + let writer = core.get_writer_or_create_index(label, dimension, txn)?; + + // Insert all vectors for this label + for (local_idx, old_vector) in vectors.into_iter().enumerate() { + let local_id = local_idx as u32; + + // Skip deleted vectors during migration + if old_vector.deleted { + continue; + } + + // Add to HNSW index + writer + .add_item(txn, local_id, &old_vector.data) + .map_err(|e| { + VectorError::VectorCoreError(format!("Failed to add vector to HNSW: {}", e)) + })?; + + // Convert old properties to new format + let properties = if let Some(props_json) = old_vector.properties { + convert_old_properties_to_new(&arena, props_json)? + } else { + None + }; + + // Store mappings + core.global_to_local_id + .write() + .unwrap() + .insert(old_vector.id, (local_id, label.to_string())); + core.local_to_global_id + .put(txn, &local_id, &old_vector.id) + .map_err(|e| { + VectorError::VectorCoreError(format!("Failed to store ID mapping: {}", e)) + })?; + + // Store properties in new format + if let Some(props) = &properties { + core.vector_properties_db + .put( + txn, + &old_vector.id, + &bincode::serialize(props).map_err(|e| { + VectorError::ConversionError(format!( + "Failed to serialize properties: {}", + e + )) + })?, + ) + .map_err(|e| { + VectorError::VectorCoreError(format!( + "Failed to store vector properties: {}", + e + )) + })?; + } + + // Update vector count + core.label_to_index + .read() + .unwrap() + .get(label) + .unwrap() + .num_vectors + .fetch_add(1, atomic::Ordering::SeqCst); + } + + // Rebuild HNSW index + let mut rng = StdRng::from_os_rng(); + let mut builder = writer.builder(&mut rng); + builder + .ef_construction(core.config.ef_construct) + .build(txn) + .map_err(|e| VectorError::VectorCoreError(format!("Failed to build HNSW index: {}", e)))?; + + Ok(()) +} + +fn convert_old_properties_to_new<'arena>( + arena: &'arena bumpalo::Bump, + old_props: serde_json::Value, +) -> VectorCoreResult>> { + // Convert from serde_json::Value to ImmutablePropertiesMap + if let serde_json::Value::Object(map) = old_props { + let mut new_props = HashMap::new(); + + for (key, value) in map { + let helix_value = match value { + serde_json::Value::String(s) => Value::String(s), + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + Value::I64(i) + } else if let Some(f) = n.as_f64() { + Value::F64(f) + } else { + continue; + } + } + serde_json::Value::Bool(b) => Value::Boolean(b), + _ => continue, // Skip complex types for now + }; + new_props.insert(arena.alloc_str(&key), helix_value); + } + + let props_vec: Vec<(&str, Value)> = + new_props.into_iter().map(|(k, v)| (k as &str, v)).collect(); + Ok(Some(ImmutablePropertiesMap::new( + props_vec.len(), + props_vec.into_iter(), + arena, + ))) + } else { + Ok(None) + } +} + +fn backup_old_databases(env: &Env, txn: &mut RwTxn) -> VectorCoreResult<()> { + // Note: LMDB doesn't support database renaming directly. + // Instead, we'll clear them after successful migration + + // Clear old databases after successful migration + if let Some(old_vectors_db) = env + .database_options() + .types::() + .name("vectors") + .open(txn)? + { + old_vectors_db.clear(txn).map_err(|e| { + VectorError::VectorCoreError(format!("Failed to clear old vectors database: {}", e)) + })?; + } + + if let Some(old_edges_db) = env + .database_options() + .types::() + .name("hnsw_out_nodes") + .open(txn)? + { + old_edges_db.clear(txn).map_err(|e| { + VectorError::VectorCoreError(format!("Failed to clear old edges database: {}", e)) + })?; + } + + Ok(()) +} + +#[cfg(test)] +mod migration_tests { + use super::*; + use crate::helix_engine::vector_core::HNSWConfig; + use heed3::EnvOpenOptions; + + use tempfile::tempdir; + + #[test] + fn test_migration_detection() { + let dir = tempdir().unwrap(); + let env = unsafe { + EnvOpenOptions::new() + .map_size(1024 * 1024 * 1024) + .max_dbs(10) + .open(dir.path()) + .unwrap() + }; + + let mut txn = env.write_txn().unwrap(); + + // Initially no migration needed + assert!(!needs_migration_from_old_format(&env, &txn).unwrap()); + + // Create old databases + let _old_vectors = env + .database_options() + .types::() + .name("vectors") + .create(&mut txn) + .unwrap(); + let _old_edges = env + .database_options() + .types::() + .name("hnsw_out_nodes") + .create(&mut txn) + .unwrap(); + + // Now migration should be needed + assert!(needs_migration_from_old_format(&env, &txn).unwrap()); + + txn.commit().unwrap(); + } + + #[test] + fn test_old_vector_data_conversion() { + // Test f64 to f32 conversion + let f64_data: Vec = vec![1.0, 2.0, 3.0]; + let f64_bytes = bytemuck::cast_slice::(&f64_data); + let converted = convert_old_vector_data(f64_bytes).unwrap(); + assert_eq!(converted, vec![1.0f32, 2.0f32, 3.0f32]); + } + + #[test] + fn test_old_properties_conversion() { + let arena = bumpalo::Bump::new(); + + // Test simple JSON object conversion + let json_value = serde_json::json!({ + "name": "test_vector", + "count": 42, + "score": 0.85, + "active": true + }); + + let converted = convert_old_properties_to_new(&arena, json_value).unwrap(); + assert!(converted.is_some()); + + let props = converted.unwrap(); + assert_eq!(props.len(), 4); + + // Verify the properties were converted correctly + assert!(props.get("name").is_some()); + assert!(props.get("count").is_some()); + assert!(props.get("score").is_some()); + assert!(props.get("active").is_some()); + } + + #[test] + fn test_empty_migration() { + let dir = tempdir().unwrap(); + let env = unsafe { + EnvOpenOptions::new() + .map_size(1024 * 1024 * 1024) + .max_dbs(10) + .open(dir.path()) + .unwrap() + }; + + let mut txn = env.write_txn().unwrap(); + let config = HNSWConfig::new(None, None, None); + + // Create old empty databases + let _old_vectors = env + .database_options() + .types::() + .name("vectors") + .create(&mut txn) + .unwrap(); + let _old_data = env + .database_options() + .types::, Bytes>() + .name("vector_data") + .create(&mut txn) + .unwrap(); + let _old_edges = env + .database_options() + .types::() + .name("hnsw_out_nodes") + .create(&mut txn) + .unwrap(); + + // Migration should succeed even with empty databases + let result = migrate_from_old_format(&env, &mut txn, config); + assert!(result.is_ok()); + + let vector_core = result.unwrap(); + assert_eq!(vector_core.num_inserted_vectors(), 0); + + txn.commit().unwrap(); + } +} diff --git a/helix-db/src/helix_engine/vector_core/mod.rs b/helix-db/src/helix_engine/vector_core/mod.rs index 279803d8..9048c70e 100644 --- a/helix-db/src/helix_engine/vector_core/mod.rs +++ b/helix-db/src/helix_engine/vector_core/mod.rs @@ -1,7 +1,854 @@ -pub mod binary_heap; +use std::{ + cmp::Ordering, + sync::{ + RwLock, + atomic::{self, AtomicU16, AtomicU32, AtomicUsize}, + }, +}; + +use bincode::Options; +use byteorder::BE; +use hashbrown::HashMap; +use heed3::{ + Database, Env, Error as LmdbError, RoTxn, RwTxn, + types::{Bytes, U32, U128}, +}; +use rand::{SeedableRng, rngs::StdRng}; +use serde::{Deserialize, Serialize, Serializer, ser::SerializeMap}; + +use crate::{ + helix_engine::{ + types::VectorError, + vector_core::{ + distance::{Cosine, Distance}, + key::KeyCodec, + node::{Item, NodeCodec}, + reader::{Reader, Searched, get_item}, + writer::Writer, + }, + }, + protocol::{ + custom_serde::vector_serde::{ + OptionPropertiesMapDeSeed, VectoWithoutDataDeSeed, VectorDeSeed, + }, + value::Value, + }, + utils::{ + id::{uuid_str_from_buf, v6_uuid}, + properties::ImmutablePropertiesMap, + }, +}; + +pub mod distance; pub mod hnsw; -pub mod utils; -pub mod vector; -pub mod vector_core; -pub mod vector_distance; -pub mod vector_without_data; +pub mod item_iter; +pub mod key; +pub mod metadata; +pub mod migration; +pub mod node; +pub mod node_id; +pub mod ordered_float; +pub mod parallel; +pub mod reader; +pub mod spaces; +pub mod stats; +pub mod unaligned_vector; +pub mod version; +pub mod writer; + +const DB_VECTORS: &str = "vectors"; // for vector data (v:) +const DB_VECTOR_DATA: &str = "vector_data"; // for vector's properties +const DB_ID_MAP: &str = "id_map"; // for map ids + +pub type ItemId = u32; + +pub type LayerId = u8; + +pub type VectorCoreResult = std::result::Result; + +pub type LmdbResult = std::result::Result; + +pub type CoreDatabase = heed3::Database>; + +#[derive(Debug, Clone)] +pub struct HVector<'arena> { + pub id: u128, + pub distance: Option, + // TODO: String Interning. We do a lot of unnecessary string allocations + // for the same set of labels. + pub label: &'arena str, + pub deleted: bool, + pub level: Option, + pub version: u8, + pub properties: Option>, + pub data: Option>, +} + +impl<'arena> serde::Serialize for HVector<'arena> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + use serde::ser::SerializeStruct; + + // Check if this is a human-readable format (like JSON) + if serializer.is_human_readable() { + // Include id for JSON serialization + let mut buffer = [0u8; 36]; + let mut state = serializer.serialize_map(Some( + 5 + self.properties.as_ref().map(|p| p.len()).unwrap_or(0), + ))?; + state.serialize_entry("id", uuid_str_from_buf(self.id, &mut buffer))?; + state.serialize_entry("label", &self.label)?; + state.serialize_entry("version", &self.version)?; + state.serialize_entry("deleted", &self.deleted)?; + if let Some(properties) = &self.properties { + for (key, value) in properties.iter() { + state.serialize_entry(key, value)?; + } + } + state.end() + } else { + // Skip id, level, distance, and data for bincode serialization + let mut state = serializer.serialize_struct("HVector", 4)?; + state.serialize_field("label", &self.label)?; + state.serialize_field("version", &self.version)?; + state.serialize_field("deleted", &self.deleted)?; + state.serialize_field("properties", &self.properties)?; + state.end() + } + } +} + +impl<'arena> HVector<'arena> { + pub fn data_borrowed(&self) -> &[f32] { + bytemuck::cast_slice(self.data.as_ref().unwrap().vector.as_bytes()) + } + + pub fn from_vec(label: &'arena str, data: bumpalo::collections::Vec<'arena, f32>) -> Self { + HVector { + label, + id: v6_uuid(), + version: 1, + data: Some(Item::::from_vec(data)), + distance: None, + properties: None, + deleted: false, + level: None, + } + } + + pub fn score(&self) -> f32 { + self.distance.unwrap_or(2.0) + } + + /// Converts HVector's data to a vec of bytes by accessing the data field directly + /// and converting each f32 to a byte slice + #[inline(always)] + pub fn vector_data_to_bytes(&self) -> VectorCoreResult<&[u8]> { + Ok(self + .data + .as_ref() + .ok_or(VectorError::HasNoData)? + .vector + .as_ref() + .as_bytes()) + } + + /// Deserializes bytes into an vector using a custom deserializer that allocates into the provided arena + /// + /// Both the properties bytes (if present) and the raw vector data are combined to generate the final vector struct + /// + /// NOTE: in this method, fixint encoding is used + pub fn from_bincode_bytes<'txn>( + arena: &'arena bumpalo::Bump, + properties: Option<&'txn [u8]>, + raw_vector_data: &'txn [u8], + id: u128, + get_data: bool, + ) -> Result { + if get_data { + bincode::options() + .with_fixint_encoding() + .allow_trailing_bytes() + .deserialize_seed( + VectorDeSeed { + arena, + id, + raw_vector_data, + }, + properties.unwrap_or(&[]), + ) + .map_err(|e| { + VectorError::ConversionError(format!("Error deserializing vector: {e}")) + }) + } else { + bincode::options() + .with_fixint_encoding() + .allow_trailing_bytes() + .deserialize_seed( + VectoWithoutDataDeSeed { arena, id }, + properties.unwrap_or(&[]), + ) + .map_err(|e| { + VectorError::ConversionError(format!("Error deserializing vector: {e}")) + }) + } + } + + #[inline(always)] + pub fn to_bincode_bytes(&self) -> Result, bincode::Error> { + bincode::serialize(self) + } + + pub fn distance_to(&self, rhs: &HVector<'arena>) -> VectorCoreResult { + match (self.data.as_ref(), rhs.data.as_ref()) { + (None, _) | (_, None) => Err(VectorError::HasNoData), + (Some(a), Some(b)) => { + if a.vector.len() != b.vector.len() { + return Err(VectorError::InvalidVecDimension { + expected: a.vector.len(), + received: b.vector.len(), + }); + } + + Ok(Cosine::distance(a, b)) + } + } + } + + pub fn set_distance(&mut self, distance: f32) { + self.distance = Some(distance); + } + + pub fn get_distance(&self) -> f32 { + self.distance.unwrap() + } + + pub fn len(&self) -> usize { + self.data.as_ref().unwrap().vector.len() + } + + pub fn is_empty(&self) -> bool { + self.data.as_ref().unwrap().vector.is_empty() + } + + #[inline(always)] + pub fn get_property(&self, key: &str) -> Option<&'arena Value> { + self.properties.as_ref().and_then(|value| value.get(key)) + } + + pub fn raw_vector_data_to_vec<'txn>( + raw_vector_data: &'txn [u8], + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult> { + let mut bump_vec = bumpalo::collections::Vec::<'arena, f32>::new_in(arena); + bump_vec.extend_from_slice(bytemuck::try_cast_slice(raw_vector_data).map_err(|err| { + VectorError::ConversionError(format!("Error casting raw bytes to &[f32]: {}", err)) + })?); + + Ok(bump_vec) + } + + pub fn from_raw_vector_data<'txn>( + id: u128, + label: &'arena str, + raw_vector_data: &'txn [u8], + ) -> VectorCoreResult> + where + 'arena: 'txn, + { + Ok(HVector { + id, + label, + data: Some(Item::::from_raw_slice(raw_vector_data)), + properties: None, + distance: None, + deleted: false, + level: Some(0), + version: 1, + }) + } +} + +impl PartialEq for HVector<'_> { + fn eq(&self, other: &Self) -> bool { + self.id == other.id + } +} +impl Eq for HVector<'_> {} +impl PartialOrd for HVector<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} +impl Ord for HVector<'_> { + fn cmp(&self, other: &Self) -> Ordering { + other + .distance + .partial_cmp(&self.distance) + .unwrap_or(Ordering::Equal) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HNSWConfig { + /// max num of bi-directional links per element + pub m: usize, + /// max num of links for lower layers + pub m_max_0: usize, + /// size of the dynamic candidate list for construction + pub ef_construct: usize, + /// level generation factor + pub m_l: f64, + /// search param, num of cands to search + pub ef: usize, + /// for get_neighbors, always 512 + pub min_neighbors: usize, +} + +impl HNSWConfig { + /// Constructor for the configs of the HNSW vector similarity search algorithm + /// - m (5 <= m <= 48): max num of bi-directional links per element + /// - m_max_0 (2 * m): max num of links for level 0 (level that stores all vecs) + /// - ef_construct (40 <= ef_construct <= 512): size of the dynamic candidate list + /// for construction + /// - m_l (ln(1/m)): level generation factor (multiplied by a random number) + /// - ef (10 <= ef <= 512): num of candidates to search + pub fn new(m: Option, ef_construct: Option, ef: Option) -> Self { + let m = m.unwrap_or(16).clamp(5, 48); + let ef_construct = ef_construct.unwrap_or(128).clamp(40, 512); + let ef = ef.unwrap_or(768).clamp(10, 512); + + Self { + m, + m_max_0: 2 * m, + ef_construct, + m_l: 1.0 / (m as f64).ln(), + ef, + min_neighbors: 512, + } + } +} + +pub struct HnswIndex { + pub id: u16, + pub dimension: usize, + pub num_vectors: AtomicUsize, +} + +// TODO: Properties filters +// TODO: Support different distances for each database +pub struct VectorCore { + pub hsnw: CoreDatabase, + pub vector_properties_db: Database, Bytes>, + pub config: HNSWConfig, + + /// Map labels to a different [HnswIndex] + pub label_to_index: RwLock>, + /// Track the last index + curr_index: AtomicU16, + + /// Maps global id (u128) to internal id (u32) and label + pub global_to_local_id: RwLock>, + pub local_to_global_id: Database, U128>, + curr_id: AtomicU32, +} + +impl VectorCore { + pub fn new(env: &Env, txn: &mut RwTxn, config: HNSWConfig) -> VectorCoreResult { + let vectors_db: CoreDatabase = env.create_database(txn, Some(DB_VECTORS))?; + let vector_properties_db = env + .database_options() + .types::, Bytes>() + .name(DB_VECTOR_DATA) + .create(txn)?; + + let local_to_global_id = env + .database_options() + .types::, U128>() + .name(DB_ID_MAP) + .create(txn)?; + + Ok(Self { + hsnw: vectors_db, + vector_properties_db, + config, + local_to_global_id, + label_to_index: RwLock::new(HashMap::new()), + curr_index: AtomicU16::new(0), + global_to_local_id: RwLock::new(HashMap::new()), + curr_id: AtomicU32::new(0), + }) + } + + pub fn new_with_migration( + env: &Env, + txn: &mut RwTxn, + config: HNSWConfig, + ) -> VectorCoreResult { + // Check if we need to migrate from old format + if migration::needs_migration_from_old_format(env, txn)? { + migration::migrate_from_old_format(env, txn, config) + } else { + Self::new(env, txn, config) + } + } + + pub fn search<'arena>( + &self, + txn: &RoTxn, + query: Vec, + k: usize, + label: &'arena str, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult> { + match self.label_to_index.read().unwrap().get(label) { + Some(index) => { + if index.dimension != query.len() { + return Err(VectorError::InvalidVectorLength); + } + + let reader = Reader::open(txn, index.id, self.hsnw)?; + reader.nns(k).by_vector(txn, query.as_slice(), arena) + } + None => Ok(Searched::new(bumpalo::vec![in &arena])), + } + } + + /// Get a writer based on label. If it doesn't exist build a new index + /// and return a writer to it + pub(crate) fn get_writer_or_create_index( + &self, + label: &str, + dimension: usize, + txn: &mut RwTxn, + ) -> VectorCoreResult> { + if let Some(index) = self.label_to_index.read().unwrap().get(label) { + Ok(Writer::new(self.hsnw, index.id, dimension)) + } else { + // Index do not exist, we should build it + let idx = self.curr_index.fetch_add(1, atomic::Ordering::SeqCst); + self.label_to_index.write().unwrap().insert( + label.to_string(), + HnswIndex { + id: idx, + dimension, + num_vectors: AtomicUsize::new(0), + }, + ); + let writer = Writer::new(self.hsnw, idx, dimension); + let mut rng = StdRng::from_os_rng(); + let mut builder = writer.builder(&mut rng); + + builder + .ef_construction(self.config.ef_construct) + .build(txn)?; + Ok(writer) + } + } + + pub fn insert<'arena>( + &self, + txn: &mut RwTxn, + label: &'arena str, + data: &'arena [f32], + properties: Option>, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult> { + let writer = self.get_writer_or_create_index(label, data.len(), txn)?; + + let idx = self.curr_id.fetch_add(1, atomic::Ordering::SeqCst); + writer.add_item(txn, idx, data).inspect_err(|_| { + self.curr_id.fetch_sub(1, atomic::Ordering::SeqCst); + })?; + + let mut bump_vec = bumpalo::collections::Vec::new_in(arena); + bump_vec.extend_from_slice(data); + let mut hvector = HVector::from_vec(label, bump_vec); + hvector.properties = properties; + + self.global_to_local_id + .write() + .unwrap() + .insert(hvector.id, (idx, label.to_string())); + self.local_to_global_id.put(txn, &idx, &hvector.id)?; + + self.label_to_index + .read() + .unwrap() + .get(label) + .unwrap() + .num_vectors + .fetch_add(1, atomic::Ordering::SeqCst); + + let mut rng = StdRng::from_os_rng(); + let mut builder = writer.builder(&mut rng); + + self.vector_properties_db + .put(txn, &hvector.id, &bincode::serialize(&properties)?)?; + + // FIXME: We shouldn't rebuild on every insertion + builder + .ef_construction(self.config.ef_construct) + .build(txn)?; + + Ok(hvector) + } + + pub fn delete(&self, txn: &mut RwTxn, id: u128) -> VectorCoreResult<()> { + let mut global_to_local_id = self.global_to_local_id.write().unwrap(); + match global_to_local_id.get(&id) { + Some(&(idx, ref label)) => { + let label_to_index = self.label_to_index.read().unwrap(); + let index = label_to_index + .get(label) + .expect("if index exist label should also exist"); + let writer = Writer::new(self.hsnw, index.id, index.dimension); + writer.del_item(txn, idx)?; + + // TODO: do we actually need to delete here? + self.local_to_global_id.delete(txn, &idx)?; + global_to_local_id.remove(&id); + + index.num_vectors.fetch_sub(1, atomic::Ordering::SeqCst); + + let mut rng = StdRng::from_os_rng(); + let mut builder = writer.builder(&mut rng); + + builder + .ef_construction(self.config.ef_construct) + .build(txn)?; + Ok(()) + } + None => Err(VectorError::VectorNotFound(format!( + "vector {} doesn't exist", + id + ))), + } + } + + pub fn nns_to_hvectors<'arena>( + &self, + txn: &RoTxn, + nns: bumpalo::collections::Vec<'arena, (ItemId, f32)>, + with_data: bool, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult>> { + let mut results = bumpalo::collections::Vec::<'arena, HVector<'arena>>::with_capacity_in( + nns.len(), + arena, + ); + + let label_to_index = self.label_to_index.read().unwrap(); + let global_to_local_id = self.global_to_local_id.read().unwrap(); + + let (item_id, _) = nns.first().unwrap(); + let global_id = self + .local_to_global_id + .get(txn, item_id)? + .ok_or_else(|| VectorError::VectorNotFound("Vector not found".to_string()))?; + let (_, label) = global_to_local_id.get(&global_id).unwrap(); + let index = label_to_index.get(label).unwrap(); + let label = arena.alloc_str(label); + + if with_data { + for (item_id, distance) in nns.into_iter() { + let global_id = self + .local_to_global_id + .get(txn, &item_id)? + .ok_or_else(|| VectorError::VectorNotFound("Vector not found".to_string()))?; + + let properties = match self.vector_properties_db.get(txn, &global_id)? { + Some(bytes) => bincode::options() + .with_fixint_encoding() + .allow_trailing_bytes() + .deserialize_seed(OptionPropertiesMapDeSeed { arena }, bytes) + .map_err(|e| { + VectorError::ConversionError(format!("Error deserializing vector: {e}")) + })?, + None => None, + }; + + results.push(HVector { + id: global_id, + distance: Some(distance), + label, + properties, + deleted: false, + level: None, + version: 0, + data: get_item(self.hsnw, index.id, txn, item_id)? + .map(|data| data.clone_in(arena)), + }); + } + } else { + for (item_id, distance) in nns.into_iter() { + let global_id = self + .local_to_global_id + .get(txn, &item_id)? + .ok_or_else(|| VectorError::VectorNotFound("Vector not found".to_string()))?; + + let properties = match self.vector_properties_db.get(txn, &global_id)? { + Some(bytes) => bincode::options() + .with_fixint_encoding() + .allow_trailing_bytes() + .deserialize_seed(OptionPropertiesMapDeSeed { arena }, bytes) + .map_err(|e| { + VectorError::ConversionError(format!("Error deserializing vector: {e}")) + })?, + None => None, + }; + results.push(HVector { + id: global_id, + distance: Some(distance), + label, + deleted: false, + version: 0, + properties, + level: None, + data: None, + }); + } + } + + Ok(results) + } + + pub fn get_full_vector<'arena>( + &self, + txn: &RoTxn, + global_id: u128, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult> { + let label_to_index = self.label_to_index.read().unwrap(); + let global_to_local_id = self.global_to_local_id.read().unwrap(); + + let (item_id, label) = global_to_local_id + .get(&global_id) + .ok_or_else(|| VectorError::VectorNotFound(format!("Vector {global_id} not found")))?; + + let index = label_to_index.get(label).unwrap(); + let properties = match self.vector_properties_db.get(txn, &global_id)? { + Some(bytes) => bincode::options() + .with_fixint_encoding() + .allow_trailing_bytes() + .deserialize_seed(OptionPropertiesMapDeSeed { arena }, bytes) + .map_err(|e| { + VectorError::ConversionError(format!("Error deserializing vector: {e}")) + })?, + None => None, + }; + + Ok(HVector { + id: global_id, + properties, + distance: None, + label: arena.alloc_str(label), + deleted: false, + version: 0, + level: None, + data: get_item(self.hsnw, index.id, txn, *item_id)?.map(|i| i.clone_in(arena)), + }) + } + + pub fn get_vector_properties<'arena>( + &self, + txn: &RoTxn, + id: u128, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult>> { + let global_to_local_id = self.global_to_local_id.read().unwrap(); + let (_, label) = global_to_local_id + .get(&id) + .ok_or_else(|| VectorError::VectorNotFound(format!("Vector not found: {}", id)))?; + + let properties = match self.vector_properties_db.get(txn, &id)? { + Some(bytes) => bincode::options() + .with_fixint_encoding() + .allow_trailing_bytes() + .deserialize_seed(OptionPropertiesMapDeSeed { arena }, bytes) + .map_err(|e| { + VectorError::ConversionError(format!("Error deserializing vector: {e}")) + })?, + None => None, + }; + + Ok(Some(HVector { + id, + properties, + distance: None, + label: arena.alloc_str(label.as_str()), + deleted: false, + version: 0, + level: None, + data: None, + })) + } + + pub fn num_inserted_vectors(&self) -> usize { + self.label_to_index + .read() + .unwrap() + .iter() + .map(|(_, i)| i.num_vectors.load(atomic::Ordering::SeqCst)) + .sum() + } + + pub fn get_all_vectors_by_label<'arena>( + &self, + txn: &RoTxn, + label: &'arena str, + get_vector_data: bool, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult>> { + let mut result = bumpalo::collections::Vec::new_in(arena); + let label_to_index = self.label_to_index.read().unwrap(); + let index = match label_to_index.get(label) { + Some(index) => index, + None => return Ok(bumpalo::collections::Vec::new_in(arena)), + }; + + let reader = Reader::open(txn, index.id, self.hsnw)?; + let mut iter = reader.iter(txn)?; + + if get_vector_data { + while let Some((key, item)) = iter.next().transpose()? { + let id = self + .local_to_global_id + .get(txn, &key.item)? + .ok_or_else(|| VectorError::VectorNotFound("Vector not found".to_string()))?; + + let properties = match self.vector_properties_db.get(txn, &id)? { + Some(bytes) => bincode::options() + .with_fixint_encoding() + .allow_trailing_bytes() + .deserialize_seed(OptionPropertiesMapDeSeed { arena }, bytes) + .map_err(|e| { + VectorError::ConversionError(format!("Error deserializing vector: {e}")) + })?, + None => None, + }; + + result.push(HVector { + id, + label, + distance: None, + deleted: false, + level: Some(key.layer as usize), + version: 0, + properties, + data: Some(item.clone_in(arena)), + }); + } + } else { + while let Some(key) = iter.next_id().transpose()? { + let id = self + .local_to_global_id + .get(txn, &key.item)? + .ok_or_else(|| VectorError::VectorNotFound("Vector not found".to_string()))?; + + let properties = match self.vector_properties_db.get(txn, &id)? { + Some(bytes) => bincode::options() + .with_fixint_encoding() + .allow_trailing_bytes() + .deserialize_seed(OptionPropertiesMapDeSeed { arena }, bytes) + .map_err(|e| { + VectorError::ConversionError(format!("Error deserializing vector: {e}")) + })?, + None => None, + }; + + result.push(HVector { + id, + label, + distance: None, + deleted: false, + level: Some(key.layer as usize), + version: 0, + properties, + data: None, + }); + } + } + + Ok(result) + } + + pub fn get_all_vectors<'arena>( + &self, + txn: &RoTxn, + get_vector_data: bool, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult>> { + let label_to_index = self.label_to_index.read().unwrap(); + let mut result = bumpalo::collections::Vec::new_in(arena); + + for (label, index) in label_to_index.iter() { + let reader = Reader::open(txn, index.id, self.hsnw)?; + let mut iter = reader.iter(txn)?; + + if get_vector_data { + while let Some((key, item)) = iter.next().transpose()? { + let id = self + .local_to_global_id + .get(txn, &key.item)? + .ok_or_else(|| { + VectorError::VectorNotFound("Vector not found".to_string()) + })?; + + result.push(HVector { + id, + label: arena.alloc_str(label), + distance: None, + deleted: false, + level: Some(key.layer as usize), + version: 0, + properties: None, + data: Some(item.clone_in(arena)), + }); + } + } else { + while let Some(key) = iter.next_id().transpose()? { + let id = self + .local_to_global_id + .get(txn, &key.item)? + .ok_or_else(|| { + VectorError::VectorNotFound("Vector not found".to_string()) + })?; + + result.push(HVector { + id, + label: arena.alloc_str(label), + distance: None, + deleted: false, + level: Some(key.layer as usize), + version: 0, + properties: None, + data: None, + }); + } + } + } + + Ok(result) + } + + pub fn into_global_id( + &self, + txn: &RoTxn, + searched: &Searched, + ) -> VectorCoreResult> { + let mut result = Vec::new(); + for &(id, distance) in searched.nns.iter() { + result.push(( + self.local_to_global_id + .get(txn, &id)? + .ok_or_else(|| VectorError::VectorNotFound("Vector not found".to_string()))?, + distance, + )) + } + + Ok(result) + } +} diff --git a/helix-db/src/helix_engine/vector_core/node.rs b/helix-db/src/helix_engine/vector_core/node.rs new file mode 100644 index 00000000..dc0b7880 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/node.rs @@ -0,0 +1,315 @@ +use core::fmt; +use std::{borrow::Cow, ops::Deref}; + +use bytemuck::{bytes_of, cast_slice, pod_read_unaligned}; +use byteorder::{ByteOrder, NativeEndian}; +use heed3::{BoxedError, BytesDecode, BytesEncode}; +use roaring::RoaringBitmap; +use serde::Serialize; + +use crate::helix_engine::vector_core::{ + ItemId, distance::Distance, unaligned_vector::UnalignedVector, +}; + +#[derive(Clone, Debug)] +pub enum Node<'a, D: Distance> { + Item(Item<'a, D>), + Links(Links<'a>), +} + +const NODE_TAG: u8 = 0; +const LINKS_TAG: u8 = 1; + +impl<'a, D: Distance> Node<'a, D> { + pub fn item(self) -> Option> { + if let Node::Item(item) = self { + Some(item) + } else { + None + } + } + + pub fn links(self) -> Option> { + if let Node::Links(links) = self { + Some(links) + } else { + None + } + } +} + +/// An item node which corresponds to the vector inputed +/// by the user and the distance header. +#[derive(Serialize)] +pub struct Item<'a, D: Distance> { + /// The header of this item. + pub header: D::Header, + /// The vector of this item. + pub vector: Cow<'a, UnalignedVector>, +} + +impl fmt::Debug for Item<'_, D> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Item") + .field("header", &self.header) + .field("vector", &self.vector) + .finish() + } +} + +impl Clone for Item<'_, D> { + fn clone(&self) -> Self { + Self { + header: self.header, + vector: self.vector.clone(), + } + } +} + +impl<'a, D: Distance> Item<'a, D> { + /// Converts the item into an owned version of itself by cloning + /// the internal vector. Doing so will make it mutable. + pub fn into_owned(self) -> Item<'static, D> { + Item { + header: self.header, + vector: Cow::Owned(self.vector.into_owned()), + } + } + + /// Clones the item into the provided arena, returning a new Item + /// with a lifetime tied to the arena. + pub fn clone_in<'bump>(&self, arena: &'bump bumpalo::Bump) -> Item<'bump, D> { + // TODO: This does two allocations, we should do only one! + let vec_data = self.vector.to_vec(arena); + let vector = UnalignedVector::from_vec(vec_data); + + Item { + header: self.header, + vector, + } + } + + /// Builds a new borrowed item from a `&[u8]` slice. + /// This function do not allocates + pub fn from_raw_slice(slice: &'a [u8]) -> Self { + let vector = UnalignedVector::from_slice(bytemuck::cast_slice(slice)); + let header = D::new_header(&vector); + Self { header, vector } + } + + /// Builds a new borrowed item from a `&[f32]` slice. + /// This function do not allocates + pub fn from_slice(slice: &'a [f32]) -> Self { + let vector = UnalignedVector::from_slice(slice); + let header = D::new_header(&vector); + Self { header, vector } + } + + /// Builds a new item from a `Vec`. + /// This function allocates + pub fn from_vec(vec: bumpalo::collections::Vec) -> Self { + let vector = UnalignedVector::from_vec(vec); + let header = D::new_header(&vector); + Self { header, vector } + } +} + +#[derive(Clone, Debug)] +pub struct Links<'a> { + pub links: Cow<'a, RoaringBitmap>, +} + +impl<'a> Deref for Links<'a> { + type Target = Cow<'a, RoaringBitmap>; + fn deref(&self) -> &Self::Target { + &self.links + } +} + +#[derive(Clone)] +pub struct ItemIds<'a> { + bytes: &'a [u8], +} + +impl<'a> ItemIds<'a> { + pub fn from_slice(slice: &[u32]) -> ItemIds<'_> { + ItemIds::from_bytes(cast_slice(slice)) + } + + pub fn from_bytes(bytes: &[u8]) -> ItemIds<'_> { + ItemIds { bytes } + } + + pub fn raw_bytes(&self) -> &[u8] { + self.bytes + } + + pub fn len(&self) -> usize { + self.bytes.len() / size_of::() + } + + pub fn is_empty(&self) -> bool { + self.bytes.is_empty() + } + + pub fn iter(&self) -> impl Iterator + 'a { + self.bytes + .chunks_exact(size_of::()) + .map(NativeEndian::read_u32) + } +} + +impl fmt::Debug for ItemIds<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut list = f.debug_list(); + self.iter().for_each(|integer| { + list.entry(&integer); + }); + list.finish() + } +} + +/// The codec used internally to encode and decode nodes. +pub struct NodeCodec(D); + +impl<'a, D: Distance> BytesEncode<'a> for NodeCodec { + type EItem = Node<'a, D>; + + fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { + let mut bytes = Vec::new(); + match item { + Node::Item(Item { header, vector }) => { + bytes.push(NODE_TAG); + bytes.extend_from_slice(bytes_of(header)); + bytes.extend(vector.as_bytes()); + } + Node::Links(Links { links }) => { + bytes.push(LINKS_TAG); + links.serialize_into(&mut bytes)?; + } + } + Ok(Cow::Owned(bytes)) + } +} + +impl<'a, D: Distance> BytesDecode<'a> for NodeCodec { + type DItem = Node<'a, D>; + + fn bytes_decode(bytes: &'a [u8]) -> Result { + match bytes { + [NODE_TAG, bytes @ ..] => { + let (header_bytes, remaining) = bytes.split_at(size_of::()); + let header = pod_read_unaligned(header_bytes); + let vector = UnalignedVector::::from_bytes(remaining)?; + + Ok(Node::Item(Item { header, vector })) + } + [LINKS_TAG, bytes @ ..] => { + let links: Cow<'_, RoaringBitmap> = + Cow::Owned(RoaringBitmap::deserialize_from(bytes).unwrap()); + Ok(Node::Links(Links { links })) + } + + [unknown_tag, ..] => Err(Box::new(InvalidNodeDecoding { + unknown_tag: Some(*unknown_tag), + })), + [] => Err(Box::new(InvalidNodeDecoding { unknown_tag: None })), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub struct InvalidNodeDecoding { + unknown_tag: Option, +} + +impl fmt::Display for InvalidNodeDecoding { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.unknown_tag { + Some(unknown_tag) => write!(f, "Invalid node decoding: unknown tag {unknown_tag}"), + None => write!(f, "Invalid node decoding: empty array of bytes"), + } + } +} + +#[cfg(test)] +mod tests { + use crate::helix_engine::vector_core::{ + distance::{Cosine, Distance}, + unaligned_vector::UnalignedVector, + }; + + use super::{Item, Links, Node, NodeCodec}; + use bumpalo::Bump; + use heed3::{BytesDecode, BytesEncode}; + use roaring::RoaringBitmap; + use std::borrow::Cow; + + #[test] + fn check_bytes_encode_decode() { + type D = Cosine; + + let b = Bump::new(); + let vector = UnalignedVector::from_vec(bumpalo::vec![in &b; 1.0, 2.0]); + let header = D::new_header(&vector); + let item = Item { vector, header }; + let db_item = Node::Item(item); + + let bytes = NodeCodec::::bytes_encode(&db_item); + assert!(bytes.is_ok()); + let bytes = bytes.unwrap(); + dbg!("{}, {}", std::mem::size_of_val(&db_item), bytes.len()); + // dbg!("{:?}", &bytes); + + let db_item2 = NodeCodec::::bytes_decode(bytes.as_ref()); + assert!(db_item2.is_ok()); + let db_item2 = db_item2.unwrap(); + + dbg!("{:?}", &db_item2); + dbg!("{:?}", &db_item); + } + + #[test] + fn test_codec() { + type D = Cosine; + + let b = Bump::new(); + let vector = UnalignedVector::from_vec(bumpalo::vec![in &b; 1.0, 2.0]); + let header = D::new_header(&vector); + let item = Item { vector, header }; + let db_item = Node::Item(item.clone()); + + let bytes = NodeCodec::::bytes_encode(&db_item); + assert!(bytes.is_ok()); + let bytes = bytes.unwrap(); + + let new_item = NodeCodec::::bytes_decode(bytes.as_ref()); + assert!(new_item.is_ok()); + let new_item = new_item.unwrap().item().unwrap(); + + assert!(matches!(new_item.vector, Cow::Borrowed(_))); + assert_eq!(new_item.vector.as_bytes(), item.vector.as_bytes()); + } + + #[test] + fn test_bitmap_codec() { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(1); + bitmap.insert(42); + + let links = Links { + links: Cow::Owned(bitmap), + }; + let db_item = Node::Links(links); + let bytes = NodeCodec::::bytes_encode(&db_item).unwrap(); + + let node = NodeCodec::::bytes_decode(&bytes).unwrap(); + assert!(matches!(node, Node::Links(_))); + let new_links = match node { + Node::Links(links) => links, + _ => unreachable!(), + }; + assert!(new_links.links.contains(1)); + assert!(new_links.links.contains(42)); + } +} diff --git a/helix-db/src/helix_engine/vector_core/node_id.rs b/helix-db/src/helix_engine/vector_core/node_id.rs new file mode 100644 index 00000000..024b8caf --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/node_id.rs @@ -0,0 +1,160 @@ +use core::fmt; + +use byteorder::{BigEndian, ByteOrder}; + +use crate::helix_engine::vector_core::{ItemId, LayerId}; + +/// /!\ Changing the value of the enum can be DB-breaking /!\ +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[repr(u8)] +pub enum NodeMode { + /// Stores the metadata under the `ItemId` 0 + Metadata = 0, + /// Stores the list of all the `ItemId` that have been updated. + /// We only stores `Unit` values under the keys. + Updated = 1, + /// The graph edges re stored under this id + Links = 2, + /// The original vectors are stored under this id in `Item` structures. + Item = 3, +} + +impl TryFrom for NodeMode { + type Error = String; + + fn try_from(v: u8) -> std::result::Result { + match v { + v if v == NodeMode::Item as u8 => Ok(NodeMode::Item), + v if v == NodeMode::Links as u8 => Ok(NodeMode::Links), + v if v == NodeMode::Updated as u8 => Ok(NodeMode::Updated), + v if v == NodeMode::Metadata as u8 => Ok(NodeMode::Metadata), + v => Err(format!("Could not convert {v} as a `NodeMode`.")), + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct NodeId { + /// Indicate what the item represent. + pub mode: NodeMode, + /// The item we want to get. + pub item: ItemId, + /// Store Hnsw layer ID after ItemId for co-locality of (vec, its_links) in lmdb (?) + /// Safe to store in a u8 since impossible the graph will have >256 layers + pub layer: LayerId, +} + +impl fmt::Debug for NodeId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}({},{})", self.mode, self.item, self.layer) + } +} + +impl NodeId { + pub const fn metadata() -> Self { + Self { + mode: NodeMode::Metadata, + item: 0, + layer: 0, + } + } + + pub const fn version() -> Self { + Self { + mode: NodeMode::Metadata, + item: 1, + layer: 0, + } + } + + pub const fn updated(item: u32) -> Self { + Self { + mode: NodeMode::Updated, + item, + layer: 0, + } + } + + pub const fn links(item: u32, layer: u8) -> Self { + Self { + mode: NodeMode::Links, + item, + layer, + } + } + + pub const fn item(item: u32) -> Self { + Self { + mode: NodeMode::Item, + item, + layer: 0, + } + } + + /// Return the underlying `ItemId` if it is an item. + /// Panic otherwise. + #[track_caller] + pub fn unwrap_item(&self) -> ItemId { + assert_eq!(self.mode, NodeMode::Item); + self.item + } + + /// Return the underlying `ItemId` if it is a links node. + /// Panic otherwise. + #[track_caller] + pub fn unwrap_node(&self) -> (ItemId, LayerId) { + assert_eq!(self.mode, NodeMode::Links); + (self.item, self.layer) + } + + pub fn to_bytes(self) -> [u8; 6] { + let mut output = [0; 6]; + + output[0] = self.mode as u8; + output[1] = self.layer; + let item_bytes = self.item.to_be_bytes(); + output[2..=5].copy_from_slice(&item_bytes); + + output + } + + pub fn from_bytes(bytes: &[u8]) -> (Self, &[u8]) { + let mode = NodeMode::try_from(bytes[0]).expect("Could not parse the node mode"); + let layer = bytes[1]; + let item = BigEndian::read_u32(&bytes[2..]); + + ( + Self { mode, item, layer }, + &bytes[size_of::() + size_of::()..], + ) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn check_node_id_ordering() { + // NOTE: `layer`s take precedence over item_ids + assert!(NodeId::item(0) == NodeId::item(0)); + assert!(NodeId::item(1) > NodeId::item(0)); + assert!(NodeId::item(0) < NodeId::item(1)); + + assert!(NodeId::links(0, 0) == NodeId::links(0, 0)); + assert!(NodeId::links(1, 0) > NodeId::links(0, 0)); + assert!(NodeId::links(0, 1) > NodeId::links(0, 0)); + assert!(NodeId::links(1, 0) > NodeId::links(0, 1)); + + assert!(NodeId::updated(0) == NodeId::updated(0)); + assert!(NodeId::updated(1) > NodeId::updated(0)); + assert!(NodeId::updated(0) < NodeId::updated(1)); + + assert!(NodeId::links(u32::MAX, 0) < NodeId::item(0)); + + assert!(NodeId::metadata() == NodeId::metadata()); + assert!(NodeId::metadata() < NodeId::links(u32::MIN, 0)); + assert!(NodeId::metadata() < NodeId::updated(u32::MIN)); + assert!(NodeId::metadata() < NodeId::item(u32::MIN)); + } +} diff --git a/helix-db/src/helix_engine/vector_core/ordered_float.rs b/helix-db/src/helix_engine/vector_core/ordered_float.rs new file mode 100644 index 00000000..641094dc --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/ordered_float.rs @@ -0,0 +1,47 @@ +/// A wrapper type around f32s implementing `Ord` +/// +/// Since distance metrics satisfy d(x,x)=0 and d(x,y)>0 for x!=y we don't need to operate on the +/// full range of f32's. Comparing the u32 representation of a non-negative f32 should suffice and +/// is actually a lot quicker. +/// +/// https://en.wikipedia.org/wiki/IEEE_754-1985#NaN +#[derive(Default, Debug, Clone, Copy)] +pub struct OrderedFloat(pub f32); + +impl PartialEq for OrderedFloat { + fn eq(&self, other: &Self) -> bool { + self.0.to_bits().eq(&other.0.to_bits()) + } +} + +impl Eq for OrderedFloat {} + +impl PartialOrd for OrderedFloat { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for OrderedFloat { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.to_bits().cmp(&other.0.to_bits()) + } +} + +#[cfg(test)] +mod tests { + use proptest::prelude::*; + + use crate::helix_engine::vector_core::ordered_float::OrderedFloat; + + proptest! { + #[test] + fn ordering_makes_sense( + (upper, lower) in (0.0f32..=f32::MAX).prop_flat_map(|u|{ + (Just(u), 0.0f32..=u) + }) + ){ + assert!(OrderedFloat(upper) > OrderedFloat(lower)); + } + } +} diff --git a/helix-db/src/helix_engine/vector_core/parallel.rs b/helix-db/src/helix_engine/vector_core/parallel.rs new file mode 100644 index 00000000..a12d63a6 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/parallel.rs @@ -0,0 +1,172 @@ +use core::slice; +use std::borrow::Cow; +use std::marker; + +use hashbrown::HashMap; +use heed3::types::Bytes; +use heed3::{BytesDecode, RoTxn}; +use roaring::RoaringBitmap; +use rustc_hash::FxBuildHasher; + +use crate::helix_engine::vector_core::distance::Distance; +use crate::helix_engine::vector_core::key::{KeyCodec, Prefix, PrefixCodec}; +use crate::helix_engine::vector_core::node::{Item, Links, Node, NodeCodec}; +use crate::helix_engine::vector_core::{CoreDatabase, ItemId, LayerId, LmdbResult}; + +/// A struture used to keep a list of the item nodes in the graph. +/// +/// It is safe to share between threads as the pointer are pointing +/// in the mmapped file and the transaction is kept here and therefore +/// no longer touches the database. +pub struct ImmutableItems<'t, D> { + items: HashMap, + constant_length: Option, + _marker: marker::PhantomData<(&'t (), D)>, +} + +// NOTE: this previously took an arg `items: &RoaringBitmap` which corresponded to the `to_insert`. +// When building the hnsw in multiple dumps we need vecs from previous dumps in order to "glue" +// things together. +// To accomodate this we use a cursor over ALL Key::items in the db. +impl<'t, D: Distance> ImmutableItems<'t, D> { + /// Creates the structure by fetching all the item vector pointers + /// and keeping the transaction making the pointers valid. + /// Do not take more items than memory allows. + /// Remove from the list of candidates all the items that were selected and return them. + pub fn new(rtxn: &'t RoTxn, database: CoreDatabase, index: u16) -> LmdbResult { + let mut map = + HashMap::with_capacity_and_hasher(database.len(rtxn)? as usize, FxBuildHasher); + let mut constant_length = None; + + let cursor = database + .remap_types::() + .prefix_iter(rtxn, &Prefix::item(index))? + .remap_key_type::(); + + for res in cursor { + let (item_id, bytes) = res?; + assert_eq!(*constant_length.get_or_insert(bytes.len()), bytes.len()); + let ptr = bytes.as_ptr(); + map.insert(item_id.node.item, ptr); + } + + Ok(ImmutableItems { + items: map, + constant_length, + _marker: marker::PhantomData, + }) + } + + /// Returns the items identified by the given ID. + pub fn get(&self, item_id: ItemId) -> LmdbResult>> { + let len = match self.constant_length { + Some(len) => len, + None => return Ok(None), + }; + let ptr = match self.items.get(&item_id) { + Some(ptr) => *ptr, + None => return Ok(None), + }; + + // safety: + // - ptr: The pointer comes from LMDB. Since the database cannot be written to, it is still valid. + // - len: All the items share the same dimensions and are the same size + let bytes = unsafe { slice::from_raw_parts(ptr, len) }; + NodeCodec::bytes_decode(bytes) + .map_err(heed3::Error::Decoding) + .map(|node| node.item()) + } +} + +unsafe impl Sync for ImmutableItems<'_, D> {} + +/// A struture used to keep a list of all the links. +/// It is safe to share between threads as the pointers are pointing +/// in the mmapped file and the transaction is kept here and therefore +/// no longer touches the database. +pub struct ImmutableLinks<'t, D> { + links: HashMap<(u32, u8), (usize, *const u8), FxBuildHasher>, + _marker: marker::PhantomData<(&'t (), D)>, +} + +impl<'t, D: Distance> ImmutableLinks<'t, D> { + /// Creates the structure by fetching all the root pointers + /// and keeping the transaction making the pointers valid. + pub fn new( + rtxn: &'t RoTxn, + database: CoreDatabase, + index: u16, + nb_links: u64, + ) -> LmdbResult { + let mut links = HashMap::with_capacity_and_hasher(nb_links as usize, FxBuildHasher); + + let iter = database + .remap_types::() + .prefix_iter(rtxn, &Prefix::links(index))? + .remap_key_type::(); + + for result in iter { + let (key, bytes) = result?; + let links_id = key.node.unwrap_node(); + links.insert(links_id, (bytes.len(), bytes.as_ptr())); + } + + Ok(ImmutableLinks { + links, + _marker: marker::PhantomData, + }) + } + + /// Returns the node identified by the given ID. + pub fn get(&self, item_id: ItemId, level: LayerId) -> LmdbResult>> { + let key = (item_id, level); + let (ptr, len) = match self.links.get(&key) { + Some((len, ptr)) => (*ptr, *len), + None => return Ok(None), + }; + + // safety: + // - ptr: The pointer comes from LMDB. Since the database cannot be written to, it is still valid. + // - len: The len cannot change either + let bytes = unsafe { slice::from_raw_parts(ptr, len) }; + NodeCodec::bytes_decode(bytes) + .map_err(heed3::Error::Decoding) + .map(|node: Node<'t, D>| node.links()) + } + + pub fn iter(&self) -> impl Iterator)>> { + self.links.keys().map(|&k| { + let (item_id, level) = k; + match self.get(item_id, level) { + Ok(Some(Links { links })) => Ok((k, links)), + Ok(None) => { + unreachable!("link at level {level} with item_id {item_id} not found") + } + Err(e) => Err(e), + } + }) + } + + /// `Iter`s only over links in a given level + pub fn iter_layer( + &self, + layer: u8, + ) -> impl Iterator)>> { + self.links.keys().filter_map(move |&k| { + let (item_id, level) = k; + if level != layer { + return None; + } + + match self.get(item_id, level) { + Ok(Some(Links { links })) => Some(Ok((k, links))), + Ok(None) => { + unreachable!("link at level {level} with item_id {item_id} not found") + } + Err(e) => Some(Err(e)), + } + }) + } +} + +unsafe impl Sync for ImmutableLinks<'_, D> {} diff --git a/helix-db/src/helix_engine/vector_core/reader.rs b/helix-db/src/helix_engine/vector_core/reader.rs new file mode 100644 index 00000000..9da913d5 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/reader.rs @@ -0,0 +1,752 @@ +use std::cmp::Reverse; +use std::collections::BinaryHeap; +use std::marker; +use std::num::NonZeroUsize; + +use bumpalo::collections::CollectIn; +use heed3::RoTxn; +use heed3::types::DecodeIgnore; +use min_max_heap::MinMaxHeap; +use roaring::RoaringBitmap; + +use crate::helix_engine::vector_core::VectorCoreResult; +use crate::helix_engine::vector_core::VectorError; +use crate::helix_engine::vector_core::distance::Distance; +use crate::helix_engine::vector_core::distance::DistanceValue; +use crate::helix_engine::vector_core::hnsw::ScoredLink; +use crate::helix_engine::vector_core::item_iter::ItemIter; +use crate::helix_engine::vector_core::key::{Key, KeyCodec, Prefix, PrefixCodec}; +use crate::helix_engine::vector_core::metadata::{Metadata, MetadataCodec}; +use crate::helix_engine::vector_core::node::Node; +use crate::helix_engine::vector_core::node::{Item, Links}; +use crate::helix_engine::vector_core::ordered_float::OrderedFloat; +use crate::helix_engine::vector_core::unaligned_vector::UnalignedVector; +use crate::helix_engine::vector_core::version::{Version, VersionCodec}; +use crate::helix_engine::vector_core::{CoreDatabase, ItemId}; + +#[cfg(not(windows))] +use { + crate::helix_engine::vector_core::unaligned_vector::VectorCodec, heed3::types::Bytes, + tracing::warn, +}; + +/// A good default value for the `ef` parameter. +const DEFAULT_EF_SEARCH: usize = 100; + +#[cfg(not(windows))] +const READER_AVAILABLE_MEMORY: &str = "HANNOY_READER_PREFETCH_MEMORY"; + +#[cfg(not(test))] +/// The threshold at which linear search is used instead of the HNSW algorithm. +const LINEAR_SEARCH_THRESHOLD: u64 = 1000; +#[cfg(test)] +/// Note that for tests purposes, we use set this threshold +/// to zero to make sure we test the HNSW algorithm. +const LINEAR_SEARCH_THRESHOLD: u64 = 0; + +/// Container storing nearest neighbour search result +#[derive(Debug)] +pub struct Searched<'arena> { + /// The nearest neighbours for the performed query + pub nns: bumpalo::collections::Vec<'arena, (ItemId, f32)>, +} + +impl<'arena> Searched<'arena> { + pub(crate) fn new(nns: bumpalo::collections::Vec<'arena, (ItemId, f32)>) -> Self { + Searched { nns } + } + + /// Consumes `self` and returns vector of nearest neighbours + pub fn into_nns(self) -> bumpalo::collections::Vec<'arena, (ItemId, f32)> { + self.nns + } +} + +/// Options used to make a query against an hannoy [`Reader`]. +pub struct QueryBuilder<'a, D: Distance> { + reader: &'a Reader, + candidates: Option<&'a RoaringBitmap>, + count: usize, + ef: usize, +} + +impl<'a, D: Distance> QueryBuilder<'a, D> { + pub fn by_item<'arena>( + &self, + rtxn: &RoTxn, + item: ItemId, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult>> { + let res = self + .reader + .nns_by_item(rtxn, item, self, arena)? + .map(Searched::new); + Ok(res) + } + + pub fn by_vector<'arena>( + &self, + rtxn: &RoTxn, + vector: &'a [f32], + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult> { + if vector.len() != self.reader.dimensions() { + return Err(VectorError::InvalidVecDimension { + expected: self.reader.dimensions(), + received: vector.len(), + }); + } + + let vector = UnalignedVector::from_slice(vector); + let item = Item { + header: D::new_header(&vector), + vector, + }; + + let neighbours = self.reader.nns_by_vec(rtxn, &item, self, arena)?; + + Ok(Searched::new(neighbours)) + } + + /// Specify a subset of candidates to inspect. Filters out everything else. + /// + /// # Examples + /// + /// ```no_run + /// # use hannoy::{Reader, distances::Euclidean}; + /// # let (reader, rtxn): (Reader, heed::RoTxn) = todo!(); + /// let candidates = roaring::RoaringBitmap::from_iter([1, 3, 4, 5, 6, 7, 8, 9, 15, 16]); + /// reader.nns(20).candidates(&candidates).by_item(&rtxn, 6); + /// ``` + pub fn candidates(&mut self, candidates: &'a RoaringBitmap) -> &mut Self { + self.candidates = Some(candidates); + self + } + + /// Specify a search buffer size from which the closest elements are returned. Increasing this + /// value improves the search relevancy but increases latency as more neighbours need to be + /// searched. + /// In an ideal graph `ef`=`count` would suffice. + /// + /// # Examples + /// + /// ```no_run + /// # use hannoy::{Reader, distances::Euclidean}; + /// # let (reader, rtxn): (Reader, heed::RoTxn) = todo!(); + /// reader.nns(20).ef_search(21).by_item(&rtxn, 6); + /// ``` + pub fn ef_search(&mut self, ef: usize) -> &mut Self { + self.ef = ef.max(self.count); + self + } +} + +struct Visitor<'a> { + pub eps: Vec, + pub level: usize, + pub ef: usize, + pub candidates: Option<&'a RoaringBitmap>, +} +impl<'a> Visitor<'a> { + pub fn new( + eps: Vec, + level: usize, + ef: usize, + candidates: Option<&'a RoaringBitmap>, + ) -> Self { + Self { + eps, + level, + ef, + candidates, + } + } + + /// Iteratively traverse a given level of the HNSW graph, updating the search path history. + /// Returns a Min-Max heap of size ef nearest neighbours to the query in that layer. + #[allow(clippy::too_many_arguments)] + pub fn visit( + &self, + query: &Item, + reader: &Reader, + rtxn: &RoTxn, + path: &mut RoaringBitmap, + ) -> VectorCoreResult> { + let mut search_queue = BinaryHeap::new(); + let mut res = MinMaxHeap::with_capacity(self.ef); + + // Register all entry points as visited and populate candidates + for &ep in &self.eps[..] { + let ve = get_item(reader.database, reader.index, rtxn, ep)?.unwrap(); + let dist = D::distance(query, &ve); + + search_queue.push((Reverse(OrderedFloat(dist)), ep)); + path.insert(ep); + + if self.candidates.is_none_or(|c| c.contains(ep)) { + res.push((OrderedFloat(dist), ep)); + } + } + + // Stop occurs either once we've done at least ef searches and notice no improvements, or + // when we've exhausted the search queue. + while let Some(&(Reverse(OrderedFloat(f)), _)) = search_queue.peek() { + let f_max = res + .peek_max() + .map(|&(OrderedFloat(d), _)| d) + .unwrap_or(f32::MAX); + if f > f_max { + break; + } + let (_, c) = search_queue.pop().unwrap(); + + let Links { links } = get_links(rtxn, reader.database, reader.index, c, self.level)? + .expect("Links must exist"); + + for point in links.iter() { + if !path.insert(point) { + continue; + } + let dist = D::distance( + query, + &get_item(reader.database, reader.index, rtxn, point)?.unwrap(), + ); + + // The search queue can take points that aren't included in the (optional) + // candidates bitmap, but the final result must *not* include them. + if res.len() < self.ef || dist < f_max { + search_queue.push((Reverse(OrderedFloat(dist)), point)); + if let Some(c) = self.candidates + && !c.contains(point) + { + continue; + } + if res.len() == self.ef { + let _ = res.push_pop_max((OrderedFloat(dist), point)); + } else { + res.push((OrderedFloat(dist), point)); + } + } + } + } + + Ok(res) + } +} + +/// A reader over the hannoy hnsw graph +#[derive(Debug)] +pub struct Reader { + pub(crate) database: CoreDatabase, + pub(crate) index: u16, + entry_points: Vec, + max_level: usize, + dimensions: usize, + items: RoaringBitmap, + version: Version, + _marker: marker::PhantomData, +} + +impl Reader { + /// Returns a reader over the database with the specified [`Distance`] type. + pub fn open( + rtxn: &RoTxn, + index: u16, + database: CoreDatabase, + ) -> VectorCoreResult> { + let metadata_key = Key::metadata(index); + + let metadata = match database + .remap_data_type::() + .get(rtxn, &metadata_key)? + { + Some(metadata) => metadata, + None => return Err(VectorError::MissingMetadata(index)), + }; + let version = match database + .remap_data_type::() + .get(rtxn, &Key::version(index))? + { + Some(version) => version, + None => Version { + major: 0, + minor: 0, + patch: 0, + }, + }; + + if D::name() != metadata.distance { + return Err(VectorError::UnmatchingDistance { + expected: metadata.distance.to_owned(), + received: D::name(), + }); + } + + // check if we need to rebuild + if database + .remap_types::() + .prefix_iter(rtxn, &Prefix::updated(index))? + .remap_key_type::() + .next() + .is_some() + { + return Err(VectorError::NeedBuild(index)); + } + + // Hint to the kernel that we'll probably need some vectors in RAM. + Self::prefetch_graph(rtxn, &database, index, &metadata)?; + + Ok(Reader { + database: database.remap_data_type(), + index, + entry_points: Vec::from_iter(metadata.entry_points.iter()), + max_level: metadata.max_level as usize, + dimensions: metadata.dimensions.try_into().unwrap(), + items: metadata.items, + version, + _marker: marker::PhantomData, + }) + } + + #[cfg(windows)] + fn prefetch_graph( + _rtxn: &RoTxn, + _database: &CoreDatabase, + _index: u16, + _metadata: &Metadata, + ) -> VectorCoreResult<()> { + // madvise crate does not support windows. + Ok(()) + } + + /// Instructs kernel to fetch nodes based on a fixed memory budget. It's OK for this operation + /// to fail, it's not integral for search to work. + #[cfg(not(windows))] + fn prefetch_graph( + rtxn: &RoTxn, + database: &CoreDatabase, + index: u16, + metadata: &Metadata, + ) -> VectorCoreResult<()> { + use std::{collections::VecDeque, sync::atomic::AtomicUsize}; + + let page_size = page_size::get(); + let mut available_memory: usize = std::env::var(READER_AVAILABLE_MEMORY) + .ok() + .and_then(|num| num.parse::().ok()) + .unwrap_or(0); + + if available_memory < page_size { + return Ok(()); + } + + let largest_alloc = AtomicUsize::new(0); + + // adjusted length in memory of a vector + let item_length = (metadata.dimensions as usize).div_ceil(::word_size()); + + let madvise_page = |item: &[u8]| -> VectorCoreResult { + use std::sync::atomic::Ordering; + + let start_ptr = item.as_ptr() as usize; + let end_ptr = start_ptr + item_length; + let start_page = start_ptr - (start_ptr % page_size); + let end_page = end_ptr + ((end_ptr + page_size - 1) % page_size); + let advised_size = end_page - start_page; + + unsafe { + use madvise::AccessPattern; + + madvise::madvise( + start_page as *const u8, + advised_size, + AccessPattern::WillNeed, + )?; + } + + largest_alloc.fetch_max(advised_size, Ordering::Relaxed); + Ok(advised_size) + }; + + // Load links and vectors for layers > 0. + let mut added = RoaringBitmap::new(); + for lvl in (1..=metadata.max_level).rev() { + use heed3::types::Bytes; + + for result in database.remap_data_type::().iter(rtxn)? { + use std::sync::atomic::Ordering; + + if available_memory < largest_alloc.load(Ordering::Relaxed) { + return Ok(()); + } + let (key, item) = result?; + if key.node.layer != lvl { + continue; + } + match madvise_page(item) { + Ok(usage) => available_memory -= usage, + Err(e) => { + use tracing::warn; + + warn!(e=?e); + return Ok(()); + } + } + added.insert(key.node.item); + } + } + + // If we still have memory left over try fetching other nodes in layer zero. + let mut queue = VecDeque::from_iter(added.iter()); + while let Some(item) = queue.pop_front() { + use std::sync::atomic::Ordering; + + use crate::helix_engine::vector_core::node::Node; + + if available_memory < largest_alloc.load(Ordering::Relaxed) { + return Ok(()); + } + if let Some(Node::Links(links)) = database.get(rtxn, &Key::links(index, item, 0))? { + for l in links.iter() { + if !added.insert(l) { + continue; + } + if let Some(bytes) = database + .remap_data_type::() + .get(rtxn, &Key::item(index, l))? + { + match madvise_page(bytes) { + Ok(usage) => available_memory -= usage, + Err(e) => { + warn!(e=?e); + return Ok(()); + } + } + queue.push_back(l); + } + } + } + } + + Ok(()) + } + + /// Returns the number of dimensions in the index. + pub fn dimensions(&self) -> usize { + self.dimensions + } + + /// Returns the number of entry points to the hnsw index. + pub fn n_entrypoints(&self) -> usize { + self.entry_points.len() + } + + /// Returns the number of vectors stored in the index. + pub fn n_items(&self) -> u64 { + self.items.len() + } + + /// Returns all the item ids contained in this index. + pub fn item_ids(&self) -> &RoaringBitmap { + &self.items + } + + /// Returns the index of this reader in the database. + pub fn index(&self) -> u16 { + self.index + } + + /// Returns the version of the database. + pub fn version(&self) -> Version { + self.version + } + + /// Returns the number of nodes in the index. Useful to run an exhaustive search. + pub fn n_nodes(&self, rtxn: &RoTxn) -> VectorCoreResult> { + Ok(NonZeroUsize::new(self.database.len(rtxn)? as usize)) + } + + /// Returns the vector for item `i` that was previously added. + pub fn item_vector<'arena>( + &self, + rtxn: &RoTxn, + item_id: ItemId, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult>> { + Ok( + get_item(self.database, self.index, rtxn, item_id)?.map(|item| { + let mut vec = item.vector.to_vec(arena); + vec.truncate(self.dimensions()); + vec + }), + ) + } + + /// Returns `true` if the index is empty. + pub fn is_empty(&self, rtxn: &RoTxn) -> VectorCoreResult { + self.iter(rtxn).map(|mut iter| iter.next().is_none()) + } + + /// Returns `true` if the database contains the given item. + pub fn contains_item(&self, rtxn: &RoTxn, item_id: ItemId) -> VectorCoreResult { + self.database + .remap_data_type::() + .get(rtxn, &Key::item(self.index, item_id)) + .map(|opt| opt.is_some()) + .map_err(Into::into) + } + + /// Returns an iterator over the items vector. + pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> VectorCoreResult> { + ItemIter::new(self.database, self.index, self.dimensions, rtxn).map_err(Into::into) + } + + /// Return a [`QueryBuilder`] that lets you configure and execute a search request. + /// + /// You must provide the number of items you want to receive. + pub fn nns(&self, count: usize) -> QueryBuilder<'_, D> { + QueryBuilder { + reader: self, + candidates: None, + count, + ef: DEFAULT_EF_SEARCH, + } + } + + fn nns_by_vec<'arena>( + &self, + rtxn: &RoTxn, + query: &Item, + opt: &QueryBuilder, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult> { + // If we will never find any candidates, return an empty vector + if opt + .candidates + .is_some_and(|c| self.item_ids().is_disjoint(c)) + { + return Ok(bumpalo::collections::Vec::new_in(arena)); + } + + // If the number of candidates is less than a given threshold, perform linear search + if let Some(candidates) = opt.candidates.filter(|c| c.len() < LINEAR_SEARCH_THRESHOLD) { + return self.brute_force_search(query, rtxn, candidates, opt.count, arena); + } + + // exhaustive search + self.hnsw_search(query, rtxn, opt, arena) + } + + /// Directly retrieves items in the candidate list and ranks them by distance to the query. + fn brute_force_search<'arena>( + &self, + query: &Item, + rtxn: &RoTxn, + candidates: &RoaringBitmap, + count: usize, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult> { + let mut item_distances = + bumpalo::collections::Vec::with_capacity_in(candidates.len() as usize, arena); + + for item_id in candidates { + let Some(vector) = self.item_vector(rtxn, item_id, arena)? else { + continue; + }; + let vector = UnalignedVector::from_vec(vector); + let item = Item { + header: D::new_header(&vector), + vector, + }; + let distance = D::distance(&item, query); + item_distances.push((item_id, distance)); + } + item_distances.sort_by_key(|(_, dist)| OrderedFloat(*dist)); + item_distances.truncate(count); + + Ok(item_distances) + } + + /// Hnsw search according to arXiv:1603.09320. + /// + /// We perform greedy beam search from the top layer to the bottom, where the search frontier + /// is controlled by `opt.ef`. Since the graph is not necessarily acyclic, search may become + /// "trapped" in a local sub-graph with fewer elements than `opt.count` - to account for this + /// we run an expensive exhaustive search at the end if fewer nns were returned. + /// + /// To break out of search early, users may wish to provide a `cancel_fn` which terminates the + /// execution of the hnsw search and returns partial results so far. + fn hnsw_search<'arena>( + &self, + query: &Item, + rtxn: &RoTxn, + opt: &QueryBuilder, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult> { + let mut visitor = Visitor::new(self.entry_points.clone(), self.max_level, 1, None); + + let mut path = RoaringBitmap::new(); + for _ in (1..=self.max_level).rev() { + let neighbours = visitor.visit(query, self, rtxn, &mut path)?; + let closest = neighbours + .peek_min() + .map(|(_, n)| n) + .expect("No neighbor was found"); + + visitor.eps = vec![*closest]; + visitor.level -= 1; + } + + // clear visited set as we only care about level 0 + path.clear(); + debug_assert!(visitor.level == 0); + + visitor.ef = opt.ef.max(opt.count); + visitor.candidates = opt.candidates; + + let mut neighbours = visitor.visit(query, self, rtxn, &mut path)?; + + // If we still don't have enough nns (e.g. search encountered cyclic subgraphs) then do exhaustive + // search over remaining unseen items. + if neighbours.len() < opt.count { + let mut cursor = self + .database + .remap_types::() + .prefix_iter(rtxn, &Prefix::item(self.index))? + .remap_key_type::(); + + while let Some((key, _)) = cursor.next().transpose()? { + let id = key.node.item; + if path.contains(id) { + continue; + } + + visitor.eps = vec![id]; + visitor.ef = opt.count - neighbours.len(); + + let more_nns = visitor.visit(query, self, rtxn, &mut path)?; + + neighbours.extend(more_nns.into_iter()); + if neighbours.len() >= opt.count { + break; + } + } + } + + Ok(neighbours + .drain_asc() + .map(|(OrderedFloat(f), i)| (i, f)) + .take(opt.count) + .collect_in(arena)) + } + + /// Returns the nearest points to the item id, not including the point itself. + /// + /// Nearly identical behaviour to `Reader.nns_by_vec` except we only search layer 0 and use the + /// `&[item]` instead of the hnsw entrypoints. Since search starts in the true neighbourhood of + /// the item fewer comparisons are needed to retrieve the nearest neighbours, making it more + /// efficient than simply calling `Reader.nns_by_vec` with the associated vector. + #[allow(clippy::type_complexity)] + fn nns_by_item<'arena>( + &self, + rtxn: &RoTxn, + item: ItemId, + opt: &QueryBuilder, + arena: &'arena bumpalo::Bump, + ) -> VectorCoreResult>> { + // If we will never find any candidates, return none + if opt + .candidates + .is_some_and(|c| self.item_ids().is_disjoint(c)) + { + return Ok(None); + } + + let Some(vector) = self.item_vector(rtxn, item, arena)? else { + return Ok(None); + }; + let vector = UnalignedVector::from_vec(vector); + let query = Item { + header: D::new_header(&vector), + vector, + }; + + // If the number of candidates is less than a given threshold, perform linear search + if let Some(candidates) = opt.candidates.filter(|c| c.len() < LINEAR_SEARCH_THRESHOLD) { + let nns = self.brute_force_search(&query, rtxn, candidates, opt.count, arena)?; + return Ok(Some(nns)); + } + + // Search over all items except `item` + let ef = opt.ef.max(opt.count); + let mut path = RoaringBitmap::new(); + let mut candidates = opt.candidates.unwrap_or_else(|| self.item_ids()).clone(); + candidates.remove(item); + + let mut visitor = Visitor::new(vec![item], 0, ef, Some(&candidates)); + + let mut neighbours = visitor.visit(&query, self, rtxn, &mut path)?; + + // If we still don't have enough nns (e.g. search encountered cyclic subgraphs) then do exhaustive + // search over remaining unseen items. + if neighbours.len() < opt.count { + let mut cursor = self + .database + .remap_types::() + .prefix_iter(rtxn, &Prefix::item(self.index))? + .remap_key_type::(); + + while let Some((key, _)) = cursor.next().transpose()? { + let id = key.node.item; + if path.contains(id) { + continue; + } + + // update walker + visitor.eps = vec![id]; + visitor.ef = opt.count - neighbours.len(); + + let more_nns = visitor.visit(&query, self, rtxn, &mut path)?; + neighbours.extend(more_nns.into_iter()); + if neighbours.len() >= opt.count { + break; + } + } + } + + Ok(Some( + neighbours + .drain_asc() + .map(|(OrderedFloat(f), i)| (i, f)) + .take(opt.count) + .collect_in(arena), + )) + } +} + +pub fn get_item<'txn, D: Distance>( + database: CoreDatabase, + index: u16, + rtxn: &'txn RoTxn, + item: ItemId, +) -> VectorCoreResult>> { + match database.get(rtxn, &Key::item(index, item))? { + Some(Node::Item(item)) => Ok(Some(item)), + Some(Node::Links(_)) => Ok(None), + None => Ok(None), + } +} + +pub fn get_links<'a, D: Distance>( + rtxn: &'a RoTxn, + database: CoreDatabase, + index: u16, + item_id: ItemId, + level: usize, +) -> VectorCoreResult>> { + match database.get(rtxn, &Key::links(index, item_id, level as u8))? { + Some(Node::Links(links)) => Ok(Some(links)), + Some(Node::Item(_)) => Ok(None), + None => Ok(None), + } +} diff --git a/helix-db/src/helix_engine/vector_core/spaces/mod.rs b/helix-db/src/helix_engine/vector_core/spaces/mod.rs new file mode 100644 index 00000000..15009da1 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/spaces/mod.rs @@ -0,0 +1,10 @@ +pub mod simple; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod simple_sse; + +#[cfg(target_arch = "x86_64")] +mod simple_avx; + +#[cfg(target_arch = "aarch64")] +mod simple_neon; diff --git a/helix-db/src/helix_engine/vector_core/spaces/simple.rs b/helix-db/src/helix_engine/vector_core/spaces/simple.rs new file mode 100644 index 00000000..98c78e21 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/spaces/simple.rs @@ -0,0 +1,84 @@ +use crate::helix_engine::vector_core::unaligned_vector::UnalignedVector; + +#[cfg(target_arch = "x86_64")] +use super::simple_avx::*; +#[cfg(all(target_arch = "aarch64", target_feature = "neon"))] +use super::simple_neon::*; +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use super::simple_sse::*; + +#[cfg(target_arch = "x86_64")] +const MIN_DIM_SIZE_AVX: usize = 32; + +#[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + all(target_arch = "aarch64", target_feature = "neon") +))] +const MIN_DIM_SIZE_SIMD: usize = 16; + +pub fn euclidean_distance(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && u.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { euclid_similarity_avx(u, v) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && u.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { euclid_similarity_sse(u, v) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && u.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { euclid_similarity_neon(u, v) }; + } + } + + euclidean_distance_non_optimized(u, v) +} + +// Don't use dot-product: avoid catastrophic cancellation in +// https://github.com/spotify/annoy/issues/314. +pub fn euclidean_distance_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + u.iter().zip(v.iter()).map(|(u, v)| (u - v) * (u - v)).sum() +} + +pub fn dot_product(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + #[cfg(target_arch = "x86_64")] + { + if is_x86_feature_detected!("avx") + && is_x86_feature_detected!("fma") + && u.len() >= MIN_DIM_SIZE_AVX + { + return unsafe { dot_similarity_avx(u, v) }; + } + } + + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + { + if is_x86_feature_detected!("sse") && u.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { dot_similarity_sse(u, v) }; + } + } + + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + { + if std::arch::is_aarch64_feature_detected!("neon") && u.len() >= MIN_DIM_SIZE_SIMD { + return unsafe { dot_similarity_neon(u, v) }; + } + } + + dot_product_non_optimized(u, v) +} + +pub fn dot_product_non_optimized(u: &UnalignedVector, v: &UnalignedVector) -> f32 { + u.iter().zip(v.iter()).map(|(a, b)| a * b).sum() +} diff --git a/helix-db/src/helix_engine/vector_core/spaces/simple_avx.rs b/helix-db/src/helix_engine/vector_core/spaces/simple_avx.rs new file mode 100644 index 00000000..ee18236a --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/spaces/simple_avx.rs @@ -0,0 +1,168 @@ +use std::arch::x86_64::*; +use std::ptr::read_unaligned; + +use crate::helix_engine::vector_core::unaligned_vector::UnalignedVector; + +#[target_feature(enable = "avx")] +#[target_feature(enable = "fma")] +unsafe fn hsum256_ps_avx(x: __m256) -> f32 { + let x128: __m128 = _mm_add_ps(_mm256_extractf128_ps(x, 1), _mm256_castps256_ps128(x)); + let x64: __m128 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); + let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + _mm_cvtss_f32(x32) +} + +#[target_feature(enable = "avx")] +#[target_feature(enable = "fma")] +pub(crate) unsafe fn euclid_similarity_avx( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { + unsafe { + // It is safe to load unaligned floats from a pointer. + // + + let n = v1.len(); + let m = n - (n % 32); + let mut ptr1 = v1.as_ptr() as *const f32; + let mut ptr2 = v2.as_ptr() as *const f32; + let mut sum256_1: __m256 = _mm256_setzero_ps(); + let mut sum256_2: __m256 = _mm256_setzero_ps(); + let mut sum256_3: __m256 = _mm256_setzero_ps(); + let mut sum256_4: __m256 = _mm256_setzero_ps(); + let mut i: usize = 0; + while i < m { + let sub256_1: __m256 = + _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(0)), _mm256_loadu_ps(ptr2.add(0))); + sum256_1 = _mm256_fmadd_ps(sub256_1, sub256_1, sum256_1); + + let sub256_2: __m256 = + _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(8)), _mm256_loadu_ps(ptr2.add(8))); + sum256_2 = _mm256_fmadd_ps(sub256_2, sub256_2, sum256_2); + + let sub256_3: __m256 = + _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(16)), _mm256_loadu_ps(ptr2.add(16))); + sum256_3 = _mm256_fmadd_ps(sub256_3, sub256_3, sum256_3); + + let sub256_4: __m256 = + _mm256_sub_ps(_mm256_loadu_ps(ptr1.add(24)), _mm256_loadu_ps(ptr2.add(24))); + sum256_4 = _mm256_fmadd_ps(sub256_4, sub256_4, sum256_4); + + ptr1 = ptr1.add(32); + ptr2 = ptr2.add(32); + i += 32; + } + + let mut result = hsum256_ps_avx(sum256_1) + + hsum256_ps_avx(sum256_2) + + hsum256_ps_avx(sum256_3) + + hsum256_ps_avx(sum256_4); + for i in 0..n - m { + let a = read_unaligned(ptr1.add(i)); + let b = read_unaligned(ptr2.add(i)); + result += (a - b).powi(2); + } + result + } +} + +#[target_feature(enable = "avx")] +#[target_feature(enable = "fma")] +pub(crate) unsafe fn dot_similarity_avx( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { + unsafe { + // It is safe to load unaligned floats from a pointer. + // + + let n = v1.len(); + let m = n - (n % 32); + let mut ptr1 = v1.as_ptr() as *const f32; + let mut ptr2 = v2.as_ptr() as *const f32; + let mut sum256_1: __m256 = _mm256_setzero_ps(); + let mut sum256_2: __m256 = _mm256_setzero_ps(); + let mut sum256_3: __m256 = _mm256_setzero_ps(); + let mut sum256_4: __m256 = _mm256_setzero_ps(); + let mut i: usize = 0; + while i < m { + sum256_1 = _mm256_fmadd_ps(_mm256_loadu_ps(ptr1), _mm256_loadu_ps(ptr2), sum256_1); + sum256_2 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(8)), + _mm256_loadu_ps(ptr2.add(8)), + sum256_2, + ); + sum256_3 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(16)), + _mm256_loadu_ps(ptr2.add(16)), + sum256_3, + ); + sum256_4 = _mm256_fmadd_ps( + _mm256_loadu_ps(ptr1.add(24)), + _mm256_loadu_ps(ptr2.add(24)), + sum256_4, + ); + + ptr1 = ptr1.add(32); + ptr2 = ptr2.add(32); + i += 32; + } + + let mut result = hsum256_ps_avx(sum256_1) + + hsum256_ps_avx(sum256_2) + + hsum256_ps_avx(sum256_3) + + hsum256_ps_avx(sum256_4); + + for i in 0..n - m { + let a = read_unaligned(ptr1.add(i)); + let b = read_unaligned(ptr2.add(i)); + result += a * b; + } + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::helix_engine::vector_core::spaces::simple::{ + dot_product_non_optimized, euclidean_distance_non_optimized, + }; + + #[test] + fn test_spaces_avx() { + if is_x86_feature_detected!("avx") && is_x86_feature_detected!("fma") { + let v1: Vec = vec![ + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., + ]; + let v2: Vec = vec![ + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 56., 57., 58., 59., 60., 61., + ]; + + let v1 = UnalignedVector::from_slice(&v1[..]); + let v2 = UnalignedVector::from_slice(&v2[..]); + + let euclid_simd = unsafe { euclid_similarity_avx(&v1, &v2) }; + let euclid = euclidean_distance_non_optimized(&v1, &v2); + assert_eq!(euclid_simd, euclid); + + let dot_simd = unsafe { dot_similarity_avx(&v1, &v2) }; + let dot = dot_product_non_optimized(&v1, &v2); + assert_eq!(dot_simd, dot); + + // let cosine_simd = unsafe { cosine_preprocess_avx(v1.clone()) }; + // let cosine = cosine_preprocess(v1); + // assert_eq!(cosine_simd, cosine); + } else { + println!("avx test skipped"); + } + } +} diff --git a/helix-db/src/helix_engine/vector_core/spaces/simple_neon.rs b/helix-db/src/helix_engine/vector_core/spaces/simple_neon.rs new file mode 100644 index 00000000..05665fa7 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/spaces/simple_neon.rs @@ -0,0 +1,160 @@ +#[cfg(target_feature = "neon")] +use crate::helix_engine::vector_core::unaligned_vector::UnalignedVector; +use std::arch::aarch64::*; +use std::ptr::read_unaligned; + +#[cfg(target_feature = "neon")] +pub(crate) unsafe fn euclid_similarity_neon( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { + unsafe { + // We use the unaligned_float32x4_t helper function to read f32x4 NEON SIMD types + // from potentially unaligned memory locations safely. + // https://github.com/meilisearch/arroy/pull/13 + + let n = v1.len(); + let m = n - (n % 16); + let mut ptr1 = v1.as_ptr() as *const f32; + let mut ptr2 = v2.as_ptr() as *const f32; + let mut sum1 = vdupq_n_f32(0.); + let mut sum2 = vdupq_n_f32(0.); + let mut sum3 = vdupq_n_f32(0.); + let mut sum4 = vdupq_n_f32(0.); + + let mut i: usize = 0; + while i < m { + let sub1 = vsubq_f32(unaligned_float32x4_t(ptr1), unaligned_float32x4_t(ptr2)); + sum1 = vfmaq_f32(sum1, sub1, sub1); + + let sub2 = vsubq_f32( + unaligned_float32x4_t(ptr1.add(4)), + unaligned_float32x4_t(ptr2.add(4)), + ); + sum2 = vfmaq_f32(sum2, sub2, sub2); + + let sub3 = vsubq_f32( + unaligned_float32x4_t(ptr1.add(8)), + unaligned_float32x4_t(ptr2.add(8)), + ); + sum3 = vfmaq_f32(sum3, sub3, sub3); + + let sub4 = vsubq_f32( + unaligned_float32x4_t(ptr1.add(12)), + unaligned_float32x4_t(ptr2.add(12)), + ); + sum4 = vfmaq_f32(sum4, sub4, sub4); + + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + let mut result = vaddvq_f32(sum1) + vaddvq_f32(sum2) + vaddvq_f32(sum3) + vaddvq_f32(sum4); + for i in 0..n - m { + let a = read_unaligned(ptr1.add(i)); + let b = read_unaligned(ptr2.add(i)); + result += (a - b).powi(2); + } + result + } +} + +#[cfg(target_feature = "neon")] +pub(crate) unsafe fn dot_similarity_neon( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { + unsafe { + // We use the unaligned_float32x4_t helper function to read f32x4 NEON SIMD types + // from potentially unaligned memory locations safely. + // https://github.com/meilisearch/arroy/pull/13 + + let n = v1.len(); + let m = n - (n % 16); + let mut ptr1 = v1.as_ptr() as *const f32; + let mut ptr2 = v2.as_ptr() as *const f32; + let mut sum1 = vdupq_n_f32(0.); + let mut sum2 = vdupq_n_f32(0.); + let mut sum3 = vdupq_n_f32(0.); + let mut sum4 = vdupq_n_f32(0.); + + let mut i: usize = 0; + while i < m { + sum1 = vfmaq_f32( + sum1, + unaligned_float32x4_t(ptr1), + unaligned_float32x4_t(ptr2), + ); + sum2 = vfmaq_f32( + sum2, + unaligned_float32x4_t(ptr1.add(4)), + unaligned_float32x4_t(ptr2.add(4)), + ); + sum3 = vfmaq_f32( + sum3, + unaligned_float32x4_t(ptr1.add(8)), + unaligned_float32x4_t(ptr2.add(8)), + ); + sum4 = vfmaq_f32( + sum4, + unaligned_float32x4_t(ptr1.add(12)), + unaligned_float32x4_t(ptr2.add(12)), + ); + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + let mut result = vaddvq_f32(sum1) + vaddvq_f32(sum2) + vaddvq_f32(sum3) + vaddvq_f32(sum4); + for i in 0..n - m { + let a = read_unaligned(ptr1.add(i)); + let b = read_unaligned(ptr2.add(i)); + result += a * b; + } + result + } +} + +/// Reads 4xf32 in a stack-located array aligned on a f32 and reads a `float32x4_t` from it. +unsafe fn unaligned_float32x4_t(ptr: *const f32) -> float32x4_t { + unsafe { vld1q_f32(read_unaligned(ptr as *const [f32; 4]).as_ptr()) } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::helix_engine::vector_core::spaces::simple::{ + dot_product_non_optimized, euclidean_distance_non_optimized, + }; + + #[cfg(target_feature = "neon")] + #[test] + fn test_spaces_neon() { + if std::arch::is_aarch64_feature_detected!("neon") { + let v1: Vec = vec![ + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., + ]; + let v2: Vec = vec![ + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55., + 56., 57., 58., 59., 60., 61., + ]; + + let v1 = UnalignedVector::from_slice(&v1[..]); + let v2 = UnalignedVector::from_slice(&v2[..]); + + let euclid_simd = unsafe { euclid_similarity_neon(&v1, &v2) }; + let euclid = euclidean_distance_non_optimized(&v1, &v2); + assert_eq!(euclid_simd, euclid); + + let dot_simd = unsafe { dot_similarity_neon(&v1, &v2) }; + let dot = dot_product_non_optimized(&v1, &v2); + assert_eq!(dot_simd, dot); + + // let cosine_simd = unsafe { cosine_preprocess_neon(v1.clone()) }; + // let cosine = cosine_preprocess(v1); + // assert_eq!(cosine_simd, cosine); + } else { + println!("neon test skipped"); + } + } +} diff --git a/helix-db/src/helix_engine/vector_core/spaces/simple_sse.rs b/helix-db/src/helix_engine/vector_core/spaces/simple_sse.rs new file mode 100644 index 00000000..c24b4e26 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/spaces/simple_sse.rs @@ -0,0 +1,161 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; +use std::ptr::read_unaligned; + +use crate::helix_engine::vector_core::unaligned_vector::UnalignedVector; + +#[target_feature(enable = "sse")] +unsafe fn hsum128_ps_sse(x: __m128) -> f32 { + let x64: __m128 = _mm_add_ps(x, _mm_movehl_ps(x, x)); + let x32: __m128 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); + _mm_cvtss_f32(x32) +} + +#[target_feature(enable = "sse")] +pub(crate) unsafe fn euclid_similarity_sse( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { + unsafe { + // It is safe to load unaligned floats from a pointer. + // + + let n = v1.len(); + let m = n - (n % 16); + let mut ptr1 = v1.as_ptr() as *const f32; + let mut ptr2 = v2.as_ptr() as *const f32; + let mut sum128_1: __m128 = _mm_setzero_ps(); + let mut sum128_2: __m128 = _mm_setzero_ps(); + let mut sum128_3: __m128 = _mm_setzero_ps(); + let mut sum128_4: __m128 = _mm_setzero_ps(); + let mut i: usize = 0; + while i < m { + let sub128_1 = _mm_sub_ps(_mm_loadu_ps(ptr1), _mm_loadu_ps(ptr2)); + sum128_1 = _mm_add_ps(_mm_mul_ps(sub128_1, sub128_1), sum128_1); + + let sub128_2 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(4)), _mm_loadu_ps(ptr2.add(4))); + sum128_2 = _mm_add_ps(_mm_mul_ps(sub128_2, sub128_2), sum128_2); + + let sub128_3 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(8)), _mm_loadu_ps(ptr2.add(8))); + sum128_3 = _mm_add_ps(_mm_mul_ps(sub128_3, sub128_3), sum128_3); + + let sub128_4 = _mm_sub_ps(_mm_loadu_ps(ptr1.add(12)), _mm_loadu_ps(ptr2.add(12))); + sum128_4 = _mm_add_ps(_mm_mul_ps(sub128_4, sub128_4), sum128_4); + + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + + let mut result = hsum128_ps_sse(sum128_1) + + hsum128_ps_sse(sum128_2) + + hsum128_ps_sse(sum128_3) + + hsum128_ps_sse(sum128_4); + for i in 0..n - m { + let a = read_unaligned(ptr1.add(i)); + let b = read_unaligned(ptr2.add(i)); + result += (a - b).powi(2); + } + result + } +} + +#[target_feature(enable = "sse")] +pub(crate) unsafe fn dot_similarity_sse( + v1: &UnalignedVector, + v2: &UnalignedVector, +) -> f32 { + unsafe { + // It is safe to load unaligned floats from a pointer. + // + + let n = v1.len(); + let m = n - (n % 16); + let mut ptr1 = v1.as_ptr() as *const f32; + let mut ptr2 = v2.as_ptr() as *const f32; + let mut sum128_1: __m128 = _mm_setzero_ps(); + let mut sum128_2: __m128 = _mm_setzero_ps(); + let mut sum128_3: __m128 = _mm_setzero_ps(); + let mut sum128_4: __m128 = _mm_setzero_ps(); + + let mut i: usize = 0; + while i < m { + sum128_1 = _mm_add_ps(_mm_mul_ps(_mm_loadu_ps(ptr1), _mm_loadu_ps(ptr2)), sum128_1); + + sum128_2 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(4)), _mm_loadu_ps(ptr2.add(4))), + sum128_2, + ); + + sum128_3 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(8)), _mm_loadu_ps(ptr2.add(8))), + sum128_3, + ); + + sum128_4 = _mm_add_ps( + _mm_mul_ps(_mm_loadu_ps(ptr1.add(12)), _mm_loadu_ps(ptr2.add(12))), + sum128_4, + ); + + ptr1 = ptr1.add(16); + ptr2 = ptr2.add(16); + i += 16; + } + + let mut result = hsum128_ps_sse(sum128_1) + + hsum128_ps_sse(sum128_2) + + hsum128_ps_sse(sum128_3) + + hsum128_ps_sse(sum128_4); + for i in 0..n - m { + let a = read_unaligned(ptr1.add(i)); + let b = read_unaligned(ptr2.add(i)); + result += a * b; + } + result + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_spaces_sse() { + use super::*; + use crate::helix_engine::vector_core::spaces::simple::{ + dot_product_non_optimized, euclidean_distance_non_optimized, + }; + + if is_x86_feature_detected!("sse") { + let v1: Vec = vec![ + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 26., 27., 28., 29., 30., 31., + ]; + let v2: Vec = vec![ + 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 53., 54., 55., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., + 56., 57., 58., 59., 60., 61., + ]; + + let v1 = UnalignedVector::from_slice(&v1[..]); + let v2 = UnalignedVector::from_slice(&v2[..]); + + let euclid_simd = unsafe { euclid_similarity_sse(&v1, &v2) }; + let euclid = euclidean_distance_non_optimized(&v1, &v2); + assert_eq!(euclid_simd, euclid); + + let dot_simd = unsafe { dot_similarity_sse(&v1, &v2) }; + let dot = dot_product_non_optimized(&v1, &v2); + assert_eq!(dot_simd, dot); + + // let cosine_simd = unsafe { cosine_preprocess_sse(v1.clone()) }; + // let cosine = cosine_preprocess(v1); + // assert_eq!(cosine_simd, cosine); + } else { + println!("sse test skipped"); + } + } +} diff --git a/helix-db/src/helix_engine/vector_core/stats.rs b/helix-db/src/helix_engine/vector_core/stats.rs new file mode 100644 index 00000000..e88e72a2 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/stats.rs @@ -0,0 +1,90 @@ +use std::marker::PhantomData; +use std::sync::atomic::{AtomicUsize, Ordering}; + +use hashbrown::HashMap; +use heed3::{Result, RoTxn}; + +use crate::helix_engine::vector_core::CoreDatabase; +use crate::helix_engine::vector_core::distance::Distance; +use crate::helix_engine::vector_core::key::{KeyCodec, Prefix, PrefixCodec}; +use crate::helix_engine::vector_core::node::{Links, Node}; + +// TODO: ignore the phantom +#[derive(Debug)] +pub struct BuildStats { + /// a counter to see how many times `HnswBuilder.add_link` is invoked + pub n_links_added: AtomicUsize, + /// a counter tracking how many times we hit lmdb + pub lmdb_hits: AtomicUsize, + /// average rank of a node, calculated at the end of build + pub mean_degree: f32, + /// number of elements per layer + pub layer_dist: HashMap, + /// track some race condition violations + pub link_misses: AtomicUsize, + + _phantom: PhantomData, +} + +impl Default for BuildStats { + fn default() -> Self { + Self::new() + } +} + +impl BuildStats { + pub fn new() -> BuildStats { + BuildStats { + n_links_added: AtomicUsize::new(0), + lmdb_hits: AtomicUsize::new(0), + mean_degree: 0.0, + layer_dist: HashMap::default(), + link_misses: AtomicUsize::new(0), + _phantom: PhantomData, + } + } + + pub fn incr_link_count(&self, val: usize) { + self.n_links_added.fetch_add(val, Ordering::Relaxed); + } + + pub fn incr_lmdb_hits(&self) { + self.lmdb_hits.fetch_add(1, Ordering::Relaxed); + } + + pub fn incr_link_misses(&self) { + self.link_misses.fetch_add(1, Ordering::Relaxed); + } + + /// iterate over all links in db and average out node rank + pub fn compute_mean_degree( + &mut self, + rtxn: &RoTxn, + db: &CoreDatabase, + index: u16, + ) -> Result<()> { + let iter = db + .remap_key_type::() + .prefix_iter(rtxn, &Prefix::links(index))? + .remap_key_type::(); + + let mut n_links = 0; + let mut total_links = 0; + + for res in iter { + let (_key, node) = res?; + + let links = match node { + Node::Links(Links { links }) => links, + Node::Item(_) => unreachable!("Node must not be an item"), + }; + + total_links += links.len(); + n_links += 1; + } + + self.mean_degree = (total_links as f32) / (n_links as f32); + + Ok(()) + } +} diff --git a/helix-db/src/helix_engine/vector_core/unaligned_vector/f32.rs b/helix-db/src/helix_engine/vector_core/unaligned_vector/f32.rs new file mode 100644 index 00000000..98ed2ee7 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/unaligned_vector/f32.rs @@ -0,0 +1,70 @@ +use std::{ + borrow::Cow, + mem::{size_of, transmute}, +}; + +use bytemuck::cast_slice; +use byteorder::{ByteOrder, NativeEndian}; + +use super::{SizeMismatch, UnalignedVector, VectorCodec}; + +impl VectorCodec for f32 { + /// Creates an unaligned slice of f32 wrapper from a slice of bytes. + fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { + let rem = bytes.len() % size_of::(); + if rem == 0 { + // safety: `UnalignedF32Slice` is transparent + Ok(Cow::Borrowed(unsafe { + transmute::<&[u8], &UnalignedVector>(bytes) + })) + } else { + Err(SizeMismatch { + vector_codec: "f32", + rem, + }) + } + } + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + fn from_slice(slice: &[f32]) -> Cow<'_, UnalignedVector> { + Self::from_bytes(cast_slice(slice)).unwrap() + } + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + fn from_vec<'arena>( + vec: bumpalo::collections::Vec<'arena, f32>, + ) -> Cow<'static, UnalignedVector> { + let bytes = vec.into_iter().flat_map(|f| f.to_ne_bytes()).collect(); + Cow::Owned(bytes) + } + + // todo: add arena + fn to_vec<'arena>( + vec: &UnalignedVector, + arena: &'arena bumpalo::Bump, + ) -> bumpalo::collections::Vec<'arena, f32> { + let iter = vec.iter(); + let mut ret = bumpalo::collections::Vec::with_capacity_in(iter.len(), arena); + ret.extend(iter); + ret + } + + /// Returns an iterator of f32 that are read from the slice. + /// The f32 are copied in memory and are therefore, aligned. + fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_ { + vec.vector + .chunks_exact(size_of::()) + .map(NativeEndian::read_f32) + } + + /// Return the number of f32 that fits into this slice. + fn len(vec: &UnalignedVector) -> usize { + vec.vector.len() / size_of::() + } + + fn is_zero(vec: &UnalignedVector) -> bool { + vec.iter().all(|v| v == 0.0) + } +} diff --git a/helix-db/src/helix_engine/vector_core/unaligned_vector/mod.rs b/helix-db/src/helix_engine/vector_core/unaligned_vector/mod.rs new file mode 100644 index 00000000..48d77fc9 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/unaligned_vector/mod.rs @@ -0,0 +1,182 @@ +use std::{ + borrow::{Borrow, Cow}, + fmt, + marker::PhantomData, + mem::transmute, +}; + +use bytemuck::pod_collect_to_vec; +use serde::Serialize; + +mod f32; + +/// Determine the way the vectors should be read and written from the database +pub trait VectorCodec: std::borrow::ToOwned + Sized { + /// Creates an unaligned vector from a slice of bytes. + /// Don't allocate. + fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch>; + + /// Creates an unaligned vector from a slice of f32. + /// May allocate depending on the codec. + fn from_slice(slice: &[f32]) -> Cow<'_, UnalignedVector>; + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + fn from_vec<'arena>( + vec: bumpalo::collections::Vec<'arena, f32>, + ) -> Cow<'static, UnalignedVector>; + + /// Converts the `UnalignedVector` to an aligned vector of `f32`. + /// It's strictly equivalent to `.iter().collect()` but the performances + /// are better. + fn to_vec<'arena>( + vec: &UnalignedVector, + arena: &'arena bumpalo::Bump, + ) -> bumpalo::collections::Vec<'arena, f32>; + + /// Returns an iterator of f32 that are read from the vector. + /// The f32 are copied in memory and are therefore, aligned. + fn iter(vec: &UnalignedVector) -> impl ExactSizeIterator + '_; + + /// Returns the len of the vector in terms of elements. + fn len(vec: &UnalignedVector) -> usize; + + /// Returns true if all the elements in the vector are equal to 0. + fn is_zero(vec: &UnalignedVector) -> bool; + + /// Returns the bit-packing size if quantized + fn word_size() -> usize { + 1 + } +} + +/// A wrapper struct that is used to read unaligned vectors directly from memory. +#[repr(transparent)] +#[derive(Serialize)] +pub struct UnalignedVector { + format: PhantomData Codec>, + vector: [u8], +} + +impl UnalignedVector { + /// Creates an unaligned vector from a slice of bytes. + /// Don't allocate. + pub fn from_bytes(bytes: &[u8]) -> Result>, SizeMismatch> { + Codec::from_bytes(bytes) + } + + /// Creates an unaligned vector from a slice of f32. + /// May allocate depending on the codec. + pub fn from_slice(slice: &[f32]) -> Cow<'_, UnalignedVector> { + Codec::from_slice(slice) + } + + /// Creates an unaligned slice of f32 wrapper from a slice of f32. + /// The slice is already known to be of the right length. + pub fn from_vec<'arena>( + vec: bumpalo::collections::Vec<'arena, f32>, + ) -> Cow<'static, UnalignedVector> { + Codec::from_vec(vec) + } + + /// Returns an iterator of f32 that are read from the vector. + /// The f32 are copied in memory and are therefore, aligned. + pub fn iter(&self) -> impl ExactSizeIterator + '_ { + Codec::iter(self) + } + + /// Returns true if all the elements in the vector are equal to 0. + pub fn is_zero(&self) -> bool { + Codec::is_zero(self) + } + + /// Returns an allocated and aligned `Vec`. + pub fn to_vec<'arena>( + &self, + arena: &'arena bumpalo::Bump, + ) -> bumpalo::collections::Vec<'arena, f32> { + Codec::to_vec(self, arena) + } + + /// Returns the len of the vector in terms of elements. + pub fn len(&self) -> usize { + Codec::len(self) + } + + /// Creates an unaligned slice of something. It's up to the caller to ensure + /// it will be used with the same type it was created initially. + pub(crate) fn from_bytes_unchecked(bytes: &[u8]) -> &Self { + unsafe { transmute(bytes) } + } + + /// Returns the original raw slice of bytes. + pub(crate) fn as_bytes(&self) -> &[u8] { + &self.vector + } + + /// Returns wether it is empty or not. + pub fn is_empty(&self) -> bool { + self.vector.is_empty() + } + /// Returns the raw pointer to the start of this slice. + pub(crate) fn as_ptr(&self) -> *const u8 { + self.vector.as_ptr() + } +} + +/// Returned in case you tried to make an unaligned vector from a slice of bytes that don't have the right number of elements +#[derive(Debug, thiserror::Error)] +#[error( + "Slice of bytes contains {rem} too many bytes to be decoded with the {vector_codec} codec." +)] +pub struct SizeMismatch { + /// The name of the codec used. + vector_codec: &'static str, + /// The number of bytes remaining after decoding as many words as possible. + rem: usize, +} + +impl ToOwned for UnalignedVector { + type Owned = Vec; + + fn to_owned(&self) -> Self::Owned { + pod_collect_to_vec(&self.vector) + } +} + +impl Borrow> for Vec { + fn borrow(&self) -> &UnalignedVector { + UnalignedVector::from_bytes_unchecked(self) + } +} + +impl fmt::Debug for UnalignedVector { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut list = f.debug_list(); + + struct Number(f32); + impl fmt::Debug for Number { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:0.4}", self.0) + } + } + + let arena = bumpalo::Bump::new(); + let vec = self.to_vec(&arena); + for v in vec.iter().take(10) { + list.entry(&Number(*v)); + } + if vec.len() < 10 { + return list.finish(); + } + + // With binary quantization we may be padding with a lot of zeros + if vec[10..].iter().all(|v| *v == 0.0) { + list.entry(&"0.0, ..."); + } else { + list.entry(&"other ..."); + } + + list.finish() + } +} diff --git a/helix-db/src/helix_engine/vector_core/utils.rs b/helix-db/src/helix_engine/vector_core/utils.rs deleted file mode 100644 index f7704f51..00000000 --- a/helix-db/src/helix_engine/vector_core/utils.rs +++ /dev/null @@ -1,167 +0,0 @@ -use super::binary_heap::BinaryHeap; -use crate::helix_engine::{ - traversal_core::LMDB_STRING_HEADER_LENGTH, - types::VectorError, - vector_core::{vector::HVector, vector_without_data::VectorWithoutData}, -}; -use heed3::{ - Database, RoTxn, - byteorder::BE, - types::{Bytes, U128}, -}; -use std::cmp::Ordering; - -#[derive(PartialEq)] -pub(super) struct Candidate { - pub id: u128, - pub distance: f64, -} - -impl Eq for Candidate {} - -impl PartialOrd for Candidate { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Candidate { - fn cmp(&self, other: &Self) -> Ordering { - other - .distance - .partial_cmp(&self.distance) - .unwrap_or(Ordering::Equal) - } -} - -pub(super) trait HeapOps<'a, T> { - /// Take the top k elements from the heap - /// Used because using `.iter()` does not keep the order - fn take_inord(&mut self, k: usize) -> BinaryHeap<'a, T> - where - T: Ord; - - /// Get the maximum element from the heap - fn get_max<'q>(&'q self) -> Option<&'a T> - where - T: Ord, - 'q: 'a; -} - -impl<'a, T> HeapOps<'a, T> for BinaryHeap<'a, T> { - #[inline(always)] - fn take_inord(&mut self, k: usize) -> BinaryHeap<'a, T> - where - T: Ord, - { - let mut result = BinaryHeap::with_capacity(self.arena, k); - for _ in 0..k { - if let Some(item) = self.pop() { - result.push(item); - } else { - break; - } - } - result - } - - #[inline(always)] - fn get_max<'q>(&'q self) -> Option<&'a T> - where - T: Ord, - 'q: 'a, - { - self.iter().max() - } -} - -pub trait VectorFilter<'db, 'arena, 'txn, 'q> { - fn to_vec_with_filter( - self, - k: usize, - filter: Option<&'arena [F]>, - label: &'arena str, - txn: &'txn RoTxn<'db>, - db: Database, Bytes>, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> - where - F: Fn(&HVector<'arena>, &'txn RoTxn<'db>) -> bool; -} - -impl<'db, 'arena, 'txn, 'q> VectorFilter<'db, 'arena, 'txn, 'q> - for BinaryHeap<'arena, HVector<'arena>> -{ - #[inline(always)] - fn to_vec_with_filter( - mut self, - k: usize, - filter: Option<&'arena [F]>, - label: &'arena str, - txn: &'txn RoTxn<'db>, - db: Database, Bytes>, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> - where - F: Fn(&HVector<'arena>, &'txn RoTxn<'db>) -> bool, - { - let mut result = bumpalo::collections::Vec::with_capacity_in(k, arena); - for _ in 0..k { - // while pop check filters and pop until one passes - while let Some(mut item) = self.pop() { - let properties = match db.get(txn, &item.id)? { - Some(bytes) => { - // println!("decoding"); - let res = Some(VectorWithoutData::from_bincode_bytes( - arena, bytes, item.id, - )?); - // println!("decoded: {res:?}"); - res - } - None => None, // TODO: maybe should be an error? - }; - - if let Some(properties) = properties - && SHOULD_CHECK_DELETED - && properties.deleted - { - continue; - } - - if item.label() == label - && (filter.is_none() || filter.unwrap().iter().all(|f| f(&item, txn))) - { - assert!( - properties.is_some(), - "properties should be some, otherwise there has been an error on vector insertion as properties are always inserted" - ); - item.expand_from_vector_without_data(properties.unwrap()); - result.push(item); - break; - } - } - } - - Ok(result) - } -} - -pub fn check_deleted(data: &[u8]) -> bool { - assert!( - data.len() >= LMDB_STRING_HEADER_LENGTH, - "value length does not contain header which means the `label` field was missing from the node on insertion" - ); - let length_of_label_in_lmdb = - u64::from_le_bytes(data[..LMDB_STRING_HEADER_LENGTH].try_into().unwrap()) as usize; - - let length_of_version_in_lmdb = 1; - - let deleted_index = - LMDB_STRING_HEADER_LENGTH + length_of_label_in_lmdb + length_of_version_in_lmdb; - - assert!( - data.len() >= deleted_index, - "data length is not at least the deleted index plus the length of the deleted field meaning there has been a corruption on node insertion" - ); - data[deleted_index] == 1 -} diff --git a/helix-db/src/helix_engine/vector_core/vector.rs b/helix-db/src/helix_engine/vector_core/vector.rs deleted file mode 100644 index 30c3223c..00000000 --- a/helix-db/src/helix_engine/vector_core/vector.rs +++ /dev/null @@ -1,305 +0,0 @@ -use crate::{ - helix_engine::{ - types::VectorError, - vector_core::{vector_distance::DistanceCalc, vector_without_data::VectorWithoutData}, - }, - protocol::{custom_serde::vector_serde::VectorDeSeed, value::Value}, - utils::{ - id::{uuid_str_from_buf, v6_uuid}, - properties::ImmutablePropertiesMap, - }, -}; -use bincode::Options; -use core::fmt; -use serde::{Serialize, Serializer, ser::SerializeMap}; -use std::{alloc, cmp::Ordering, fmt::Debug, mem, ptr, slice}; - -// TODO: make this generic over the type of encoding (f32, f64, etc) -// TODO: use const param to set dimension -// TODO: set level as u8 - -#[repr(C, align(16))] // TODO: see performance impact of repr(C) and align(16) -#[derive(Clone, Copy)] -pub struct HVector<'arena> { - /// The id of the HVector - pub id: u128, - /// The label of the HVector - pub label: &'arena str, - /// the version of the vector - pub version: u8, - /// whether the vector is deleted - pub deleted: bool, - /// The level of the HVector - pub level: usize, - /// The distance of the HVector - pub distance: Option, - /// The actual vector - pub data: &'arena [f64], - /// The properties of the HVector - pub properties: Option>, -} - -impl<'arena> Serialize for HVector<'arena> { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - use serde::ser::SerializeStruct; - - // Check if this is a human-readable format (like JSON) - if serializer.is_human_readable() { - // Include id for JSON serialization - let mut buffer = [0u8; 36]; - let mut state = serializer.serialize_map(Some( - 5 + self.properties.as_ref().map(|p| p.len()).unwrap_or(0), - ))?; - state.serialize_entry("id", uuid_str_from_buf(self.id, &mut buffer))?; - state.serialize_entry("label", &self.label)?; - state.serialize_entry("version", &self.version)?; - state.serialize_entry("deleted", &self.deleted)?; - if let Some(properties) = &self.properties { - for (key, value) in properties.iter() { - state.serialize_entry(key, value)?; - } - } - state.end() - } else { - // Skip id, level, distance, and data for bincode serialization - let mut state = serializer.serialize_struct("HVector", 4)?; - state.serialize_field("label", &self.label)?; - state.serialize_field("version", &self.version)?; - state.serialize_field("deleted", &self.deleted)?; - state.serialize_field("properties", &self.properties)?; - state.end() - } - } -} - -impl PartialEq for HVector<'_> { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } -} -impl Eq for HVector<'_> {} -impl PartialOrd for HVector<'_> { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} -impl Ord for HVector<'_> { - fn cmp(&self, other: &Self) -> Ordering { - other - .distance - .partial_cmp(&self.distance) - .unwrap_or(Ordering::Equal) - } -} - -impl Debug for HVector<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{{ \nid: {},\nlevel: {},\ndistance: {:?},\ndata: {:?}, }}", - uuid::Uuid::from_u128(self.id), - // self.is_deleted, - self.level, - self.distance, - self.data, - ) - } -} - -impl<'arena> HVector<'arena> { - #[inline(always)] - pub fn from_slice(label: &'arena str, level: usize, data: &'arena [f64]) -> Self { - let id = v6_uuid(); - HVector { - id, - // is_deleted: false, - version: 1, - level, - label, - data, - distance: None, - properties: None, - deleted: false, - } - } - - /// Converts the HVector to an vec of bytes by accessing the data field directly - /// and converting each f64 to a byte slice - #[inline(always)] - pub fn vector_data_to_bytes(&self) -> Result<&[u8], VectorError> { - bytemuck::try_cast_slice(self.data).map_err(|_| { - VectorError::ConversionError("Invalid vector data: vector data".to_string()) - }) - } - - /// Deserializes bytes into an vector using a custom deserializer that allocates into the provided arena - /// - /// Both the properties bytes (if present) and the raw vector data are combined to generate the final vector struct - /// - /// NOTE: in this method, fixint encoding is used - #[inline] - pub fn from_bincode_bytes<'txn>( - arena: &'arena bumpalo::Bump, - properties: Option<&'txn [u8]>, - raw_vector_data: &'txn [u8], - id: u128, - ) -> Result { - bincode::options() - .with_fixint_encoding() - .allow_trailing_bytes() - .deserialize_seed( - VectorDeSeed { - arena, - id, - raw_vector_data, - }, - properties.unwrap_or(&[]), - ) - .map_err(|e| VectorError::ConversionError(format!("Error deserializing vector: {e}"))) - } - - #[inline(always)] - pub fn to_bincode_bytes(&self) -> Result, bincode::Error> { - bincode::serialize(self) - } - - /// Casts the raw bytes to a f64 slice by copying them once into the arena - #[inline] - pub fn cast_raw_vector_data<'txn>( - arena: &'arena bumpalo::Bump, - raw_vector_data: &'txn [u8], - ) -> &'arena [f64] { - assert!(!raw_vector_data.is_empty(), "raw_vector_data.len() == 0"); - assert!( - raw_vector_data.len().is_multiple_of(mem::size_of::()), - "raw_vector_data bytes len is not a multiple of size_of::()" - ); - let dimensions = raw_vector_data.len() / mem::size_of::(); - - assert!( - raw_vector_data.len().is_multiple_of(dimensions), - "raw_vector_data does not have the exact required number of dimensions" - ); - - let layout = alloc::Layout::array::(dimensions) - .expect("vector_data array arithmetic overflow or total size exceeds isize::MAX"); - - let vector_data: ptr::NonNull = arena.alloc_layout(layout); - - // 'arena because the destination pointer is allocated in the arena - let data: &'arena [f64] = unsafe { - // SAFETY: - // - We assert data is present and that we are within bounds in asserts above - ptr::copy_nonoverlapping( - raw_vector_data.as_ptr(), - vector_data.as_ptr(), - raw_vector_data.len(), - ); - - // We allocated with the layout of an f64 array - let vector_data: ptr::NonNull = vector_data.cast(); - - // SAFETY: - // - `vector_data`` is guaranteed to be valid by being NonNull - // - the asserts above guarantee that there are enough valid bytes to be read - slice::from_raw_parts(vector_data.as_ptr(), dimensions) - }; - - data - } - - /// Uses just the vector data to generate a HVector struct - pub fn from_raw_vector_data<'txn>( - arena: &'arena bumpalo::Bump, - raw_vector_data: &'txn [u8], - label: &'arena str, - id: u128, - ) -> Result { - let data = Self::cast_raw_vector_data(arena, raw_vector_data); - Ok(HVector { - id, - label, - data, - version: 1, - level: 0, - distance: None, - properties: None, - deleted: false, - }) - } - - #[inline(always)] - pub fn len(&self) -> usize { - self.data.len() - } - - #[inline(always)] - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - - #[inline(always)] - pub fn distance_to(&self, other: &HVector) -> Result { - HVector::<'arena>::distance(self, other) - } - - #[inline(always)] - pub fn set_distance(&mut self, distance: f64) { - self.distance = Some(distance); - } - - #[inline(always)] - pub fn get_distance(&self) -> f64 { - self.distance.unwrap_or(2.0) - } - - #[inline(always)] - pub fn get_label(&self) -> Option<&Value> { - match &self.properties { - Some(p) => p.get("label"), - None => None, - } - } - - #[inline(always)] - pub fn get_property(&self, key: &str) -> Option<&'arena Value> { - self.properties.as_ref().and_then(|value| value.get(key)) - } - - pub fn id(&self) -> &u128 { - &self.id - } - - pub fn label(&self) -> &'arena str { - self.label - } - - pub fn score(&self) -> f64 { - self.distance.unwrap_or(2.0) - } - - pub fn expand_from_vector_without_data(&mut self, vector: VectorWithoutData<'arena>) { - self.label = vector.label; - self.version = vector.version; - self.level = vector.level; - self.properties = vector.properties; - } -} - -impl<'arena> From> for HVector<'arena> { - fn from(value: VectorWithoutData<'arena>) -> Self { - HVector { - id: value.id, - label: value.label, - version: value.version, - level: value.level, - distance: None, - data: &[], - properties: value.properties, - deleted: value.deleted, - } - } -} diff --git a/helix-db/src/helix_engine/vector_core/vector_core.rs b/helix-db/src/helix_engine/vector_core/vector_core.rs deleted file mode 100644 index 49521258..00000000 --- a/helix-db/src/helix_engine/vector_core/vector_core.rs +++ /dev/null @@ -1,664 +0,0 @@ -use super::binary_heap::BinaryHeap; -use crate::{ - debug_println, - helix_engine::{ - types::VectorError, - vector_core::{ - hnsw::HNSW, - utils::{Candidate, HeapOps, VectorFilter}, - vector::HVector, - vector_without_data::VectorWithoutData, - }, - }, - utils::{id::uuid_str, properties::ImmutablePropertiesMap}, -}; -use heed3::{ - Database, Env, RoTxn, RwTxn, - byteorder::BE, - types::{Bytes, U128, Unit}, -}; -use rand::prelude::Rng; -use serde::{Deserialize, Serialize}; -use std::collections::HashSet; - -const DB_VECTORS: &str = "vectors"; // for vector data (v:) -const DB_VECTOR_DATA: &str = "vector_data"; // for vector data (v:) -const DB_HNSW_EDGES: &str = "hnsw_out_nodes"; // for hnsw out node data -const VECTOR_PREFIX: &[u8] = b"v:"; -pub const ENTRY_POINT_KEY: &[u8] = b"entry_point"; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct HNSWConfig { - pub m: usize, // max num of bi-directional links per element - pub m_max_0: usize, // max num of links for lower layers - pub ef_construct: usize, // size of the dynamic candidate list for construction - pub m_l: f64, // level generation factor - pub ef: usize, // search param, num of cands to search - pub min_neighbors: usize, // for get_neighbors, always 512 -} - -impl HNSWConfig { - /// Constructor for the configs of the HNSW vector similarity search algorithm - /// - m (5 <= m <= 48): max num of bi-directional links per element - /// - m_max_0 (2 * m): max num of links for level 0 (level that stores all vecs) - /// - ef_construct (40 <= ef_construct <= 512): size of the dynamic candidate list - /// for construction - /// - m_l (ln(1/m)): level generation factor (multiplied by a random number) - /// - ef (10 <= ef <= 512): num of candidates to search - pub fn new(m: Option, ef_construct: Option, ef: Option) -> Self { - let m = m.unwrap_or(16).clamp(5, 48); - let ef_construct = ef_construct.unwrap_or(128).clamp(40, 512); - let ef = ef.unwrap_or(768).clamp(10, 512); - - Self { - m, - m_max_0: 2 * m, - ef_construct, - m_l: 1.0 / (m as f64).ln(), - ef, - min_neighbors: 512, - } - } -} - -pub struct VectorCore { - pub vectors_db: Database, - pub vector_properties_db: Database, Bytes>, - pub edges_db: Database, - pub config: HNSWConfig, -} - -impl VectorCore { - pub fn new(env: &Env, txn: &mut RwTxn, config: HNSWConfig) -> Result { - let vectors_db = env.create_database(txn, Some(DB_VECTORS))?; - let vector_properties_db = env - .database_options() - .types::, Bytes>() - .name(DB_VECTOR_DATA) - .create(txn)?; - let edges_db = env.create_database(txn, Some(DB_HNSW_EDGES))?; - - Ok(Self { - vectors_db, - vector_properties_db, - edges_db, - config, - }) - } - - /// Vector key: [v, id, ] - #[inline(always)] - pub fn vector_key(id: u128, level: usize) -> Vec { - [VECTOR_PREFIX, &id.to_be_bytes(), &level.to_be_bytes()].concat() - } - - #[inline(always)] - pub fn out_edges_key(source_id: u128, level: usize, sink_id: Option) -> Vec { - match sink_id { - Some(sink_id) => [ - source_id.to_be_bytes().as_slice(), - level.to_be_bytes().as_slice(), - sink_id.to_be_bytes().as_slice(), - ] - .concat() - .to_vec(), - None => [ - source_id.to_be_bytes().as_slice(), - level.to_be_bytes().as_slice(), - ] - .concat() - .to_vec(), - } - } - - #[inline] - fn get_new_level(&self) -> usize { - let mut rng = rand::rng(); - let r: f64 = rng.random::(); - (-r.ln() * self.config.m_l).floor() as usize - } - - #[inline] - fn get_entry_point<'db: 'arena, 'arena: 'txn, 'txn>( - &self, - txn: &'txn RoTxn<'db>, - label: &'arena str, - arena: &'arena bumpalo::Bump, - ) -> Result, VectorError> { - let ep_id = self.vectors_db.get(txn, ENTRY_POINT_KEY)?; - if let Some(ep_id) = ep_id { - let mut arr = [0u8; 16]; - let len = std::cmp::min(ep_id.len(), 16); - arr[..len].copy_from_slice(&ep_id[..len]); - - let ep = self - .get_raw_vector_data(txn, u128::from_be_bytes(arr), label, arena) - .map_err(|_| VectorError::EntryPointNotFound)?; - Ok(ep) - } else { - Err(VectorError::EntryPointNotFound) - } - } - - #[inline] - fn set_entry_point(&self, txn: &mut RwTxn, entry: &HVector) -> Result<(), VectorError> { - self.vectors_db - .put(txn, ENTRY_POINT_KEY, &entry.id.to_be_bytes()) - .map_err(VectorError::from)?; - Ok(()) - } - - #[inline(always)] - pub fn put_vector<'arena>( - &self, - txn: &mut RwTxn, - vector: &HVector<'arena>, - ) -> Result<(), VectorError> { - self.vectors_db - .put( - txn, - &Self::vector_key(vector.id, vector.level), - vector.vector_data_to_bytes()?, - ) - .map_err(VectorError::from)?; - self.vector_properties_db - .put(txn, &vector.id, &bincode::serialize(&vector)?)?; - Ok(()) - } - - #[inline(always)] - fn get_neighbors<'db: 'arena, 'arena: 'txn, 'txn, F>( - &self, - txn: &'txn RoTxn<'db>, - label: &'arena str, - id: u128, - level: usize, - filter: Option<&[F]>, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> - where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool, - { - let out_key = Self::out_edges_key(id, level, None); - let mut neighbors = bumpalo::collections::Vec::with_capacity_in( - self.config.m_max_0.min(self.config.min_neighbors), - arena, - ); - - let iter = self - .edges_db - .lazily_decode_data() - .prefix_iter(txn, &out_key)?; - - let prefix_len = out_key.len(); - - for result in iter { - let (key, _) = result?; - - let mut arr = [0u8; 16]; - arr[..16].copy_from_slice(&key[prefix_len..(prefix_len + 16)]); - let neighbor_id = u128::from_be_bytes(arr); - - if neighbor_id == id { - continue; - } - let vector = self.get_raw_vector_data(txn, neighbor_id, label, arena)?; - - let passes_filters = match filter { - Some(filter_slice) => filter_slice.iter().all(|f| f(&vector, txn)), - None => true, - }; - - if passes_filters { - neighbors.push(vector); - } - } - neighbors.shrink_to_fit(); - - Ok(neighbors) - } - - #[inline(always)] - fn set_neighbours<'db: 'arena, 'arena: 'txn, 'txn, 's>( - &'db self, - txn: &'txn mut RwTxn<'db>, - id: u128, - neighbors: &BinaryHeap<'arena, HVector<'arena>>, - level: usize, - ) -> Result<(), VectorError> { - let prefix = Self::out_edges_key(id, level, None); - - let mut keys_to_delete: HashSet> = self - .edges_db - .prefix_iter(txn, prefix.as_ref())? - .filter_map(|result| result.ok().map(|(key, _)| key.to_vec())) - .collect(); - - neighbors - .iter() - .try_for_each(|neighbor| -> Result<(), VectorError> { - let neighbor_id = neighbor.id; - if neighbor_id == id { - return Ok(()); - } - - let out_key = Self::out_edges_key(id, level, Some(neighbor_id)); - keys_to_delete.remove(&out_key); - self.edges_db.put(txn, &out_key, &())?; - - let in_key = Self::out_edges_key(neighbor_id, level, Some(id)); - keys_to_delete.remove(&in_key); - self.edges_db.put(txn, &in_key, &())?; - - Ok(()) - })?; - - for key in keys_to_delete { - self.edges_db.delete(txn, &key)?; - } - - Ok(()) - } - - fn select_neighbors<'db: 'arena, 'arena: 'txn, 'txn, 's, F>( - &'db self, - txn: &'txn RoTxn<'db>, - label: &'arena str, - query: &'s HVector<'arena>, - mut cands: BinaryHeap<'arena, HVector<'arena>>, - level: usize, - should_extend: bool, - filter: Option<&[F]>, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> - where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool, - { - let m = self.config.m; - - if !should_extend { - return Ok(cands.take_inord(m)); - } - - let mut visited: HashSet = HashSet::new(); - let mut result = BinaryHeap::with_capacity(arena, m * cands.len()); - for candidate in cands.iter() { - for mut neighbor in - self.get_neighbors(txn, label, candidate.id, level, filter, arena)? - { - if !visited.insert(neighbor.id) { - continue; - } - - neighbor.set_distance(neighbor.distance_to(query)?); - - /* - let passes_filters = match filter { - Some(filter_slice) => filter_slice.iter().all(|f| f(&neighbor, txn)), - None => true, - }; - - if passes_filters { - result.push(neighbor); - } - */ - - if filter.is_none() || filter.unwrap().iter().all(|f| f(&neighbor, txn)) { - result.push(neighbor); - } - } - } - - result.extend(cands); - Ok(result.take_inord(m)) - } - - fn search_level<'db: 'arena, 'arena: 'txn, 'txn, 'q, F>( - &self, - txn: &'txn RoTxn<'db>, - label: &'arena str, - query: &'q HVector<'arena>, - entry_point: &'q mut HVector<'arena>, - ef: usize, - level: usize, - filter: Option<&[F]>, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> - where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool, - { - let mut visited: HashSet = HashSet::new(); - let mut candidates: BinaryHeap<'arena, Candidate> = - BinaryHeap::with_capacity(arena, self.config.ef_construct); - let mut results: BinaryHeap<'arena, HVector<'arena>> = BinaryHeap::new(arena); - - entry_point.set_distance(entry_point.distance_to(query)?); - candidates.push(Candidate { - id: entry_point.id, - distance: entry_point.get_distance(), - }); - results.push(*entry_point); - visited.insert(entry_point.id); - - while let Some(curr_cand) = candidates.pop() { - if results.len() >= ef - && results - .get_max() - .is_none_or(|f| curr_cand.distance > f.get_distance()) - { - break; - } - - let max_distance = if results.len() >= ef { - results.get_max().map(|f| f.get_distance()) - } else { - None - }; - - self.get_neighbors(txn, label, curr_cand.id, level, filter, arena)? - .into_iter() - .filter(|neighbor| visited.insert(neighbor.id)) - .filter_map(|mut neighbor| { - let distance = neighbor.distance_to(query).ok()?; - - if max_distance.is_none_or(|max| distance < max) { - neighbor.set_distance(distance); - Some((neighbor, distance)) - } else { - None - } - }) - .for_each(|(neighbor, distance)| { - candidates.push(Candidate { - id: neighbor.id, - distance, - }); - - results.push(neighbor); - - if results.len() > ef { - results = results.take_inord(ef); - } - }); - } - Ok(results) - } - - pub fn num_inserted_vectors(&self, txn: &RoTxn) -> Result { - Ok(self.vectors_db.len(txn)?) - } - - #[inline] - pub fn get_vector_properties<'db: 'arena, 'arena: 'txn, 'txn>( - &self, - txn: &'txn RoTxn<'db>, - id: u128, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> { - let vector: Option> = - match self.vector_properties_db.get(txn, &id)? { - Some(bytes) => Some(VectorWithoutData::from_bincode_bytes(arena, bytes, id)?), - None => None, - }; - - if let Some(vector) = vector - && vector.deleted - { - return Err(VectorError::VectorDeleted); - } - - Ok(vector) - } - - #[inline(always)] - pub fn get_full_vector<'arena>( - &self, - txn: &RoTxn, - id: u128, - arena: &'arena bumpalo::Bump, - ) -> Result, VectorError> { - let vector_data_bytes = self - .vectors_db - .get(txn, &Self::vector_key(id, 0))? - .ok_or(VectorError::VectorNotFound(uuid_str(id, arena).to_string()))?; - - let properties_bytes = self.vector_properties_db.get(txn, &id)?; - - let vector = HVector::from_bincode_bytes(arena, properties_bytes, vector_data_bytes, id)?; - if vector.deleted { - return Err(VectorError::VectorDeleted); - } - Ok(vector) - } - - #[inline(always)] - pub fn get_raw_vector_data<'db: 'arena, 'arena: 'txn, 'txn>( - &self, - txn: &'txn RoTxn<'db>, - id: u128, - label: &'arena str, - arena: &'arena bumpalo::Bump, - ) -> Result, VectorError> { - let vector_data_bytes = self - .vectors_db - .get(txn, &Self::vector_key(id, 0))? - .ok_or(VectorError::VectorNotFound(uuid_str(id, arena).to_string()))?; - HVector::from_raw_vector_data(arena, vector_data_bytes, label, id) - } - - /// Get all vectors from the database, optionally filtered by level - pub fn get_all_vectors<'db: 'arena, 'arena: 'txn, 'txn>( - &self, - txn: &'txn RoTxn<'db>, - level: Option, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> { - let mut vectors = bumpalo::collections::Vec::new_in(arena); - - // Iterate over all vectors in the database - let prefix_iter = self.vectors_db.prefix_iter(txn, VECTOR_PREFIX)?; - - for result in prefix_iter { - let (key, _) = result?; - - // Extract id from the key: v: (2 bytes) + id (16 bytes) + level (8 bytes) - if key.len() < VECTOR_PREFIX.len() + 16 { - continue; // Skip malformed keys - } - - let mut id_bytes = [0u8; 16]; - id_bytes.copy_from_slice(&key[VECTOR_PREFIX.len()..VECTOR_PREFIX.len() + 16]); - let id = u128::from_be_bytes(id_bytes); - - // Get the full vector using the existing method - match self.get_full_vector(txn, id, arena) { - Ok(vector) => { - // Filter by level if specified - if let Some(lvl) = level { - if vector.level == lvl { - vectors.push(vector); - } - } else { - vectors.push(vector); - } - } - Err(_) => { - // Skip vectors that can't be loaded (e.g., deleted) - continue; - } - } - } - - Ok(vectors) - } -} - -impl HNSW for VectorCore { - fn search<'db, 'arena, 'txn, F>( - &self, - txn: &'txn RoTxn<'db>, - query: &'arena [f64], - k: usize, - label: &'arena str, - filter: Option<&'arena [F]>, - should_trickle: bool, - arena: &'arena bumpalo::Bump, - ) -> Result>, VectorError> - where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool, - 'db: 'arena, - 'arena: 'txn, - { - let query = HVector::from_slice(label, 0, query); - // let temp_arena = bumpalo::Bump::new(); - - let mut entry_point = self.get_entry_point(txn, label, arena)?; - - let ef = self.config.ef; - let curr_level = entry_point.level; - // println!("curr_level: {curr_level}"); - for level in (1..=curr_level).rev() { - let mut nearest = self.search_level( - txn, - label, - &query, - &mut entry_point, - ef, - level, - match should_trickle { - true => filter, - false => None, - }, - arena, - )?; - if let Some(closest) = nearest.pop() { - entry_point = closest; - } - } - // println!("entry_point: {entry_point:?}"); - let candidates = self.search_level( - txn, - label, - &query, - &mut entry_point, - ef, - 0, - match should_trickle { - true => filter, - false => None, - }, - arena, - )?; - // println!("candidates"); - let results = candidates.to_vec_with_filter::( - k, - filter, - label, - txn, - self.vector_properties_db, - arena, - )?; - - debug_println!("vector search found {} results", results.len()); - Ok(results) - } - - fn insert<'db, 'arena, 'txn, F>( - &'db self, - txn: &'txn mut RwTxn<'db>, - label: &'arena str, - data: &'arena [f64], - properties: Option>, - arena: &'arena bumpalo::Bump, - ) -> Result, VectorError> - where - F: Fn(&HVector<'arena>, &RoTxn<'db>) -> bool, - 'db: 'arena, - 'arena: 'txn, - { - let new_level = self.get_new_level(); - - let mut query = HVector::from_slice(label, 0, data); - query.properties = properties; - self.put_vector(txn, &query)?; - - query.level = new_level; - - let entry_point = match self.get_entry_point(txn, label, arena) { - Ok(ep) => ep, - Err(_) => { - // TODO: use proper error handling - self.set_entry_point(txn, &query)?; - query.set_distance(0.0); - - return Ok(query); - } - }; - - let l = entry_point.level; - let mut curr_ep = entry_point; - for level in (new_level + 1..=l).rev() { - let mut nearest = - self.search_level::(txn, label, &query, &mut curr_ep, 1, level, None, arena)?; - curr_ep = nearest.pop().ok_or(VectorError::VectorCoreError( - "emtpy search result".to_string(), - ))?; - } - - for level in (0..=l.min(new_level)).rev() { - let nearest = self.search_level::( - txn, - label, - &query, - &mut curr_ep, - self.config.ef_construct, - level, - None, - arena, - )?; - curr_ep = *nearest.peek().ok_or(VectorError::VectorCoreError( - "emtpy search result".to_string(), - ))?; - - let neighbors = - self.select_neighbors::(txn, label, &query, nearest, level, true, None, arena)?; - self.set_neighbours(txn, query.id, &neighbors, level)?; - - for e in neighbors { - let id = e.id; - let e_conns = BinaryHeap::from( - arena, - self.get_neighbors::(txn, label, id, level, None, arena)?, - ); - let e_new_conn = self - .select_neighbors::(txn, label, &query, e_conns, level, true, None, arena)?; - self.set_neighbours(txn, id, &e_new_conn, level)?; - } - } - - if new_level > l { - self.set_entry_point(txn, &query)?; - } - - debug_println!("vector inserted with id {}", query.id); - Ok(query) - } - - fn delete(&self, txn: &mut RwTxn, id: u128, arena: &bumpalo::Bump) -> Result<(), VectorError> { - match self.get_vector_properties(txn, id, arena)? { - Some(mut properties) => { - debug_println!("properties: {properties:?}"); - if properties.deleted { - return Err(VectorError::VectorAlreadyDeleted(id.to_string())); - } - - properties.deleted = true; - self.vector_properties_db - .put(txn, &id, &bincode::serialize(&properties)?)?; - debug_println!("vector deleted with id {}", &id); - Ok(()) - } - None => Err(VectorError::VectorNotFound(id.to_string())), - } - } -} diff --git a/helix-db/src/helix_engine/vector_core/vector_distance.rs b/helix-db/src/helix_engine/vector_core/vector_distance.rs deleted file mode 100644 index d92737e2..00000000 --- a/helix-db/src/helix_engine/vector_core/vector_distance.rs +++ /dev/null @@ -1,157 +0,0 @@ -use crate::helix_engine::{types::VectorError, vector_core::vector::HVector}; - -pub const MAX_DISTANCE: f64 = 2.0; -pub const ORTHOGONAL: f64 = 1.0; -pub const MIN_DISTANCE: f64 = 0.0; - -pub trait DistanceCalc { - fn distance(from: &HVector, to: &HVector) -> Result; -} -impl<'a> DistanceCalc for HVector<'a> { - /// Calculates the distance between two vectors. - /// - /// It normalizes the distance to be between 0 and 2. - /// - /// - 1.0 (most similar) β†’ Distance 0.0 (closest) - /// - 0.0 (orthogonal) β†’ Distance 1.0 - /// - -1.0 (most dissimilar) β†’ Distance 2.0 (furthest) - #[inline(always)] - #[cfg(feature = "cosine")] - fn distance(from: &HVector, to: &HVector) -> Result { - cosine_similarity(from.data, to.data).map(|sim| 1.0 - sim) - } -} - -#[inline] -#[cfg(feature = "cosine")] -pub fn cosine_similarity(from: &[f64], to: &[f64]) -> Result { - let len = from.len(); - let other_len = to.len(); - - if len != other_len { - println!("mis-match in vector dimensions!\n{len} != {other_len}"); - return Err(VectorError::InvalidVectorLength); - } - //debug_assert_eq!(len, other.data.len(), "Vectors must have the same length"); - - #[cfg(target_feature = "avx2")] - { - return cosine_similarity_avx2(from, to); - } - - let mut dot_product = 0.0; - let mut magnitude_a = 0.0; - let mut magnitude_b = 0.0; - - const CHUNK_SIZE: usize = 8; - let chunks = len / CHUNK_SIZE; - let remainder = len % CHUNK_SIZE; - - for i in 0..chunks { - let offset = i * CHUNK_SIZE; - let a_chunk = &from[offset..offset + CHUNK_SIZE]; - let b_chunk = &to[offset..offset + CHUNK_SIZE]; - - let mut local_dot = 0.0; - let mut local_mag_a = 0.0; - let mut local_mag_b = 0.0; - - for j in 0..CHUNK_SIZE { - let a_val = a_chunk[j]; - let b_val = b_chunk[j]; - local_dot += a_val * b_val; - local_mag_a += a_val * a_val; - local_mag_b += b_val * b_val; - } - - dot_product += local_dot; - magnitude_a += local_mag_a; - magnitude_b += local_mag_b; - } - - let remainder_offset = chunks * CHUNK_SIZE; - for i in 0..remainder { - let a_val = from[remainder_offset + i]; - let b_val = to[remainder_offset + i]; - dot_product += a_val * b_val; - magnitude_a += a_val * a_val; - magnitude_b += b_val * b_val; - } - - if magnitude_a.abs() == 0.0 || magnitude_b.abs() == 0.0 { - return Ok(-1.0); - } - - Ok(dot_product / (magnitude_a.sqrt() * magnitude_b.sqrt())) -} - -// SIMD implementation using AVX2 (256-bit vectors) -#[cfg(target_feature = "avx2")] -#[inline(always)] -pub fn cosine_similarity_avx2(a: &[f64], b: &[f64]) -> f64 { - use std::arch::x86_64::*; - - let len = a.len(); - let chunks = len / 4; // AVX2 processes 4 f64 values at once - - unsafe { - let mut dot_product = _mm256_setzero_pd(); - let mut magnitude_a = _mm256_setzero_pd(); - let mut magnitude_b = _mm256_setzero_pd(); - - for i in 0..chunks { - let offset = i * 4; - - // Load data - handle unaligned data - let a_chunk = _mm256_loadu_pd(&a[offset]); - let b_chunk = _mm256_loadu_pd(&b[offset]); - - // Calculate dot product and magnitudes in parallel - dot_product = _mm256_add_pd(dot_product, _mm256_mul_pd(a_chunk, b_chunk)); - magnitude_a = _mm256_add_pd(magnitude_a, _mm256_mul_pd(a_chunk, a_chunk)); - magnitude_b = _mm256_add_pd(magnitude_b, _mm256_mul_pd(b_chunk, b_chunk)); - } - - // Horizontal sum of 4 doubles in each vector - let dot_sum = horizontal_sum_pd(dot_product); - let mag_a_sum = horizontal_sum_pd(magnitude_a); - let mag_b_sum = horizontal_sum_pd(magnitude_b); - - // Handle remainder elements - let mut dot_remainder = 0.0; - let mut mag_a_remainder = 0.0; - let mut mag_b_remainder = 0.0; - - let remainder_offset = chunks * 4; - for i in remainder_offset..len { - let a_val = a[i]; - let b_val = b[i]; - dot_remainder += a_val * b_val; - mag_a_remainder += a_val * a_val; - mag_b_remainder += b_val * b_val; - } - - // Combine SIMD and scalar results - let dot_product_total = dot_sum + dot_remainder; - let magnitude_a_total = (mag_a_sum + mag_a_remainder).sqrt(); - let magnitude_b_total = (mag_b_sum + mag_b_remainder).sqrt(); - - dot_product_total / (magnitude_a_total * magnitude_b_total) - } -} - -// Helper function to sum the 4 doubles in an AVX2 vector -#[cfg(target_feature = "avx2")] -#[inline(always)] -unsafe fn horizontal_sum_pd(__v: __m256d) -> f64 { - use std::arch::x86_64::*; - - // Extract the high 128 bits and add to the low 128 bits - let sum_hi_lo = _mm_add_pd(_mm256_castpd256_pd128(__v), _mm256_extractf128_pd(__v, 1)); - - // Add the high 64 bits to the low 64 bits - let sum = _mm_add_sd(sum_hi_lo, _mm_unpackhi_pd(sum_hi_lo, sum_hi_lo)); - - // Extract the low 64 bits as a scalar - _mm_cvtsd_f64(sum) -} diff --git a/helix-db/src/helix_engine/vector_core/vector_without_data.rs b/helix-db/src/helix_engine/vector_core/vector_without_data.rs deleted file mode 100644 index 8d756094..00000000 --- a/helix-db/src/helix_engine/vector_core/vector_without_data.rs +++ /dev/null @@ -1,153 +0,0 @@ -use crate::{ - helix_engine::types::VectorError, - protocol::{custom_serde::vector_serde::VectoWithoutDataDeSeed, value::Value}, - utils::{id::uuid_str_from_buf, properties::ImmutablePropertiesMap}, -}; -use bincode::Options; -use core::fmt; -use serde::{Serialize, ser::SerializeMap}; -use std::fmt::Debug; -// TODO: make this generic over the type of encoding (f32, f64, etc) -// TODO: use const param to set dimension -// TODO: set level as u8 - -#[repr(C, align(16))] -#[derive(Clone, Copy)] -pub struct VectorWithoutData<'arena> { - /// The id of the HVector - pub id: u128, - /// The label of the HVector - pub label: &'arena str, - /// the version of the vector - pub version: u8, - /// whether the vector is deleted - pub deleted: bool, - /// The level of the HVector - pub level: usize, - - /// The properties of the HVector - pub properties: Option>, -} - -// Custom Serialize implementation to conditionally include id field -// For JSON serialization, the id field is included, but for bincode it is skipped -impl<'arena> Serialize for VectorWithoutData<'arena> { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - use serde::ser::SerializeStruct; - - // Check if this is a human-readable format (like JSON) - if serializer.is_human_readable() { - // Include id for JSON serialization - let mut buffer = [0u8; 36]; - let mut state = serializer.serialize_map(Some( - 6 + self.properties.as_ref().map(|p| p.len()).unwrap_or(0), - ))?; - state.serialize_entry("id", uuid_str_from_buf(self.id, &mut buffer))?; - state.serialize_entry("label", self.label)?; - state.serialize_entry("version", &self.version)?; - state.serialize_entry("deleted", &self.deleted)?; - state.serialize_entry("level", &self.level)?; - if let Some(properties) = &self.properties { - for (key, value) in properties.iter() { - state.serialize_entry(key, value)?; - } - } - state.end() - } else { - // Skip id for bincode serialization - let mut state = serializer.serialize_struct("VectorWithoutData", 5)?; - state.serialize_field("label", self.label)?; - state.serialize_field("version", &self.version)?; - state.serialize_field("deleted", &self.deleted)?; - state.serialize_field("level", &self.level)?; - state.serialize_field("properties", &self.properties)?; - state.end() - } - } -} - -impl Debug for VectorWithoutData<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{{ \nid: {},\nlevel: {} }}", - uuid::Uuid::from_u128(self.id), - self.level, - ) - } -} - -impl<'arena> VectorWithoutData<'arena> { - #[inline(always)] - pub fn from_properties( - id: u128, - label: &'arena str, - level: usize, - properties: ImmutablePropertiesMap<'arena>, - ) -> Self { - VectorWithoutData { - id, - label, - version: 1, - level, - properties: Some(properties), - deleted: false, - } - } - - pub fn from_bincode_bytes<'txn>( - arena: &'arena bumpalo::Bump, - properties: &'txn [u8], - id: u128, - ) -> Result { - bincode::options() - .with_fixint_encoding() - .allow_trailing_bytes() - .deserialize_seed(VectoWithoutDataDeSeed { arena, id }, properties) - .map_err(|e| VectorError::ConversionError(format!("Error deserializing vector: {e}"))) - } - - #[inline(always)] - pub fn to_bincode_bytes(&self) -> Result, bincode::Error> { - bincode::serialize(self) - } - /// Returns the id of the HVector - #[inline(always)] - pub fn get_id(&self) -> u128 { - self.id - } - - /// Returns the level of the HVector - #[inline(always)] - pub fn get_level(&self) -> usize { - self.level - } - - #[inline(always)] - pub fn get_label(&self) -> &'arena str { - self.label - } - - #[inline(always)] - pub fn get_property(&self, key: &str) -> Option<&'arena Value> { - self.properties.as_ref().and_then(|value| value.get(key)) - } - - pub fn id(&self) -> &u128 { - &self.id - } - - pub fn label(&self) -> &'arena str { - self.label - } -} - -impl PartialEq for VectorWithoutData<'_> { - fn eq(&self, other: &Self) -> bool { - self.id == other.id - } -} -impl Eq for VectorWithoutData<'_> {} diff --git a/helix-db/src/helix_engine/vector_core/version.rs b/helix-db/src/helix_engine/vector_core/version.rs new file mode 100644 index 00000000..322d44d1 --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/version.rs @@ -0,0 +1,90 @@ +use std::mem::size_of; +use std::{borrow::Cow, fmt}; + +use byteorder::{BigEndian, ByteOrder}; +use heed3::BoxedError; + +#[derive(Debug, Clone, Copy)] +pub struct Version { + pub major: u32, + pub minor: u32, + pub patch: u32, +} + +impl fmt::Display for Version { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "v{}.{}.{}", self.major, self.minor, self.patch) + } +} + +impl Version { + pub fn current() -> Self { + Version { + major: env!("CARGO_PKG_VERSION_MAJOR").parse().unwrap(), + minor: env!("CARGO_PKG_VERSION_MINOR").parse().unwrap(), + patch: env!("CARGO_PKG_VERSION_PATCH").parse().unwrap(), + } + } +} + +pub enum VersionCodec {} + +impl<'a> heed3::BytesEncode<'a> for VersionCodec { + type EItem = Version; + + fn bytes_encode(item: &'a Self::EItem) -> Result, BoxedError> { + let Version { + major, + minor, + patch, + } = item; + + let mut output = Vec::with_capacity(size_of::() * 3); + output.extend_from_slice(&major.to_be_bytes()); + output.extend_from_slice(&minor.to_be_bytes()); + output.extend_from_slice(&patch.to_be_bytes()); + + Ok(Cow::Owned(output)) + } +} + +impl heed3::BytesDecode<'_> for VersionCodec { + type DItem = Version; + + fn bytes_decode(bytes: &'_ [u8]) -> Result { + let major = BigEndian::read_u32(bytes); + let bytes = &bytes[size_of_val(&major)..]; + let minor = BigEndian::read_u32(bytes); + let bytes = &bytes[size_of_val(&minor)..]; + let patch = BigEndian::read_u32(bytes); + + Ok(Version { + major, + minor, + patch, + }) + } +} + +#[cfg(test)] +mod test { + use heed3::{BytesDecode, BytesEncode}; + + use super::*; + + #[test] + fn version_codec() { + let version = Version { + major: 0, + minor: 10, + patch: 100, + }; + + let encoded = VersionCodec::bytes_encode(&version).unwrap(); + let decoded = VersionCodec::bytes_decode(&encoded).unwrap(); + + assert_eq!(version.major, decoded.major); + assert_eq!(version.minor, decoded.minor); + assert_eq!(version.patch, decoded.patch); + } +} diff --git a/helix-db/src/helix_engine/vector_core/writer.rs b/helix-db/src/helix_engine/vector_core/writer.rs new file mode 100644 index 00000000..36a7572d --- /dev/null +++ b/helix-db/src/helix_engine/vector_core/writer.rs @@ -0,0 +1,377 @@ +use std::path::PathBuf; + +use heed3::{ + RoTxn, RwTxn, + types::{DecodeIgnore, Unit}, +}; +use rand::{Rng, SeedableRng}; +use roaring::RoaringBitmap; + +use crate::helix_engine::vector_core::{ + CoreDatabase, ItemId, VectorCoreResult, VectorError, + distance::Distance, + hnsw::HnswBuilder, + item_iter::ItemIter, + key::{Key, KeyCodec, Prefix, PrefixCodec}, + metadata::{Metadata, MetadataCodec}, + node::{Item, ItemIds, Links, Node}, + parallel::{ImmutableItems, ImmutableLinks}, + unaligned_vector::UnalignedVector, + version::{Version, VersionCodec}, +}; + +pub struct VectorBuilder<'a, D: Distance, R: Rng + SeedableRng> { + writer: &'a Writer, + rng: &'a mut R, + inner: BuildOption, +} + +pub struct BuildOption { + pub ef_construction: usize, + pub alpha: f32, + pub available_memory: Option, + pub m: usize, + pub m_max_0: usize, +} + +impl BuildOption { + fn default() -> Self { + Self { + ef_construction: 100, + alpha: 1.0, + available_memory: None, + m: 16, + m_max_0: 32, + } + } +} + +impl<'a, D: Distance, R: Rng + SeedableRng> VectorBuilder<'a, D, R> { + /// Controls the search range when inserting a new item into the graph. + /// + /// Typical values range from 50 to 500, with larger `ef_construction` producing higher + /// quality hnsw graphs at the expense of longer builds. The default value used in hannoy is + /// 100. + pub fn ef_construction(&mut self, ef_construction: usize) -> &mut Self { + self.inner.ef_construction = ef_construction; + self + } + + /// Tunable hyperparameter for the graph building process. Alpha decreases the tolerance for + /// link creation during index time. Alpha = 1 is the normal HNSW build while alpha > 1 is + /// more similar to DiskANN. Increasing alpha increases indexing times as more neighbours are + /// considered per linking step, but results in higher recall. + /// + /// DiskANN authors suggest using alpha=1.1 or alpha=1.2. By default alpha=1.0. + pub fn alpha(&mut self, alpha: f32) -> &mut Self { + self.inner.alpha = alpha; + self + } + + /// Generates an HNSW graph with max `M` links per node in layers > 0 and max `M0` links in layer 0. + /// + /// A general rule of thumb is to take `M0`= 2*`M`, with `M` >=3. Some common choices for + /// `M` include : 8, 12, 16, 32. Note that increasing `M` produces a denser graph at the cost + /// of longer build times. + pub fn build(&mut self, wtxn: &mut RwTxn) -> VectorCoreResult<()> { + self.writer.build::(wtxn, self.rng, &self.inner) + } +} + +/// A writer to store new items, remove existing ones, and build the search +/// index to query the nearest neighbors to items or vectors. +#[derive(Debug)] +pub struct Writer { + database: CoreDatabase, + index: u16, + dimensions: usize, + /// The folder in which tempfile will write its temporary files. + tmpdir: Option, +} + +impl Writer { + /// Creates a new writer from a database, index and dimensions. + pub fn new(database: CoreDatabase, index: u16, dimensions: usize) -> Writer { + Writer { + database, + index, + dimensions, + tmpdir: None, + } + } + + /// Sets the path to the temporary directory where files are written. + pub fn set_tmpdir(&mut self, path: impl Into) { + self.tmpdir = Some(path.into()); + } + + /// Returns `true` if the index is empty. + pub fn is_empty(&self, rtxn: &RoTxn) -> VectorCoreResult { + self.iter(rtxn).map(|mut iter| iter.next().is_none()) + } + + /// Returns `true` if the index needs to be built before being able to read in it. + pub fn need_build(&self, rtxn: &RoTxn) -> VectorCoreResult { + Ok(self + .database + .remap_types::() + .prefix_iter(rtxn, &Prefix::updated(self.index))? + .remap_key_type::() + .next() + .is_some() + || self + .database + .remap_data_type::() + .get(rtxn, &Key::metadata(self.index))? + .is_none()) + } + + /// Returns `true` if the database contains the given item. + pub fn contains_item(&self, rtxn: &RoTxn, item: ItemId) -> VectorCoreResult { + self.database + .remap_data_type::() + .get(rtxn, &Key::item(self.index, item)) + .map(|opt| opt.is_some()) + .map_err(Into::into) + } + + /// Returns an iterator over the items vector. + pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> VectorCoreResult> { + Ok(ItemIter::new( + self.database, + self.index, + self.dimensions, + rtxn, + )?) + } + + /// Add an item associated to a vector in the database. + pub fn add_item(&self, wtxn: &mut RwTxn, item: ItemId, vector: &[f32]) -> VectorCoreResult<()> { + if vector.len() != self.dimensions { + return Err(VectorError::InvalidVecDimension { + expected: self.dimensions, + received: vector.len(), + }); + } + + let vector = UnalignedVector::from_slice(vector); + let db_item = Item { + header: D::new_header(&vector), + vector, + }; + self.database + .put(wtxn, &Key::item(self.index, item), &Node::Item(db_item))?; + self.database + .remap_data_type::() + .put(wtxn, &Key::updated(self.index, item), &())?; + + Ok(()) + } + + /// Deletes an item stored in this database and returns `true` if it existed. + pub fn del_item(&self, wtxn: &mut RwTxn, item: ItemId) -> VectorCoreResult { + if self.database.delete(wtxn, &Key::item(self.index, item))? { + self.database.remap_data_type::().put( + wtxn, + &Key::updated(self.index, item), + &(), + )?; + + Ok(true) + } else { + Ok(false) + } + } + + /// Removes everything in the database, user items and internal graph links. + pub fn clear(&self, wtxn: &mut RwTxn) -> VectorCoreResult<()> { + let mut cursor = self + .database + .remap_key_type::() + .prefix_iter_mut(wtxn, &Prefix::all(self.index))? + .remap_types::(); + + while let Some((_id, _node)) = cursor.next().transpose()? { + // SAFETY: Safe because we don't keep any references to the entry + unsafe { cursor.del_current() }?; + } + + Ok(()) + } + + pub fn builder<'a, R>(&'a self, rng: &'a mut R) -> VectorBuilder<'a, D, R> + where + R: Rng + SeedableRng, + { + VectorBuilder { + writer: self, + rng, + inner: BuildOption::default(), + } + } + + fn build(&self, wtxn: &mut RwTxn, rng: &mut R, options: &BuildOption) -> VectorCoreResult<()> + where + R: Rng + SeedableRng, + { + let item_indices = self.item_indices(wtxn)?; + // updated items can be an update, an addition or a removed item + let updated_items = self.reset_and_retrieve_updated_items(wtxn)?; + + let to_delete = updated_items.clone() - &item_indices; + let to_insert = &item_indices & &updated_items; + + let metadata = self + .database + .remap_data_type::() + .get(wtxn, &Key::metadata(self.index))?; + + let (entry_points, max_level) = metadata.as_ref().map_or_else( + || (Vec::new(), usize::MIN), + |metadata| { + ( + metadata.entry_points.iter().collect(), + metadata.max_level as usize, + ) + }, + ); + + // we should not keep a reference to the metadata since they're going to be moved by LMDB + drop(metadata); + + let mut hnsw = HnswBuilder::::new(options) + .with_entry_points(entry_points) + .with_max_level(max_level); + + let _ = hnsw.build(to_insert, &to_delete, self.database, self.index, wtxn, rng)?; + + // Remove deleted links from lmdb AFTER build; in DiskANN we use a deleted item's + // neighbours when filling in the "gaps" left in the graph from deletions. See + // [`HnswBuilder::maybe_patch_old_links`] for more details. + self.delete_links_from_db(to_delete, wtxn)?; + + let metadata = Metadata { + dimensions: self.dimensions.try_into().unwrap(), + items: item_indices, + entry_points: ItemIds::from_slice(&hnsw.entry_points), + max_level: hnsw.max_level as u8, + distance: D::name(), + }; + self.database.remap_data_type::().put( + wtxn, + &Key::metadata(self.index), + &metadata, + )?; + self.database.remap_data_type::().put( + wtxn, + &Key::version(self.index), + &Version::current(), + )?; + + Ok(()) + } + + fn reset_and_retrieve_updated_items( + &self, + wtxn: &mut RwTxn, + ) -> VectorCoreResult { + let mut updated_items = RoaringBitmap::new(); + let mut updated_iter = self + .database + .remap_types::() + .prefix_iter_mut(wtxn, &Prefix::updated(self.index))? + .remap_key_type::(); + + while let Some((key, _)) = updated_iter.next().transpose()? { + let inserted = updated_items.insert(key.node.item); + debug_assert!(inserted, "The keys should be sorted by LMDB"); + // SAFETY: Safe because we don't hold any reference to the database currently + unsafe { updated_iter.del_current()? }; + } + Ok(updated_items) + } + + // Fetches the item's ids, not the links. + fn item_indices(&self, wtxn: &mut RwTxn) -> VectorCoreResult { + let mut indices = RoaringBitmap::new(); + for result in self + .database + .remap_types::() + .prefix_iter(wtxn, &Prefix::item(self.index))? + .remap_key_type::() + { + let (i, _) = result?; + indices.insert(i.node.unwrap_item()); + } + + Ok(indices) + } + + // Iterates over links in lmdb and deletes those in `to_delete`. There can be several links + // with the same NodeId.item, each differing by their layer + fn delete_links_from_db( + &self, + to_delete: RoaringBitmap, + wtxn: &mut RwTxn, + ) -> VectorCoreResult<()> { + let mut cursor = self + .database + .remap_key_type::() + .prefix_iter_mut(wtxn, &Prefix::links(self.index))? + .remap_types::(); + + while let Some((key, _)) = cursor.next().transpose()? { + if to_delete.contains(key.node.item) { + // SAFETY: Safe because we don't keep any references to the entry + unsafe { cursor.del_current() }?; + } + } + + Ok(()) + } +} + +#[derive(Clone)] +pub(crate) struct FrozenReader<'a, D: Distance> { + pub index: u16, + pub items: &'a ImmutableItems<'a, D>, + pub links: &'a ImmutableLinks<'a, D>, +} + +impl<'a, D: Distance> FrozenReader<'a, D> { + pub fn get_item(&self, item_id: ItemId) -> VectorCoreResult> { + let key = Key::item(self.index, item_id); + // key is a `Key::item` so returned result must be a Node::Item + self.items + .get(item_id)? + .ok_or(VectorError::missing_key(key)) + } + + pub fn get_links(&self, item_id: ItemId, level: usize) -> VectorCoreResult> { + let key = Key::links(self.index, item_id, level as u8); + // key is a `Key::item` so returned result must be a Node::Item + self.links + .get(item_id, level as u8)? + .ok_or(VectorError::missing_key(key)) + } +} + +/// Clears all the links. Starts from the last node and stops at the first item. +#[allow(dead_code)] +fn clear_links( + wtxn: &mut RwTxn, + database: CoreDatabase, + index: u16, +) -> VectorCoreResult<()> { + let mut cursor = database + .remap_types::() + .prefix_iter_mut(wtxn, &Prefix::links(index))? + .remap_key_type::(); + + while let Some((_id, _node)) = cursor.next().transpose()? { + // SAFETY: Safe because we don't keep any references to the entry + unsafe { cursor.del_current()? }; + } + + Ok(()) +} diff --git a/helix-db/src/helix_gateway/builtin/all_nodes_and_edges.rs b/helix-db/src/helix_gateway/builtin/all_nodes_and_edges.rs index 8a67d7ae..09b18a1c 100644 --- a/helix-db/src/helix_gateway/builtin/all_nodes_and_edges.rs +++ b/helix-db/src/helix_gateway/builtin/all_nodes_and_edges.rs @@ -98,7 +98,7 @@ pub fn nodes_edges_inner(input: HandlerInput) -> Result = vecs .iter() @@ -108,7 +108,7 @@ pub fn nodes_edges_inner(input: HandlerInput) -> Result Result, GraphError>; - async fn fetch_embedding_async(&self, text: &str) -> Result, GraphError>; + fn fetch_embedding(&self, text: &str) -> Result, GraphError>; + async fn fetch_embedding_async(&self, text: &str) -> Result, GraphError>; } #[derive(Debug, Clone)] @@ -111,12 +111,12 @@ impl EmbeddingModelImpl { impl EmbeddingModel for EmbeddingModelImpl { /// Must be called with an active tokio context - fn fetch_embedding(&self, text: &str) -> Result, GraphError> { + fn fetch_embedding(&self, text: &str) -> Result, GraphError> { let handle = tokio::runtime::Handle::current(); handle.block_on(self.fetch_embedding_async(text)) } - async fn fetch_embedding_async(&self, text: &str) -> Result, GraphError> { + async fn fetch_embedding_async(&self, text: &str) -> Result, GraphError> { match &self.provider { EmbeddingProvider::OpenAI => { let api_key = self @@ -151,8 +151,9 @@ impl EmbeddingModel for EmbeddingModelImpl { .map(|v| { v.as_f64() .ok_or_else(|| GraphError::from("Invalid float value")) + .map(|f| f as f32) }) - .collect::, GraphError>>()?; + .collect::, GraphError>>()?; Ok(embedding) } @@ -198,8 +199,9 @@ impl EmbeddingModel for EmbeddingModelImpl { .map(|v| { v.as_f64() .ok_or_else(|| GraphError::from("Invalid float value")) + .map(|f| f as f32) }) - .collect::, GraphError>>()?; + .collect::, GraphError>>()?; Ok(embedding) } @@ -237,8 +239,9 @@ impl EmbeddingModel for EmbeddingModelImpl { .map(|v| { v.as_f64() .ok_or_else(|| GraphError::from("Invalid float value")) + .map(|f| f as f32) }) - .collect::, GraphError>>()?; + .collect::, GraphError>>()?; Ok(embedding) } diff --git a/helix-db/src/helix_gateway/introspect_schema.rs b/helix-db/src/helix_gateway/introspect_schema.rs index 8eff78e6..ad52a2e3 100644 --- a/helix-db/src/helix_gateway/introspect_schema.rs +++ b/helix-db/src/helix_gateway/introspect_schema.rs @@ -18,4 +18,3 @@ pub async fn introspect_schema_handler( _ => (StatusCode::INTERNAL_SERVER_ERROR, "Could not find schema").into_response(), } } - diff --git a/helix-db/src/helix_gateway/mcp/mcp.rs b/helix-db/src/helix_gateway/mcp/mcp.rs index 239a8c18..be63687f 100644 --- a/helix-db/src/helix_gateway/mcp/mcp.rs +++ b/helix-db/src/helix_gateway/mcp/mcp.rs @@ -964,13 +964,13 @@ pub fn search_vector_text(input: &mut MCPToolInput) -> Result bool, _>( + .search_v:: bool, _>( query_vec_arena, k_value, label_arena, - None + None, ) - .collect::,_>>()?; + .collect::, _>>()?; tracing::debug!("[VECTOR_SEARCH] Search returned {} results", results.len()); @@ -1012,9 +1012,9 @@ pub fn search_vector_text(input: &mut MCPToolInput) -> Result, + pub vector: Vec, pub k: usize, - pub min_score: Option, + pub min_score: Option, } #[derive(Debug, Deserialize)] diff --git a/helix-db/src/helix_gateway/mcp/tools.rs b/helix-db/src/helix_gateway/mcp/tools.rs index d130a685..9d5431c2 100644 --- a/helix-db/src/helix_gateway/mcp/tools.rs +++ b/helix-db/src/helix_gateway/mcp/tools.rs @@ -75,10 +75,10 @@ pub enum ToolArgs { k: usize, }, SearchVec { - vector: Vec, + vector: Vec, k: usize, - min_score: Option, cutoff: Option, + min_score: Option, }, } diff --git a/helix-db/src/helix_gateway/router/router.rs b/helix-db/src/helix_gateway/router/router.rs index 7220118b..fc70d33a 100644 --- a/helix-db/src/helix_gateway/router/router.rs +++ b/helix-db/src/helix_gateway/router/router.rs @@ -47,8 +47,6 @@ impl Debug for IoContFn { } } - - // basic type for function pointer pub type BasicHandlerFn = fn(HandlerInput) -> Result; diff --git a/helix-db/src/helix_gateway/tests/gateway_tests.rs b/helix-db/src/helix_gateway/tests/gateway_tests.rs index e168fcfa..f49581bf 100644 --- a/helix-db/src/helix_gateway/tests/gateway_tests.rs +++ b/helix-db/src/helix_gateway/tests/gateway_tests.rs @@ -337,9 +337,10 @@ fn test_gateway_opts_default_workers_per_core() { #[cfg(feature = "api-key")] mod api_key_tests { + use crate::helix_gateway::key_verification::verify_key; - use crate::protocol::request::Request; - use crate::protocol::{Format, HelixError}; + use crate::protocol::Format; + use crate::protocol::{HelixError, request::Request}; use axum::body::Bytes; #[test] diff --git a/helix-db/src/helix_gateway/tests/mcp_tests.rs b/helix-db/src/helix_gateway/tests/mcp_tests.rs index 27a65b68..4bd50cda 100644 --- a/helix-db/src/helix_gateway/tests/mcp_tests.rs +++ b/helix-db/src/helix_gateway/tests/mcp_tests.rs @@ -340,7 +340,7 @@ mod mcp_tests { let response = out_step(&mut input).unwrap(); let body = String::from_utf8(response.body.clone()).unwrap(); - assert!(body.contains(&uuid_str(person2.id(), &arena))); + assert!(body.contains(uuid_str(person2.id(), &arena))); } #[test] @@ -906,7 +906,7 @@ mod mcp_tests { .unwrap(); let results = stream.collect().unwrap(); - assert!(results.len() > 0); + assert!(!results.is_empty()); } #[test] @@ -1141,7 +1141,7 @@ mod mcp_tests { .unwrap(); let results = stream.collect().unwrap(); - assert!(results.len() > 0); + assert!(!results.is_empty()); } #[test] diff --git a/helix-db/src/helix_gateway/tests/mod.rs b/helix-db/src/helix_gateway/tests/mod.rs index 4c6449d4..c4296573 100644 --- a/helix-db/src/helix_gateway/tests/mod.rs +++ b/helix-db/src/helix_gateway/tests/mod.rs @@ -3,5 +3,5 @@ pub mod gateway_tests; pub mod introspect_schema_tests; pub mod mcp_tests; pub mod router_tests; -pub mod worker_pool_tests; pub mod worker_pool_concurrency_tests; +pub mod worker_pool_tests; diff --git a/helix-db/src/helixc/analyzer/diagnostic.rs b/helix-db/src/helixc/analyzer/diagnostic.rs index d3a33f67..86dc0da9 100644 --- a/helix-db/src/helixc/analyzer/diagnostic.rs +++ b/helix-db/src/helixc/analyzer/diagnostic.rs @@ -1,9 +1,5 @@ use crate::helixc::{ - analyzer::{ - error_codes::ErrorCode, - fix::Fix, - pretty, - }, + analyzer::{error_codes::ErrorCode, fix::Fix, pretty}, parser::location::Loc, }; diff --git a/helix-db/src/helixc/analyzer/error_codes.rs b/helix-db/src/helixc/analyzer/error_codes.rs index 27e157d2..1b8ab787 100644 --- a/helix-db/src/helixc/analyzer/error_codes.rs +++ b/helix-db/src/helixc/analyzer/error_codes.rs @@ -1,5 +1,5 @@ use paste::paste; -use std::fmt::{Debug}; +use std::fmt::Debug; #[allow(dead_code)] #[derive(Debug, Clone, PartialEq)] @@ -127,7 +127,6 @@ pub enum ErrorCode { /// `E653` - `inner type of in variable is not an object` E653, - /// `W101` - `query has no return` W101, } diff --git a/helix-db/src/helixc/analyzer/methods/exclude_validation.rs b/helix-db/src/helixc/analyzer/methods/exclude_validation.rs index b0f63bfe..0297c135 100644 --- a/helix-db/src/helixc/analyzer/methods/exclude_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/exclude_validation.rs @@ -8,7 +8,7 @@ use crate::{ fix::Fix, types::Type, }, - parser::{types::*, location::Loc}, + parser::{location::Loc, types::*}, }, }; use paste::paste; @@ -127,7 +127,13 @@ pub(crate) fn validate_exclude<'a>( validate_exclude(ctx, ty, tr, ex, excluded, original_query); } _ => { - generate_error!(ctx, original_query, ex.fields[0].0.clone(), E203, cur_ty.kind_str()); + generate_error!( + ctx, + original_query, + ex.fields[0].0.clone(), + E203, + cur_ty.kind_str() + ); } } } @@ -135,7 +141,7 @@ pub(crate) fn validate_exclude<'a>( #[cfg(test)] mod tests { use crate::helixc::analyzer::error_codes::ErrorCode; - use crate::helixc::parser::{write_to_temp_file, HelixParser}; + use crate::helixc::parser::{HelixParser, write_to_temp_file}; // ============================================================================ // Field Exclusion Tests diff --git a/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs b/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs index 8b5f74e2..5b86da31 100644 --- a/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/graph_step_validation.rs @@ -13,12 +13,13 @@ use crate::{ utils::{gen_identifier_or_param, is_valid_identifier}, }, generator::{ - math_functions::{generate_math_expr, ExpressionContext}, + math_functions::{ExpressionContext, generate_math_expr}, queries::Query as GeneratedQuery, traversal_steps::{ FromV as GeneratedFromV, In as GeneratedIn, InE as GeneratedInE, Out as GeneratedOut, OutE as GeneratedOutE, SearchVectorStep, - ShortestPath as GeneratedShortestPath, ShortestPathAStar as GeneratedShortestPathAStar, + ShortestPath as GeneratedShortestPath, + ShortestPathAStar as GeneratedShortestPathAStar, ShortestPathBFS as GeneratedShortestPathBFS, ShortestPathDijkstras as GeneratedShortestPathDijkstras, ShouldCollect, Step as GeneratedStep, ToV as GeneratedToV, Traversal as GeneratedTraversal, @@ -388,7 +389,13 @@ pub(crate) fn apply_graph_step<'a>( algorithm, }, (None, None) => { - generate_error!(ctx, original_query, sp.loc.clone(), E627, "ShortestPath"); + generate_error!( + ctx, + original_query, + sp.loc.clone(), + E627, + "ShortestPath" + ); return None; } }, @@ -407,9 +414,7 @@ pub(crate) fn apply_graph_step<'a>( Some(WeightExpression::Expression(expr)) => { // Generate Rust code for the math expression match generate_math_expr(expr, ExpressionContext::WeightCalculation) { - Ok(math_expr) => { - WeightCalculation::Expression(format!("{}", math_expr)) - } + Ok(math_expr) => WeightCalculation::Expression(format!("{}", math_expr)), Err(e) => { generate_error!( ctx, @@ -424,9 +429,7 @@ pub(crate) fn apply_graph_step<'a>( } } } - Some(WeightExpression::Default) | None => { - WeightCalculation::Default - } + Some(WeightExpression::Default) | None => WeightCalculation::Default, }; // Extract weight property for validation (if it's a simple property) @@ -515,7 +518,13 @@ pub(crate) fn apply_graph_step<'a>( weight_calculation: weight_calculation.clone(), }, (None, None) => { - generate_error!(ctx, original_query, sp.loc.clone(), E627, "ShortestPathDijkstras"); + generate_error!( + ctx, + original_query, + sp.loc.clone(), + E627, + "ShortestPathDijkstras" + ); return None; } }, @@ -546,7 +555,13 @@ pub(crate) fn apply_graph_step<'a>( to: Some(GenRef::from(to)), }, (None, None) => { - generate_error!(ctx, original_query, sp.loc.clone(), E627, "ShortestPathBFS"); + generate_error!( + ctx, + original_query, + sp.loc.clone(), + E627, + "ShortestPathBFS" + ); return None; } }, @@ -564,9 +579,7 @@ pub(crate) fn apply_graph_step<'a>( } Some(WeightExpression::Expression(expr)) => { match generate_math_expr(expr, ExpressionContext::WeightCalculation) { - Ok(math_expr) => { - WeightCalculation::Expression(format!("{}", math_expr)) - } + Ok(math_expr) => WeightCalculation::Expression(format!("{}", math_expr)), Err(e) => { generate_error!( ctx, @@ -588,35 +601,33 @@ pub(crate) fn apply_graph_step<'a>( traversal .steps - .push(Separator::Period(GeneratedStep::ShortestPathAStar( - match (sp.from.clone(), sp.to.clone()) { - (Some(from), Some(to)) => GeneratedShortestPathAStar { - label: type_arg, - from: Some(GenRef::from(from)), - to: Some(GenRef::from(to)), - weight_calculation, - heuristic_property, - }, - (Some(from), None) => GeneratedShortestPathAStar { - label: type_arg, - from: Some(GenRef::from(from)), - to: None, - weight_calculation, - heuristic_property, - }, - (None, Some(to)) => GeneratedShortestPathAStar { - label: type_arg, - from: None, - to: Some(GenRef::from(to)), - weight_calculation, - heuristic_property, - }, - (None, None) => { - generate_error!(ctx, original_query, sp.loc.clone(), E627, "ShortestPathAStar"); - return None; - } + .push(Separator::Period(GeneratedStep::ShortestPathAStar(match ( + sp.from.clone(), + sp.to.clone(), + ) { + (Some(from), Some(to)) => GeneratedShortestPathAStar { + label: type_arg, + from: Some(GenRef::from(from)), + to: Some(GenRef::from(to)), + weight_calculation, + heuristic_property, }, - ))); + (Some(from), None) => GeneratedShortestPathAStar { + label: type_arg, + from: Some(GenRef::from(from)), + to: None, + weight_calculation, + heuristic_property, + }, + (None, Some(to)) => GeneratedShortestPathAStar { + label: type_arg, + from: None, + to: Some(GenRef::from(to)), + weight_calculation, + heuristic_property, + }, + (None, None) => panic!("Invalid shortest path astar"), + }))); traversal.should_collect = ShouldCollect::ToVec; Some(Type::Unknown) } diff --git a/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs b/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs index e28710bb..2bf58003 100644 --- a/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs +++ b/helix-db/src/helixc/analyzer/methods/infer_expr_type.rs @@ -87,7 +87,7 @@ pub(crate) fn infer_expr_type<'a>( Some(GeneratedStatement::Literal(GenRef::Literal(i.to_string()))), ), FloatLiteral(f) => ( - Type::Scalar(FieldType::F64), + Type::Scalar(FieldType::F32), Some(GeneratedStatement::Literal(GenRef::Literal(f.to_string()))), ), StringLiteral(s) => ( @@ -288,16 +288,19 @@ pub(crate) fn infer_expr_type<'a>( match value { ValueType::Literal { value, loc } => { match ctx.node_fields.get(ty.as_str()) { - Some(fields) => match fields.get(field_name.as_str()) + Some(fields) => match fields + .get(field_name.as_str()) { Some(field) => { match field.field_type == FieldType::Date { true => match Date::new(value) { - Ok(date) => GeneratedValue::Literal( - GenRef::Literal( - date.to_rfc3339(), - ), - ), + Ok(date) => { + GeneratedValue::Literal( + GenRef::Literal( + date.to_rfc3339(), + ), + ) + } Err(_) => { generate_error!( ctx, @@ -542,16 +545,19 @@ pub(crate) fn infer_expr_type<'a>( match value { ValueType::Literal { value, loc } => { match ctx.edge_fields.get(ty.as_str()) { - Some(fields) => match fields.get(field_name.as_str()) + Some(fields) => match fields + .get(field_name.as_str()) { Some(field) => { match field.field_type == FieldType::Date { true => match Date::new(value) { - Ok(date) => GeneratedValue::Literal( - GenRef::Literal( - date.to_rfc3339(), - ), - ), + Ok(date) => { + GeneratedValue::Literal( + GenRef::Literal( + date.to_rfc3339(), + ), + ) + } Err(_) => { generate_error!( ctx, @@ -616,12 +622,10 @@ pub(crate) fn infer_expr_type<'a>( Some(properties.into_iter().collect()) } - None => { - match default_properties.is_empty() { - true => None, - false => Some(default_properties), - } - } + None => match default_properties.is_empty() { + true => None, + false => Some(default_properties), + }, }; let (to, to_is_plural) = match &add.connection.to_id { @@ -629,7 +633,9 @@ pub(crate) fn infer_expr_type<'a>( IdType::Identifier { value, loc } => { is_valid_identifier(ctx, original_query, loc.clone(), value.as_str()); // Validate that the identifier exists in scope or is a parameter - if !scope.contains_key(value.as_str()) && is_param(original_query, value.as_str()).is_none() { + if !scope.contains_key(value.as_str()) + && is_param(original_query, value.as_str()).is_none() + { generate_error!( ctx, original_query, @@ -677,7 +683,9 @@ pub(crate) fn infer_expr_type<'a>( IdType::Identifier { value, loc } => { is_valid_identifier(ctx, original_query, loc.clone(), value.as_str()); // Validate that the identifier exists in scope or is a parameter - if !scope.contains_key(value.as_str()) && is_param(original_query, value.as_str()).is_none() { + if !scope.contains_key(value.as_str()) + && is_param(original_query, value.as_str()).is_none() + { generate_error!( ctx, original_query, @@ -773,7 +781,12 @@ pub(crate) fn infer_expr_type<'a>( } let label = GenRef::Literal(ty.clone()); - let vector_in_schema = match ctx.output.vectors.iter().find(|v| v.name == ty.as_str()) { + let vector_in_schema = match ctx + .output + .vectors + .iter() + .find(|v| v.name == ty.as_str()) + { Some(vector) => vector.clone(), None => { generate_error!(ctx, original_query, add.loc.clone(), E103, ty.as_str()); @@ -897,16 +910,19 @@ pub(crate) fn infer_expr_type<'a>( match value { ValueType::Literal { value, loc } => { match ctx.vector_fields.get(ty.as_str()) { - Some(fields) => match fields.get(field_name.as_str()) + Some(fields) => match fields + .get(field_name.as_str()) { Some(field) => { match field.field_type == FieldType::Date { true => match Date::new(value) { - Ok(date) => GeneratedValue::Literal( - GenRef::Literal( - date.to_rfc3339(), - ), - ), + Ok(date) => { + GeneratedValue::Literal( + GenRef::Literal( + date.to_rfc3339(), + ), + ) + } Err(_) => { generate_error!( ctx, @@ -971,15 +987,13 @@ pub(crate) fn infer_expr_type<'a>( properties } - None => { - default_properties.into_iter().fold( - HashMap::new(), - |mut acc, (field_name, default_value)| { - acc.insert(field_name, default_value); - acc - }, - ) - } + None => default_properties.into_iter().fold( + HashMap::new(), + |mut acc, (field_name, default_value)| { + acc.insert(field_name, default_value); + acc + }, + ), }; if let Some(vec_data) = &add.data { let vec = match vec_data { @@ -1114,8 +1128,7 @@ pub(crate) fn infer_expr_type<'a>( if let Some(var_type) = type_in_scope(ctx, original_query, sv.loc.clone(), scope, i.as_str()) { - let expected_type = - Type::Array(Box::new(Type::Scalar(FieldType::F64))); + let expected_type = Type::Array(Box::new(Type::Scalar(FieldType::F64))); if var_type != expected_type { generate_error!( ctx, @@ -1433,7 +1446,9 @@ pub(crate) fn infer_expr_type<'a>( }; } SourceStep::Anonymous => { - tr.traversal_type = TraversalType::FromSingle(GenRef::Std(DEFAULT_VAR_NAME.to_string())); + tr.traversal_type = TraversalType::FromSingle(GenRef::Std( + DEFAULT_VAR_NAME.to_string(), + )); } _ => { // For AddN, AddV, AddE, SearchVector, etc., leave traversal_type unchanged (Ref) @@ -1453,7 +1468,7 @@ pub(crate) fn infer_expr_type<'a>( // Math function calls always return f64 // TODO: Add proper type inference and validation for math function arguments ( - Type::Scalar(FieldType::F64), + Type::Scalar(FieldType::F32), None, // Will be handled by generator ) } diff --git a/helix-db/src/helixc/analyzer/methods/migration_validation.rs b/helix-db/src/helixc/analyzer/methods/migration_validation.rs index 02aa49cc..7b6d8729 100644 --- a/helix-db/src/helixc/analyzer/methods/migration_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/migration_validation.rs @@ -8,9 +8,7 @@ use crate::{ }, utils::{GenRef, GeneratedValue, Separator}, }, - parser::types::{ - FieldValueType, Migration, MigrationItem, MigrationPropertyMapping, - }, + parser::types::{FieldValueType, Migration, MigrationItem, MigrationPropertyMapping}, }, protocol::value::Value, }; @@ -26,7 +24,10 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { ctx, migration.from_version.0.clone(), ErrorCode::E108, - format!("Migration references non-existent schema version: {}", migration.from_version.1), + format!( + "Migration references non-existent schema version: {}", + migration.from_version.1 + ), Some("Ensure the schema version exists before referencing it in a migration".into()), ); return; @@ -41,7 +42,10 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { ctx, migration.to_version.0.clone(), ErrorCode::E108, - format!("Migration references non-existent schema version: {}", migration.to_version.1), + format!( + "Migration references non-existent schema version: {}", + migration.to_version.1 + ), Some("Ensure the schema version exists before referencing it in a migration".into()), ); return; @@ -80,8 +84,13 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { ctx, item.from_item.0.clone(), ErrorCode::E201, - format!("Migration item '{item_name}' does not exist in schema version {}", migration.from_version.1), - Some(format!("Ensure '{item_name}' is defined in the source schema")), + format!( + "Migration item '{item_name}' does not exist in schema version {}", + migration.from_version.1 + ), + Some(format!( + "Ensure '{item_name}' is defined in the source schema" + )), ); continue; } @@ -99,8 +108,13 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { ctx, item.to_item.0.clone(), ErrorCode::E201, - format!("Migration item '{item_name}' does not exist in schema version {}", migration.to_version.1), - Some(format!("Ensure '{item_name}' is defined in the target schema")), + format!( + "Migration item '{item_name}' does not exist in schema version {}", + migration.to_version.1 + ), + Some(format!( + "Ensure '{item_name}' is defined in the target schema" + )), ); continue; } @@ -113,8 +127,11 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { ctx, item.loc.clone(), ErrorCode::E205, - format!("Migration item types do not match: '{}' to '{}'", - item.from_item.1.inner(), item.to_item.1.inner()), + format!( + "Migration item types do not match: '{}' to '{}'", + item.from_item.1.inner(), + item.to_item.1.inner() + ), Some("Migration between different item types is not yet supported".into()), ); continue; @@ -143,9 +160,15 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { ctx, property_name.0.clone(), ErrorCode::E202, - format!("Property '{}' does not exist in target schema for '{}'", - property_name.1, item.to_item.1.inner()), - Some(format!("Ensure property '{}' is defined in the target schema", property_name.1)), + format!( + "Property '{}' does not exist in target schema for '{}'", + property_name.1, + item.to_item.1.inner() + ), + Some(format!( + "Ensure property '{}' is defined in the target schema", + property_name.1 + )), ); continue; } @@ -175,9 +198,14 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { ctx, property_value.loc.clone(), ErrorCode::E202, - format!("Identifier '{}' does not exist in source schema for '{}'", - identifier, item.from_item.1.inner()), - Some(format!("Ensure '{identifier}' is a valid field in the source schema")), + format!( + "Identifier '{}' does not exist in source schema for '{}'", + identifier, + item.from_item.1.inner() + ), + Some(format!( + "Ensure '{identifier}' is a valid field in the source schema" + )), ); continue; } @@ -188,7 +216,10 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { property_value.loc.clone(), ErrorCode::E206, "Unsupported property value type in migration".into(), - Some("Only literal values and identifiers are supported in migrations".into()), + Some( + "Only literal values and identifiers are supported in migrations" + .into(), + ), ); continue; } @@ -196,30 +227,42 @@ pub(crate) fn validate_migration(ctx: &mut Ctx, migration: &Migration) { // check default value is valid for the new field type if let Some(default) = &default - && to_property_field.field_type != *default { - push_schema_err( - ctx, - property_value.loc.clone(), - ErrorCode::E205, - format!("Default value type mismatch: expected '{}' but got '{:?}'", - to_property_field.field_type, default), - Some("Ensure the default value type matches the field type in the target schema".into()), - ); - continue; + && to_property_field.field_type != *default + { + push_schema_err( + ctx, + property_value.loc.clone(), + ErrorCode::E205, + format!( + "Default value type mismatch: expected '{}' but got '{:?}'", + to_property_field.field_type, default + ), + Some( + "Ensure the default value type matches the field type in the target schema" + .into(), + ), + ); + continue; } // check the cast is valid for the new field type if let Some(cast) = &cast - && to_property_field.field_type != cast.cast_to { - push_schema_err( - ctx, - cast.loc.clone(), - ErrorCode::E205, - format!("Cast target type mismatch: expected '{}' but got '{}'", - to_property_field.field_type, cast.cast_to), - Some("Ensure the cast target type matches the field type in the target schema".into()), - ); - continue; + && to_property_field.field_type != cast.cast_to + { + push_schema_err( + ctx, + cast.loc.clone(), + ErrorCode::E205, + format!( + "Cast target type mismatch: expected '{}' but got '{}'", + to_property_field.field_type, cast.cast_to + ), + Some( + "Ensure the cast target type matches the field type in the target schema" + .into(), + ), + ); + continue; } // // warnings if name is same diff --git a/helix-db/src/helixc/analyzer/methods/object_validation.rs b/helix-db/src/helixc/analyzer/methods/object_validation.rs index 9635b789..42f48606 100644 --- a/helix-db/src/helixc/analyzer/methods/object_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/object_validation.rs @@ -345,9 +345,11 @@ fn validate_property_access<'a>( ); // Check if this nested traversal ends with a Closure step - let own_closure_param = tr.steps.last() - .and_then(|step| match &step.step { - crate::helixc::parser::types::StepType::Closure(cl) => Some(cl.identifier.clone()), + let own_closure_param = + tr.steps.last().and_then(|step| match &step.step { + crate::helixc::parser::types::StepType::Closure(cl) => { + Some(cl.identifier.clone()) + } _ => None, }); @@ -386,9 +388,11 @@ fn validate_property_access<'a>( ); // Check if this nested traversal ends with a Closure step - let own_closure_param = tr.steps.last() - .and_then(|step| match &step.step { - crate::helixc::parser::types::StepType::Closure(cl) => Some(cl.identifier.clone()), + let own_closure_param = + tr.steps.last().and_then(|step| match &step.step { + crate::helixc::parser::types::StepType::Closure(cl) => { + Some(cl.identifier.clone()) + } _ => None, }); diff --git a/helix-db/src/helixc/analyzer/methods/query_validation.rs b/helix-db/src/helixc/analyzer/methods/query_validation.rs index c5502a07..4c95cc81 100644 --- a/helix-db/src/helixc/analyzer/methods/query_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/query_validation.rs @@ -14,8 +14,7 @@ use crate::helixc::{ generator::{ queries::{Parameter as GeneratedParameter, Query as GeneratedQuery}, return_values::{ - ReturnFieldInfo, ReturnFieldSource, ReturnFieldType, ReturnValue, - ReturnValueStruct, + ReturnFieldInfo, ReturnFieldSource, ReturnFieldType, ReturnValue, ReturnValueStruct, }, source_steps::SourceStep, statements::Statement as GeneratedStatement, @@ -167,13 +166,13 @@ fn build_return_fields( if should_add_field("data") { fields.push(ReturnFieldInfo::new_implicit( "data".to_string(), - "&'a [f64]".to_string(), + "&'a [f32]".to_string(), )); } if should_add_field("score") { fields.push(ReturnFieldInfo::new_implicit( "score".to_string(), - "f64".to_string(), + "f32".to_string(), )); } } @@ -235,8 +234,8 @@ fn build_return_fields( if is_implicit_field { let rust_type = match *field_name { - "data" => "&'a [f64]".to_string(), - "score" => "f64".to_string(), + "data" => "&'a [f32]".to_string(), + "score" => "f32".to_string(), _ => "&'a str".to_string(), }; fields.push(ReturnFieldInfo::new_implicit( @@ -305,8 +304,8 @@ fn build_return_fields( let rust_type = if is_implicit { // Use the appropriate type based on the implicit field match accessed_field.map(|s| s.as_str()) { - Some("data") => "&'a [f64]".to_string(), - Some("score") => "f64".to_string(), + Some("data") => "&'a [f32]".to_string(), + Some("score") => "f32".to_string(), Some("id") | Some("ID") | Some("label") | Some("Label") | Some("from_node") | Some("to_node") | None => "&'a str".to_string(), _ => "Option<&'a Value>".to_string(), @@ -434,13 +433,17 @@ fn process_object_literal<'a>( // Handle traversal like app::{name} // Extract variable name from start node let var_name = match &trav.start { - crate::helixc::parser::types::StartNode::Identifier(id) => id.clone(), + crate::helixc::parser::types::StartNode::Identifier(id) => { + id.clone() + } _ => "unknown".to_string(), }; // Check if there's an Object step to extract property name if let Some(step) = trav.steps.first() { - if let crate::helixc::parser::types::StepType::Object(obj) = &step.step { + if let crate::helixc::parser::types::StepType::Object(obj) = + &step.step + { // Extract the first field name from the object step if let Some(field) = obj.fields.first() { let prop_name = &field.key; @@ -475,9 +478,7 @@ fn process_object_literal<'a>( format!("json!({})", id) } } - _ => { - "serde_json::Value::Null".to_string() - } + _ => "serde_json::Value::Null".to_string(), } } ReturnType::Object(nested_obj) => { @@ -495,7 +496,11 @@ fn process_object_literal<'a>( ExpressionType::Identifier(id) => { // Look up the variable type and generate property extraction if let Some(var_info) = scope.get(id.as_str()) { - array_parts.push(build_identifier_json(ctx, id, &var_info.ty)); + array_parts.push(build_identifier_json( + ctx, + id, + &var_info.ty, + )); } else { // Fallback array_parts.push(format!("json!({})", id)); @@ -504,24 +509,37 @@ fn process_object_literal<'a>( ExpressionType::Traversal(trav) => { // Handle traversal in array let var_name = match &trav.start { - crate::helixc::parser::types::StartNode::Identifier(id) => id.clone(), + crate::helixc::parser::types::StartNode::Identifier( + id, + ) => id.clone(), _ => "unknown".to_string(), }; // Check for object step if let Some(step) = trav.steps.first() { - if let crate::helixc::parser::types::StepType::Object(obj) = &step.step { + if let crate::helixc::parser::types::StepType::Object( + obj, + ) = &step.step + { if let Some(field) = obj.fields.first() { let prop_name = &field.key; if prop_name == "id" { - array_parts.push(format!("uuid_str({}.id(), &arena)", var_name)); + array_parts.push(format!( + "uuid_str({}.id(), &arena)", + var_name + )); } else if prop_name == "label" { - array_parts.push(format!("{}.label()", var_name)); + array_parts + .push(format!("{}.label()", var_name)); } else { - array_parts.push(format!("{}.get_property(\"{}\")", var_name, prop_name)); + array_parts.push(format!( + "{}.get_property(\"{}\")", + var_name, prop_name + )); } } else { - array_parts.push(format!("json!({})", var_name)); + array_parts + .push(format!("json!({})", var_name)); } } else { array_parts.push(format!("json!({})", var_name)); @@ -571,13 +589,19 @@ fn process_object_literal<'a>( if *prop_name == "id" || *prop_name == "label" { continue; } - props.push(format!("\"{}\": {}.get_property(\"{}\")", prop_name, var_name, prop_name)); + props.push(format!( + "\"{}\": {}.get_property(\"{}\")", + prop_name, var_name, prop_name + )); } format!("json!({{\n {}\n }})", props.join(",\n ")) } else { // Fallback if schema not found - format!("json!({{\"id\": uuid_str({}.id(), &arena), \"label\": {}.label()}})", var_name, var_name) + format!( + "json!({{\"id\": uuid_str({}.id(), &arena), \"label\": {}.label()}})", + var_name, var_name + ) } } Type::Edge(Some(label)) => { @@ -593,12 +617,18 @@ fn process_object_literal<'a>( if *prop_name == "id" || *prop_name == "label" { continue; } - props.push(format!("\"{}\": {}.get_property(\"{}\")", prop_name, var_name, prop_name)); + props.push(format!( + "\"{}\": {}.get_property(\"{}\")", + prop_name, var_name, prop_name + )); } format!("json!({{\n {}\n }})", props.join(",\n ")) } else { - format!("json!({{\"id\": uuid_str({}.id(), &arena), \"label\": {}.label()}})", var_name, var_name) + format!( + "json!({{\"id\": uuid_str({}.id(), &arena), \"label\": {}.label()}})", + var_name, var_name + ) } } _ => { @@ -616,7 +646,10 @@ fn process_object_literal<'a>( ReturnValue { name: "serde_json::Value".to_string(), fields: vec![], - literal_value: Some(crate::helixc::generator::utils::GenRef::Std(format!("json!({})", json_code))), + literal_value: Some(crate::helixc::generator::utils::GenRef::Std(format!( + "json!({})", + json_code + ))), }, )); @@ -939,11 +972,20 @@ fn analyze_return_expr<'a>( ShouldCollect::ToVec => { // Collection - generate iteration code let iter_code = if property_name == "id" { - format!("{}.iter().map(|item| uuid_str(item.id(), &arena)).collect::>()", field_name) + format!( + "{}.iter().map(|item| uuid_str(item.id(), &arena)).collect::>()", + field_name + ) } else if property_name == "label" { - format!("{}.iter().map(|item| item.label()).collect::>()", field_name) + format!( + "{}.iter().map(|item| item.label()).collect::>()", + field_name + ) } else { - format!("{}.iter().map(|item| item.get_property(\"{}\")).collect::>()", field_name, property_name) + format!( + "{}.iter().map(|item| item.get_property(\"{}\")).collect::>()", + field_name, property_name + ) }; Some(GenRef::Std(iter_code)) } @@ -1233,14 +1275,7 @@ fn analyze_return_expr<'a>( } else { // Complex nested object - use new object literal processing let struct_name = format!("{}ReturnType", capitalize_first(&query.name)); - process_object_literal( - ctx, - original_query, - scope, - query, - values, - struct_name, - ); + process_object_literal(ctx, original_query, scope, query, values, struct_name); // Note: process_object_literal adds to query.return_values // and sets use_struct_returns = false, so no need to push to return_structs diff --git a/helix-db/src/helixc/analyzer/methods/schema_methods.rs b/helix-db/src/helixc/analyzer/methods/schema_methods.rs index 0bae45dd..d3fbd25b 100644 --- a/helix-db/src/helixc/analyzer/methods/schema_methods.rs +++ b/helix-db/src/helixc/analyzer/methods/schema_methods.rs @@ -158,7 +158,7 @@ pub(crate) fn build_field_lookups<'a>(src: &'a Source) -> SchemaVersionMap<'a> { prefix: FieldPrefix::Empty, defaults: None, name: "data".to_string(), - field_type: FieldType::Array(Box::new(FieldType::F64)), + field_type: FieldType::Array(Box::new(FieldType::F32)), loc: Loc::empty(), }), ); @@ -168,7 +168,7 @@ pub(crate) fn build_field_lookups<'a>(src: &'a Source) -> SchemaVersionMap<'a> { prefix: FieldPrefix::Empty, defaults: None, name: "score".to_string(), - field_type: FieldType::F64, + field_type: FieldType::F32, loc: Loc::empty(), }), ); @@ -331,7 +331,8 @@ pub(crate) fn check_schema(ctx: &mut Ctx) -> Result<(), ParserError> { } if let Some(v) = edge.properties.as_ref() { // Check for duplicate field names (case-insensitive) - let mut seen_fields: std::collections::HashSet = std::collections::HashSet::new(); + let mut seen_fields: std::collections::HashSet = + std::collections::HashSet::new(); for f in v { let lower_name = f.name.to_lowercase(); if !seen_fields.insert(lower_name) { @@ -779,7 +780,7 @@ mod tests { N::Person { name: String, age: U32, - score: F64, + score: F32, active: Boolean, user_id: ID, created_at: Date @@ -805,7 +806,7 @@ mod tests { let source = r#" N::Person { tags: [String], - scores: [F64], + scores: [F32], ids: [ID] } diff --git a/helix-db/src/helixc/analyzer/methods/statement_validation.rs b/helix-db/src/helixc/analyzer/methods/statement_validation.rs index 41e02239..4c50cd88 100644 --- a/helix-db/src/helixc/analyzer/methods/statement_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/statement_validation.rs @@ -5,8 +5,11 @@ use crate::{ generate_error, helixc::{ analyzer::{ - Ctx, errors::push_query_err, methods::infer_expr_type::infer_expr_type, types::Type, - utils::{is_valid_identifier, VariableInfo}, + Ctx, + errors::push_query_err, + methods::infer_expr_type::infer_expr_type, + types::Type, + utils::{VariableInfo, is_valid_identifier}, }, generator::{ queries::Query as GeneratedQuery, @@ -63,14 +66,17 @@ pub(crate) fn validate_statements<'a>( // Determine if the variable is single or collection based on type let is_single = if let Some(GeneratedStatement::Traversal(ref tr)) = stmt { // Check if should_collect is ToObj, or if the type is a single value - matches!(tr.should_collect, ShouldCollect::ToObj) || - matches!(rhs_ty, Type::Node(_) | Type::Edge(_) | Type::Vector(_)) + matches!(tr.should_collect, ShouldCollect::ToObj) + || matches!(rhs_ty, Type::Node(_) | Type::Edge(_) | Type::Vector(_)) } else { // Non-traversal: check if type is single matches!(rhs_ty, Type::Node(_) | Type::Edge(_) | Type::Vector(_)) }; - scope.insert(assign.variable.as_str(), VariableInfo::new(rhs_ty, is_single)); + scope.insert( + assign.variable.as_str(), + VariableInfo::new(rhs_ty, is_single), + ); stmt.as_ref()?; @@ -91,7 +97,13 @@ pub(crate) fn validate_statements<'a>( tr.should_collect = ShouldCollect::No; Some(GeneratedStatement::Drop(GeneratedDrop { expression: tr })) } else { - generate_error!(ctx, original_query, expr.loc.clone(), E628, &expr_ty.get_type_name()); + generate_error!( + ctx, + original_query, + expr.loc.clone(), + E628, + &expr_ty.get_type_name() + ); None } } @@ -201,8 +213,14 @@ pub(crate) fn validate_statements<'a>( .unwrap() .clone(), ); - body_scope.insert(field_name.as_str(), VariableInfo::new(field_type.clone(), true)); - scope.insert(field_name.as_str(), VariableInfo::new(field_type, true)); + body_scope.insert( + field_name.as_str(), + VariableInfo::new(field_type.clone(), true), + ); + scope.insert( + field_name.as_str(), + VariableInfo::new(field_type, true), + ); } for_variable = ForVariable::ObjectDestructure( fields @@ -239,18 +257,25 @@ pub(crate) fn validate_statements<'a>( Type::Array(object_arr) => { match object_arr.as_ref() { Type::Object(object) => { - let mut obj_dest_fields = Vec::with_capacity(fields.len()); + let mut obj_dest_fields = + Vec::with_capacity(fields.len()); let object = object.clone(); for (_, field_name) in fields { let name = field_name.as_str(); // adds non-param fields to scope let field_type = object.get(name).unwrap().clone(); - body_scope.insert(name, VariableInfo::new(field_type.clone(), true)); - scope.insert(name, VariableInfo::new(field_type, true)); - obj_dest_fields.push(GenRef::Std(name.to_string())); - } - for_variable = - ForVariable::ObjectDestructure(obj_dest_fields); + body_scope.insert( + name, + VariableInfo::new(field_type.clone(), true), + ); + scope.insert( + name, + VariableInfo::new(field_type, true), + ); + obj_dest_fields.push(GenRef::Std(name.to_string())); + } + for_variable = + ForVariable::ObjectDestructure(obj_dest_fields); } _ => { generate_error!( @@ -274,7 +299,7 @@ pub(crate) fn validate_statements<'a>( [&fl.in_variable.1] ); } - } + }, _ => { generate_error!( ctx, @@ -312,7 +337,7 @@ pub(crate) fn validate_statements<'a>( #[cfg(test)] mod tests { use super::*; - use crate::helixc::parser::{write_to_temp_file, HelixParser}; + use crate::helixc::parser::{HelixParser, write_to_temp_file}; // ============================================================================ // Assignment Validation Tests @@ -336,7 +361,11 @@ mod tests { assert!(result.is_ok()); let (diagnostics, _) = result.unwrap(); assert!(diagnostics.iter().any(|d| d.error_code == ErrorCode::E302)); - assert!(diagnostics.iter().any(|d| d.message.contains("previously declared"))); + assert!( + diagnostics + .iter() + .any(|d| d.message.contains("previously declared")) + ); } #[test] @@ -382,7 +411,11 @@ mod tests { assert!(result.is_ok()); let (diagnostics, _) = result.unwrap(); assert!(diagnostics.iter().any(|d| d.error_code == ErrorCode::E301)); - assert!(diagnostics.iter().any(|d| d.message.contains("not in scope") && d.message.contains("unknownList"))); + assert!( + diagnostics + .iter() + .any(|d| d.message.contains("not in scope") && d.message.contains("unknownList")) + ); } #[test] @@ -426,7 +459,11 @@ mod tests { assert!(result.is_ok()); let (diagnostics, _) = result.unwrap(); assert!(diagnostics.iter().any(|d| d.error_code == ErrorCode::E651)); - assert!(diagnostics.iter().any(|d| d.message.contains("not iterable"))); + assert!( + diagnostics + .iter() + .any(|d| d.message.contains("not iterable")) + ); } #[test] @@ -515,7 +552,9 @@ mod tests { assert!(result.is_ok()); let (diagnostics, _) = result.unwrap(); // Expression statements should not produce errors - assert!(diagnostics.is_empty() || !diagnostics.iter().any(|d| d.error_code == ErrorCode::E301)); + assert!( + diagnostics.is_empty() || !diagnostics.iter().any(|d| d.error_code == ErrorCode::E301) + ); } #[test] @@ -587,7 +626,11 @@ mod tests { assert!(result.is_ok()); let (diagnostics, _) = result.unwrap(); - assert!(!diagnostics.iter().any(|d| d.error_code == ErrorCode::E301 || d.error_code == ErrorCode::E302)); + assert!( + !diagnostics + .iter() + .any(|d| d.error_code == ErrorCode::E301 || d.error_code == ErrorCode::E302) + ); } #[test] @@ -610,6 +653,10 @@ mod tests { assert!(result.is_ok()); let (diagnostics, _) = result.unwrap(); - assert!(!diagnostics.iter().any(|d| d.error_code == ErrorCode::E301 || d.error_code == ErrorCode::E302)); + assert!( + !diagnostics + .iter() + .any(|d| d.error_code == ErrorCode::E301 || d.error_code == ErrorCode::E302) + ); } } diff --git a/helix-db/src/helixc/analyzer/methods/traversal_validation.rs b/helix-db/src/helixc/analyzer/methods/traversal_validation.rs index 949f10ef..813ef550 100644 --- a/helix-db/src/helixc/analyzer/methods/traversal_validation.rs +++ b/helix-db/src/helixc/analyzer/methods/traversal_validation.rs @@ -77,7 +77,7 @@ fn get_reserved_property_type(prop_name: &str, item_type: &Type) -> Option { // Only valid for vectors match item_type { - Type::Vector(_) | Type::Vectors(_) => Some(FieldType::F64), + Type::Vector(_) | Type::Vectors(_) => Some(FieldType::F32), _ => None, } } @@ -85,7 +85,7 @@ fn get_reserved_property_type(prop_name: &str, item_type: &Type) -> Option { - Some(FieldType::Array(Box::new(FieldType::F64))) + Some(FieldType::Array(Box::new(FieldType::F32))) } _ => None, } @@ -1136,7 +1136,13 @@ pub(crate) fn validate_traversal<'a>( expr.loc.clone(), i.as_str(), ); - type_in_scope(ctx, original_query, expr.loc.clone(), scope, i.as_str()); + type_in_scope( + ctx, + original_query, + expr.loc.clone(), + scope, + i.as_str(), + ); gen_identifier_or_param(original_query, i.as_str(), false, true) } _ => unreachable!("Cannot reach here"), @@ -1161,7 +1167,13 @@ pub(crate) fn validate_traversal<'a>( expr.loc.clone(), i.as_str(), ); - type_in_scope(ctx, original_query, expr.loc.clone(), scope, i.as_str()); + type_in_scope( + ctx, + original_query, + expr.loc.clone(), + scope, + i.as_str(), + ); gen_identifier_or_param(original_query, i.as_str(), false, true) } _ => unreachable!("Cannot reach here"), @@ -1186,7 +1198,13 @@ pub(crate) fn validate_traversal<'a>( expr.loc.clone(), i.as_str(), ); - type_in_scope(ctx, original_query, expr.loc.clone(), scope, i.as_str()); + type_in_scope( + ctx, + original_query, + expr.loc.clone(), + scope, + i.as_str(), + ); gen_identifier_or_param(original_query, i.as_str(), false, true) } _ => unreachable!("Cannot reach here"), @@ -1211,7 +1229,13 @@ pub(crate) fn validate_traversal<'a>( expr.loc.clone(), i.as_str(), ); - type_in_scope(ctx, original_query, expr.loc.clone(), scope, i.as_str()); + type_in_scope( + ctx, + original_query, + expr.loc.clone(), + scope, + i.as_str(), + ); gen_identifier_or_param(original_query, i.as_str(), false, true) } _ => unreachable!("Cannot reach here"), @@ -1248,32 +1272,38 @@ pub(crate) fn validate_traversal<'a>( } } else { let v = match &expr.expr { - ExpressionType::BooleanLiteral(b) => { - GeneratedValue::Primitive(GenRef::Std(b.to_string())) - } - ExpressionType::IntegerLiteral(i) => { - GeneratedValue::Primitive(GenRef::Std(i.to_string())) - } - ExpressionType::FloatLiteral(f) => { - GeneratedValue::Primitive(GenRef::Std(f.to_string())) - } - ExpressionType::StringLiteral(s) => { - GeneratedValue::Primitive(GenRef::Literal(s.to_string())) - } - ExpressionType::Identifier(i) => { - is_valid_identifier( - ctx, - original_query, - expr.loc.clone(), - i.as_str(), - ); - type_in_scope(ctx, original_query, expr.loc.clone(), scope, i.as_str()); - gen_identifier_or_param(original_query, i.as_str(), false, true) - } - _ => { - unreachable!("Cannot reach here"); - } - }; + ExpressionType::BooleanLiteral(b) => { + GeneratedValue::Primitive(GenRef::Std(b.to_string())) + } + ExpressionType::IntegerLiteral(i) => { + GeneratedValue::Primitive(GenRef::Std(i.to_string())) + } + ExpressionType::FloatLiteral(f) => { + GeneratedValue::Primitive(GenRef::Std(f.to_string())) + } + ExpressionType::StringLiteral(s) => { + GeneratedValue::Primitive(GenRef::Literal(s.to_string())) + } + ExpressionType::Identifier(i) => { + is_valid_identifier( + ctx, + original_query, + expr.loc.clone(), + i.as_str(), + ); + type_in_scope( + ctx, + original_query, + expr.loc.clone(), + scope, + i.as_str(), + ); + gen_identifier_or_param(original_query, i.as_str(), false, true) + } + _ => { + unreachable!("Cannot reach here"); + } + }; BoolOp::Eq(Eq { left: GeneratedValue::Primitive(GenRef::Std("*v".to_string())), right: v, @@ -1307,30 +1337,36 @@ pub(crate) fn validate_traversal<'a>( } } else { let v = match &expr.expr { - ExpressionType::BooleanLiteral(b) => { - GeneratedValue::Primitive(GenRef::Std(b.to_string())) - } - ExpressionType::IntegerLiteral(i) => { - GeneratedValue::Primitive(GenRef::Std(i.to_string())) - } - ExpressionType::FloatLiteral(f) => { - GeneratedValue::Primitive(GenRef::Std(f.to_string())) - } - ExpressionType::StringLiteral(s) => { - GeneratedValue::Primitive(GenRef::Literal(s.to_string())) - } - ExpressionType::Identifier(i) => { - is_valid_identifier( - ctx, - original_query, - expr.loc.clone(), - i.as_str(), - ); - type_in_scope(ctx, original_query, expr.loc.clone(), scope, i.as_str()); - gen_identifier_or_param(original_query, i.as_str(), false, true) - } - _ => unreachable!("Cannot reach here"), - }; + ExpressionType::BooleanLiteral(b) => { + GeneratedValue::Primitive(GenRef::Std(b.to_string())) + } + ExpressionType::IntegerLiteral(i) => { + GeneratedValue::Primitive(GenRef::Std(i.to_string())) + } + ExpressionType::FloatLiteral(f) => { + GeneratedValue::Primitive(GenRef::Std(f.to_string())) + } + ExpressionType::StringLiteral(s) => { + GeneratedValue::Primitive(GenRef::Literal(s.to_string())) + } + ExpressionType::Identifier(i) => { + is_valid_identifier( + ctx, + original_query, + expr.loc.clone(), + i.as_str(), + ); + type_in_scope( + ctx, + original_query, + expr.loc.clone(), + scope, + i.as_str(), + ); + gen_identifier_or_param(original_query, i.as_str(), false, true) + } + _ => unreachable!("Cannot reach here"), + }; BoolOp::Neq(Neq { left: GeneratedValue::Primitive(GenRef::Std("*v".to_string())), right: v, @@ -1346,7 +1382,13 @@ pub(crate) fn validate_traversal<'a>( expr.loc.clone(), i.as_str(), ); - type_in_scope(ctx, original_query, expr.loc.clone(), scope, i.as_str()); + type_in_scope( + ctx, + original_query, + expr.loc.clone(), + scope, + i.as_str(), + ); gen_identifier_or_param(original_query, i.as_str(), true, false) } ExpressionType::BooleanLiteral(b) => { @@ -1374,7 +1416,13 @@ pub(crate) fn validate_traversal<'a>( expr.loc.clone(), i.as_str(), ); - type_in_scope(ctx, original_query, expr.loc.clone(), scope, i.as_str()); + type_in_scope( + ctx, + original_query, + expr.loc.clone(), + scope, + i.as_str(), + ); gen_identifier_or_param(original_query, i.as_str(), true, false) } ExpressionType::ArrayLiteral(a) => GeneratedValue::Array(GenRef::Std( @@ -1519,7 +1567,13 @@ pub(crate) fn validate_traversal<'a>( field.value.loc.clone(), i.as_str(), ); - type_in_scope(ctx, original_query, field.value.loc.clone(), scope, i.as_str()); + type_in_scope( + ctx, + original_query, + field.value.loc.clone(), + scope, + i.as_str(), + ); gen_identifier_or_param( original_query, i.as_str(), @@ -1543,7 +1597,13 @@ pub(crate) fn validate_traversal<'a>( e.loc.clone(), i.as_str(), ); - type_in_scope(ctx, original_query, e.loc.clone(), scope, i.as_str()); + type_in_scope( + ctx, + original_query, + e.loc.clone(), + scope, + i.as_str(), + ); gen_identifier_or_param( original_query, i.as_str(), @@ -1565,12 +1625,24 @@ pub(crate) fn validate_traversal<'a>( GeneratedValue::Primitive(GenRef::Std(i.to_string())) } other => { - generate_error!(ctx, original_query, e.loc.clone(), E206, &format!("{:?}", other)); + generate_error!( + ctx, + original_query, + e.loc.clone(), + E206, + &format!("{:?}", other) + ); GeneratedValue::Unknown } }, other => { - generate_error!(ctx, original_query, field.value.loc.clone(), E206, &format!("{:?}", other)); + generate_error!( + ctx, + original_query, + field.value.loc.clone(), + E206, + &format!("{:?}", other) + ); GeneratedValue::Unknown } }, @@ -1916,7 +1988,13 @@ pub(crate) fn validate_traversal<'a>( // Generate distance parameter if provided let distance = if let Some(MMRDistance::Identifier(id)) = &rerank_mmr.distance { is_valid_identifier(ctx, original_query, rerank_mmr.loc.clone(), id.as_str()); - type_in_scope(ctx, original_query, rerank_mmr.loc.clone(), scope, id.as_str()); + type_in_scope( + ctx, + original_query, + rerank_mmr.loc.clone(), + scope, + id.as_str(), + ); Some( crate::helixc::generator::traversal_steps::MMRDistanceMethod::Identifier( id.clone(), diff --git a/helix-db/src/helixc/analyzer/mod.rs b/helix-db/src/helixc/analyzer/mod.rs index f03806c5..418ca24b 100644 --- a/helix-db/src/helixc/analyzer/mod.rs +++ b/helix-db/src/helixc/analyzer/mod.rs @@ -12,12 +12,15 @@ use crate::helixc::{ methods::{ migration_validation::validate_migration, query_validation::validate_query, - schema_methods::{build_field_lookups, check_schema, SchemaVersionMap}, + schema_methods::{SchemaVersionMap, build_field_lookups, check_schema}, }, types::Type, }, generator::Source as GeneratedSource, - parser::{errors::ParserError, types::{EdgeSchema, ExpressionType, Field, Query, ReturnType, Source}}, + parser::{ + errors::ParserError, + types::{EdgeSchema, ExpressionType, Field, Query, ReturnType, Source}, + }, }; use itertools::Itertools; use serde::Serialize; @@ -43,7 +46,6 @@ pub mod pretty; pub mod types; pub mod utils; - /// Internal working context shared by all passes. pub(crate) struct Ctx<'a> { pub(super) src: &'a Source, diff --git a/helix-db/src/helixc/analyzer/types.rs b/helix-db/src/helixc/analyzer/types.rs index da2b9233..8a2f9d07 100644 --- a/helix-db/src/helixc/analyzer/types.rs +++ b/helix-db/src/helixc/analyzer/types.rs @@ -229,10 +229,10 @@ impl From for GeneratedValue { /// Metadata for GROUPBY and AGGREGATE_BY operations #[derive(Debug, Clone)] pub struct AggregateInfo { - pub source_type: Box, // Original type being aggregated (Node, Edge, Vector) - pub properties: Vec, // Properties being grouped by - pub is_count: bool, // true for COUNT mode - pub is_group_by: bool, // true for GROUP_BY, false for AGGREGATE_BY + pub source_type: Box, // Original type being aggregated (Node, Edge, Vector) + pub properties: Vec, // Properties being grouped by + pub is_count: bool, // true for COUNT mode + pub is_group_by: bool, // true for GROUP_BY, false for AGGREGATE_BY } #[derive(Debug, Clone)] @@ -408,7 +408,11 @@ impl From<&FieldType> for Type { String | Boolean | F32 | F64 | I8 | I16 | I32 | I64 | U8 | U16 | U32 | U64 | U128 | Uuid | Date => Type::Scalar(ft.clone()), Array(inner_ft) => Type::Array(Box::new(Type::from(*inner_ft.clone()))), - Object(obj) => Type::Object(obj.iter().map(|(k, v)| (k.clone(), Type::from(v))).collect()), + Object(obj) => Type::Object( + obj.iter() + .map(|(k, v)| (k.clone(), Type::from(v))) + .collect(), + ), Identifier(id) => Type::Scalar(FieldType::Identifier(id.clone())), } } diff --git a/helix-db/src/helixc/analyzer/utils.rs b/helix-db/src/helixc/analyzer/utils.rs index a6bbdc01..5fd15041 100644 --- a/helix-db/src/helixc/analyzer/utils.rs +++ b/helix-db/src/helixc/analyzer/utils.rs @@ -377,8 +377,8 @@ impl FieldLookup for Type { .map(|fields| match key { "id" | "ID" => Some(FieldType::Uuid), "label" => Some(FieldType::String), - "data" => Some(FieldType::Array(Box::new(FieldType::F64))), - "score" => Some(FieldType::F64), + "data" => Some(FieldType::Array(Box::new(FieldType::F32))), + "score" => Some(FieldType::F32), _ => fields .get(key) .map(|field| Some(field.field_type.clone())) diff --git a/helix-db/src/helixc/generator/bool_ops.rs b/helix-db/src/helixc/generator/bool_ops.rs index 93a0a42a..9fe798be 100644 --- a/helix-db/src/helixc/generator/bool_ops.rs +++ b/helix-db/src/helixc/generator/bool_ops.rs @@ -111,7 +111,11 @@ pub struct PropertyEq { } impl Display for PropertyEq { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}.get_property(\"{}\").map_or(false, |w| w == v)", self.var, self.property) + write!( + f, + "{}.get_property(\"{}\").map_or(false, |w| w == v)", + self.var, self.property + ) } } @@ -122,7 +126,11 @@ pub struct PropertyNeq { } impl Display for PropertyNeq { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}.get_property(\"{}\").map_or(false, |w| w != v)", self.var, self.property) + write!( + f, + "{}.get_property(\"{}\").map_or(false, |w| w != v)", + self.var, self.property + ) } } diff --git a/helix-db/src/helixc/generator/math_functions.rs b/helix-db/src/helixc/generator/math_functions.rs index c1e086d6..3aade720 100644 --- a/helix-db/src/helixc/generator/math_functions.rs +++ b/helix-db/src/helixc/generator/math_functions.rs @@ -31,7 +31,7 @@ pub struct MathFunctionCallGen { #[derive(Debug, Clone)] pub struct NumericLiteral { - pub value: f64, + pub value: f32, } #[derive(Debug, Clone)] @@ -54,10 +54,10 @@ impl Display for MathExpr { impl Display for NumericLiteral { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // Handle special formatting for cleaner output - if self.value.fract() == 0.0 && self.value.abs() < i64::MAX as f64 { - write!(f, "{}_f64", self.value as i64) + if self.value.fract() == 0.0 && self.value.abs() < i64::MAX as f32 { + write!(f, "{}_f32", self.value as i64) } else { - write!(f, "{}_f64", self.value) + write!(f, "{}_f32", self.value) } } } @@ -66,16 +66,32 @@ impl Display for PropertyAccess { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.context { PropertyContext::Edge => { - write!(f, "(edge.get_property({}).ok_or(GraphError::Default)?.as_f64())", self.property) + write!( + f, + "(edge.get_property({}).ok_or(GraphError::Default)?.as_f32())", + self.property + ) } PropertyContext::SourceNode => { - write!(f, "(src_node.get_property({}).ok_or(GraphError::Default)?.as_f64())", self.property) + write!( + f, + "(src_node.get_property({}).ok_or(GraphError::Default)?.as_f32())", + self.property + ) } PropertyContext::TargetNode => { - write!(f, "(dst_node.get_property({}).ok_or(GraphError::Default)?.as_f64())", self.property) + write!( + f, + "(dst_node.get_property({}).ok_or(GraphError::Default)?.as_f32())", + self.property + ) } PropertyContext::Current => { - write!(f, "(v.get_property({}).ok_or(GraphError::Default)?.as_f64())", self.property) + write!( + f, + "(v.get_property({}).ok_or(GraphError::Default)?.as_f32())", + self.property + ) } } } @@ -223,8 +239,8 @@ impl Display for MathFunctionCallGen { } // Constants (nullary) - MathFunction::Pi => write!(f, "std::f64::consts::PI"), - MathFunction::E => write!(f, "std::f64::consts::E"), + MathFunction::Pi => write!(f, "std::f32::consts::PI"), + MathFunction::E => write!(f, "std::f32::consts::E"), // Aggregates (special handling needed) MathFunction::Min @@ -270,7 +286,7 @@ pub fn generate_math_expr( })) } ExpressionType::IntegerLiteral(i) => Ok(MathExpr::NumericLiteral(NumericLiteral { - value: *i as f64, + value: *i as f32, })), ExpressionType::FloatLiteral(f) => { Ok(MathExpr::NumericLiteral(NumericLiteral { value: *f })) @@ -307,22 +323,35 @@ fn parse_property_access_from_traversal( } else if traversal.steps.len() == 2 { // Check if first step is FromN or ToN match &traversal.steps[0].step { - StepType::Node(graph_step) => { - match &graph_step.step { - GraphStepType::FromN => (PropertyContext::SourceNode, 1), - GraphStepType::ToN => (PropertyContext::TargetNode, 1), - _ => return Err(format!("Unexpected node step type in property access: {:?}", graph_step.step)), + StepType::Node(graph_step) => match &graph_step.step { + GraphStepType::FromN => (PropertyContext::SourceNode, 1), + GraphStepType::ToN => (PropertyContext::TargetNode, 1), + _ => { + return Err(format!( + "Unexpected node step type in property access: {:?}", + graph_step.step + )); } + }, + _ => { + return Err(format!( + "Expected FromN or ToN step, got: {:?}", + traversal.steps[0].step + )); } - _ => return Err(format!("Expected FromN or ToN step, got: {:?}", traversal.steps[0].step)), } } else { - return Err(format!("Invalid traversal length for property access: {}", traversal.steps.len())); + return Err(format!( + "Invalid traversal length for property access: {}", + traversal.steps.len() + )); }; // Extract property name from the Object step if let StepType::Object(obj) = &traversal.steps[property_step_idx].step - && obj.fields.len() == 1 && !obj.should_spread { + && obj.fields.len() == 1 + && !obj.should_spread + { let property_name = obj.fields[0].key.clone(); // Override context if specified by ExpressionContext @@ -347,13 +376,13 @@ mod tests { #[test] fn test_numeric_literal_integer() { let lit = NumericLiteral { value: 5.0 }; - assert_eq!(lit.to_string(), "5_f64"); + assert_eq!(lit.to_string(), "5_f32"); } #[test] fn test_numeric_literal_float() { let lit = NumericLiteral { value: 3.14 }; - assert_eq!(lit.to_string(), "3.14_f64"); + assert_eq!(lit.to_string(), "3.14_f32"); } #[test] @@ -365,7 +394,7 @@ mod tests { MathExpr::NumericLiteral(NumericLiteral { value: 3.0 }), ], }; - assert_eq!(add.to_string(), "(5_f64 + 3_f64)"); + assert_eq!(add.to_string(), "(5_f32 + 3_f32)"); } #[test] @@ -377,7 +406,7 @@ mod tests { MathExpr::NumericLiteral(NumericLiteral { value: 30.0 }), ], }; - assert_eq!(pow.to_string(), "(0.95_f64).powf(30_f64)"); + assert_eq!(pow.to_string(), "(0.95_f32).powf(30_f32)"); } #[test] @@ -395,7 +424,7 @@ mod tests { }), ], }; - assert_eq!(nested.to_string(), "(0.95_f64).powf((10_f64 / 30_f64))"); + assert_eq!(nested.to_string(), "(0.95_f32).powf((10_f32 / 30_f32))"); } #[test] @@ -404,7 +433,7 @@ mod tests { function: MathFunction::Sqrt, args: vec![MathExpr::NumericLiteral(NumericLiteral { value: 16.0 })], }; - assert_eq!(sqrt.to_string(), "(16_f64).sqrt()"); + assert_eq!(sqrt.to_string(), "(16_f32).sqrt()"); } #[test] @@ -413,7 +442,7 @@ mod tests { function: MathFunction::Sin, args: vec![MathExpr::NumericLiteral(NumericLiteral { value: 1.57 })], }; - assert_eq!(sin.to_string(), "(1.57_f64).sin()"); + assert_eq!(sin.to_string(), "(1.57_f32).sin()"); } #[test] @@ -422,13 +451,13 @@ mod tests { function: MathFunction::Pi, args: vec![], }; - assert_eq!(pi.to_string(), "std::f64::consts::PI"); + assert_eq!(pi.to_string(), "std::f32::consts::PI"); let e = MathFunctionCallGen { function: MathFunction::E, args: vec![], }; - assert_eq!(e.to_string(), "std::f64::consts::E"); + assert_eq!(e.to_string(), "std::f32::consts::E"); } #[test] @@ -440,7 +469,7 @@ mod tests { }; assert_eq!( edge_prop.to_string(), - "(edge.get_property(\"distance\").ok_or(GraphError::Default)?.as_f64())" + "(edge.get_property(\"distance\").ok_or(GraphError::Default)?.as_f32())" ); // Test SourceNode context @@ -450,7 +479,7 @@ mod tests { }; assert_eq!( src_prop.to_string(), - "(src_node.get_property(\"traffic_factor\").ok_or(GraphError::Default)?.as_f64())" + "(src_node.get_property(\"traffic_factor\").ok_or(GraphError::Default)?.as_f32())" ); // Test TargetNode context @@ -460,14 +489,14 @@ mod tests { }; assert_eq!( dst_prop.to_string(), - "(dst_node.get_property(\"popularity\").ok_or(GraphError::Default)?.as_f64())" + "(dst_node.get_property(\"popularity\").ok_or(GraphError::Default)?.as_f32())" ); } #[test] fn test_complex_weight_expression() { // Test: MUL(_::{distance}, POW(0.95, DIV(_::{days}, 30))) - // Should generate: ((edge.get_property("distance").ok_or(GraphError::Default)?.as_f64()) * (0.95_f64).powf(((edge.get_property("days").ok_or(GraphError::Default)?.as_f64()) / 30_f64))) + // Should generate: ((edge.get_property("distance").ok_or(GraphError::Default)?.as_f32()) * (0.95_f32).powf(((edge.get_property("days").ok_or(GraphError::Default)?.as_f32()) / 30_f32))) let expr = MathFunctionCallGen { function: MathFunction::Mul, args: vec![ @@ -496,14 +525,14 @@ mod tests { assert_eq!( expr.to_string(), - "((edge.get_property(\"distance\").ok_or(GraphError::Default)?.as_f64()) * (0.95_f64).powf(((edge.get_property(\"days\").ok_or(GraphError::Default)?.as_f64()) / 30_f64)))" + "((edge.get_property(\"distance\").ok_or(GraphError::Default)?.as_f32()) * (0.95_f32).powf(((edge.get_property(\"days\").ok_or(GraphError::Default)?.as_f32()) / 30_f32)))" ); } #[test] fn test_multi_context_expression() { // Test: MUL(_::{distance}, _::From::{traffic_factor}) - // Should generate: ((edge.get_property("distance").ok_or(GraphError::Default)?.as_f64()) * (src_node.get_property("traffic_factor").ok_or(GraphError::Default)?.as_f64())) + // Should generate: ((edge.get_property("distance").ok_or(GraphError::Default)?.as_f32()) * (src_node.get_property("traffic_factor").ok_or(GraphError::Default)?.as_f32())) let expr = MathFunctionCallGen { function: MathFunction::Mul, args: vec![ @@ -520,7 +549,7 @@ mod tests { assert_eq!( expr.to_string(), - "((edge.get_property(\"distance\").ok_or(GraphError::Default)?.as_f64()) * (src_node.get_property(\"traffic_factor\").ok_or(GraphError::Default)?.as_f64()))" + "((edge.get_property(\"distance\").ok_or(GraphError::Default)?.as_f32()) * (src_node.get_property(\"traffic_factor\").ok_or(GraphError::Default)?.as_f32()))" ); } } diff --git a/helix-db/src/helixc/generator/migrations.rs b/helix-db/src/helixc/generator/migrations.rs index b1a7b522..d5ffcd28 100644 --- a/helix-db/src/helixc/generator/migrations.rs +++ b/helix-db/src/helixc/generator/migrations.rs @@ -153,7 +153,9 @@ mod tests { remappings: vec![Separator::Semicolon( GeneratedMigrationPropertyMapping::FieldAdditionFromOldField { old_field: GeneratedValue::Literal(GenRef::Literal("name".to_string())), - new_field: GeneratedValue::Literal(GenRef::Literal("full_name".to_string())), + new_field: GeneratedValue::Literal(GenRef::Literal( + "full_name".to_string(), + )), }, )], should_spread: false, @@ -203,7 +205,9 @@ mod tests { ), Separator::Semicolon( GeneratedMigrationPropertyMapping::FieldAdditionFromValue { - new_field_name: GeneratedValue::Literal(GenRef::Literal("c".to_string())), + new_field_name: GeneratedValue::Literal(GenRef::Literal( + "c".to_string(), + )), new_field_type: FieldType::Boolean, value: GeneratedValue::Primitive(GenRef::Std("true".to_string())), }, diff --git a/helix-db/src/helixc/generator/queries.rs b/helix-db/src/helixc/generator/queries.rs index c89f3732..876e73f4 100644 --- a/helix-db/src/helixc/generator/queries.rs +++ b/helix-db/src/helixc/generator/queries.rs @@ -90,7 +90,7 @@ impl Query { for (i, _) in self.hoisted_embedding_calls.iter().enumerate() { let name = EmbedData::name_from_index(i); - writeln!(f, "let {name}: Vec = {name}?;")?; + writeln!(f, "let {name}: Vec = {name}?;")?; } } Ok(()) @@ -172,8 +172,7 @@ impl Query { writeln!( f, " \"{}\": {}", - struct_def.source_variable, - struct_def.source_variable + struct_def.source_variable, struct_def.source_variable )?; } else if struct_def.source_variable.is_empty() { // Object literal - construct from multiple sources @@ -895,8 +894,7 @@ impl Query { writeln!( f, " \"{}\": {}", - struct_def.source_variable, - struct_def.source_variable + struct_def.source_variable, struct_def.source_variable )?; } else if struct_def.is_collection { // Collection - generate mapping code diff --git a/helix-db/src/helixc/generator/schemas.rs b/helix-db/src/helixc/generator/schemas.rs index a6c72059..7e5ce766 100644 --- a/helix-db/src/helixc/generator/schemas.rs +++ b/helix-db/src/helixc/generator/schemas.rs @@ -218,7 +218,7 @@ mod tests { }, SchemaProperty { name: "score".to_string(), - field_type: GeneratedType::RustType(RustType::F64), + field_type: GeneratedType::RustType(RustType::F32), default_value: None, is_index: FieldPrefix::Empty, }, @@ -227,7 +227,7 @@ mod tests { let output = format!("{}", schema); assert!(output.contains("pub count: i32,")); - assert!(output.contains("pub score: f64,")); + assert!(output.contains("pub score: f32,")); } // ============================================================================ diff --git a/helix-db/src/helixc/generator/source_steps.rs b/helix-db/src/helixc/generator/source_steps.rs index 07ce8e95..859eebfd 100644 --- a/helix-db/src/helixc/generator/source_steps.rs +++ b/helix-db/src/helixc/generator/source_steps.rs @@ -141,7 +141,7 @@ impl Display for AddV { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "insert_v:: bool>({}, {}, {})", + "insert_v({}, {}, {})", self.vec, self.label, write_properties(&self.properties) diff --git a/helix-db/src/helixc/generator/statements.rs b/helix-db/src/helixc/generator/statements.rs index fdfda778..828bfa48 100644 --- a/helix-db/src/helixc/generator/statements.rs +++ b/helix-db/src/helixc/generator/statements.rs @@ -3,8 +3,6 @@ use std::fmt::Display; use crate::helixc::generator::{bool_ops::BoExp, traversal_steps::Traversal, utils::GenRef}; - - #[derive(Clone)] pub enum Statement { Assignment(Assignment), @@ -27,13 +25,20 @@ impl Display for Statement { Statement::Literal(literal) => write!(f, "{literal}"), Statement::Identifier(identifier) => write!(f, "{identifier}"), Statement::BoExp(bo) => write!(f, "{bo}"), - Statement::Array(array) => write!(f, "[{}]", array.iter().map(|s| s.to_string()).collect::>().join(", ")), + Statement::Array(array) => write!( + f, + "[{}]", + array + .iter() + .map(|s| s.to_string()) + .collect::>() + .join(", ") + ), Statement::Empty => write!(f, ""), } } } - #[derive(Clone)] pub enum IdentifierType { Primitive, @@ -197,7 +202,9 @@ mod tests { fn test_assignment_statement() { let assignment = Statement::Assignment(Assignment { variable: GenRef::Std("result".to_string()), - value: Box::new(Statement::Identifier(GenRef::Std("computation".to_string()))), + value: Box::new(Statement::Identifier(GenRef::Std( + "computation".to_string(), + ))), }); let output = format!("{}", assignment); assert!(output.contains("let result = computation")); @@ -227,4 +234,3 @@ mod tests { assert_eq!(var.inner(), ""); } } - diff --git a/helix-db/src/helixc/generator/traversal_steps.rs b/helix-db/src/helixc/generator/traversal_steps.rs index e60ff4a8..0c60aefa 100644 --- a/helix-db/src/helixc/generator/traversal_steps.rs +++ b/helix-db/src/helixc/generator/traversal_steps.rs @@ -266,7 +266,9 @@ impl Display for Step { Step::ToV(to_v) => write!(f, "{to_v}"), Step::PropertyFetch(property) => write!(f, "get_property({property})"), Step::ReservedPropertyAccess(prop) => match prop { - ReservedProp::Id => write!(f, "map(|item| Ok(Value::from(uuid_str(item.id, &arena))))"), + ReservedProp::Id => { + write!(f, "map(|item| Ok(Value::from(uuid_str(item.id, &arena))))") + } ReservedProp::Label => write!(f, "map(|item| Ok(Value::from(item.label())))"), // ReservedProp::Version => write!(f, "map(|item| Ok(Value::from(item.version)))"), // ReservedProp::FromNode => write!(f, "map(|item| Ok(Value::from(uuid_str(item.from_node, &arena))))"), @@ -453,7 +455,9 @@ impl Display for WhereRef { | Separator::Empty(Step::PropertyFetch(p)) => prop = Some(p), Separator::Period(Step::ReservedPropertyAccess(rp)) | Separator::Newline(Step::ReservedPropertyAccess(rp)) - | Separator::Empty(Step::ReservedPropertyAccess(rp)) => reserved_prop = Some(rp), + | Separator::Empty(Step::ReservedPropertyAccess(rp)) => { + reserved_prop = Some(rp) + } Separator::Period(Step::BoolOp(op)) | Separator::Newline(Step::BoolOp(op)) | Separator::Empty(Step::BoolOp(op)) => bool_op = Some(op), @@ -477,7 +481,9 @@ impl Display for WhereRef { BoolOp::Contains(contains) => format!("{}{}", value_expr, contains), BoolOp::IsIn(is_in) => format!("{}{}", value_expr, is_in), BoolOp::PropertyEq(_) | BoolOp::PropertyNeq(_) => { - unreachable!("PropertyEq/PropertyNeq should not be used with reserved properties") + unreachable!( + "PropertyEq/PropertyNeq should not be used with reserved properties" + ) } }; return write!( @@ -725,19 +731,22 @@ impl Display for ShortestPathDijkstras { WeightCalculation::Property(prop) => { write!( f, - "|edge, _src_node, _dst_node| -> Result {{ Ok(edge.get_property({})?.as_f64()?) }}", + "|edge, _src_node, _dst_node| -> Result {{ Ok(edge.get_property({})?.as_f32()?) }}", prop )?; } WeightCalculation::Expression(expr) => { write!( f, - "|edge, src_node, dst_node| -> Result {{ Ok({}) }}", + "|edge, src_node, dst_node| -> Result {{ Ok({}) }}", expr )?; } WeightCalculation::Default => { - write!(f, "helix_db::helix_engine::traversal_core::ops::util::paths::default_weight_fn")?; + write!( + f, + "helix_db::helix_engine::traversal_core::ops::util::paths::default_weight_fn" + )?; } } @@ -784,19 +793,22 @@ impl Display for ShortestPathAStar { WeightCalculation::Property(prop) => { write!( f, - "|edge, _src_node, _dst_node| -> Result {{ Ok(edge.get_property({})?.as_f64()?) }}, ", + "|edge, _src_node, _dst_node| -> Result {{ Ok(edge.get_property({})?.as_f32()?) }}, ", prop )?; } WeightCalculation::Expression(expr) => { write!( f, - "|edge, src_node, dst_node| -> Result {{ Ok({}) }}, ", + "|edge, src_node, dst_node| -> Result {{ Ok({}) }}, ", expr )?; } WeightCalculation::Default => { - write!(f, "helix_db::helix_engine::traversal_core::ops::util::paths::default_weight_fn, ")?; + write!( + f, + "helix_db::helix_engine::traversal_core::ops::util::paths::default_weight_fn, " + )?; } } @@ -829,7 +841,7 @@ pub struct RerankRRF { impl Display for RerankRRF { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.k { - Some(k) => write!(f, "rerank(RRFReranker::with_k({k} as f64).unwrap(), None)"), + Some(k) => write!(f, "rerank(RRFReranker::with_k({k} as f32).unwrap(), None)"), None => write!(f, "rerank(RRFReranker::new(), None)"), } } @@ -848,7 +860,10 @@ impl Display for MMRDistanceMethod { MMRDistanceMethod::Cosine => write!(f, "DistanceMethod::Cosine"), MMRDistanceMethod::Euclidean => write!(f, "DistanceMethod::Euclidean"), MMRDistanceMethod::DotProduct => write!(f, "DistanceMethod::DotProduct"), - MMRDistanceMethod::Identifier(id) => write!(f, "match {id}.as_str() {{ \"cosine\" => DistanceMethod::Cosine, \"euclidean\" => DistanceMethod::Euclidean, \"dotproduct\" => DistanceMethod::DotProduct, _ => DistanceMethod::Cosine }}"), + MMRDistanceMethod::Identifier(id) => write!( + f, + "match {id}.as_str() {{ \"cosine\" => DistanceMethod::Cosine, \"euclidean\" => DistanceMethod::Euclidean, \"dotproduct\" => DistanceMethod::DotProduct, _ => DistanceMethod::Cosine }}" + ), } } } @@ -860,9 +875,15 @@ pub struct RerankMMR { } impl Display for RerankMMR { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let lambda = self.lambda.as_ref().map_or_else(|| "0.7".to_string(), |l| l.to_string()); + let lambda = self + .lambda + .as_ref() + .map_or_else(|| "0.7".to_string(), |l| l.to_string()); match &self.distance { - Some(dist) => write!(f, "rerank(MMRReranker::with_distance({lambda}, {dist}).unwrap(), None)"), + Some(dist) => write!( + f, + "rerank(MMRReranker::with_distance({lambda}, {dist}).unwrap(), None)" + ), None => write!(f, "rerank(MMRReranker::new({lambda}).unwrap(), None)"), } } diff --git a/helix-db/src/helixc/generator/utils.rs b/helix-db/src/helixc/generator/utils.rs index 0710649f..6084f0aa 100644 --- a/helix-db/src/helixc/generator/utils.rs +++ b/helix-db/src/helixc/generator/utils.rs @@ -467,7 +467,7 @@ use helix_db::{ traversal_value::TraversalValue, }, types::GraphError, - vector_core::vector::HVector, + vector_core::HVector, }, helix_gateway::{ embedding_providers::{EmbeddingModel, get_embedding_model}, diff --git a/helix-db/src/helixc/parser/creation_step_parse_methods.rs b/helix-db/src/helixc/parser/creation_step_parse_methods.rs index 3775dd6b..6786d69c 100644 --- a/helix-db/src/helixc/parser/creation_step_parse_methods.rs +++ b/helix-db/src/helixc/parser/creation_step_parse_methods.rs @@ -152,7 +152,7 @@ impl HelixParser { #[cfg(test)] mod tests { - use crate::helixc::parser::{write_to_temp_file, HelixParser}; + use crate::helixc::parser::{HelixParser, write_to_temp_file}; // ============================================================================ // AddNode Tests diff --git a/helix-db/src/helixc/parser/errors.rs b/helix-db/src/helixc/parser/errors.rs index 7e019b0c..7a462e1e 100644 --- a/helix-db/src/helixc/parser/errors.rs +++ b/helix-db/src/helixc/parser/errors.rs @@ -48,4 +48,4 @@ impl std::fmt::Debug for ParserError { } } } -} \ No newline at end of file +} diff --git a/helix-db/src/helixc/parser/expression_parse_methods.rs b/helix-db/src/helixc/parser/expression_parse_methods.rs index c5750918..c754e550 100644 --- a/helix-db/src/helixc/parser/expression_parse_methods.rs +++ b/helix-db/src/helixc/parser/expression_parse_methods.rs @@ -69,10 +69,12 @@ impl HelixParser { Rule::anonymous_traversal => self.parse_anon_traversal(traversal)?, Rule::id_traversal => self.parse_traversal(traversal)?, Rule::traversal => self.parse_traversal(traversal)?, - other => return Err(ParserError::from(format!( - "Unexpected rule in exists expression: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule in exists expression: {:?}", + other + ))); + } }; let expr = ExpressionType::Exists(ExistsExpression { loc: loc.clone(), @@ -223,10 +225,12 @@ impl HelixParser { Rule::anonymous_traversal => self.parse_anon_traversal(traversal)?, Rule::id_traversal => self.parse_traversal(traversal)?, Rule::traversal => self.parse_traversal(traversal)?, - other => return Err(ParserError::from(format!( - "Unexpected rule in and_or_expression exists: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule in and_or_expression exists: {:?}", + other + ))); + } }; let expr = ExpressionType::Exists(ExistsExpression { loc: loc.clone(), @@ -281,10 +285,12 @@ impl HelixParser { Rule::evaluates_to_bool => { expressions.push(self.parse_boolean_expression(p)?); } - other => return Err(ParserError::from(format!( - "Unexpected rule in parse_expression_vec: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule in parse_expression_vec: {:?}", + other + ))); + } } } Ok(expressions) @@ -555,7 +561,7 @@ impl HelixParser { return Err(ParserError::from(format!( "Unknown mathematical function: {}", function_name - ))) + ))); } }; @@ -604,7 +610,9 @@ impl HelixParser { match inner_inner.as_rule() { Rule::math_function_call => Ok(Expression { loc: inner_inner.loc(), - expr: ExpressionType::MathFunctionCall(self.parse_math_function_call(inner_inner)?), + expr: ExpressionType::MathFunctionCall( + self.parse_math_function_call(inner_inner)?, + ), }), Rule::float => inner_inner .as_str() @@ -628,11 +636,15 @@ impl HelixParser { }), Rule::traversal => Ok(Expression { loc: inner_inner.loc(), - expr: ExpressionType::Traversal(Box::new(self.parse_traversal(inner_inner)?)), + expr: ExpressionType::Traversal(Box::new( + self.parse_traversal(inner_inner)?, + )), }), Rule::id_traversal => Ok(Expression { loc: inner_inner.loc(), - expr: ExpressionType::Traversal(Box::new(self.parse_traversal(inner_inner)?)), + expr: ExpressionType::Traversal(Box::new( + self.parse_traversal(inner_inner)?, + )), }), _ => Err(ParserError::from(format!( "Unexpected evaluates_to_number type: {:?}", @@ -682,7 +694,7 @@ impl HelixParser { #[cfg(test)] mod tests { - use crate::helixc::parser::{write_to_temp_file, HelixParser}; + use crate::helixc::parser::{HelixParser, write_to_temp_file}; // ============================================================================ // Literal Expression Tests diff --git a/helix-db/src/helixc/parser/graph_step_parse_methods.rs b/helix-db/src/helixc/parser/graph_step_parse_methods.rs index dae9bc53..25937800 100644 --- a/helix-db/src/helixc/parser/graph_step_parse_methods.rs +++ b/helix-db/src/helixc/parser/graph_step_parse_methods.rs @@ -2,10 +2,10 @@ use crate::helixc::parser::{ HelixParser, ParserError, Rule, location::HasLoc, types::{ - Aggregate, BooleanOp, BooleanOpType, Closure, Exclude, Expression, ExpressionType, FieldAddition, - FieldValue, FieldValueType, GraphStep, GraphStepType, GroupBy, IdType, MMRDistance, Object, OrderBy, - OrderByType, RerankMMR, RerankRRF, ShortestPath, ShortestPathAStar, ShortestPathBFS, - ShortestPathDijkstras, Step, StepType, Update, + Aggregate, BooleanOp, BooleanOpType, Closure, Exclude, Expression, ExpressionType, + FieldAddition, FieldValue, FieldValueType, GraphStep, GraphStepType, GroupBy, IdType, + MMRDistance, Object, OrderBy, OrderByType, RerankMMR, RerankRRF, ShortestPath, + ShortestPathAStar, ShortestPathBFS, ShortestPathDijkstras, Step, StepType, Update, }, utils::{PairTools, PairsTools}, }; @@ -24,10 +24,12 @@ impl HelixParser { let order_by_type = match order_by_rule.as_rule() { Rule::asc => OrderByType::Asc, Rule::desc => OrderByType::Desc, - other => return Err(ParserError::from(format!( - "Unexpected rule in parse_order_by: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule in parse_order_by: {:?}", + other + ))); + } }; let expression = self.parse_expression(inner.try_next()?)?; Ok(OrderBy { @@ -455,13 +457,8 @@ impl HelixParser { Rule::math_expression => { // Parse the math_expression into an Expression let expr = self.parse_math_expression(p)?; - Ok(( - type_arg, - Some(expr), - from, - to, - )) - }, + Ok((type_arg, Some(expr), from, to)) + } Rule::to_from => match p.into_inner().next() { Some(p) => match p.as_rule() { Rule::to => Ok(( @@ -486,9 +483,7 @@ impl HelixParser { _ => Ok((type_arg, weight_expr, from, to)), }, ) { - Ok((type_arg, weight_expr, from, to)) => { - (type_arg, weight_expr, from, to) - } + Ok((type_arg, weight_expr, from, to)) => (type_arg, weight_expr, from, to), Err(e) => return Err(e), }; @@ -499,18 +494,25 @@ impl HelixParser { ExpressionType::Traversal(_trav) => { // For now, keep the traversal and create a Property weight expression // TODO: Extract property name from traversal for simple cases - Some(crate::helixc::parser::types::WeightExpression::Expression(Box::new(expr.clone()))) + Some(crate::helixc::parser::types::WeightExpression::Expression( + Box::new(expr.clone()), + )) } ExpressionType::MathFunctionCall(_) => { - Some(crate::helixc::parser::types::WeightExpression::Expression(Box::new(expr.clone()))) - } - _ => { - Some(crate::helixc::parser::types::WeightExpression::Expression(Box::new(expr.clone()))) + Some(crate::helixc::parser::types::WeightExpression::Expression( + Box::new(expr.clone()), + )) } + _ => Some(crate::helixc::parser::types::WeightExpression::Expression( + Box::new(expr.clone()), + )), }; (None, weight_type) } else { - (None, Some(crate::helixc::parser::types::WeightExpression::Default)) + ( + None, + Some(crate::helixc::parser::types::WeightExpression::Default), + ) }; GraphStep { @@ -592,7 +594,8 @@ impl HelixParser { for inner_pair in pair.clone().into_inner() { match inner_pair.as_rule() { Rule::type_args => { - type_arg = Some(inner_pair.into_inner().next().unwrap().as_str().to_string()); + type_arg = + Some(inner_pair.into_inner().next().unwrap().as_str().to_string()); } Rule::math_expression => { weight_expression = Some(self.parse_expression(inner_pair)?); @@ -603,15 +606,21 @@ impl HelixParser { heuristic_property = Some(literal[1..literal.len() - 1].to_string()); } Rule::to_from => { - if let Some(p) = inner_pair.into_inner().next() { match p.as_rule() { - Rule::to => { - to = Some(p.into_inner().next().unwrap().as_str().to_string()); - } - Rule::from => { - from = Some(p.into_inner().next().unwrap().as_str().to_string()); + if let Some(p) = inner_pair.into_inner().next() { + match p.as_rule() { + Rule::to => { + to = Some( + p.into_inner().next().unwrap().as_str().to_string(), + ); + } + Rule::from => { + from = Some( + p.into_inner().next().unwrap().as_str().to_string(), + ); + } + _ => {} } - _ => {} - } } + } } _ => {} } @@ -621,18 +630,25 @@ impl HelixParser { let (inner_traversal, weight_expr_typed) = if let Some(expr) = weight_expression { let weight_type = match &expr.expr { ExpressionType::Traversal(_trav) => { - Some(crate::helixc::parser::types::WeightExpression::Expression(Box::new(expr.clone()))) + Some(crate::helixc::parser::types::WeightExpression::Expression( + Box::new(expr.clone()), + )) } ExpressionType::MathFunctionCall(_) => { - Some(crate::helixc::parser::types::WeightExpression::Expression(Box::new(expr.clone()))) - } - _ => { - Some(crate::helixc::parser::types::WeightExpression::Expression(Box::new(expr.clone()))) + Some(crate::helixc::parser::types::WeightExpression::Expression( + Box::new(expr.clone()), + )) } + _ => Some(crate::helixc::parser::types::WeightExpression::Expression( + Box::new(expr.clone()), + )), }; (None, weight_type) } else { - (None, Some(crate::helixc::parser::types::WeightExpression::Default)) + ( + None, + Some(crate::helixc::parser::types::WeightExpression::Default), + ) }; GraphStep { @@ -730,8 +746,13 @@ impl HelixParser { }); } - let lambda = lambda.ok_or_else(|| ParserError::from("lambda parameter required for RerankMMR"))?; + let lambda = + lambda.ok_or_else(|| ParserError::from("lambda parameter required for RerankMMR"))?; - Ok(RerankMMR { loc, lambda, distance }) + Ok(RerankMMR { + loc, + lambda, + distance, + }) } } diff --git a/helix-db/src/helixc/parser/object_parse_methods.rs b/helix-db/src/helixc/parser/object_parse_methods.rs index 297b1e74..705c427b 100644 --- a/helix-db/src/helixc/parser/object_parse_methods.rs +++ b/helix-db/src/helixc/parser/object_parse_methods.rs @@ -36,7 +36,7 @@ impl HelixParser { Rule::float => value_pair .as_str() .parse() - .map(|f| ValueType::new(Value::F64(f), value_pair.loc())) + .map(|f| ValueType::new(Value::F32(f), value_pair.loc())) .map_err(|_| ParserError::from("Invalid float value")), Rule::boolean => Ok(ValueType::new( Value::Boolean(value_pair.as_str() == "true"), @@ -101,7 +101,7 @@ impl HelixParser { }, Rule::float => FieldValue { loc: value_pair.loc(), - value: FieldValueType::Literal(Value::F64( + value: FieldValueType::Literal(Value::F32( value_pair .as_str() .parse() @@ -193,7 +193,7 @@ impl HelixParser { }, Rule::float => FieldValue { loc: value_pair.loc(), - value: FieldValueType::Literal(Value::F64( + value: FieldValueType::Literal(Value::F32( value_pair .as_str() .parse() diff --git a/helix-db/src/helixc/parser/query_parse_methods.rs b/helix-db/src/helixc/parser/query_parse_methods.rs index 30af8771..8767aba9 100644 --- a/helix-db/src/helixc/parser/query_parse_methods.rs +++ b/helix-db/src/helixc/parser/query_parse_methods.rs @@ -1,7 +1,6 @@ use crate::helixc::parser::{ - HelixParser, Rule, + HelixParser, ParserError, Rule, location::HasLoc, - ParserError, types::{BuiltInMacro, Parameter, Query, Statement, StatementType}, }; use pest::iterators::Pair; @@ -20,14 +19,14 @@ impl HelixParser { let built_in_macro = match pair.into_inner().next() { Some(pair) => match pair.as_rule() { Rule::mcp_macro => Some(BuiltInMacro::MCP), - Rule::model_macro => { - match pair.into_inner().next() { - Some(model_name) => Some(BuiltInMacro::Model( - model_name.as_str().to_string(), - )), - None => return Err(ParserError::from("Model macro missing model name")), + Rule::model_macro => match pair.into_inner().next() { + Some(model_name) => { + Some(BuiltInMacro::Model(model_name.as_str().to_string())) } - } + None => { + return Err(ParserError::from("Model macro missing model name")); + } + }, _ => None, }, _ => None, @@ -37,17 +36,24 @@ impl HelixParser { } _ => None, }; - let name = pairs.next() + let name = pairs + .next() .ok_or_else(|| ParserError::from("Expected query name"))? - .as_str().to_string(); + .as_str() + .to_string(); let parameters = self.parse_parameters( - pairs.next().ok_or_else(|| ParserError::from("Expected parameters block"))? + pairs + .next() + .ok_or_else(|| ParserError::from("Expected parameters block"))?, )?; - let body = pairs.next() + let body = pairs + .next() .ok_or_else(|| ParserError::from("Expected query body"))?; let statements = self.parse_query_body(body)?; let return_values = self.parse_return_statement( - pairs.next().ok_or_else(|| ParserError::from("Expected return statement"))? + pairs + .next() + .ok_or_else(|| ParserError::from("Expected return statement"))?, )?; Ok(Query { @@ -68,7 +74,8 @@ impl HelixParser { .map(|p: Pair<'_, Rule>| -> Result { let mut inner = p.into_inner(); let name = { - let pair = inner.next() + let pair = inner + .next() .ok_or_else(|| ParserError::from("Expected parameter name"))?; (pair.loc(), pair.as_str().to_string()) }; @@ -136,7 +143,9 @@ impl HelixParser { }), Rule::drop => { - let inner = p.into_inner().next() + let inner = p + .into_inner() + .next() .ok_or_else(|| ParserError::from("Drop statement missing expression"))?; Ok(Statement { loc: inner.loc(), @@ -160,7 +169,7 @@ impl HelixParser { #[cfg(test)] mod tests { use super::*; - use crate::helixc::parser::{write_to_temp_file, HelixParser}; + use crate::helixc::parser::{HelixParser, write_to_temp_file}; // ============================================================================ // Basic Query Parsing Tests diff --git a/helix-db/src/helixc/parser/return_value_parse_methods.rs b/helix-db/src/helixc/parser/return_value_parse_methods.rs index 13cbd449..b4a51acb 100644 --- a/helix-db/src/helixc/parser/return_value_parse_methods.rs +++ b/helix-db/src/helixc/parser/return_value_parse_methods.rs @@ -1,9 +1,8 @@ use std::collections::HashMap; use crate::helixc::parser::{ - HelixParser, Rule, + HelixParser, ParserError, Rule, location::HasLoc, - ParserError, types::{Expression, ExpressionType, ReturnType}, }; use pest::iterators::Pair; diff --git a/helix-db/src/helixc/parser/schema_parse_methods.rs b/helix-db/src/helixc/parser/schema_parse_methods.rs index 4da559fe..d8dc3041 100644 --- a/helix-db/src/helixc/parser/schema_parse_methods.rs +++ b/helix-db/src/helixc/parser/schema_parse_methods.rs @@ -231,110 +231,110 @@ impl HelixParser { let default_value = match pair.into_inner().next() { Some(pair) => match pair.as_rule() { Rule::string_literal => DefaultValue::String(pair.as_str().to_string()), - Rule::float => { - match field_type { - FieldType::F32 => DefaultValue::F32( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid float value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::F64 => DefaultValue::F64( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid float value '{}': {e}", - pair.as_str() - )) - })?, - ), - other => return Err(ParserError::from(format!( + Rule::float => match field_type { + FieldType::F32 => DefaultValue::F32( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid float value '{}': {e}", + pair.as_str() + )) + })?, + ), + FieldType::F64 => DefaultValue::F64( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid float value '{}': {e}", + pair.as_str() + )) + })?, + ), + other => { + return Err(ParserError::from(format!( "Float default value not valid for field type {:?}", other - ))), + ))); } - } - Rule::integer => { - match field_type { - FieldType::I8 => DefaultValue::I8( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::I16 => DefaultValue::I16( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::I32 => DefaultValue::I32( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::I64 => DefaultValue::I64( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::U8 => DefaultValue::U8( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::U16 => DefaultValue::U16( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::U32 => DefaultValue::U32( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::U64 => DefaultValue::U64( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - FieldType::U128 => DefaultValue::U128( - pair.as_str().parse::().map_err(|e| { - ParserError::from(format!( - "Invalid integer value '{}': {e}", - pair.as_str() - )) - })?, - ), - other => return Err(ParserError::from(format!( + }, + Rule::integer => match field_type { + FieldType::I8 => { + DefaultValue::I8(pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?) + } + FieldType::I16 => DefaultValue::I16( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?, + ), + FieldType::I32 => DefaultValue::I32( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?, + ), + FieldType::I64 => DefaultValue::I64( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?, + ), + FieldType::U8 => { + DefaultValue::U8(pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?) + } + FieldType::U16 => DefaultValue::U16( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?, + ), + FieldType::U32 => DefaultValue::U32( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?, + ), + FieldType::U64 => DefaultValue::U64( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?, + ), + FieldType::U128 => DefaultValue::U128( + pair.as_str().parse::().map_err(|e| { + ParserError::from(format!( + "Invalid integer value '{}': {e}", + pair.as_str() + )) + })?, + ), + other => { + return Err(ParserError::from(format!( "Integer default value not valid for field type {:?}", other - ))), + ))); } - } + }, Rule::now => DefaultValue::Now, Rule::boolean => DefaultValue::Boolean( pair.as_str().parse::().map_err(|e| { @@ -344,10 +344,12 @@ impl HelixParser { )) })?, ), - other => return Err(ParserError::from(format!( - "Unexpected rule for default value: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule for default value: {:?}", + other + ))); + } }, None => DefaultValue::Empty, }; @@ -414,10 +416,7 @@ impl HelixParser { "U32" => Ok(FieldType::U32), "U64" => Ok(FieldType::U64), "U128" => Ok(FieldType::U128), - other => Err(ParserError::from(format!( - "Unknown named type: {}", - other - ))), + other => Err(ParserError::from(format!("Unknown named type: {}", other))), } } Rule::array => { @@ -537,7 +536,7 @@ impl HelixParser { #[cfg(test)] mod tests { use super::*; - use crate::helixc::parser::{write_to_temp_file, HelixParser}; + use crate::helixc::parser::{HelixParser, write_to_temp_file}; // ============================================================================ // Node Definition Tests @@ -581,8 +580,14 @@ mod tests { let parsed = result.unwrap(); let schema = parsed.schema.get(&1).unwrap(); - assert!(matches!(schema.node_schemas[0].fields[0].prefix, FieldPrefix::Index)); - assert!(matches!(schema.node_schemas[0].fields[1].prefix, FieldPrefix::Empty)); + assert!(matches!( + schema.node_schemas[0].fields[0].prefix, + FieldPrefix::Index + )); + assert!(matches!( + schema.node_schemas[0].fields[1].prefix, + FieldPrefix::Empty + )); } #[test] @@ -654,8 +659,14 @@ mod tests { let parsed = result.unwrap(); let schema = parsed.schema.get(&1).unwrap(); assert_eq!(schema.node_schemas[0].fields.len(), 2); - assert!(matches!(schema.node_schemas[0].fields[0].field_type, FieldType::Array(_))); - assert!(matches!(schema.node_schemas[0].fields[1].field_type, FieldType::Array(_))); + assert!(matches!( + schema.node_schemas[0].fields[0].field_type, + FieldType::Array(_) + )); + assert!(matches!( + schema.node_schemas[0].fields[1].field_type, + FieldType::Array(_) + )); } #[test] @@ -673,7 +684,10 @@ mod tests { let parsed = result.unwrap(); let schema = parsed.schema.get(&1).unwrap(); assert_eq!(schema.node_schemas[0].fields.len(), 1); - assert!(matches!(schema.node_schemas[0].fields[0].field_type, FieldType::Object(_))); + assert!(matches!( + schema.node_schemas[0].fields[0].field_type, + FieldType::Object(_) + )); } #[test] @@ -980,7 +994,10 @@ mod tests { let parsed = result.unwrap(); let schema = parsed.schema.get(&1).unwrap(); - assert!(matches!(schema.node_schemas[0].fields[0].field_type, FieldType::Array(_))); + assert!(matches!( + schema.node_schemas[0].fields[0].field_type, + FieldType::Array(_) + )); } #[test] diff --git a/helix-db/src/helixc/parser/traversal_parse_methods.rs b/helix-db/src/helixc/parser/traversal_parse_methods.rs index f2793e4d..a1d50cf0 100644 --- a/helix-db/src/helixc/parser/traversal_parse_methods.rs +++ b/helix-db/src/helixc/parser/traversal_parse_methods.rs @@ -117,7 +117,7 @@ impl HelixParser { }, Rule::float => ValueType::Literal { value: Value::from( - val.as_str().parse::().map_err(|_| { + val.as_str().parse::().map_err(|_| { ParserError::from("Invalid float value") })?, ), @@ -137,10 +137,12 @@ impl HelixParser { )); } }, - other => return Err(ParserError::from(format!( - "Unexpected rule in start_node by_index: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule in start_node by_index: {:?}", + other + ))); + } }; vec![IdType::ByIndex { index: Box::new(index), @@ -149,10 +151,12 @@ impl HelixParser { }] }) } - other => return Err(ParserError::from(format!( - "Unexpected rule in start_node: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule in start_node: {:?}", + other + ))); + } } } Ok(StartNode::Node { node_type, ids }) @@ -191,10 +195,12 @@ impl HelixParser { } ids = Some(new_ids); } - other => return Err(ParserError::from(format!( - "Unexpected rule in start_edge: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule in start_edge: {:?}", + other + ))); + } } } Ok(StartNode::Edge { edge_type, ids }) @@ -273,7 +279,7 @@ impl HelixParser { }, Rule::float => ValueType::Literal { value: Value::from( - value_inner.as_str().parse::().map_err(|_| { + value_inner.as_str().parse::().map_err(|_| { ParserError::from("Invalid float value") })?, ), @@ -301,10 +307,12 @@ impl HelixParser { } ids = Some(new_ids); } - other => return Err(ParserError::from(format!( - "Unexpected rule in start_vector: {:?}", - other - ))), + other => { + return Err(ParserError::from(format!( + "Unexpected rule in start_vector: {:?}", + other + ))); + } } } Ok(StartNode::Vector { vector_type, ids }) diff --git a/helix-db/src/helixc/parser/types.rs b/helix-db/src/helixc/parser/types.rs index fc408847..9ff18f50 100644 --- a/helix-db/src/helixc/parser/types.rs +++ b/helix-db/src/helixc/parser/types.rs @@ -1,5 +1,8 @@ use super::location::Loc; -use crate::{helixc::parser::{errors::ParserError, HelixParser}, protocol::value::Value}; +use crate::{ + helixc::parser::{HelixParser, errors::ParserError}, + protocol::value::Value, +}; use chrono::{DateTime, NaiveDate, Utc}; use itertools::Itertools; use serde::{Deserialize, Serialize}; @@ -477,7 +480,7 @@ pub enum MathFunction { Sqrt, Ln, Log10, - Log, // Binary: LOG(x, base) + Log, // Binary: LOG(x, base) Exp, Ceil, Floor, @@ -490,7 +493,7 @@ pub enum MathFunction { Asin, Acos, Atan, - Atan2, // Binary: ATAN2(y, x) + Atan2, // Binary: ATAN2(y, x) // Constants (nullary) Pi, @@ -509,16 +512,33 @@ impl MathFunction { pub fn arity(&self) -> usize { match self { MathFunction::Pi | MathFunction::E => 0, - MathFunction::Abs | MathFunction::Sqrt | MathFunction::Ln | - MathFunction::Log10 | MathFunction::Exp | MathFunction::Ceil | - MathFunction::Floor | MathFunction::Round | MathFunction::Sin | - MathFunction::Cos | MathFunction::Tan | MathFunction::Asin | - MathFunction::Acos | MathFunction::Atan | MathFunction::Min | - MathFunction::Max | MathFunction::Sum | MathFunction::Avg | - MathFunction::Count => 1, - MathFunction::Add | MathFunction::Sub | MathFunction::Mul | - MathFunction::Div | MathFunction::Pow | MathFunction::Mod | - MathFunction::Atan2 | MathFunction::Log => 2, + MathFunction::Abs + | MathFunction::Sqrt + | MathFunction::Ln + | MathFunction::Log10 + | MathFunction::Exp + | MathFunction::Ceil + | MathFunction::Floor + | MathFunction::Round + | MathFunction::Sin + | MathFunction::Cos + | MathFunction::Tan + | MathFunction::Asin + | MathFunction::Acos + | MathFunction::Atan + | MathFunction::Min + | MathFunction::Max + | MathFunction::Sum + | MathFunction::Avg + | MathFunction::Count => 1, + MathFunction::Add + | MathFunction::Sub + | MathFunction::Mul + | MathFunction::Div + | MathFunction::Pow + | MathFunction::Mod + | MathFunction::Atan2 + | MathFunction::Log => 2, } } @@ -572,7 +592,7 @@ pub enum ExpressionType { Identifier(String), StringLiteral(String), IntegerLiteral(i32), - FloatLiteral(f64), + FloatLiteral(f32), BooleanLiteral(bool), ArrayLiteral(Vec), Exists(ExistsExpression), @@ -639,7 +659,9 @@ impl Display for ExpressionType { ExpressionType::Or(exprs) => write!(f, "Or({exprs:?})"), ExpressionType::SearchVector(sv) => write!(f, "SearchVector({sv:?})"), ExpressionType::BM25Search(bm25) => write!(f, "BM25Search({bm25:?})"), - ExpressionType::MathFunctionCall(mfc) => write!(f, "{}({:?})", mfc.function.name(), mfc.args), + ExpressionType::MathFunctionCall(mfc) => { + write!(f, "{}({:?})", mfc.function.name(), mfc.args) + } ExpressionType::Empty => write!(f, "Empty"), } } @@ -701,13 +723,13 @@ pub struct OrderBy { #[derive(Debug, Clone)] pub struct Aggregate { pub loc: Loc, - pub properties: Vec + pub properties: Vec, } #[derive(Debug, Clone)] pub struct GroupBy { pub loc: Loc, - pub properties: Vec + pub properties: Vec, } #[derive(Debug, Clone)] @@ -911,7 +933,7 @@ pub enum BooleanOpType { #[derive(Debug, Clone)] pub enum VectorData { - Vector(Vec), + Vector(Vec), Identifier(String), Embed(Embed), } @@ -1069,6 +1091,10 @@ impl From for ValueType { value: Value::I32(i), loc: Loc::empty(), }, + Value::F32(f) => ValueType::Literal { + value: Value::F32(f), + loc: Loc::empty(), + }, Value::F64(f) => ValueType::Literal { value: Value::F64(f), loc: Loc::empty(), diff --git a/helix-db/src/helixc/parser/utils.rs b/helix-db/src/helixc/parser/utils.rs index efb64d1b..f2088ed2 100644 --- a/helix-db/src/helixc/parser/utils.rs +++ b/helix-db/src/helixc/parser/utils.rs @@ -26,13 +26,13 @@ impl HelixParser { ))), } } - pub(super) fn parse_vec_literal(&self, pair: Pair) -> Result, ParserError> { + pub(super) fn parse_vec_literal(&self, pair: Pair) -> Result, ParserError> { let pairs = pair.into_inner(); let mut vec = Vec::new(); for p in pairs { vec.push( p.as_str() - .parse::() + .parse::() .map_err(|_| ParserError::from("Invalid float value"))?, ); } @@ -72,10 +72,12 @@ impl HelixParser { Rule::to => { to_id = self.parse_id_args(p.into_inner().next().unwrap())?; } - _ => return Err(ParserError::from(format!( - "Unexpected rule in parse_to_from: {:?}", - p.as_rule() - ))), + _ => { + return Err(ParserError::from(format!( + "Unexpected rule in parse_to_from: {:?}", + p.as_rule() + ))); + } } } Ok(EdgeConnection { diff --git a/helix-db/src/lib.rs b/helix-db/src/lib.rs index 317a9376..32e76458 100644 --- a/helix-db/src/lib.rs +++ b/helix-db/src/lib.rs @@ -8,4 +8,4 @@ pub mod utils; use mimalloc::MiMalloc; #[global_allocator] -static GLOBAL: MiMalloc = MiMalloc; \ No newline at end of file +static GLOBAL: MiMalloc = MiMalloc; diff --git a/helix-db/src/protocol/custom_serde/compatibility_tests.rs b/helix-db/src/protocol/custom_serde/compatibility_tests.rs index 8944628d..7ccc4ba5 100644 --- a/helix-db/src/protocol/custom_serde/compatibility_tests.rs +++ b/helix-db/src/protocol/custom_serde/compatibility_tests.rs @@ -10,7 +10,7 @@ #[cfg(test)] mod compatibility_tests { use super::super::test_utils::*; - use crate::helix_engine::vector_core::vector::HVector; + use crate::helix_engine::vector_core::HVector; use crate::protocol::value::Value; use crate::utils::items::{Edge, Node}; use bumpalo::Bump; @@ -115,7 +115,10 @@ mod compatibility_tests { let props: Vec<(&str, Value)> = (0..50) .map(|i| { - (Box::leak(format!("key_{}", i).into_boxed_str()) as &str, Value::I32(i)) + ( + Box::leak(format!("key_{}", i).into_boxed_str()) as &str, + Value::I32(i), + ) }) .collect(); @@ -184,22 +187,23 @@ mod compatibility_tests { fn test_old_edge_with_nested_values() { let id = 77777u128; - let props = vec![ - ( - "metadata", - Value::Object( - vec![ - ("created".to_string(), Value::I64(1234567890)), - ("tags".to_string(), Value::Array(vec![ + let props = vec![( + "metadata", + Value::Object( + vec![ + ("created".to_string(), Value::I64(1234567890)), + ( + "tags".to_string(), + Value::Array(vec![ Value::String("tag1".to_string()), Value::String("tag2".to_string()), - ])), - ] - .into_iter() - .collect(), - ), + ]), + ), + ] + .into_iter() + .collect(), ), - ]; + )]; let old_edge = create_old_edge(id, "NestedEdge", 0, 10, 20, props); let old_bytes = bincode::serialize(&old_edge).unwrap(); @@ -245,12 +249,8 @@ mod compatibility_tests { let data_bytes = create_vector_bytes(&data); let arena = Bump::new(); - let new_vector = HVector::from_bincode_bytes( - &arena, - Some(&old_bytes), - &data_bytes, - id, - ); + let new_vector = + HVector::from_bincode_bytes(&arena, Some(&old_bytes), &data_bytes, id, true); assert!(new_vector.is_ok(), "Should deserialize old vector format"); let restored = new_vector.unwrap(); @@ -258,7 +258,7 @@ mod compatibility_tests { assert_eq!(restored.id, id); assert_eq!(restored.label, "LegacyVector"); assert_eq!(restored.version, 1); - assert_eq!(restored.deleted, false); + assert!(!restored.deleted); } #[test] @@ -271,14 +271,10 @@ mod compatibility_tests { let data_bytes = create_vector_bytes(&[0.0]); let arena = Bump::new(); - let new_vector = HVector::from_bincode_bytes( - &arena, - Some(&old_bytes), - &data_bytes, - id, - ).unwrap(); + let new_vector = + HVector::from_bincode_bytes(&arena, Some(&old_bytes), &data_bytes, id, true).unwrap(); - assert_eq!(new_vector.deleted, true); + assert!(new_vector.deleted); } #[test] @@ -296,12 +292,8 @@ mod compatibility_tests { let data_bytes = create_vector_bytes(&vec![0.0; 1536]); let arena = Bump::new(); - let new_vector = HVector::from_bincode_bytes( - &arena, - Some(&old_bytes), - &data_bytes, - id, - ).unwrap(); + let new_vector = + HVector::from_bincode_bytes(&arena, Some(&old_bytes), &data_bytes, id, true).unwrap(); assert!(new_vector.properties.is_some()); let props = new_vector.properties.unwrap(); @@ -363,26 +355,18 @@ mod compatibility_tests { let data = vec![1.0, 2.0]; // Different vector versions - let vec_v1 = create_arena_vector(&arena, id, "V1", 1, false, 0, &data, vec![]); - let vec_v2 = create_arena_vector(&arena, id, "V2", 2, false, 0, &data, vec![]); + let vec_v1 = create_arena_vector(&arena, id, "V1", 1, false, &data, vec![]); + let vec_v2 = create_arena_vector(&arena, id, "V2", 2, false, &data, vec![]); let props_v1 = bincode::serialize(&vec_v1).unwrap(); let props_v2 = bincode::serialize(&vec_v2).unwrap(); let data_bytes = create_vector_bytes(&data); let arena2 = Bump::new(); - let restored_v1 = HVector::from_bincode_bytes( - &arena2, - Some(&props_v1), - &data_bytes, - id, - ).unwrap(); - let restored_v2 = HVector::from_bincode_bytes( - &arena2, - Some(&props_v2), - &data_bytes, - id, - ).unwrap(); + let restored_v1 = + HVector::from_bincode_bytes(&arena2, Some(&props_v1), &data_bytes, id, true).unwrap(); + let restored_v2 = + HVector::from_bincode_bytes(&arena2, Some(&props_v2), &data_bytes, id, true).unwrap(); assert_eq!(restored_v1.version, 1); assert_eq!(restored_v2.version, 2); diff --git a/helix-db/src/protocol/custom_serde/edge_case_tests.rs b/helix-db/src/protocol/custom_serde/edge_case_tests.rs index 85463e6d..3bd17ad1 100644 --- a/helix-db/src/protocol/custom_serde/edge_case_tests.rs +++ b/helix-db/src/protocol/custom_serde/edge_case_tests.rs @@ -13,7 +13,7 @@ #[cfg(test)] mod edge_case_tests { use super::super::test_utils::*; - use crate::helix_engine::vector_core::vector::HVector; + use crate::helix_engine::vector_core::HVector; use crate::protocol::value::Value; use crate::utils::items::{Edge, Node}; use bumpalo::Bump; @@ -79,12 +79,12 @@ mod edge_case_tests { }) .collect(); - let vector = create_arena_vector(&arena, id, "many_props", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "many_props", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); assert_eq!(result.unwrap().properties.unwrap().len(), 500); } @@ -156,7 +156,7 @@ mod edge_case_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); assert!(result.unwrap().label.len() > 2000); } @@ -240,12 +240,12 @@ mod edge_case_tests { ("ΠšΠ»ΡŽΡ‡", Value::String("Π—Π½Π°Ρ‡Π΅Π½ΠΈΠ΅".to_string())), ]; - let vector = create_arena_vector(&arena, id, "unicode", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "unicode", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); } @@ -354,19 +354,22 @@ mod edge_case_tests { Value::I32(1), Value::Object({ let mut inner = HashMap::new(); - inner.insert("inner_key".to_string(), Value::String("inner_value".to_string())); + inner.insert( + "inner_key".to_string(), + Value::String("inner_value".to_string()), + ); inner }), ]), ); let props = vec![("complex", Value::Object(map))]; - let vector = create_arena_vector(&arena, id, "complex", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "complex", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); } @@ -446,12 +449,12 @@ mod edge_case_tests { let data = vec![1.0]; let props = vec![("empty_obj", Value::Object(HashMap::new()))]; - let vector = create_arena_vector(&arena, id, "test", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "test", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); } @@ -516,19 +519,14 @@ mod edge_case_tests { let id = 800800u128; // Subnormal (denormalized) numbers - let data = vec![ - f64::MIN_POSITIVE, - f64::MIN_POSITIVE / 2.0, - 1e-308, - 1e-320, - ]; + let data = vec![f32::MIN_POSITIVE, f32::MIN_POSITIVE / 2.0, 1e-308, 1e-320]; let vector = create_simple_vector(&arena, id, "subnormal", &data); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); } @@ -543,7 +541,7 @@ mod edge_case_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); } @@ -578,9 +576,7 @@ mod edge_case_tests { let long_key = "property_key_".repeat(100); // ~1.3KB key let key_ref: &str = arena.alloc_str(&long_key); - let props = vec![ - (key_ref, Value::String("value".to_string())), - ]; + let props = vec![(key_ref, Value::String("value".to_string()))]; let edge = create_arena_edge(&arena, id, "test", 0, 1, 2, props); let bytes = bincode::serialize(&edge).unwrap(); @@ -602,12 +598,12 @@ mod edge_case_tests { ("123", Value::String("one-two-three".to_string())), ]; - let vector = create_arena_vector(&arena, id, "numeric_keys", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "numeric_keys", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); } @@ -620,9 +616,7 @@ mod edge_case_tests { let arena = Bump::new(); let id = 404404u128; - let large_array = Value::Array( - (0..1000).map(|i| Value::I32(i)).collect() - ); + let large_array = Value::Array((0..1000).map(Value::I32).collect()); let props = vec![("big_array", large_array)]; let node = create_arena_node(&arena, id, "test", 0, props); @@ -641,7 +635,7 @@ mod edge_case_tests { let string_array = Value::Array( (0..100) .map(|i| Value::String(format!("string_{}", i))) - .collect() + .collect(), ); let props = vec![("strings", string_array)]; @@ -669,12 +663,12 @@ mod edge_case_tests { ]); let props = vec![("mixed", mixed_array)]; - let vector = create_arena_vector(&arena, id, "test", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "test", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); } @@ -686,16 +680,16 @@ mod edge_case_tests { fn test_vector_with_8192_dimensions() { let arena = Bump::new(); let id = 707707u128; - let data: Vec = (0..8192).map(|i| (i as f64) * 0.0001).collect(); + let data: Vec = (0..8192).map(|i| (i as f32) * 0.0001).collect(); let vector = create_simple_vector(&arena, id, "8k_dims", &data); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); - assert_eq!(result.unwrap().data.len(), 8192); + assert_eq!(result.unwrap().len(), 8192); } #[test] @@ -709,10 +703,10 @@ mod edge_case_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); let deserialized = result.unwrap(); - assert!(deserialized.data.iter().all(|&v| v == 0.0)); + assert!(deserialized.data_borrowed().iter().all(|&v| v == 0.0)); } #[test] @@ -726,10 +720,15 @@ mod edge_case_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); let deserialized = result.unwrap(); - assert!(deserialized.data.iter().all(|&v| (v - 42.42).abs() < 1e-10)); + assert!( + deserialized + .data_borrowed() + .iter() + .all(|&v| (v - 42.42).abs() < 1e-10) + ); } // ======================================================================== @@ -788,7 +787,7 @@ mod edge_case_tests { fn test_vector_max_complexity() { let arena = Bump::new(); let id = u128::MAX; - let data: Vec = (0..2048).map(|i| (i as f64).sin()).collect(); + let data: Vec = (0..2048).map(|i| (i as f32).sin()).collect(); let props: Vec<(&str, Value)> = (0..200) .map(|i| { @@ -797,12 +796,12 @@ mod edge_case_tests { }) .collect(); - let vector = create_arena_vector(&arena, id, &"Vec".repeat(200), 255, true, 0, &data, props); + let vector = create_arena_vector(&arena, id, &"Vec".repeat(200), 255, true, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); } } diff --git a/helix-db/src/protocol/custom_serde/edge_serde.rs b/helix-db/src/protocol/custom_serde/edge_serde.rs index 024d5b1f..b222b65a 100644 --- a/helix-db/src/protocol/custom_serde/edge_serde.rs +++ b/helix-db/src/protocol/custom_serde/edge_serde.rs @@ -2,8 +2,8 @@ use crate::utils::{ items::Edge, properties::{ImmutablePropertiesMap, ImmutablePropertiesMapDeSeed}, }; -use std::fmt; use serde::de::{DeserializeSeed, Visitor}; +use std::fmt; /// Helper DeserializeSeed for Option struct OptionPropertiesMapDeSeed<'arena> { diff --git a/helix-db/src/protocol/custom_serde/error_handling_tests.rs b/helix-db/src/protocol/custom_serde/error_handling_tests.rs index fda9616e..064ce9cd 100644 --- a/helix-db/src/protocol/custom_serde/error_handling_tests.rs +++ b/helix-db/src/protocol/custom_serde/error_handling_tests.rs @@ -13,7 +13,7 @@ #[cfg(test)] mod error_handling_tests { use super::super::test_utils::*; - use crate::helix_engine::vector_core::vector::HVector; + use crate::helix_engine::vector_core::HVector; use crate::protocol::value::Value; use crate::utils::items::{Edge, Node}; use bumpalo::Bump; @@ -204,7 +204,7 @@ mod error_handling_tests { let valid_data = vec![1.0, 2.0, 3.0]; let data_bytes = create_vector_bytes(&valid_data); - let result = HVector::from_bincode_bytes(&arena, Some(empty_bytes), &data_bytes, id); + let result = HVector::from_bincode_bytes(&arena, Some(empty_bytes), &data_bytes, id, true); assert!(result.is_err(), "Should fail on empty property bytes"); } @@ -218,16 +218,16 @@ mod error_handling_tests { let empty_data: &[u8] = &[]; let arena2 = Bump::new(); - let _result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), empty_data, id); - // Should panic due to assertion in cast_raw_vector_data + let _result = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), empty_data, id, true).unwrap(); } #[test] - #[should_panic(expected = "raw_vector_data.len() == 0")] + #[should_panic] fn test_vector_cast_empty_raw_data_panics() { let arena = Bump::new(); let empty_data: &[u8] = &[]; - HVector::cast_raw_vector_data(&arena, empty_data); + HVector::raw_vector_data_to_vec(empty_data, &arena).unwrap(); } #[test] @@ -235,14 +235,15 @@ mod error_handling_tests { let arena = Bump::new(); let id = 666777u128; let props = vec![("key", Value::String("value".to_string()))]; - let vector = create_arena_vector(&arena, id, "test", 1, false, 0, &[1.0], props); + let vector = create_arena_vector(&arena, id, "test", 1, false, &[1.0], props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let truncated_props = &props_bytes[..props_bytes.len() / 2]; let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(truncated_props), data_bytes, id); + let result = + HVector::from_bincode_bytes(&arena2, Some(truncated_props), data_bytes, id, true); assert!(result.is_err(), "Should fail on truncated properties"); } @@ -253,17 +254,17 @@ mod error_handling_tests { let garbage: Vec = vec![0xFF; 50]; let data_bytes = create_vector_bytes(&[1.0, 2.0, 3.0]); - let result = HVector::from_bincode_bytes(&arena, Some(&garbage), &data_bytes, id); + let result = HVector::from_bincode_bytes(&arena, Some(&garbage), &data_bytes, id, true); assert!(result.is_err(), "Should fail on garbage property bytes"); } #[test] - #[should_panic(expected = "is not a multiple of size_of::()")] + #[should_panic] fn test_vector_misaligned_data_bytes_panics() { let arena = Bump::new(); - // 7 bytes is not a multiple of 8 (size of f64) + // 7 bytes is not a multiple of 4 (size of f32) let misaligned: &[u8] = &[0, 1, 2, 3, 4, 5, 6]; - HVector::cast_raw_vector_data(&arena, misaligned); + HVector::raw_vector_data_to_vec(misaligned, &arena).unwrap(); } #[test] @@ -384,7 +385,8 @@ mod error_handling_tests { } let arena2 = Bump::new(); - let _result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let _result = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); // Should fail on UTF-8 validation } @@ -465,14 +467,14 @@ mod error_handling_tests { #[test] fn test_vector_extreme_version_value() { let arena = Bump::new(); - let id = 012012u128; + let id = 12012u128; - let vector = create_arena_vector(&arena, id, "test", 255, false, 0, &[1.0], vec![]); + let vector = create_arena_vector(&arena, id, "test", 255, false, &[1.0], vec![]); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok(), "Should handle u8::MAX version"); assert_eq!(result.unwrap().version, 255); } @@ -519,7 +521,7 @@ mod error_handling_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok(), "Should handle u128::MAX ID"); assert_eq!(result.unwrap().id, u128::MAX); } @@ -582,14 +584,14 @@ mod error_handling_tests { let id = 987654u128; // Vector with NaN, infinity, and other special values - let data = vec![f64::NAN, f64::INFINITY, f64::NEG_INFINITY, 0.0, -0.0]; + let data = vec![f32::NAN, f32::INFINITY, f32::NEG_INFINITY, 0.0, -0.0]; let vector = create_simple_vector(&arena, id, "special", &data); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!( result.is_ok(), "Should handle special float values in vector data" diff --git a/helix-db/src/protocol/custom_serde/integration_tests.rs b/helix-db/src/protocol/custom_serde/integration_tests.rs index 8f84fae0..58b82b4f 100644 --- a/helix-db/src/protocol/custom_serde/integration_tests.rs +++ b/helix-db/src/protocol/custom_serde/integration_tests.rs @@ -11,7 +11,7 @@ #[cfg(test)] mod integration_tests { use super::super::test_utils::*; - use crate::helix_engine::vector_core::vector::HVector; + use crate::helix_engine::vector_core::HVector; use crate::protocol::value::Value; use crate::utils::items::{Edge, Node}; use bincode::Options; @@ -203,9 +203,7 @@ mod integration_tests { let arena = Bump::new(); let edges: Vec = (0..20) - .map(|i| { - create_simple_edge(&arena, i as u128, "LINK", i as u128, (i + 1) as u128) - }) + .map(|i| create_simple_edge(&arena, i as u128, "LINK", i as u128, (i + 1) as u128)) .collect(); let serialized: Vec> = edges @@ -251,12 +249,8 @@ mod integration_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes( - &arena2, - Some(&props_bytes), - data_bytes, - id, - ).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_vectors_semantically_equal(&vector, &deserialized); } @@ -272,17 +266,13 @@ mod integration_tests { ("dimensions", Value::I32(3)), ]; - let vector = create_arena_vector(&arena, id, "doc_vector", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "doc_vector", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes( - &arena2, - Some(&props_bytes), - data_bytes, - id, - ).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_vectors_semantically_equal(&vector, &deserialized); } @@ -314,23 +304,17 @@ mod integration_tests { let props_bytes1 = bincode::serialize(&vector).unwrap(); let data_bytes1 = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let vector2 = HVector::from_bincode_bytes( - &arena2, - Some(&props_bytes1), - data_bytes1, - id, - ).unwrap(); + let vector2 = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes1), data_bytes1, id, true) + .unwrap(); // Second roundtrip let props_bytes2 = bincode::serialize(&vector2).unwrap(); let data_bytes2 = vector2.vector_data_to_bytes().unwrap(); let arena3 = Bump::new(); - let vector3 = HVector::from_bincode_bytes( - &arena3, - Some(&props_bytes2), - data_bytes2, - id, - ).unwrap(); + let vector3 = + HVector::from_bincode_bytes(&arena3, Some(&props_bytes2), data_bytes2, id, true) + .unwrap(); assert_vectors_semantically_equal(&vector, &vector2); assert_vectors_semantically_equal(&vector2, &vector3); @@ -344,7 +328,7 @@ mod integration_tests { let vectors: Vec = (0..15) .map(|i| { - let data = vec![i as f64, (i + 1) as f64, (i + 2) as f64]; + let data = vec![i as f32, (i + 1) as f32, (i + 2) as f32]; create_simple_vector(&arena, i as u128, &format!("vec_{}", i), &data) }) .collect(); @@ -367,6 +351,7 @@ mod integration_tests { Some(props_bytes), data_bytes, i as u128, + true, ); assert!(result.is_ok()); } @@ -428,6 +413,7 @@ mod integration_tests { Some(&vector_props_bytes), vector_data_bytes, 3, + true, ); assert!(node_restored.is_ok()); @@ -483,9 +469,7 @@ mod integration_tests { let restored: Vec = serialized .iter() .enumerate() - .map(|(i, bytes)| { - Node::from_bincode_bytes(i as u128, bytes, &shared_arena).unwrap() - }) + .map(|(i, bytes)| Node::from_bincode_bytes(i as u128, bytes, &shared_arena).unwrap()) .collect(); assert_eq!(restored.len(), 100); @@ -613,7 +597,11 @@ mod integration_tests { let bytes = bincode::serialize(&node).unwrap(); // Should be relatively small (label + version + empty props indicator) - assert!(bytes.len() < 100, "Empty node should be small, got {} bytes", bytes.len()); + assert!( + bytes.len() < 100, + "Empty node should be small, got {} bytes", + bytes.len() + ); } #[test] @@ -647,7 +635,7 @@ mod integration_tests { let vector = create_simple_vector(&arena, id, "test", &data); let data_bytes = vector.vector_data_to_bytes().unwrap(); - // Should be exactly 128 * 8 bytes (128 f64 values) - assert_eq!(data_bytes.len(), 128 * 8); + // Should be exactly 128 * 8 bytes (128 f32 values) + assert_eq!(data_bytes.len(), 128 * 4); } } diff --git a/helix-db/src/protocol/custom_serde/node_serde.rs b/helix-db/src/protocol/custom_serde/node_serde.rs index 9af6bec9..54e5533f 100644 --- a/helix-db/src/protocol/custom_serde/node_serde.rs +++ b/helix-db/src/protocol/custom_serde/node_serde.rs @@ -1,9 +1,9 @@ -use std::fmt; -use serde::de::{DeserializeSeed, Visitor}; use crate::utils::{ items::Node, properties::{ImmutablePropertiesMap, ImmutablePropertiesMapDeSeed}, }; +use serde::de::{DeserializeSeed, Visitor}; +use std::fmt; /// Helper DeserializeSeed for Option /// This is needed because we can't use next_element::>() with custom DeserializeSeed @@ -84,7 +84,9 @@ impl<'de, 'arena> serde::de::DeserializeSeed<'de> for NodeDeSeed<'arena> { .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?; let label = self.arena.alloc_str(label_string); - let version: u8 = seq.next_element()?.ok_or_else(|| serde::de::Error::invalid_length(1, &self))?; + let version: u8 = seq + .next_element()? + .ok_or_else(|| serde::de::Error::invalid_length(1, &self))?; // Bincode serializes Option as ONE field: 0x00 (None) or 0x01+data (Some) // Use our custom DeserializeSeed that handles the Option wrapper diff --git a/helix-db/src/protocol/custom_serde/property_based_tests.rs b/helix-db/src/protocol/custom_serde/property_based_tests.rs index 81f5c7d6..0304a03a 100644 --- a/helix-db/src/protocol/custom_serde/property_based_tests.rs +++ b/helix-db/src/protocol/custom_serde/property_based_tests.rs @@ -9,7 +9,7 @@ #[cfg(test)] mod property_based_tests { use super::super::test_utils::*; - use crate::helix_engine::vector_core::vector::HVector; + use crate::helix_engine::vector_core::HVector; use crate::protocol::value::Value; use crate::utils::items::{Edge, Node}; use bumpalo::Bump; @@ -36,7 +36,9 @@ mod property_based_tests { any::().prop_map(Value::I64), any::().prop_map(Value::U32), any::().prop_map(Value::U64), - any::().prop_filter("Not NaN", |f| !f.is_nan()).prop_map(Value::F64), + any::() + .prop_filter("Not NaN", |f| !f.is_nan()) + .prop_map(Value::F64), any::().prop_map(Value::Boolean), arb_long_string().prop_map(Value::String), Just(Value::Empty), @@ -52,9 +54,9 @@ mod property_based_tests { } // Strategy for generating vector data - fn arb_vector_data() -> impl Strategy> { + fn arb_vector_data() -> impl Strategy> { prop::collection::vec( - any::().prop_filter("Not NaN", |f| !f.is_nan()), + any::().prop_filter("Not NaN", |f| !f.is_nan()), 1..128, // 1 to 128 dimensions ) } @@ -288,15 +290,16 @@ mod property_based_tests { Some(&props_bytes), data_bytes, id, + true, ).unwrap(); prop_assert_eq!(deserialized.label, label.as_str()); prop_assert_eq!(deserialized.id, id); - prop_assert_eq!(deserialized.data.len(), data.len()); + prop_assert_eq!(deserialized.len(), data.len()); // Check each data point (with floating point tolerance) - for (i, (&orig, &deser)) in data.iter().zip(deserialized.data.iter()).enumerate() { - let diff = (orig - deser).abs(); + for (i, (&orig, &deser)) in data.iter().zip(deserialized.data_borrowed().iter()).enumerate() { + let diff = (orig as f64 - deser as f64).abs(); prop_assert!(diff < 1e-10, "Data mismatch at index {}: {} vs {}", i, orig, deser); } } @@ -315,7 +318,7 @@ mod property_based_tests { .map(|(k, v)| (k.as_str(), v.clone())) .collect(); - let vector = create_arena_vector(&arena, id, &label, 1, deleted, 0, &data, props_refs); + let vector = create_arena_vector(&arena, id, &label, 1, deleted, &data, props_refs); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); @@ -326,10 +329,11 @@ mod property_based_tests { Some(&props_bytes), data_bytes, id, + true, ).unwrap(); prop_assert_eq!(deserialized.deleted, deleted); - prop_assert_eq!(deserialized.data.len(), data.len()); + prop_assert_eq!(deserialized.len(), data.len()); } #[test] @@ -340,12 +344,12 @@ mod property_based_tests { // Convert to bytes and back let bytes = create_vector_bytes(&data); - let restored = HVector::cast_raw_vector_data(&arena, &bytes); + let restored = HVector::raw_vector_data_to_vec( &bytes,&arena).unwrap(); prop_assert_eq!(restored.len(), data.len()); for (i, (&orig, &rest)) in data.iter().zip(restored.iter()).enumerate() { - let diff = (orig - rest).abs(); + let diff = (orig as f64 - rest as f64).abs(); prop_assert!(diff < 1e-10, "Data mismatch at index {}: {} vs {}", i, orig, rest); } } @@ -387,6 +391,7 @@ mod property_based_tests { Some(&props_bytes1), data_bytes1, id, + true, ).unwrap(); // Second roundtrip @@ -436,6 +441,7 @@ mod property_based_tests { Some(&props_bytes), data_bytes, id, + true, ).unwrap(); prop_assert_eq!(vector_restored.id, id); } @@ -472,6 +478,7 @@ mod property_based_tests { Some(&props_bytes), data_bytes, id, + true, ).unwrap(); prop_assert_eq!(vector_restored.label, label.as_str()); } diff --git a/helix-db/src/protocol/custom_serde/test_utils.rs b/helix-db/src/protocol/custom_serde/test_utils.rs index 4197744a..fc3faa55 100644 --- a/helix-db/src/protocol/custom_serde/test_utils.rs +++ b/helix-db/src/protocol/custom_serde/test_utils.rs @@ -5,7 +5,9 @@ #![cfg(test)] -use crate::helix_engine::vector_core::vector::HVector; +use crate::helix_engine::vector_core::HVector; +use crate::helix_engine::vector_core::distance::Cosine; +use crate::helix_engine::vector_core::node::Item; use crate::protocol::value::Value; use crate::utils::items::{Edge, Node}; use crate::utils::properties::ImmutablePropertiesMap; @@ -101,12 +103,7 @@ pub fn create_simple_node<'arena>(arena: &'arena Bump, id: u128, label: &str) -> } /// Creates an old-style Node for compatibility testing -pub fn create_old_node( - id: u128, - label: &str, - version: u8, - props: Vec<(&str, Value)>, -) -> OldNode { +pub fn create_old_node(id: u128, label: &str, version: u8, props: Vec<(&str, Value)>) -> OldNode { if props.is_empty() { OldNode { id, @@ -230,12 +227,13 @@ pub fn create_arena_vector<'arena>( label: &str, version: u8, deleted: bool, - level: usize, - data: &[f64], + data: &[f32], props: Vec<(&str, Value)>, ) -> HVector<'arena> { let label_ref = arena.alloc_str(label); - let data_ref = arena.alloc_slice_copy(data); + + let mut bump_vec = bumpalo::collections::Vec::new_in(arena); + bump_vec.extend_from_slice(data); if props.is_empty() { HVector { @@ -243,10 +241,10 @@ pub fn create_arena_vector<'arena>( label: label_ref, version, deleted, - level, distance: None, - data: data_ref, + data: Some(Item::::from_vec(bump_vec)), properties: None, + level: None, } } else { let len = props.len(); @@ -261,10 +259,10 @@ pub fn create_arena_vector<'arena>( label: label_ref, version, deleted, - level, distance: None, - data: data_ref, + data: Some(Item::::from_vec(bump_vec)), properties: Some(props_map), + level: None, } } } @@ -274,13 +272,13 @@ pub fn create_simple_vector<'arena>( arena: &'arena Bump, id: u128, label: &str, - data: &[f64], + data: &[f32], ) -> HVector<'arena> { - create_arena_vector(arena, id, label, 1, false, 0, data, vec![]) + create_arena_vector(arena, id, label, 1, false, data, vec![]) } /// Creates vector data as raw bytes -pub fn create_vector_bytes(data: &[f64]) -> Vec { +pub fn create_vector_bytes(data: &[f32]) -> Vec { bytemuck::cast_slice(data).to_vec() } @@ -334,7 +332,10 @@ pub fn all_value_types_props() -> Vec<(&'static str, Value)> { ("u16_val", Value::U16(65000)), ("u32_val", Value::U32(4000000)), ("u64_val", Value::U64(18000000000)), - ("u128_val", Value::U128(340282366920938463463374607431768211455)), + ( + "u128_val", + Value::U128(340282366920938463463374607431768211455), + ), ("bool_val", Value::Boolean(true)), ("empty_val", Value::Empty), ] @@ -345,17 +346,16 @@ pub fn nested_value_props() -> Vec<(&'static str, Value)> { vec![ ( "array_val", - Value::Array(vec![ - Value::I32(1), - Value::I32(2), - Value::I32(3), - ]), + Value::Array(vec![Value::I32(1), Value::I32(2), Value::I32(3)]), ), ( "object_val", Value::Object( vec![ - ("nested_key".to_string(), Value::String("nested_value".to_string())), + ( + "nested_key".to_string(), + Value::String("nested_value".to_string()), + ), ("nested_num".to_string(), Value::I32(42)), ] .into_iter() @@ -364,15 +364,14 @@ pub fn nested_value_props() -> Vec<(&'static str, Value)> { ), ( "deeply_nested", - Value::Array(vec![ - Value::Object( - vec![ - ("inner".to_string(), Value::Array(vec![Value::I32(1), Value::I32(2)])), - ] - .into_iter() - .collect(), - ), - ]), + Value::Array(vec![Value::Object( + vec![( + "inner".to_string(), + Value::Array(vec![Value::I32(1), Value::I32(2)]), + )] + .into_iter() + .collect(), + )]), ), ] } @@ -398,11 +397,7 @@ pub fn assert_nodes_semantically_equal(node1: &Node, node2: &Node) { match (&node1.properties, &node2.properties) { (None, None) => {} (Some(props1), Some(props2)) => { - assert_eq!( - props1.len(), - props2.len(), - "Node property counts differ" - ); + assert_eq!(props1.len(), props2.len(), "Node property counts differ"); // Check each property exists and has the same value for (key1, val1) in props1.iter() { if let Some(val2) = props2.get(key1) { @@ -427,11 +422,7 @@ pub fn assert_edges_semantically_equal(edge1: &Edge, edge2: &Edge) { match (&edge1.properties, &edge2.properties) { (None, None) => {} (Some(props1), Some(props2)) => { - assert_eq!( - props1.len(), - props2.len(), - "Edge property counts differ" - ); + assert_eq!(props1.len(), props2.len(), "Edge property counts differ"); for (key1, val1) in props1.iter() { if let Some(val2) = props2.get(key1) { assert_eq!(val1, val2, "Property value differs for key: {}", key1); @@ -450,10 +441,15 @@ pub fn assert_vectors_semantically_equal(vec1: &HVector, vec2: &HVector) { assert_eq!(vec1.label, vec2.label, "Vector labels differ"); assert_eq!(vec1.version, vec2.version, "Vector versions differ"); assert_eq!(vec1.deleted, vec2.deleted, "Vector deleted flags differ"); - assert_eq!(vec1.data.len(), vec2.data.len(), "Vector dimensions differ"); + assert_eq!(vec1.len(), vec2.len(), "Vector dimensions differ"); // Compare vector data with floating point tolerance - for (i, (v1, v2)) in vec1.data.iter().zip(vec2.data.iter()).enumerate() { + for (i, (v1, v2)) in vec1 + .data_borrowed() + .iter() + .zip(vec2.data_borrowed().iter()) + .enumerate() + { assert!( (v1 - v2).abs() < 1e-10, "Vector data differs at index {}: {} vs {}", @@ -466,11 +462,7 @@ pub fn assert_vectors_semantically_equal(vec1: &HVector, vec2: &HVector) { match (&vec1.properties, &vec2.properties) { (None, None) => {} (Some(props1), Some(props2)) => { - assert_eq!( - props1.len(), - props2.len(), - "Vector property counts differ" - ); + assert_eq!(props1.len(), props2.len(), "Vector property counts differ"); for (key1, val1) in props1.iter() { if let Some(val2) = props2.get(key1) { assert_eq!(val1, val2, "Property value differs for key: {}", key1); @@ -502,7 +494,10 @@ pub fn print_byte_comparison(label: &str, bytes1: &[u8], bytes2: &[u8]) { let min_len = bytes1.len().min(bytes2.len()); for (i, (b1, b2)) in bytes1.iter().zip(bytes2.iter()).take(min_len).enumerate() { if b1 != b2 { - println!(" Index {}: bytes1={:02x} ({}), bytes2={:02x} ({})", i, b1, b1, b2, b2); + println!( + " Index {}: bytes1={:02x} ({}), bytes2={:02x} ({})", + i, b1, b1, b2, b2 + ); } } @@ -561,7 +556,9 @@ pub fn random_utf8_string(len: usize) -> String { pub fn random_f64_vector(dimensions: usize) -> Vec { use rand::Rng; let mut rng = rand::rng(); - (0..dimensions).map(|_| rng.random_range(-1.0..1.0)).collect() + (0..dimensions) + .map(|_| rng.random_range(-1.0..1.0)) + .collect() } /// Generates a random Value for property testing diff --git a/helix-db/src/protocol/custom_serde/tests.rs b/helix-db/src/protocol/custom_serde/tests.rs index 50eb59d9..8c18dd9c 100644 --- a/helix-db/src/protocol/custom_serde/tests.rs +++ b/helix-db/src/protocol/custom_serde/tests.rs @@ -60,11 +60,7 @@ mod node_serialization_tests { } /// Helper to create an old node with properties - fn create_old_node_with_props( - id: u128, - label: &str, - props: Vec<(&str, Value)>, - ) -> OldNode { + fn create_old_node_with_props(id: u128, label: &str, props: Vec<(&str, Value)>) -> OldNode { if props.is_empty() { OldNode { id, @@ -119,8 +115,10 @@ mod node_serialization_tests { println!("\nByte-by-byte comparison:"); for (i, (old_byte, new_byte)) in old_bytes.iter().zip(new_bytes.iter()).enumerate() { if old_byte != new_byte { - println!(" Index {}: old={:02x} ({}), new={:02x} ({})", - i, old_byte, old_byte, new_byte, new_byte); + println!( + " Index {}: old={:02x} ({}), new={:02x} ({})", + i, old_byte, old_byte, new_byte, new_byte + ); } } @@ -137,7 +135,11 @@ mod node_serialization_tests { if let Err(e) = &deserialized { println!("Deserialization error: {:?}", e); } - assert!(deserialized.is_ok(), "Failed to deserialize new format: {:?}", deserialized.err()); + assert!( + deserialized.is_ok(), + "Failed to deserialize new format: {:?}", + deserialized.err() + ); // Test that new format can deserialize old format println!("Attempting to deserialize old_bytes..."); @@ -146,7 +148,11 @@ mod node_serialization_tests { if let Err(e) = &old_deserialized { println!("Deserialization error from old format: {:?}", e); } - assert!(old_deserialized.is_ok(), "Failed to deserialize old format: {:?}", old_deserialized.err()); + assert!( + old_deserialized.is_ok(), + "Failed to deserialize old format: {:?}", + old_deserialized.err() + ); } #[test] @@ -313,7 +319,10 @@ mod node_serialization_tests { ("u16_val", Value::U16(65535)), ("u32_val", Value::U32(4294967295)), ("u64_val", Value::U64(18446744073709551615)), - ("u128_val", Value::U128(340282366920938463463374607431768211455)), + ( + "u128_val", + Value::U128(340282366920938463463374607431768211455), + ), ("f32_val", Value::F32(3.14159)), ("f64_val", Value::F64(2.71828)), ("bool_val", Value::Boolean(true)), @@ -328,7 +337,10 @@ mod node_serialization_tests { let props = deserialized.properties.unwrap(); assert_eq!(props.len(), 13); - assert_eq!(props.get("string_val"), Some(&Value::String("test".to_string()))); + assert_eq!( + props.get("string_val"), + Some(&Value::String("test".to_string())) + ); assert_eq!(props.get("i8_val"), Some(&Value::I8(-42))); assert_eq!(props.get("i16_val"), Some(&Value::I16(1000))); assert_eq!(props.get("i32_val"), Some(&Value::I32(100000))); @@ -336,8 +348,14 @@ mod node_serialization_tests { assert_eq!(props.get("u8_val"), Some(&Value::U8(255))); assert_eq!(props.get("u16_val"), Some(&Value::U16(65535))); assert_eq!(props.get("u32_val"), Some(&Value::U32(4294967295))); - assert_eq!(props.get("u64_val"), Some(&Value::U64(18446744073709551615))); - assert_eq!(props.get("u128_val"), Some(&Value::U128(340282366920938463463374607431768211455))); + assert_eq!( + props.get("u64_val"), + Some(&Value::U64(18446744073709551615)) + ); + assert_eq!( + props.get("u128_val"), + Some(&Value::U128(340282366920938463463374607431768211455)) + ); assert_eq!(props.get("f32_val"), Some(&Value::F32(3.14159))); assert_eq!(props.get("f64_val"), Some(&Value::F64(2.71828))); assert_eq!(props.get("bool_val"), Some(&Value::Boolean(true))); @@ -349,14 +367,16 @@ mod node_serialization_tests { let id = 22222u128; let props = vec![ - ("array", Value::Array(vec![ - Value::I32(1), - Value::I32(2), - Value::I32(3), - ])), + ( + "array", + Value::Array(vec![Value::I32(1), Value::I32(2), Value::I32(3)]), + ), ("nested_obj", { let mut map = HashMap::new(); - map.insert("inner_key".to_string(), Value::String("inner_value".to_string())); + map.insert( + "inner_key".to_string(), + Value::String("inner_value".to_string()), + ); Value::Object(map) }), ]; @@ -386,9 +406,17 @@ mod node_serialization_tests { // Check that both have the same keys and values (regardless of order) for (key, old_value) in old_props.iter() { - let new_value = new_props.get(key).expect(&format!("Missing key: {}", key)); + let new_value = new_props + .get(key) + .unwrap_or_else(|| panic!("Missing key: {}", key)); // For nested objects, we need to compare recursively since HashMap order may differ - assert!(values_equal(old_value, new_value), "Value mismatch for key {}: {:?} != {:?}", key, old_value, new_value); + assert!( + values_equal(old_value, new_value), + "Value mismatch for key {}: {:?} != {:?}", + key, + old_value, + new_value + ); } } @@ -413,7 +441,9 @@ mod node_serialization_tests { a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y)) } (Value::Object(a), Value::Object(b)) => { - a.len() == b.len() && a.iter().all(|(k, v)| b.get(k).map_or(false, |bv| values_equal(v, bv))) + a.len() == b.len() + && a.iter() + .all(|(k, v)| b.get(k).is_some_and(|bv| values_equal(v, bv))) } (Value::Date(a), Value::Date(b)) => a == b, (Value::Id(a), Value::Id(b)) => a == b, @@ -476,7 +506,10 @@ mod node_serialization_tests { let props = deserialized_node.properties.unwrap(); assert_eq!(props.len(), 2); - assert_eq!(props.get("name"), Some(&Value::String("Charlie".to_string()))); + assert_eq!( + props.get("name"), + Some(&Value::String("Charlie".to_string())) + ); assert_eq!(props.get("count"), Some(&Value::U64(42))); } @@ -602,14 +635,7 @@ mod node_serialization_tests { fn test_node_serialization_utf8_labels() { let arena = Bump::new(); - let utf8_labels = vec![ - "Hello", - "δΈ–η•Œ", - "πŸš€πŸŒŸ", - "ΠŸΡ€ΠΈΠ²Π΅Ρ‚", - "Ω…Ψ±Ψ­Ψ¨Ψ§", - "Γ‘oΓ±o", - ]; + let utf8_labels = ["Hello", "δΈ–η•Œ", "πŸš€πŸŒŸ", "ΠŸΡ€ΠΈΠ²Π΅Ρ‚", "Ω…Ψ±Ψ­Ψ¨Ψ§", "Γ‘oΓ±o"]; for (idx, label) in utf8_labels.iter().enumerate() { let id = idx as u128; @@ -621,7 +647,8 @@ mod node_serialization_tests { assert_eq!( old_bytes, new_bytes, - "UTF-8 label '{}' serialization differs", label + "UTF-8 label '{}' serialization differs", + label ); } } @@ -648,7 +675,10 @@ mod node_serialization_tests { assert_eq!(props.len(), 3); assert_eq!(props.get("名前"), Some(&Value::String("ε€ͺιƒŽ".to_string()))); assert_eq!(props.get("возраст"), Some(&Value::I32(25))); - assert_eq!(props.get("emoji_key_πŸŽ‰"), Some(&Value::String("party_🎊".to_string()))); + assert_eq!( + props.get("emoji_key_πŸŽ‰"), + Some(&Value::String("party_🎊".to_string())) + ); } #[test] @@ -677,7 +707,12 @@ mod node_serialization_tests { // Verify all properties are present with correct values for i in 0..50 { let key = format!("key_{}", i); - assert_eq!(props.get(&key), Some(&Value::I32(i)), "Missing or incorrect value for {}", key); + assert_eq!( + props.get(&key), + Some(&Value::I32(i)), + "Missing or incorrect value for {}", + key + ); } } @@ -712,10 +747,7 @@ mod node_serialization_tests { let arena = Bump::new(); let id = 13131u128; - let props = vec![ - ("empty_val", Value::Empty), - ("normal_val", Value::I32(42)), - ]; + let props = vec![("empty_val", Value::Empty), ("normal_val", Value::I32(42))]; let new_node = create_arena_node_with_props(&arena, id, "EmptyValue", props); let new_bytes = bincode::serialize(&new_node).unwrap(); @@ -843,7 +875,9 @@ mod edge_serialization_tests { a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| values_equal(x, y)) } (Value::Object(a), Value::Object(b)) => { - a.len() == b.len() && a.iter().all(|(k, v)| b.get(k).map_or(false, |bv| values_equal(v, bv))) + a.len() == b.len() + && a.iter() + .all(|(k, v)| b.get(k).is_some_and(|bv| values_equal(v, bv))) } (Value::Date(a), Value::Date(b)) => a == b, (Value::Id(a), Value::Id(b)) => a == b, @@ -938,8 +972,14 @@ mod edge_serialization_tests { // Check semantic equality (order may differ) for (key, old_value) in old_props.iter() { - let new_value = new_props.get(key).expect(&format!("Missing key: {}", key)); - assert!(values_equal(old_value, new_value), "Value mismatch for key {}", key); + let new_value = new_props + .get(key) + .unwrap_or_else(|| panic!("Missing key: {}", key)); + assert!( + values_equal(old_value, new_value), + "Value mismatch for key {}", + key + ); } } @@ -955,7 +995,8 @@ mod edge_serialization_tests { ("verified", Value::Boolean(true)), ]; - let original = create_arena_edge_with_props(&arena, id, "RELATED_TO", from_node, to_node, props); + let original = + create_arena_edge_with_props(&arena, id, "RELATED_TO", from_node, to_node, props); let bytes = bincode::serialize(&original).unwrap(); let arena2 = Bump::new(); @@ -1003,7 +1044,10 @@ mod edge_serialization_tests { let props = deserialized.properties.unwrap(); assert_eq!(props.len(), 2); assert_eq!(props.get("strength"), Some(&Value::I32(5))); - assert_eq!(props.get("label_text"), Some(&Value::String("connection".to_string()))); + assert_eq!( + props.get("label_text"), + Some(&Value::String("connection".to_string())) + ); } #[test] @@ -1016,18 +1060,25 @@ mod edge_serialization_tests { let props = vec![ ("metadata", { let mut map = HashMap::new(); - map.insert("created_by".to_string(), Value::String("system".to_string())); + map.insert( + "created_by".to_string(), + Value::String("system".to_string()), + ); map.insert("timestamp".to_string(), Value::I64(1234567890)); Value::Object(map) }), - ("tags", Value::Array(vec![ - Value::String("important".to_string()), - Value::String("verified".to_string()), - ])), + ( + "tags", + Value::Array(vec![ + Value::String("important".to_string()), + Value::String("verified".to_string()), + ]), + ), ]; let old_edge = create_old_edge_with_props(id, "HAS_TAG", from_node, to_node, props.clone()); - let new_edge = create_arena_edge_with_props(&arena, id, "HAS_TAG", from_node, to_node, props); + let new_edge = + create_arena_edge_with_props(&arena, id, "HAS_TAG", from_node, to_node, props); let old_bytes = bincode::serialize(&old_edge).unwrap(); let new_bytes = bincode::serialize(&new_edge).unwrap(); @@ -1045,8 +1096,16 @@ mod edge_serialization_tests { // Compare nested values for (key, old_value) in old_props.iter() { - let new_value = new_props.get(key).expect(&format!("Missing key: {}", key)); - assert!(values_equal(old_value, new_value), "Value mismatch for key {}: {:?} != {:?}", key, old_value, new_value); + let new_value = new_props + .get(key) + .unwrap_or_else(|| panic!("Missing key: {}", key)); + assert!( + values_equal(old_value, new_value), + "Value mismatch for key {}: {:?} != {:?}", + key, + old_value, + new_value + ); } } @@ -1079,7 +1138,12 @@ mod edge_serialization_tests { // Verify all properties are present for i in 0..20 { let key = format!("prop_{}", i); - assert_eq!(props.get(&key), Some(&Value::I32(i)), "Property {} mismatch", key); + assert_eq!( + props.get(&key), + Some(&Value::I32(i)), + "Property {} mismatch", + key + ); } } @@ -1119,8 +1183,10 @@ mod edge_serialization_tests { println!("\nByte-by-byte comparison:"); for (i, (old_byte, new_byte)) in old_bytes.iter().zip(new_bytes.iter()).enumerate() { if old_byte != new_byte { - println!(" Index {}: old={:02x} ({}), new={:02x} ({})", - i, old_byte, old_byte, new_byte, new_byte); + println!( + " Index {}: old={:02x} ({}), new={:02x} ({})", + i, old_byte, old_byte, new_byte, new_byte + ); } } @@ -1143,7 +1209,8 @@ mod edge_serialization_tests { ("emoji", Value::String("πŸ”—".to_string())), ]; - let new_edge = create_arena_edge_with_props(&arena, id, "ηΉ‹γŒγ‚Š", from_node, to_node, props); + let new_edge = + create_arena_edge_with_props(&arena, id, "ηΉ‹γŒγ‚Š", from_node, to_node, props); let bytes = bincode::serialize(&new_edge).unwrap(); let arena2 = Bump::new(); diff --git a/helix-db/src/protocol/custom_serde/vector_serde.rs b/helix-db/src/protocol/custom_serde/vector_serde.rs index 000a9e89..68a88eb1 100644 --- a/helix-db/src/protocol/custom_serde/vector_serde.rs +++ b/helix-db/src/protocol/custom_serde/vector_serde.rs @@ -1,13 +1,13 @@ use crate::{ - helix_engine::vector_core::{vector::HVector, vector_without_data::VectorWithoutData}, + helix_engine::vector_core::{HVector, distance::Cosine, node::Item}, utils::properties::{ImmutablePropertiesMap, ImmutablePropertiesMapDeSeed}, }; use serde::de::{DeserializeSeed, Visitor}; use std::fmt; /// Helper DeserializeSeed for Option -struct OptionPropertiesMapDeSeed<'arena> { - arena: &'arena bumpalo::Bump, +pub struct OptionPropertiesMapDeSeed<'arena> { + pub arena: &'arena bumpalo::Bump, } impl<'de, 'arena> DeserializeSeed<'de> for OptionPropertiesMapDeSeed<'arena> { @@ -94,17 +94,18 @@ impl<'de, 'txn, 'arena> serde::de::DeserializeSeed<'de> for VectorDeSeed<'txn, ' .next_element_seed(OptionPropertiesMapDeSeed { arena: self.arena })? .ok_or_else(|| serde::de::Error::custom("Expected properties field"))?; - let data = HVector::cast_raw_vector_data(self.arena, self.raw_vector_data); + let data = HVector::raw_vector_data_to_vec(self.raw_vector_data, self.arena) + .map_err(serde::de::Error::custom)?; Ok(HVector { id: self.id, label, deleted, version, - level: 0, distance: None, - data, + data: Some(Item::::from_vec(data)), properties, + level: None, }) } } @@ -128,7 +129,7 @@ pub struct VectoWithoutDataDeSeed<'arena> { } impl<'de, 'arena> serde::de::DeserializeSeed<'de> for VectoWithoutDataDeSeed<'arena> { - type Value = VectorWithoutData<'arena>; + type Value = HVector<'arena>; fn deserialize(self, deserializer: D) -> Result where @@ -140,7 +141,7 @@ impl<'de, 'arena> serde::de::DeserializeSeed<'de> for VectoWithoutDataDeSeed<'ar } impl<'de, 'arena> serde::de::Visitor<'de> for VectorVisitor<'arena> { - type Value = VectorWithoutData<'arena>; + type Value = HVector<'arena>; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("struct VectorWithoutData") @@ -164,13 +165,15 @@ impl<'de, 'arena> serde::de::DeserializeSeed<'de> for VectoWithoutDataDeSeed<'ar .next_element_seed(OptionPropertiesMapDeSeed { arena: self.arena })? .ok_or_else(|| serde::de::Error::custom("Expected properties field"))?; - Ok(VectorWithoutData { + Ok(HVector { id: self.id, label, version, deleted, - level: 0, properties, + distance: None, + level: None, + data: None, }) } } diff --git a/helix-db/src/protocol/custom_serde/vector_serde_tests.rs b/helix-db/src/protocol/custom_serde/vector_serde_tests.rs index a1c6c0dc..706c67fe 100644 --- a/helix-db/src/protocol/custom_serde/vector_serde_tests.rs +++ b/helix-db/src/protocol/custom_serde/vector_serde_tests.rs @@ -13,10 +13,9 @@ #[cfg(test)] mod vector_serialization_tests { use super::super::test_utils::*; - use crate::helix_engine::vector_core::vector::HVector; - use crate::helix_engine::vector_core::vector_without_data::VectorWithoutData; + use crate::helix_engine::vector_core::HVector; use crate::protocol::value::Value; - + use bumpalo::Bump; // ======================================================================== @@ -39,12 +38,8 @@ mod vector_serialization_tests { // Deserialize let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes( - &arena2, - Some(&props_bytes), - data_bytes, - id, - ).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_vectors_semantically_equal(&vector, &deserialized); } @@ -56,18 +51,14 @@ mod vector_serialization_tests { let data = vec![0.5, -0.5, 1.5, -1.5]; let props = vec![("name", Value::String("test".to_string()))]; - let vector = create_arena_vector(&arena, id, "labeled_vector", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "labeled_vector", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes( - &arena2, - Some(&props_bytes), - data_bytes, - id, - ).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_vectors_semantically_equal(&vector, &deserialized); } @@ -83,18 +74,14 @@ mod vector_serialization_tests { ("score", Value::F64(0.95)), ]; - let vector = create_arena_vector(&arena, id, "vector_label", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "vector_label", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes( - &arena2, - Some(&props_bytes), - data_bytes, - id, - ).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_vectors_semantically_equal(&vector, &deserialized); } @@ -106,18 +93,14 @@ mod vector_serialization_tests { let data = vec![0.0; 128]; // Standard embedding dimension let props = all_value_types_props(); - let vector = create_arena_vector(&arena, id, "all_types", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "all_types", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes( - &arena2, - Some(&props_bytes), - data_bytes, - id, - ).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_vectors_semantically_equal(&vector, &deserialized); } @@ -129,23 +112,19 @@ mod vector_serialization_tests { let data = vec![1.0, 2.0, 3.0]; let props = nested_value_props(); - let vector = create_arena_vector(&arena, id, "nested", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "nested", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes( - &arena2, - Some(&props_bytes), - data_bytes, - id, - ).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); // Just verify basic structure instead of deep equality due to HashMap ordering assert_eq!(deserialized.id, id); assert_eq!(deserialized.label, "nested"); - assert_eq!(deserialized.data.len(), 3); + assert_eq!(deserialized.data_borrowed().len(), 3); assert!(deserialized.properties.is_some()); assert_eq!(deserialized.properties.unwrap().len(), 3); } @@ -158,45 +137,45 @@ mod vector_serialization_tests { fn test_vector_data_to_bytes_128d() { let arena = Bump::new(); let id = 111111u128; - let data: Vec = (0..128).map(|i| i as f64 * 0.1).collect(); + let data: Vec = (0..128).map(|i| i as f32 * 0.1).collect(); let vector = create_simple_vector(&arena, id, "vector_128", &data); let bytes = vector.vector_data_to_bytes().unwrap(); - assert_eq!(bytes.len(), 128 * 8); // 128 dimensions * 8 bytes per f64 + assert_eq!(bytes.len(), 128 * 4); // 128 dimensions * 4 bytes per f32 } #[test] fn test_vector_data_to_bytes_384d() { let arena = Bump::new(); let id = 222222u128; - let data: Vec = (0..384).map(|i| i as f64 * 0.01).collect(); + let data: Vec = (0..384).map(|i| i as f32 * 0.01).collect(); let vector = create_simple_vector(&arena, id, "vector_384", &data); let bytes = vector.vector_data_to_bytes().unwrap(); - assert_eq!(bytes.len(), 384 * 8); + assert_eq!(bytes.len(), 384 * 4); } #[test] fn test_vector_data_to_bytes_1536d() { let arena = Bump::new(); let id = 333333u128; - let data: Vec = (0..1536).map(|i| (i as f64).sin()).collect(); + let data: Vec = (0..1536).map(|i| (i as f32).sin()).collect(); let vector = create_simple_vector(&arena, id, "vector_1536", &data); let bytes = vector.vector_data_to_bytes().unwrap(); - assert_eq!(bytes.len(), 1536 * 8); + assert_eq!(bytes.len(), 1536 * 4); } #[test] fn test_cast_raw_vector_data_128d() { let arena = Bump::new(); - let original_data: Vec = (0..128).map(|i| i as f64).collect(); + let original_data: Vec = (0..128).map(|i| i as f32).collect(); let raw_bytes = create_vector_bytes(&original_data); - let casted_data = HVector::cast_raw_vector_data(&arena, &raw_bytes); + let casted_data = HVector::raw_vector_data_to_vec(&raw_bytes, &arena).unwrap(); assert_eq!(casted_data.len(), 128); for (i, &val) in casted_data.iter().enumerate() { @@ -210,7 +189,7 @@ mod vector_serialization_tests { let original_data = vec![3.14159, 2.71828, 1.41421, 1.73205]; let raw_bytes = create_vector_bytes(&original_data); - let casted_data = HVector::cast_raw_vector_data(&arena, &raw_bytes); + let casted_data = HVector::raw_vector_data_to_vec(&raw_bytes, &arena).unwrap(); assert_eq!(casted_data.len(), original_data.len()); for (orig, casted) in original_data.iter().zip(casted_data.iter()) { @@ -226,14 +205,13 @@ mod vector_serialization_tests { let data = vec![1.0, 2.0, 3.0, 4.0]; let raw_bytes = create_vector_bytes(&data); - let vector = HVector::from_raw_vector_data(&arena, &raw_bytes, label, id).unwrap(); + let vector = HVector::from_raw_vector_data(id, label, &raw_bytes).unwrap(); assert_eq!(vector.id, id); assert_eq!(vector.label, label); - assert_eq!(vector.data.len(), 4); + assert_eq!(vector.len(), 4); assert_eq!(vector.version, 1); - assert_eq!(vector.deleted, false); - assert_eq!(vector.level, 0); + assert!(!vector.deleted); assert!(vector.properties.is_none()); } @@ -255,7 +233,7 @@ mod vector_serialization_tests { // Deserialize combining both let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); let deserialized = result.unwrap(); @@ -272,13 +250,13 @@ mod vector_serialization_tests { ("dimension", Value::I32(4)), ]; - let vector = create_arena_vector(&arena, id, "embedding", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "embedding", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); let deserialized = result.unwrap(); @@ -297,77 +275,13 @@ mod vector_serialization_tests { // Deserialize with serialized empty properties let arena2 = Bump::new(); - let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id); + let result = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true); assert!(result.is_ok()); let deserialized = result.unwrap(); assert_eq!(deserialized.id, id); assert_eq!(deserialized.label, "no_props"); - assert_eq!(deserialized.data.len(), 3); - assert!(deserialized.properties.is_none()); - } - - // ======================================================================== - // VECTOR WITHOUT DATA TESTS - // ======================================================================== - - #[test] - fn test_vector_without_data_serialization() { - let arena = Bump::new(); - let id = 999000u128; - let label = arena.alloc_str("metadata_only"); - let props = vec![("type", Value::String("embedding".to_string()))]; - let len = props.len(); - let props_iter = props.into_iter().map(|(k, v)| { - let key: &str = arena.alloc_str(k); - (key, v) - }); - let props_map = crate::utils::properties::ImmutablePropertiesMap::new(len, props_iter, &arena); - - let vector_without_data = VectorWithoutData { - id, - label, - version: 1, - deleted: false, - level: 0, - properties: Some(props_map), - }; - - // Serialize and deserialize - let bytes = bincode::serialize(&vector_without_data).unwrap(); - let arena2 = Bump::new(); - let result = VectorWithoutData::from_bincode_bytes(&arena2, &bytes, id); - println!("{:?}", result); - assert!(result.is_ok()); - let deserialized = result.unwrap(); - assert_eq!(deserialized.id, id); - assert_eq!(deserialized.label, label); - assert_eq!(deserialized.version, 1); - assert_eq!(deserialized.deleted, false); - } - - #[test] - fn test_vector_without_data_empty_properties() { - let arena = Bump::new(); - let id = 111000u128; - let label = arena.alloc_str("empty_meta"); - - let vector_without_data = VectorWithoutData { - id, - label, - version: 1, - deleted: false, - level: 0, - properties: None, - }; - - let bytes = bincode::serialize(&vector_without_data).unwrap(); - let arena2 = Bump::new(); - let result = VectorWithoutData::from_bincode_bytes(&arena2, &bytes, id); - - assert!(result.is_ok()); - let deserialized = result.unwrap(); - assert_eq!(deserialized.id, id); + assert_eq!(deserialized.len(), 3); assert!(deserialized.properties.is_none()); } @@ -381,13 +295,14 @@ mod vector_serialization_tests { let id = 123456u128; let data = vec![1.0, 2.0]; - let vector = create_arena_vector(&arena, id, "versioned", 5, false, 0, &data, vec![]); + let vector = create_arena_vector(&arena, id, "versioned", 5, false, &data, vec![]); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_eq!(deserialized.version, 5); } @@ -398,15 +313,16 @@ mod vector_serialization_tests { let id = 654321u128; let data = vec![0.0, 1.0]; - let vector = create_arena_vector(&arena, id, "deleted", 1, true, 0, &data, vec![]); + let vector = create_arena_vector(&arena, id, "deleted", 1, true, &data, vec![]); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); - assert_eq!(deserialized.deleted, true); + assert!(deserialized.deleted); } #[test] @@ -415,15 +331,16 @@ mod vector_serialization_tests { let id = 987654u128; let data = vec![1.0, 0.0]; - let vector = create_arena_vector(&arena, id, "active", 1, false, 0, &data, vec![]); + let vector = create_arena_vector(&arena, id, "active", 1, false, &data, vec![]); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); - assert_eq!(deserialized.deleted, false); + assert!(!deserialized.deleted); } // ======================================================================== @@ -442,7 +359,8 @@ mod vector_serialization_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_eq!(deserialized.label, "向量桋试"); } @@ -459,7 +377,8 @@ mod vector_serialization_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_eq!(deserialized.label, "πŸš€πŸ”₯πŸ’―"); } @@ -476,7 +395,8 @@ mod vector_serialization_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_eq!(deserialized.label, ""); } @@ -494,7 +414,8 @@ mod vector_serialization_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_eq!(deserialized.label.len(), 1000); assert_eq!(deserialized.label, long_label); @@ -516,13 +437,14 @@ mod vector_serialization_tests { }) .collect(); - let vector = create_arena_vector(&arena, id, "many_props", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "many_props", 1, false, &data, props); let props_bytes = bincode::serialize(&vector).unwrap(); let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); assert_eq!(deserialized.properties.unwrap().len(), 50); } @@ -543,17 +465,18 @@ mod vector_serialization_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); - assert_eq!(deserialized.data.len(), 1); - assert!((deserialized.data[0] - 42.0).abs() < 1e-10); + assert_eq!(deserialized.len(), 1); + assert!((deserialized.data_borrowed()[0] - 42.0).abs() < 1e-10); } #[test] fn test_vector_large_dimension_4096() { let arena = Bump::new(); let id = 951753u128; - let data: Vec = (0..4096).map(|i| i as f64 * 0.001).collect(); + let data: Vec = (0..4096).map(|i| i as f32 * 0.001).collect(); let vector = create_simple_vector(&arena, id, "4096d", &data); @@ -561,9 +484,10 @@ mod vector_serialization_tests { let data_bytes = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized = HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id).unwrap(); + let deserialized = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes), data_bytes, id, true).unwrap(); - assert_eq!(deserialized.data.len(), 4096); + assert_eq!(deserialized.len(), 4096); } // ======================================================================== @@ -577,14 +501,16 @@ mod vector_serialization_tests { let data = vec![1.1, 2.2, 3.3]; let props = vec![("test", Value::String("value".to_string()))]; - let vector = create_arena_vector(&arena, id, "byte_test", 1, false, 0, &data, props); + let vector = create_arena_vector(&arena, id, "byte_test", 1, false, &data, props); // First roundtrip let props_bytes1 = bincode::serialize(&vector).unwrap(); let data_bytes1 = vector.vector_data_to_bytes().unwrap(); let arena2 = Bump::new(); - let deserialized1 = HVector::from_bincode_bytes(&arena2, Some(&props_bytes1), data_bytes1, id).unwrap(); + let deserialized1 = + HVector::from_bincode_bytes(&arena2, Some(&props_bytes1), data_bytes1, id, true) + .unwrap(); // Second roundtrip let props_bytes2 = bincode::serialize(&deserialized1).unwrap(); diff --git a/helix-db/src/protocol/format.rs b/helix-db/src/protocol/format.rs index fe3c9612..e23f9798 100644 --- a/helix-db/src/protocol/format.rs +++ b/helix-db/src/protocol/format.rs @@ -201,7 +201,8 @@ mod tests { fn test_format_deserialize_invalid_json() { let invalid_json = b"not valid json {"; - let result: Result, GraphError> = Format::Json.deserialize(invalid_json); + let result: Result, GraphError> = + Format::Json.deserialize(invalid_json); assert!(result.is_err()); if let Err(GraphError::DecodeError(msg)) = result { diff --git a/helix-db/src/protocol/mod.rs b/helix-db/src/protocol/mod.rs index b1ee482b..22b127df 100644 --- a/helix-db/src/protocol/mod.rs +++ b/helix-db/src/protocol/mod.rs @@ -1,9 +1,9 @@ +pub mod custom_serde; pub mod date; pub mod error; pub mod format; pub mod request; pub mod response; -pub mod custom_serde; pub mod value; pub use error::HelixError; diff --git a/helix-db/src/protocol/request.rs b/helix-db/src/protocol/request.rs index 1fa3dc7a..265fc37f 100644 --- a/helix-db/src/protocol/request.rs +++ b/helix-db/src/protocol/request.rs @@ -199,7 +199,7 @@ mod tests { #[test] fn test_request_type_clone() { let rt1 = RequestType::MCP; - let rt2 = rt1.clone(); + let rt2 = rt1; assert!(matches!(rt1, RequestType::MCP)); assert!(matches!(rt2, RequestType::MCP)); diff --git a/helix-db/src/protocol/value.rs b/helix-db/src/protocol/value.rs index cdf2cac6..88f2ed1e 100644 --- a/helix-db/src/protocol/value.rs +++ b/helix-db/src/protocol/value.rs @@ -1634,6 +1634,11 @@ impl Value { pub fn as_f64(&self) -> f64 { *self.into_primitive() } + + #[inline(always)] + pub fn as_f32(&self) -> f32 { + *self.into_primitive() + } } #[cfg(test)] @@ -1730,8 +1735,8 @@ mod tests { assert_eq!(Value::F64(1.0), Value::F64(1.0)); assert_eq!(Value::I64(1), Value::U64(1)); assert_eq!(Value::U64(1), Value::I64(1)); - assert_eq!(Value::I32(1), 1 as i32); - assert_eq!(Value::U32(1), 1 as i32); + assert_eq!(Value::I32(1), 1_i32); + assert_eq!(Value::U32(1), 1_i32); } #[test] @@ -1991,7 +1996,7 @@ mod tests { let val = Value::Boolean(true); let b: bool = val.into(); - assert_eq!(b, true); + assert!(b); let val = Value::String("test".to_string()); let s: String = val.into(); @@ -2068,7 +2073,7 @@ mod tests { let val = Value::Boolean(true); let b: &bool = val.into_primitive(); - assert_eq!(*b, true); + assert!(*b); let val = Value::String("test".to_string()); let s = val.as_str(); diff --git a/helix-db/src/utils/id.rs b/helix-db/src/utils/id.rs index 4500911d..6f16de26 100644 --- a/helix-db/src/utils/id.rs +++ b/helix-db/src/utils/id.rs @@ -121,12 +121,12 @@ pub fn v6_uuid() -> u128 { uuid::Uuid::now_v6(&[1, 2, 3, 4, 5, 6]).as_u128() } -/// Converts a uuid to a string slice using a buffer created in the arena -/// +/// Converts a uuid to a string slice using a buffer created in the arena +/// /// This is more efficient that using the `to_string` on the created uuid /// as it avoids formatting and potential double buffering -/// -/// NOTE: This could be optimized further by reusing a slice at a set index within the arena +/// +/// NOTE: This could be optimized further by reusing a slice at a set index within the arena #[inline] pub fn uuid_str(id: u128, arena: &bumpalo::Bump) -> &str { let uuid = uuid::Uuid::from_u128(id); @@ -134,13 +134,17 @@ pub fn uuid_str(id: u128, arena: &bumpalo::Bump) -> &str { uuid.as_hyphenated().encode_lower(buffer) } -/// Converts a uuid to a string slice using a buffer -/// +/// Converts a uuid to a string slice using a buffer +/// /// This is more efficient that using the `to_string` on the created uuid /// as it avoids formatting and potential double buffering #[inline] pub fn uuid_str_from_buf(id: u128, buffer: &mut [u8]) -> &str { - assert_eq!(buffer.len(), 36, "length of hyphenated buffer should be 36 characters long"); + assert_eq!( + buffer.len(), + 36, + "length of hyphenated buffer should be 36 characters long" + ); let uuid = uuid::Uuid::from_u128(id); uuid.as_hyphenated().encode_lower(buffer) } @@ -274,7 +278,7 @@ mod tests { let id = ID::from(value); // Test Deref trait - let deref_value: &u128 = &*id; + let deref_value: &u128 = &id; assert_eq!(*deref_value, value); } @@ -301,7 +305,7 @@ mod tests { #[test] fn test_id_ordering() { - let mut ids = vec![ID::from(300u128), ID::from(100u128), ID::from(200u128)]; + let mut ids = [ID::from(300u128), ID::from(100u128), ID::from(200u128)]; ids.sort(); diff --git a/helix-db/src/utils/items.rs b/helix-db/src/utils/items.rs index 594b7d2b..29fed1da 100644 --- a/helix-db/src/utils/items.rs +++ b/helix-db/src/utils/items.rs @@ -47,11 +47,13 @@ impl<'arena> serde::Serialize for Node<'arena> { if serializer.is_human_readable() { // Include id for JSON serialization let mut buffer = [0u8; 36]; - let mut state = serializer.serialize_map(Some(3 + self.properties.as_ref().map(|p| p.len()).unwrap_or(0)))?; + let mut state = serializer.serialize_map(Some( + 3 + self.properties.as_ref().map(|p| p.len()).unwrap_or(0), + ))?; state.serialize_entry("id", uuid_str_from_buf(self.id, &mut buffer))?; - state.serialize_entry("label", self.label)?; + state.serialize_entry("label", self.label)?; state.serialize_entry("version", &self.version)?; - if let Some(properties ) = &self.properties { + if let Some(properties) = &self.properties { for (key, value) in properties.iter() { state.serialize_entry(key, value)?; } @@ -177,7 +179,9 @@ impl<'arena> serde::Serialize for Edge<'arena> { if serializer.is_human_readable() { // Include id for JSON serialization let mut buffer = [0u8; 36]; - let mut state = serializer.serialize_map(Some(5 + self.properties.as_ref().map(|p| p.len()).unwrap_or(0)))?; + let mut state = serializer.serialize_map(Some( + 5 + self.properties.as_ref().map(|p| p.len()).unwrap_or(0), + ))?; state.serialize_entry("id", uuid_str_from_buf(self.id, &mut buffer))?; state.serialize_entry("label", self.label)?; state.serialize_entry("version", &self.version)?; diff --git a/helix-db/src/utils/label_hash.rs b/helix-db/src/utils/label_hash.rs index 8bd42f71..d93a080d 100644 --- a/helix-db/src/utils/label_hash.rs +++ b/helix-db/src/utils/label_hash.rs @@ -31,7 +31,10 @@ mod tests { let hash_person = hash_label("person", None); let hash_company = hash_label("company", None); - assert_ne!(hash_person, hash_company, "Different labels should produce different hashes"); + assert_ne!( + hash_person, hash_company, + "Different labels should produce different hashes" + ); } #[test] @@ -42,22 +45,24 @@ mod tests { let hash_seed_42 = hash_label(label, Some(42)); // Same label with no seed vs seed 0 should be same - assert_eq!(hash_no_seed, hash_seed_0, "No seed should be equivalent to seed 0"); + assert_eq!( + hash_no_seed, hash_seed_0, + "No seed should be equivalent to seed 0" + ); // Different seed should produce different hash - assert_ne!(hash_no_seed, hash_seed_42, "Different seeds should produce different hashes"); + assert_ne!( + hash_no_seed, hash_seed_42, + "Different seeds should produce different hashes" + ); } #[test] fn test_hash_label_collision_rate() { // Test collision rate with 10,000 labels - let labels: Vec = (0..10_000) - .map(|i| format!("label_{}", i)) - .collect(); + let labels: Vec = (0..10_000).map(|i| format!("label_{}", i)).collect(); - let hashes: HashSet<[u8; 4]> = labels.iter() - .map(|l| hash_label(l, None)) - .collect(); + let hashes: HashSet<[u8; 4]> = labels.iter().map(|l| hash_label(l, None)).collect(); let collision_rate = 1.0 - (hashes.len() as f64 / labels.len() as f64); @@ -86,16 +91,14 @@ mod tests { // Test with UTF-8 characters let labels = vec![ "person", - "δΊΊ", // Chinese character - "πŸš€", // Emoji - "Γ‘oΓ±o", // Spanish with tildes - "ΠŸΡ€ΠΈΠ²Π΅Ρ‚", // Russian - "Ω…Ψ±Ψ­Ψ¨Ψ§", // Arabic + "δΊΊ", // Chinese character + "πŸš€", // Emoji + "Γ‘oΓ±o", // Spanish with tildes + "ΠŸΡ€ΠΈΠ²Π΅Ρ‚", // Russian + "Ω…Ψ±Ψ­Ψ¨Ψ§", // Arabic ]; - let hashes: Vec<[u8; 4]> = labels.iter() - .map(|l| hash_label(l, None)) - .collect(); + let hashes: Vec<[u8; 4]> = labels.iter().map(|l| hash_label(l, None)).collect(); // All should be different let unique_hashes: HashSet<_> = hashes.iter().collect(); @@ -149,17 +152,9 @@ mod tests { #[test] fn test_hash_label_similar_strings() { // Test labels that differ by only one character - let labels = vec![ - "person", - "persons", - "person1", - "person_", - "Person", - ]; + let labels = ["person", "persons", "person1", "person_", "Person"]; - let hashes: Vec<[u8; 4]> = labels.iter() - .map(|l| hash_label(l, None)) - .collect(); + let hashes: Vec<[u8; 4]> = labels.iter().map(|l| hash_label(l, None)).collect(); // All should be different let unique_hashes: HashSet<_> = hashes.iter().collect(); @@ -179,7 +174,10 @@ mod tests { // Should be big-endian bytes (we can convert back) let value = u32::from_be_bytes(hash); - assert!(value > 0, "Hash value should be non-zero for non-empty string"); + assert!( + value > 0, + "Hash value should be non-zero for non-empty string" + ); } #[test] @@ -218,9 +216,7 @@ mod tests { "created_by", ]; - let hashes: HashSet<[u8; 4]> = common_labels.iter() - .map(|l| hash_label(l, None)) - .collect(); + let hashes: HashSet<[u8; 4]> = common_labels.iter().map(|l| hash_label(l, None)).collect(); // All common labels should hash uniquely assert_eq!( diff --git a/helix-db/src/utils/properties.rs b/helix-db/src/utils/properties.rs index 843d170f..42f9800e 100644 --- a/helix-db/src/utils/properties.rs +++ b/helix-db/src/utils/properties.rs @@ -22,7 +22,7 @@ use crate::protocol::value::Value; /// - All required space is allocated in the arena upfront /// - Key lengths are stored packed for SIMD length check on get. /// - Small n means O(n) is faster than O(1) -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub struct ImmutablePropertiesMap<'arena> { len: usize, key_lengths: *const usize, diff --git a/helix-db/src/utils/tqdm.rs b/helix-db/src/utils/tqdm.rs index 8067309d..19954a99 100644 --- a/helix-db/src/utils/tqdm.rs +++ b/helix-db/src/utils/tqdm.rs @@ -1,6 +1,6 @@ use std::{ - io::{stdout, Write}, fmt, + io::{Write, stdout}, }; pub enum ProgChar { @@ -12,7 +12,8 @@ impl fmt::Display for ProgChar { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let c = match self { ProgChar::Block => 'β–ˆ', - ProgChar::Hash => '#', }; + ProgChar::Hash => '#', + }; write!(f, "{c}") } } diff --git a/helix-macros/src/lib.rs b/helix-macros/src/lib.rs index 3d592180..a42ba60c 100644 --- a/helix-macros/src/lib.rs +++ b/helix-macros/src/lib.rs @@ -5,7 +5,10 @@ extern crate syn; use proc_macro::TokenStream; use quote::quote; use syn::{ - parse::{Parse, ParseStream}, parse_macro_input, Data, DeriveInput, Expr, FnArg, Ident, ItemFn, ItemStruct, ItemTrait, LitInt, Pat, Stmt, Token, TraitItem + Data, DeriveInput, Expr, FnArg, Ident, ItemFn, ItemStruct, ItemTrait, LitInt, Pat, Stmt, Token, + TraitItem, + parse::{Parse, ParseStream}, + parse_macro_input, }; struct HandlerArgs { @@ -119,7 +122,6 @@ pub fn get_handler(_attr: TokenStream, item: TokenStream) -> TokenStream { expanded.into() } - #[proc_macro_attribute] pub fn tool_calls(_attr: TokenStream, input: TokenStream) -> TokenStream { let input_trait = parse_macro_input!(input as ItemTrait); @@ -391,20 +393,17 @@ pub fn traversable_derive(input: TokenStream) -> TokenStream { // Verify that the struct has an 'id' field let has_id_field = match &input.data { - Data::Struct(data) => { - data.fields.iter().any(|field| { - field.ident.as_ref().map(|i| i == "id").unwrap_or(false) - }) - } + Data::Struct(data) => data + .fields + .iter() + .any(|field| field.ident.as_ref().map(|i| i == "id").unwrap_or(false)), _ => false, }; if !has_id_field { - return TokenStream::from( - quote! { - compile_error!("Traversable can only be derived for structs with an 'id: &'a str' field"); - } - ); + return TokenStream::from(quote! { + compile_error!("Traversable can only be derived for structs with an 'id: &'a str' field"); + }); } // Extract lifetime parameter if present diff --git a/hql-tests/run.sh b/hql-tests/run.sh old mode 100644 new mode 100755 diff --git a/hql-tests/src/main.rs b/hql-tests/src/main.rs index 9e26618c..adafa6ab 100644 --- a/hql-tests/src/main.rs +++ b/hql-tests/src/main.rs @@ -66,6 +66,7 @@ async fn check_issue_exists(github_config: &GitHubConfig, error_hash: &str) -> R } #[allow(unused)] +#[allow(clippy::too_many_arguments)] async fn create_github_issue( github_config: &GitHubConfig, error_type: &str, @@ -454,7 +455,9 @@ async fn main() -> Result<()> { ); } - println!("[SUCCESS] Finished processing batch {current_batch}/{total_batches} successfully"); + println!( + "[SUCCESS] Finished processing batch {current_batch}/{total_batches} successfully" + ); } else { // Process all test directories in parallel (default behavior) println!( @@ -525,7 +528,6 @@ async fn process_test_directory( return Ok(()); } - // Find the query file - could be queries.hx or file*.hx let mut query_file_path = None; let schema_hx_path = folder_path.join("schema.hx"); @@ -643,8 +645,9 @@ async fn process_test_directory( let stderr = String::from_utf8_lossy(&output.stderr); let stdout = String::from_utf8_lossy(&output.stdout); // For helix compilation, we'll show the raw output since it's not cargo format - let error_message = - format!("[FAILED] HELIX COMPILE FAILED for {test_name}\nStderr: {stderr}\nStdout: {stdout}"); + let error_message = format!( + "[FAILED] HELIX COMPILE FAILED for {test_name}\nStderr: {stderr}\nStdout: {stdout}" + ); // Create GitHub issue if configuration is available if let Some(config) = github_config { diff --git a/hql-tests/tests/basic_search_v/queries.hx b/hql-tests/tests/basic_search_v/queries.hx index c2e85387..65e3dac6 100644 --- a/hql-tests/tests/basic_search_v/queries.hx +++ b/hql-tests/tests/basic_search_v/queries.hx @@ -13,7 +13,7 @@ E::EdgeUser { } -QUERY user(vec: [F64]) => +QUERY user(vec: [F32]) => vecs <- SearchV(vec, 10) // pre_filter <- SearchV(vec, 10)::PREFILTER(_::{content}::EQ("hello")) RETURN "hello" @@ -32,4 +32,4 @@ V::Document { QUERY SearchText(query: String, limit: I64) => // Search for documents that are similar to the query results <- SearchV(Embed(query), limit) - RETURN results \ No newline at end of file + RETURN results diff --git a/hql-tests/tests/benchmarks/queries.hx b/hql-tests/tests/benchmarks/queries.hx index b0188dc5..2ecb036f 100644 --- a/hql-tests/tests/benchmarks/queries.hx +++ b/hql-tests/tests/benchmarks/queries.hx @@ -33,8 +33,8 @@ QUERY InsertUser(country: U8) => // Query 2: Insert an Item vector node // Creates a new Item record with embedding and category -// The embedding parameter is explicit as an array of F64 values -QUERY InsertItem(embedding: [F64], category: U16) => +// The embedding parameter is explicit as an array of F32 values +QUERY InsertItem(embedding: [F32], category: U16) => item <- AddV(embedding, { category: category }) @@ -87,10 +87,10 @@ QUERY OneHopFilter(user_id: ID, category: U16) => items <- N(user_id)::Out::WHERE(_::{category}::EQ(category)) RETURN items::{id, category} -QUERY Vector(vector: [F64], top_k: I64) => +QUERY Vector(vector: [F32], top_k: I64) => items <- SearchV(vector, top_k) RETURN items::{id, score, category} -QUERY VectorHopFilter(vector: [F64], top_k: I64, country: U8) => +QUERY VectorHopFilter(vector: [F32], top_k: I64, country: U8) => items <- SearchV(vector, top_k)::WHERE(EXISTS(_::In::WHERE(_::{country}::EQ(country)))) - RETURN items::{id, category} \ No newline at end of file + RETURN items::{id, category} diff --git a/hql-tests/tests/brute_force_search_v/queries.hx b/hql-tests/tests/brute_force_search_v/queries.hx index a31462ab..fa192444 100644 --- a/hql-tests/tests/brute_force_search_v/queries.hx +++ b/hql-tests/tests/brute_force_search_v/queries.hx @@ -17,13 +17,6 @@ E::Friend { To: User, } -QUERY search_vector(query: [f64], k: I64) => +QUERY search_vector(query: [f32], k: I64) => result <- N::Out::SearchV(query, k) RETURN result - - - - - - - diff --git a/hql-tests/tests/cognee/queries.hx b/hql-tests/tests/cognee/queries.hx index 026e4aeb..518630eb 100644 --- a/hql-tests/tests/cognee/queries.hx +++ b/hql-tests/tests/cognee/queries.hx @@ -8,7 +8,7 @@ QUERY CogneeHasCollection (collection_name: String) => RETURN {collection: collection} // Add multiple vectors to a collection with a given data points -QUERY CogneeCreateDataPoints (collection_name: String, data_points: [{vector: [F64], dp_id: String, payload: String, content: String}]) => +QUERY CogneeCreateDataPoints (collection_name: String, data_points: [{vector: [F32], dp_id: String, payload: String, content: String}]) => FOR {vector, dp_id, payload, content} IN data_points { AddV(vector, {collection_name: collection_name, data_point_id: dp_id, payload: payload, content: content}) } @@ -20,7 +20,7 @@ QUERY CogneeRetrieve (collection_name: String, dp_ids: [String]) => RETURN {documents: documents} // Perform a search in the specified collection using a vector. -QUERY CogneeSearch (collection_name: String, vector: [F64], limit: I64) => +QUERY CogneeSearch (collection_name: String, vector: [F32], limit: I64) => result <- SearchV(vector, limit)::WHERE(_::{collection_name}::EQ(collection_name)) RETURN {result: result} @@ -133,7 +133,7 @@ QUERY CogneeDeleteGraph () => // Get the target node and its entire neighborhood QUERY CogneeGetConnections (node_id: String) => main_node <- N({node_id: node_id}) - + in_nodes <- main_node::In in_edges <- main_node::InE diff --git a/hql-tests/tests/companies_graph/queries.hx b/hql-tests/tests/companies_graph/queries.hx index 8d251432..a8b7cbdc 100644 --- a/hql-tests/tests/companies_graph/queries.hx +++ b/hql-tests/tests/companies_graph/queries.hx @@ -9,7 +9,7 @@ QUERY GetCompany(company_number: String) => QUERY AddCompany(company_number: String, total_filings: I32) => company <- AddN({ - company_number: company_number, + company_number: company_number, total_filings: total_filings, ingested_filings: 0 }) @@ -29,7 +29,7 @@ QUERY DeleteCompany(company_number: String) => // ------------------------------ EDGE OPERATIONS -------------------------- -QUERY GetDocumentEdges(company_number: String) => +QUERY GetDocumentEdges(company_number: String) => c <- N({company_number: company_number}) edges <- c::OutE count <- c::Out::COUNT @@ -41,9 +41,9 @@ QUERY GetDocumentEdges(company_number: String) => // ─── filing / embedding helpers ─────────────────────────────── QUERY AddEmbeddingsToCompany( - company_number: String, + company_number: String, embeddings_data: [{ - vector: [F64], + vector: [F32], text: String, chunk_id: String, page_number: I32, @@ -90,18 +90,18 @@ QUERY GetAllCompanyEmbeddings(company_number: String) => // return vector data RETURN embeddings -QUERY CompanyEmbeddingSearch(company_number: String, query: [F64], k: I32) => +QUERY CompanyEmbeddingSearch(company_number: String, query: [F32], k: I32) => c <- N({company_number: company_number})::OutE::ToV embedding_search <- c::SearchV(query, k) RETURN embedding_search // ---------------------- FOR TESTING --------------------------------- // tmp function for testing helix -QUERY AddVector(vector: [F64], text: String, chunk_id: String, page_number: I32, reference: String) => +QUERY AddVector(vector: [F32], text: String, chunk_id: String, page_number: I32, reference: String) => embedding <- AddV(vector, {text: text, chunk_id: chunk_id, page_number: page_number, reference: reference}) RETURN embedding // tmp function for testing helix -QUERY SearchVector(query: [F64], k: I32) => +QUERY SearchVector(query: [F32], k: I32) => embedding_search <- SearchV(query, k) - RETURN embedding_search \ No newline at end of file + RETURN embedding_search diff --git a/hql-tests/tests/companies_graph_v2/queries.hx b/hql-tests/tests/companies_graph_v2/queries.hx index b5c17c1f..5e11f557 100644 --- a/hql-tests/tests/companies_graph_v2/queries.hx +++ b/hql-tests/tests/companies_graph_v2/queries.hx @@ -10,7 +10,7 @@ QUERY GetCompany(company_number: String) => QUERY CreateCompany(company_name: String, company_number: String, total_docs: I32) => company <- AddN({ company_name: company_name, - company_number: company_number, + company_number: company_number, total_docs: total_docs, ingested_docs: 0 }) @@ -30,7 +30,7 @@ QUERY DeleteCompany(company_number: String) => // ------------------------------ EDGE OPERATIONS -------------------------- -QUERY GetDocumentEdges(company_number: String) => +QUERY GetDocumentEdges(company_number: String) => c <- N({company_number: company_number}) edges <- c::OutE RETURN edges @@ -39,9 +39,9 @@ QUERY GetDocumentEdges(company_number: String) => // ─── filing / embedding helpers ─────────────────────────────── QUERY AddEmbeddingsToCompany( - company_number: String, + company_number: String, embeddings_data: [{ - vector: [F64], + vector: [F32], text: String, chunk_id: String, page_number: I32, @@ -85,19 +85,19 @@ QUERY GetAllCompanyEmbeddings(company_number: String) => embeddings <- c::Out RETURN embeddings -QUERY CompanyEmbeddingSearch(company_number: String, query: [F64], k: I32) => +QUERY CompanyEmbeddingSearch(company_number: String, query: [F32], k: I32) => c <- N({company_number: company_number})::OutE::ToV embedding_search <- c::SearchV(query, k) RETURN embedding_search // ---------------------- FOR TESTING --------------------------------- // tmp function for testing helix -QUERY AddVector(vector: [F64], text: String, chunk_id: String, page_number: I32, reference: String) => +QUERY AddVector(vector: [F32], text: String, chunk_id: String, page_number: I32, reference: String) => embedding <- AddV(vector, {text: text, chunk_id: chunk_id, page_number: page_number, reference: reference}) RETURN embedding // tmp function for testing helix -QUERY SearchVector(query: [F64], k: I32) => +QUERY SearchVector(query: [F32], k: I32) => embedding_search <- SearchV(query, k) RETURN embedding_search @@ -122,4 +122,4 @@ QUERY GetVectorsBySourceLinkAndPageRange(company_number: String, source_link: St _::{source_link}::EQ(source_link) ) ) - RETURN vectors \ No newline at end of file + RETURN vectors diff --git a/hql-tests/tests/complete_vector_addition_and_search/queries.hx b/hql-tests/tests/complete_vector_addition_and_search/queries.hx index 8d24a5d2..816b92a2 100644 --- a/hql-tests/tests/complete_vector_addition_and_search/queries.hx +++ b/hql-tests/tests/complete_vector_addition_and_search/queries.hx @@ -1,16 +1,16 @@ -QUERY addEmbedding(vec: [F64]) => +QUERY addEmbedding(vec: [F32]) => doc <- AddN({content: "Hello, content!", number: 1}) embedding <- AddV(vec, {chunk: "Hello, chunk!", chunk_id: 1, number: 1, reference: "Hello, reference!"}) AddE::From(doc)::To(embedding) RETURN embedding -QUERY getAllEmbedding() => +QUERY getAllEmbedding() => c <- N({number: 1}) embeddings <- c::Out RETURN embeddings -QUERY searchEmbedding(query: [F64]) => +QUERY searchEmbedding(query: [F32]) => c <- N({number: 1}) embedding_search <- SearchV(query, 10) RETURN embedding_search::{ diff --git a/hql-tests/tests/date_comparisons/queries.hx b/hql-tests/tests/date_comparisons/queries.hx index af5a6793..e42be9ea 100644 --- a/hql-tests/tests/date_comparisons/queries.hx +++ b/hql-tests/tests/date_comparisons/queries.hx @@ -1,8 +1,8 @@ -QUERY SearchRecentDocuments (vector: [F64], limit: I64, cutoff_date: Date) => +QUERY SearchRecentDocuments (vector: [F32], limit: I64, cutoff_date: Date) => documents <- SearchV(vector, limit)::WHERE(_::{created_at}::GTE(cutoff_date)) RETURN documents -QUERY InsertVector (vector: [F64], content: String, created_at: Date) => +QUERY InsertVector (vector: [F32], content: String, created_at: Date) => document <- AddV(vector, { content: content, created_at: created_at }) doc <- document::{content, created_at} - RETURN document \ No newline at end of file + RETURN document diff --git a/hql-tests/tests/dijkstra_custom_weights/schema.hx b/hql-tests/tests/dijkstra_custom_weights/schema.hx index 28ec1eb8..878cb49c 100644 --- a/hql-tests/tests/dijkstra_custom_weights/schema.hx +++ b/hql-tests/tests/dijkstra_custom_weights/schema.hx @@ -1,7 +1,7 @@ N::Location { name: String, - traffic_factor: F64, - popularity: F64 + traffic_factor: F32, + popularity: F32 } E::Route { diff --git a/hql-tests/tests/edge_from_node_to_vec/queries.hx b/hql-tests/tests/edge_from_node_to_vec/queries.hx index 94dac69b..dccde6e9 100644 --- a/hql-tests/tests/edge_from_node_to_vec/queries.hx +++ b/hql-tests/tests/edge_from_node_to_vec/queries.hx @@ -14,7 +14,7 @@ E::EmbeddingOf { } } -QUERY add(vec: [F64]) => +QUERY add(vec: [F32]) => user <- AddN({ name: "John Doe" }) @@ -24,11 +24,10 @@ QUERY add(vec: [F64]) => AddE({category: "test"})::From(user)::To(embedding) RETURN user -QUERY to_v(query: [F64], k: I32, data: String) => +QUERY to_v(query: [F32], k: I32, data: String) => user <- N({name: "John Doe"}) edges <- user::OutE filtered <- edges::WHERE(_::{category}::EQ(data)) vectors <- filtered::ToV searched <- vectors::SearchV(query, k) RETURN user, edges, filtered, vectors, searched - diff --git a/hql-tests/tests/graphiti/queries.hx b/hql-tests/tests/graphiti/queries.hx index 7fb8513d..bee842f2 100644 --- a/hql-tests/tests/graphiti/queries.hx +++ b/hql-tests/tests/graphiti/queries.hx @@ -2,13 +2,13 @@ // Entity // ######################################################### -QUERY createEntity (name: String, name_embedding: [F64], group_id: String, summary: String, created_at: Date, labels: [String], attributes: String) => +QUERY createEntity (name: String, name_embedding: [F32], group_id: String, summary: String, created_at: Date, labels: [String], attributes: String) => entity <- AddN({name: name, group_id: group_id, summary: summary, created_at: created_at, labels: labels, attributes: attributes}) embedding <- AddV(name_embedding, {name_embedding: name_embedding}) edge <- AddE({group_id: group_id})::From(entity)::To(embedding) RETURN entity -QUERY updateEntity (entity_id: ID, name: String, name_embedding: [F64], group_id: String, summary: String, created_at: Date, labels: [String], attributes: String) => +QUERY updateEntity (entity_id: ID, name: String, name_embedding: [F32], group_id: String, summary: String, created_at: Date, labels: [String], attributes: String) => entity <- N(entity_id)::UPDATE({name: name, group_id: group_id, summary: summary, created_at: created_at, labels: labels, attributes: attributes}) DROP N(entity_id)::Out embedding <- AddV(name_embedding, {name_embedding: name_embedding}) @@ -113,13 +113,13 @@ QUERY deleteEpisodeEdge (episodeEdge_id: ID) => // Community // ######################################################### -QUERY createCommunity (name: String, group_id: String, summary: String, created_at: Date, labels: [String], name_embedding: [F64]) => +QUERY createCommunity (name: String, group_id: String, summary: String, created_at: Date, labels: [String], name_embedding: [F32]) => community <- AddN({name: name, group_id: group_id, summary: summary, created_at: created_at, labels: labels}) embedding <- AddV(name_embedding, {name_embedding: name_embedding}) edge <- AddE({group_id: group_id})::From(community)::To(embedding) RETURN community -QUERY updateCommunity (community_id: ID, name: String, group_id: String, summary: String, created_at: Date, labels: [String], name_embedding: [F64]) => +QUERY updateCommunity (community_id: ID, name: String, group_id: String, summary: String, created_at: Date, labels: [String], name_embedding: [F32]) => community <- N(community_id)::UPDATE({name: name, group_id: group_id, summary: summary, created_at: created_at, labels: labels}) DROP N(community_id)::Out embedding <- AddV(name_embedding, {name_embedding: name_embedding}) diff --git a/hql-tests/tests/graphiti/schema.hx b/hql-tests/tests/graphiti/schema.hx index 3cfb1c54..d523ef9a 100644 --- a/hql-tests/tests/graphiti/schema.hx +++ b/hql-tests/tests/graphiti/schema.hx @@ -20,7 +20,7 @@ E::Entity_to_Embedding { } V::Entity_Embedding { - name_embedding: [F64], + name_embedding: [F32], } E::Entity_Fact { @@ -60,7 +60,7 @@ E::Fact_to_Embedding { } V::Fact_Embedding { - fact: [F64], + fact: [F32], } E::Fact_Entity { @@ -118,7 +118,7 @@ E::Community_to_Embedding { } V::Community_Embedding { - name_embedding: [F64], + name_embedding: [F32], } E::Community_Entity { @@ -146,4 +146,4 @@ E::Community_Fact { group_id: String, created_at: Date DEFAULT NOW } -} \ No newline at end of file +} diff --git a/hql-tests/tests/knowledge_graphs/queries.hx b/hql-tests/tests/knowledge_graphs/queries.hx index 3ec2d9c5..471eea31 100644 --- a/hql-tests/tests/knowledge_graphs/queries.hx +++ b/hql-tests/tests/knowledge_graphs/queries.hx @@ -138,7 +138,7 @@ QUERY insert_event_Cluster1 ( uuid: String, chunk_uuid: String, statement: String, - embedding: [F64], + embedding: [F32], triplets: [String], statement_type: String, temporal_type: String, @@ -189,7 +189,7 @@ QUERY update_event_Cluster1 ( uuid: String, chunk_uuid: String, statement: String, - embedding: [F64], + embedding: [F32], triplets: [String], statement_type: String, temporal_type: String, @@ -414,7 +414,7 @@ QUERY remove_entity_Cluster1 ( // ######################################################### QUERY vector_search_events_Cluster1 ( - query_embedding: [F64], + query_embedding: [F32], k: I32 ) => matching_embeddings <- SearchV(query_embedding, k) @@ -440,7 +440,7 @@ QUERY get_stories_mentioning_entity_as_subject_Cluster1 ( stories <- chunks::In RETURN stories, chunks, events, triplets -// Find stories that mention a specific entity (as object) +// Find stories that mention a specific entity (as object) QUERY get_stories_mentioning_entity_as_object_Cluster1 ( entity_uuid: String ) => @@ -613,7 +613,7 @@ QUERY get_sub_comments_by_parent_uuid_Cluster2 ( // Story Embedding operations QUERY add_story_embedding_Cluster2 ( story_uuid: String, - embedding: [F64], + embedding: [F32], content: String ) => story <- N({uuid: story_uuid}) @@ -624,7 +624,7 @@ QUERY add_story_embedding_Cluster2 ( // Comment Embedding operations QUERY add_comment_embedding_Cluster2 ( comment_uuid: String, - embedding: [F64], + embedding: [F32], content: String ) => comment <- N({uuid: comment_uuid}) @@ -633,7 +633,7 @@ QUERY add_comment_embedding_Cluster2 ( RETURN comment QUERY search_similar_stories_Cluster2 ( - query_embedding: [F64], + query_embedding: [F32], k: I64 ) => matching_embeddings <- SearchV(query_embedding, k) @@ -700,4 +700,4 @@ QUERY drop_all_Cluster2 () => DROP N DROP N::Out DROP N - RETURN "Success" \ No newline at end of file + RETURN "Success" diff --git a/hql-tests/tests/knowledge_graphs/schema.hx b/hql-tests/tests/knowledge_graphs/schema.hx index b4e5836f..ba523aee 100644 --- a/hql-tests/tests/knowledge_graphs/schema.hx +++ b/hql-tests/tests/knowledge_graphs/schema.hx @@ -77,7 +77,7 @@ E::Event_to_Embedding_Cluster1 { } V::EventEmbedding_Cluster1 { - embedding: [F64] + embedding: [F32] } diff --git a/hql-tests/tests/model_macro/schema.hx b/hql-tests/tests/model_macro/schema.hx index f20b9d60..3d97d6ae 100644 --- a/hql-tests/tests/model_macro/schema.hx +++ b/hql-tests/tests/model_macro/schema.hx @@ -1,6 +1,6 @@ schema::1 { V::ClinicalNote { - vector: [F64], + vector: [F32], text: String, } -} \ No newline at end of file +} diff --git a/hql-tests/tests/multi_type_index_test/queries.hx b/hql-tests/tests/multi_type_index_test/queries.hx index 35b1e42e..314bd8fe 100644 --- a/hql-tests/tests/multi_type_index_test/queries.hx +++ b/hql-tests/tests/multi_type_index_test/queries.hx @@ -9,11 +9,11 @@ QUERY testString(value: String) => QUERY testI8(value: I8) => node <- N({i8_field: value}) RETURN node - + QUERY testI32(value: I32) => node <- N({i32_field: value}) RETURN node - + QUERY testI64(value: I64) => node <- N({i64_field: value}) RETURN node @@ -22,11 +22,11 @@ QUERY testI64(value: I64) => QUERY testU8(value: U8) => node <- N({u8_field: value}) RETURN node - + QUERY testU32(value: U32) => node <- N({u32_field: value}) RETURN node - + QUERY testU64(value: U64) => node <- N({u64_field: value}) RETURN node @@ -35,7 +35,7 @@ QUERY testU64(value: U64) => QUERY testF32(value: F32) => node <- N({f32_field: value}) RETURN node - + QUERY testF64(value: F64) => node <- N({f64_field: value}) RETURN node @@ -62,4 +62,4 @@ QUERY testMultipleConditions(name: String, age: U32, active: Boolean) => nodes_by_name <- N({str_field: name}) nodes_by_age <- N({u32_field: age}) nodes_by_active <- N({bool_field: active}) - RETURN nodes_by_name, nodes_by_age, nodes_by_active \ No newline at end of file + RETURN nodes_by_name, nodes_by_age, nodes_by_active diff --git a/hql-tests/tests/nested_for_loops/queries.hx b/hql-tests/tests/nested_for_loops/queries.hx index 3e0e2901..9234c406 100644 --- a/hql-tests/tests/nested_for_loops/queries.hx +++ b/hql-tests/tests/nested_for_loops/queries.hx @@ -1,4 +1,4 @@ -QUERY loaddocs_rag(chapters: [{ id: I64, subchapters: [{ title: String, content: String, chunks: [{chunk: String, vector: [F64]}]}] }]) => +QUERY loaddocs_rag(chapters: [{ id: I64, subchapters: [{ title: String, content: String, chunks: [{chunk: String, vector: [F32]}]}] }]) => FOR {id, subchapters} IN chapters { chapter_node <- AddN({ chapter_index: id }) FOR {title, content, chunks} IN subchapters { @@ -12,11 +12,11 @@ QUERY loaddocs_rag(chapters: [{ id: I64, subchapters: [{ title: String, content: } RETURN "Success" -QUERY searchdocs_rag(query: [F64], k: I32) => +QUERY searchdocs_rag(query: [F32], k: I32) => vecs <- SearchV(query, k) subchapters <- vecs::In RETURN subchapters::{title, content} -QUERY edge_node(id: ID) => +QUERY edge_node(id: ID) => e <- N::OutE - RETURN e \ No newline at end of file + RETURN e diff --git a/hql-tests/tests/putts_professor/queries.hx b/hql-tests/tests/putts_professor/queries.hx index aae93f80..85657722 100644 --- a/hql-tests/tests/putts_professor/queries.hx +++ b/hql-tests/tests/putts_professor/queries.hx @@ -45,7 +45,7 @@ QUERY link_professor_to_lab(professor_id: ID, lab_id: ID) => lab <- N(lab_id) edge <- AddE::From(professor)::To(lab) RETURN edge - + // Link Professor to Research Area QUERY link_professor_to_research_area(professor_id: ID, research_area_id: ID) => professor <- N(professor_id) @@ -54,7 +54,7 @@ QUERY link_professor_to_research_area(professor_id: ID, research_area_id: ID) => RETURN edge // Search Similar Professors based on Research Area + Description Embedding -QUERY search_similar_professors_by_research_area_and_description(query_vector: [F64], k: I64) => +QUERY search_similar_professors_by_research_area_and_description(query_vector: [F32], k: I64) => vecs <- SearchV(query_vector, k) professors <- vecs::In RETURN professors @@ -64,14 +64,14 @@ QUERY get_professor_research_areas_with_descriptions(professor_id: ID) => research_areas <- N(professor_id)::Out::{areas_and_descriptions: areas_and_descriptions} RETURN research_areas -QUERY create_research_area_embedding(professor_id: ID, areas_and_descriptions: String, vector: [F64]) => +QUERY create_research_area_embedding(professor_id: ID, areas_and_descriptions: String, vector: [F32]) => professor <- N(professor_id) research_area <- AddV(vector, { areas_and_descriptions: areas_and_descriptions }) edge <- AddE::From(professor)::To(research_area) RETURN research_area -// GET Queries // +// GET Queries // QUERY get_professors_by_university_name(university_name: String) => professors <- N::Out::WHERE(_::{name}::EQ(university_name)) @@ -80,7 +80,7 @@ QUERY get_professors_by_university_name(university_name: String) => QUERY get_professor_by_research_area_name(research_area_name: String) => professors <- N::Out::WHERE(_::{research_area}::EQ(research_area_name)) RETURN professors - + QUERY get_professors_by_department_name(department_name: String) => professors <- N::Out::WHERE(_::{name}::EQ(department_name)) - RETURN professors \ No newline at end of file + RETURN professors diff --git a/hql-tests/tests/rerankers/queries.hx b/hql-tests/tests/rerankers/queries.hx index a3caa8e7..70468441 100644 --- a/hql-tests/tests/rerankers/queries.hx +++ b/hql-tests/tests/rerankers/queries.hx @@ -9,42 +9,42 @@ N::Article { } // Test 1: RerankRRF with default k -QUERY testRRFDefault(query_vec: [F64]) => +QUERY testRRFDefault(query_vec: [F32]) => results <- SearchV(query_vec, 100) ::RerankRRF ::RANGE(0, 10) RETURN results // Test 2: RerankRRF with custom k parameter -QUERY testRRFCustomK(query_vec: [F64], k_val: F64) => +QUERY testRRFCustomK(query_vec: [F32], k_val: F32) => results <- SearchV(query_vec, 100) ::RerankRRF(k: k_val) ::RANGE(0, 10) RETURN results // Test 3: RerankMMR with default distance (cosine) -QUERY testMMRDefault(query_vec: [F64]) => +QUERY testMMRDefault(query_vec: [F32]) => results <- SearchV(query_vec, 100) ::RerankMMR(lambda: 0.7) ::RANGE(0, 10) RETURN results // Test 4: RerankMMR with euclidean distance -QUERY testMMREuclidean(query_vec: [F64]) => +QUERY testMMREuclidean(query_vec: [F32]) => results <- SearchV(query_vec, 100) ::RerankMMR(lambda: 0.5, distance: "euclidean") ::RANGE(0, 10) RETURN results // Test 5: RerankMMR with dot product distance -QUERY testMMRDotProduct(query_vec: [F64]) => +QUERY testMMRDotProduct(query_vec: [F32]) => results <- SearchV(query_vec, 100) ::RerankMMR(lambda: 0.6, distance: "dotproduct") ::RANGE(0, 10) RETURN results // Test 6: Chained rerankers (RRF then MMR) -QUERY testChainedRerankers(query_vec: [F64]) => +QUERY testChainedRerankers(query_vec: [F32]) => results <- SearchV(query_vec, 100) ::RerankRRF(k: 60) ::RerankMMR(lambda: 0.7) @@ -52,14 +52,14 @@ QUERY testChainedRerankers(query_vec: [F64]) => RETURN results // Test 7: MMR with variable lambda -QUERY testMMRVariableLambda(query_vec: [F64], lambda_val: F64) => +QUERY testMMRVariableLambda(query_vec: [F32], lambda_val: F32) => results <- SearchV(query_vec, 100) ::RerankMMR(lambda: lambda_val) ::RANGE(0, 10) RETURN results // Test 8: Multiple chained MMR rerankers -QUERY testMultipleMMR(query_vec: [F64]) => +QUERY testMultipleMMR(query_vec: [F32]) => results <- SearchV(query_vec, 100) ::RerankMMR(lambda: 0.9) ::RerankMMR(lambda: 0.5) diff --git a/hql-tests/tests/search_v_as_assignment_and_expr/file8.hx b/hql-tests/tests/search_v_as_assignment_and_expr/file8.hx index 32321d2e..be7c42cf 100644 --- a/hql-tests/tests/search_v_as_assignment_and_expr/file8.hx +++ b/hql-tests/tests/search_v_as_assignment_and_expr/file8.hx @@ -13,7 +13,7 @@ E::EdgeFile8 { } -QUERY file8(vec: [F64]) => +QUERY file8(vec: [F32]) => new_vec <- AddV(vec) AddV(vec) RETURN new_vec diff --git a/hql-tests/tests/series/queries.hx b/hql-tests/tests/series/queries.hx index ba8e7757..0319e3c4 100644 --- a/hql-tests/tests/series/queries.hx +++ b/hql-tests/tests/series/queries.hx @@ -109,7 +109,7 @@ QUERY addWarmConnect (user_id: ID, warm_connect_id: ID) => metadata_to_warm_connect <- AddE()::From(metadata)::To(warm_connect) RETURN warm_connect -QUERY createUserBio (user_id: ID, bio: [F64]) => +QUERY createUserBio (user_id: ID, bio: [F32]) => user_bio <- AddV(bio) user <- N(user_id) user_user_bio <- AddE()::From(user)::To(user_bio) @@ -156,7 +156,7 @@ QUERY getUsersByReferrer(referrer: String) => RETURN users #[mcp] -QUERY searchUsersByBio(bio_vector: [F64], k: I64) => +QUERY searchUsersByBio(bio_vector: [F32], k: I64) => similar_bios <- SearchV(bio_vector, k) users <- similar_bios::In RETURN users @@ -402,4 +402,4 @@ QUERY deleteUser (user_id: ID) => DROP N(user_id)::Out DROP N(user_id)::OutE DROP N(user_id) - RETURN "success" \ No newline at end of file + RETURN "success" diff --git a/hql-tests/tests/series/schema.hx b/hql-tests/tests/series/schema.hx index 7e22daf8..0e927fb1 100644 --- a/hql-tests/tests/series/schema.hx +++ b/hql-tests/tests/series/schema.hx @@ -191,6 +191,5 @@ E::LinkedinContent_to_CurrentCompany { } V::EmbeddedBio { - bio: [F64] + bio: [F32] } - diff --git a/hql-tests/tests/update_drop_then_add/file52.hx b/hql-tests/tests/update_drop_then_add/file52.hx index c441fc89..5ded7ea8 100644 --- a/hql-tests/tests/update_drop_then_add/file52.hx +++ b/hql-tests/tests/update_drop_then_add/file52.hx @@ -1,7 +1,7 @@ -QUERY updateEntity (entity_id: ID, name: String, name_embedding: [F64], group_id: String, summary: String, created_at: Date, labels: [String], attributes: String) => +QUERY updateEntity (entity_id: ID, name: String, name_embedding: [F32], group_id: String, summary: String, created_at: Date, labels: [String], attributes: String) => entity <- N(entity_id)::UPDATE({name: name, group_id: group_id, summary: summary, created_at: created_at, labels: labels, attributes: attributes}) DROP N(entity_id)::Out DROP N(entity_id)::OutE embedding <- AddV(name_embedding, {name_embedding: name_embedding}) edge <- AddE({group_id: group_id})::From(entity)::To(embedding) - RETURN entity \ No newline at end of file + RETURN entity diff --git a/hql-tests/tests/update_drop_then_add/schema.hx b/hql-tests/tests/update_drop_then_add/schema.hx index b10e04f3..f4504290 100644 --- a/hql-tests/tests/update_drop_then_add/schema.hx +++ b/hql-tests/tests/update_drop_then_add/schema.hx @@ -16,5 +16,5 @@ E::Entity_to_Embedding { } V::Entity_Embedding { - name_embedding: [F64], -} \ No newline at end of file + name_embedding: [F32], +} diff --git a/hql-tests/tests/user_test_3/queries.hx b/hql-tests/tests/user_test_3/queries.hx index 1ec7ec54..7683e8a3 100644 --- a/hql-tests/tests/user_test_3/queries.hx +++ b/hql-tests/tests/user_test_3/queries.hx @@ -28,7 +28,7 @@ QUERY add_chunk_with_metadata( edge <- AddE::From(doc)::To(chunk) RETURN chunk -QUERY add_embedding(chunk_element_id: String, vec: [F64]) => +QUERY add_embedding(chunk_element_id: String, vec: [F32]) => chunk <- N({element_id: chunk_element_id}) embedding <- AddV(vec, {chunk_id: chunk_element_id}) edge <- AddE::From(chunk)::To(embedding) @@ -43,7 +43,7 @@ QUERY get_document_chunks(doc_filename: String) => chunks <- doc::Out RETURN chunks -QUERY search_similar_chunks(query_vec: [F64], limit: I64) => +QUERY search_similar_chunks(query_vec: [F32], limit: I64) => embeddings <- SearchV(query_vec, limit) chunks <- embeddings::In RETURN chunks diff --git a/hql-tests/tests/user_test_5/queries.hx b/hql-tests/tests/user_test_5/queries.hx index d1217474..598ece5f 100644 --- a/hql-tests/tests/user_test_5/queries.hx +++ b/hql-tests/tests/user_test_5/queries.hx @@ -29,7 +29,7 @@ QUERY get_all_posts() => posts <- N RETURN posts -QUERY search_posts_vec(query: [F64], k: I32) => +QUERY search_posts_vec(query: [F32], k: I32) => vecs <- SearchV(query, k) posts <- vecs::In RETURN posts::{subreddit, title, content, url} diff --git a/metrics/src/events.rs b/metrics/src/events.rs index 27cd145d..0d47203f 100644 --- a/metrics/src/events.rs +++ b/metrics/src/events.rs @@ -248,4 +248,4 @@ pub struct InvalidApiKeyEvent { #[serde(skip_serializing_if = "Option::is_none")] pub cluster_id: Option, pub time_taken_usec: u32, -} \ No newline at end of file +} diff --git a/metrics/src/lib.rs b/metrics/src/lib.rs index ad6039f4..067bd1a0 100644 --- a/metrics/src/lib.rs +++ b/metrics/src/lib.rs @@ -602,7 +602,6 @@ mod tests { // Channel should have fewer or equal batches let _final_count = METRICS_STATE.events_rx.len(); - } } @@ -684,12 +683,11 @@ mod tests { // Should be able to serialize batch let json_bytes = sonic_rs::to_vec(&events).unwrap(); - assert!(json_bytes.len() > 0); + assert!(!json_bytes.is_empty()); // Should be valid JSON array let json_str = String::from_utf8(json_bytes).unwrap(); assert!(json_str.starts_with('[')); assert!(json_str.ends_with(']')); } - }