Skip to content

Commit 7ae28f4

Browse files
committed
Add error on helix check for old vector datatype (F64)
1 parent f1fc167 commit 7ae28f4

File tree

4 files changed

+204
-12
lines changed

4 files changed

+204
-12
lines changed

Cargo.lock

Lines changed: 1 addition & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helix-cli/src/commands/check.rs

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::utils::helixc_utils::{
99
};
1010
use crate::utils::{print_confirm, print_error, print_status, print_success, print_warning};
1111
use eyre::Result;
12+
use helix_db::helixc::parser::types::FieldType;
1213
use std::fs;
1314
use std::path::Path;
1415
use std::process::Command;
@@ -49,6 +50,10 @@ async fn check_instance(
4950
validate_project_syntax(project)?;
5051
print_success("Syntax validation passed");
5152

53+
// Step 1.5: Validate vector data types
54+
print_status("VECTORS", "Validating vector data types...");
55+
validate_vector_data_types(project)?;
56+
5257
// Step 2: Ensure helix repo is cached (reuse from build.rs)
5358
build::ensure_helix_repo_cached().await?;
5459

@@ -128,6 +133,10 @@ async fn check_all_instances(
128133
));
129134
}
130135

136+
// Validate vector data types once for all instances
137+
print_status("VECTORS", "Validating vector data types...");
138+
validate_vector_data_types(project)?;
139+
131140
// Check each instance
132141
for instance_name in &instances {
133142
check_instance(project, instance_name, metrics_sender).await?;
@@ -137,6 +146,63 @@ async fn check_all_instances(
137146
Ok(())
138147
}
139148

149+
/// Validate vector data types and warn about F64 usage
150+
fn validate_vector_data_types(project: &ProjectContext) -> Result<()> {
151+
// Collect all .hx files for validation
152+
let hx_files = collect_hx_files(&project.root, &project.config.project.queries)?;
153+
154+
// Generate content and parse
155+
let content = generate_content(&hx_files)?;
156+
let source = parse_content(&content)?;
157+
158+
let mut found_f64_vectors = false;
159+
let mut f64_vector_names = Vec::new();
160+
161+
// Check all vector schemas for F64 usage
162+
for schema in source.get_schemas_in_order() {
163+
for vector_schema in &schema.vector_schemas {
164+
for field in &vector_schema.fields {
165+
if contains_f64_type(&field.field_type) {
166+
found_f64_vectors = true;
167+
f64_vector_names.push(format!("V::{}.{}", vector_schema.name, field.name));
168+
}
169+
}
170+
}
171+
}
172+
173+
if found_f64_vectors {
174+
print_warning("Found F64 data types in vector fields");
175+
println!();
176+
println!(" Vector fields using F64:");
177+
for vector_name in &f64_vector_names {
178+
println!(" • {}", vector_name);
179+
}
180+
println!();
181+
println!(" ⚠️ F64 vectors are deprecated.");
182+
println!(
183+
" For vectors, use [F32] instead of [F64] for better performance and compatibility."
184+
);
185+
println!(" F32 provides sufficient precision for most vector similarity use cases.");
186+
return Err(eyre::eyre!(
187+
"Vectors with F64 data types are deprecated. Use F32 instead."
188+
));
189+
} else {
190+
print_success("Vector data types validation passed");
191+
}
192+
193+
Ok(())
194+
}
195+
196+
/// Recursively check if a FieldType contains F64
197+
fn contains_f64_type(field_type: &FieldType) -> bool {
198+
match field_type {
199+
FieldType::F64 => true,
200+
FieldType::Array(inner) => contains_f64_type(inner),
201+
FieldType::Object(obj) => obj.values().any(contains_f64_type),
202+
_ => false,
203+
}
204+
}
205+
140206
/// Validate project syntax by parsing queries and schema (similar to build.rs but without generating files)
141207
fn validate_project_syntax(project: &ProjectContext) -> Result<()> {
142208
// Collect all .hx files for validation
@@ -223,3 +289,138 @@ fn handle_cargo_check_failure(
223289

224290
Ok(())
225291
}
292+
293+
#[cfg(test)]
294+
mod tests {
295+
use super::*;
296+
use crate::config::{ContainerRuntime, HelixConfig, ProjectConfig};
297+
use std::fs;
298+
use tempfile::tempdir;
299+
300+
#[test]
301+
fn test_validate_vector_data_types_with_f64() {
302+
let temp_dir = tempdir().unwrap();
303+
let project_root = temp_dir.path();
304+
305+
// Create a helix.toml config
306+
let config = HelixConfig {
307+
project: ProjectConfig {
308+
name: "test_project".to_string(),
309+
queries: std::path::PathBuf::from("./db/"),
310+
container_runtime: ContainerRuntime::Docker,
311+
},
312+
local: std::collections::HashMap::new(),
313+
cloud: std::collections::HashMap::new(),
314+
};
315+
316+
// Create project context
317+
let project = ProjectContext {
318+
root: project_root.to_path_buf(),
319+
config,
320+
helix_dir: project_root.join(".helix"),
321+
};
322+
323+
// Create db directory
324+
let db_dir = project_root.join("db");
325+
fs::create_dir_all(&db_dir).unwrap();
326+
327+
// Create a .hx file with F64 vector fields
328+
let schema_content = r#"
329+
V::Document {
330+
content: String,
331+
embedding: [F64],
332+
scores: [F64]
333+
}
334+
335+
QUERY test() =>
336+
d <- V<Document>
337+
RETURN d
338+
"#;
339+
340+
fs::write(db_dir.join("schema.hx"), schema_content).unwrap();
341+
342+
// Test validation - should detect F64 usage
343+
let result = validate_vector_data_types(&project);
344+
assert!(
345+
result.is_ok(),
346+
"Validation should succeed but warn about F64"
347+
);
348+
}
349+
350+
#[test]
351+
fn test_validate_vector_data_types_with_f32() {
352+
let temp_dir = tempdir().unwrap();
353+
let project_root = temp_dir.path();
354+
355+
// Create a helix.toml config
356+
let config = HelixConfig {
357+
project: ProjectConfig {
358+
name: "test_project".to_string(),
359+
queries: std::path::PathBuf::from("./db/"),
360+
container_runtime: ContainerRuntime::Docker,
361+
},
362+
local: std::collections::HashMap::new(),
363+
cloud: std::collections::HashMap::new(),
364+
};
365+
366+
// Create project context
367+
let project = ProjectContext {
368+
root: project_root.to_path_buf(),
369+
config,
370+
helix_dir: project_root.join(".helix"),
371+
};
372+
373+
// Create db directory
374+
let db_dir = project_root.join("db");
375+
fs::create_dir_all(&db_dir).unwrap();
376+
377+
// Create a .hx file with F32 vector fields (correct)
378+
let schema_content = r#"
379+
V::Document {
380+
content: String,
381+
embedding: [F32],
382+
scores: [F32]
383+
}
384+
385+
QUERY test() =>
386+
d <- V<Document>
387+
RETURN d
388+
"#;
389+
390+
fs::write(db_dir.join("schema.hx"), schema_content).unwrap();
391+
392+
// Test validation - should pass without warnings
393+
let result = validate_vector_data_types(&project);
394+
assert!(result.is_ok(), "Validation should succeed with F32");
395+
}
396+
397+
#[test]
398+
fn test_contains_f64_type() {
399+
use helix_db::helixc::parser::types::FieldType;
400+
401+
// Test direct F64
402+
assert!(contains_f64_type(&FieldType::F64));
403+
404+
// Test F32 (should be false)
405+
assert!(!contains_f64_type(&FieldType::F32));
406+
407+
// Test Array of F64
408+
assert!(contains_f64_type(&FieldType::Array(Box::new(
409+
FieldType::F64
410+
))));
411+
412+
// Test Array of F32 (should be false)
413+
assert!(!contains_f64_type(&FieldType::Array(Box::new(
414+
FieldType::F32
415+
))));
416+
417+
// Test nested object with F64
418+
let mut obj = std::collections::HashMap::new();
419+
obj.insert("score".to_string(), FieldType::F64);
420+
assert!(contains_f64_type(&FieldType::Object(obj)));
421+
422+
// Test other types
423+
assert!(!contains_f64_type(&FieldType::String));
424+
assert!(!contains_f64_type(&FieldType::Boolean));
425+
}
426+
}

helix-db/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ async-trait = "0.1.88"
5555
thiserror = "2.0.12"
5656
polars = { version = "0.46.0", features = ["parquet", "lazy", "json"], optional = true }
5757
subtle = "2.6.1"
58-
sha_256 = "=0.1.1"
58+
sha2 = "0.10"
5959
byteorder = "1.5.0"
6060
roaring = "0.11.2"
6161
tinyvec = "1.10.0"

helix-db/src/protocol/request.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,10 +299,7 @@ mod tests {
299299
out_fmt: Format::Json,
300300
};
301301

302-
assert_ne!(
303-
request1.api_key_hash.unwrap(),
304-
request2.api_key_hash.unwrap()
305-
);
302+
assert_ne!(request1.api_key.unwrap(), request2.api_key.unwrap());
306303
}
307304

308305
#[test]

0 commit comments

Comments
 (0)