Skip to content

Commit be3dba4

Browse files
authored
refactor: parse query str to should boolean query (#9)
Previously, a query string was converted to a Match query, due to limitaion of tokenization, the search results may confuse our users[1]. This commit updates the parsing logic to **expand** query string to a should boolean query, which should hit more search results and thus is more intuitive for average users, based on this discussion[2]. [1]: infinilabs/pizza-searchbox#9 [2]: infinilabs/pizza-searchbox#9 (comment)
1 parent 6aa0e23 commit be3dba4

File tree

5 files changed

+152
-18
lines changed

5 files changed

+152
-18
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ cfg-if = "1.0.0"
3939
wee_alloc = { optional = true,version = "0.4.5" }
4040

4141
serde = { version = "1.0.197", features = ["derive"],default-features = false }
42-
serde_json = { version = "1.0.115",default-features = false }
42+
serde_json = { version = "1.0.115", default-features = false }
4343

4444
# We do not depend on uuid and getrandom directly, but we need to enable the corresponding
4545
# feature to fix the build on WASM

pkg/pizza_wasm_bg.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -330,8 +330,8 @@ export function __wbindgen_cb_drop(arg0) {
330330
return ret;
331331
};
332332

333-
export function __wbindgen_closure_wrapper804(arg0, arg1, arg2) {
334-
const ret = makeMutClosure(arg0, arg1, 129, __wbg_adapter_18);
333+
export function __wbindgen_closure_wrapper814(arg0, arg1, arg2) {
334+
const ret = makeMutClosure(arg0, arg1, 133, __wbg_adapter_18);
335335
return addHeapObject(ret);
336336
};
337337

pkg/pizza_wasm_bg.wasm

2.65 KB
Binary file not shown.

pkg/pizza_wasm_bg.wasm.gz

963 Bytes
Binary file not shown.

src/lib.rs

Lines changed: 149 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use alloc::boxed::Box;
1515
#[cfg(feature = "debug")]
1616
use alloc::format;
1717
use alloc::string::ToString;
18+
use alloc::vec;
1819
use alloc::vec::Vec;
1920
use cfg_if::cfg_if;
2021
use core::fmt::Display;
@@ -26,7 +27,14 @@ use pizza_engine::document::DraftDoc;
2627
use pizza_engine::document::FieldType;
2728
use pizza_engine::document::FieldValue;
2829
use pizza_engine::document::Property;
30+
use pizza_engine::search::query::BooleanQuery;
31+
use pizza_engine::search::query::MatchQuery;
32+
use pizza_engine::search::query::PrefixQuery;
33+
use pizza_engine::search::query::Query;
34+
use pizza_engine::search::query::Rewrite;
35+
use pizza_engine::search::query::Term;
2936
use pizza_engine::search::OriginalQuery;
37+
use pizza_engine::search::ParsedQuery;
3038
use pizza_engine::search::QueryContext;
3139
use pizza_engine::search::Searcher;
3240
use pizza_engine::store::MemoryStore;
@@ -64,8 +72,8 @@ pub struct Pizza {
6472
}
6573

6674
impl Pizza {
67-
pub fn search(&self, query_context: &QueryContext) -> JsValue {
68-
let result = self.searcher.parse_and_query(query_context, &());
75+
pub fn search(&self, query_context: &QueryContext, parsed_query: ParsedQuery) -> JsValue {
76+
let result = self.searcher.query(query_context, &parsed_query, &());
6977

7078
let result = match result {
7179
Ok(o) => o,
@@ -75,7 +83,11 @@ impl Pizza {
7583
wasm_bindgen::throw_str(&error_msg)
7684
}
7785
};
78-
web_sys::console::log_1(&JsValue::from_str("search completed"));
86+
let hits_len = result.hits.as_ref().map(|hits| hits.len()).unwrap_or(0);
87+
web_sys::console::log_1(&JsValue::from_str(&alloc::format!(
88+
"search completed: {}",
89+
hits_len
90+
)));
7991

8092
#[cfg(feature = "debug")]
8193
{
@@ -299,14 +311,20 @@ impl Pizza {
299311

300312
#[cfg(feature = "query_string")]
301313
pub fn search_by_query_string(&self, query_string: &str) -> JsValue {
302-
let original_query = OriginalQuery::QueryString(query_string.to_string());
314+
if query_string.is_empty() {
315+
return JsValue::null();
316+
}
317+
318+
let original_query = OriginalQuery::QueryString(query_string.into());
319+
let mut query_context = QueryContext::new(original_query, false);
303320

304-
let mut query_context = QueryContext::new(original_query, true);
305321
query_context.support_wildcard_in_field_name = true;
306322
query_context.default_operator = Operator::Or;
307-
query_context.default_field = "*".into();
308323

309-
self.search(&query_context)
324+
// Parse the query
325+
let parsed_query = query_string_to_parsed_query(&self.searcher, &query_context);
326+
327+
self.search(&query_context, parsed_query)
310328
}
311329

312330
#[cfg(feature = "query_string")]
@@ -319,21 +337,35 @@ impl Pizza {
319337
size: usize,
320338
explain: bool,
321339
) -> JsValue {
322-
let original_query = OriginalQuery::QueryString(query_string.to_string());
340+
if query_string.is_empty() {
341+
return JsValue::null();
342+
}
323343

344+
let original_query = OriginalQuery::QueryString(query_string.into());
324345
let mut query_context = QueryContext::new(original_query, explain);
325346
query_context.support_wildcard_in_field_name = true;
347+
// QueryContext::new sets these values to their default value, but we
348+
// should use the values passed by users.
326349
query_context.from = from;
327350
query_context.size = size;
328-
329-
if operator.trim().to_uppercase() == "AND" {
330-
query_context.default_operator = Operator::And;
331-
} else {
332-
query_context.default_operator = Operator::Or;
333-
}
334351
query_context.default_field = default_field.into();
352+
query_context.default_operator = {
353+
let uppercase = operator.trim().to_uppercase();
335354

336-
self.search(&query_context)
355+
if uppercase == "AND" {
356+
Operator::And
357+
} else if uppercase == "OR" {
358+
Operator::Or
359+
} else {
360+
// default to OR when it is invalid
361+
Operator::Or
362+
}
363+
};
364+
365+
// Parse the query
366+
let parsed_query = query_string_to_parsed_query(&self.searcher, &query_context);
367+
368+
self.search(&query_context, parsed_query)
337369
}
338370

339371
// #[cfg(feature = "query_dsl")]
@@ -355,3 +387,105 @@ impl Display for Pizza {
355387
Ok(())
356388
}
357389
}
390+
391+
fn escape_query_string(query_string: &str) -> alloc::string::String {
392+
const ESCAPE_CHARS: &[char] = &[
393+
'+', '-', '=', '>', '<', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':',
394+
'/',
395+
];
396+
397+
query_string
398+
.chars()
399+
.map(|c| if ESCAPE_CHARS.contains(&c) { ' ' } else { c })
400+
.collect()
401+
}
402+
403+
/// Parse the query string stored in `query_context.original_query` to a boolean
404+
/// should query of `[QueryString, Prefix, Match, MatchPhrase]`.
405+
///
406+
/// `query_context` should contain a query string in its `original_query` field.
407+
fn query_string_to_parsed_query(
408+
searcher: &Searcher<MemoryStore>,
409+
query_context: &QueryContext,
410+
) -> ParsedQuery {
411+
use alloc::format;
412+
413+
// Parse query_string in case it is a structured query
414+
let mut parsed_query = match searcher
415+
.parse(query_context)
416+
.expect("query_context should contain original_query")
417+
{
418+
Ok(pq) => pq,
419+
Err(err) => {
420+
let error_message = format!("failed to parsing string: {}", err);
421+
422+
web_sys::console::log_1(&JsValue::from_str(&error_message));
423+
wasm_bindgen::throw_str(&error_message)
424+
}
425+
};
426+
427+
let raw_query_string = match query_context
428+
.original_query
429+
.as_ref()
430+
.expect("query_context should contain original_query")
431+
{
432+
OriginalQuery::QueryString(str) => str,
433+
_ => unreachable!("the input should be a query string"),
434+
};
435+
// Escape the character that should not appear in the query because we are
436+
// going to treat the whole query string as a single query
437+
let query_string = escape_query_string(raw_query_string);
438+
439+
let field = &query_context.default_field;
440+
441+
let prefix_query = Query::Prefix(PrefixQuery {
442+
field: field.clone(),
443+
value: Term::String(query_string.clone()),
444+
rewrite: Rewrite::default(),
445+
case_insensitive: false,
446+
});
447+
448+
let mut match_query_inner: MatchQuery = {
449+
let dsl = format!("{{ \"{field}\": \"{query_string}\" }}");
450+
serde_json::from_str(&dsl).unwrap_or_else(|e| {
451+
let error_message = format!("failed to parsing DSL: {}", e);
452+
453+
web_sys::console::log_1(&JsValue::from_str(&error_message));
454+
wasm_bindgen::throw_str(&error_message)
455+
})
456+
};
457+
// These 2 fields should default to the values specified in query_context,
458+
// not the value set by Serde.
459+
match_query_inner.operator = query_context.default_operator;
460+
match_query_inner.cross_fields_strategy = query_context.default_cross_fields_strategy.clone();
461+
let match_query = Query::Match(match_query_inner);
462+
463+
let match_phrase_query = Query::Phrase({
464+
let dsl = format!("{{ \"{field}\": \"{query_string}\" }}");
465+
serde_json::from_str(&dsl).unwrap_or_else(|e| {
466+
let error_message = format!("failed to parsing DSL: {}", e);
467+
468+
web_sys::console::log_1(&JsValue::from_str(&error_message));
469+
wasm_bindgen::throw_str(&error_message)
470+
})
471+
});
472+
473+
let query_string_query = parsed_query.query;
474+
475+
let boolean_query = Query::Boolean(BooleanQuery {
476+
minimum_should_match: 0,
477+
must: Vec::new(),
478+
filter: Vec::new(),
479+
must_not: Vec::new(),
480+
should: vec![
481+
query_string_query,
482+
prefix_query,
483+
match_query,
484+
match_phrase_query,
485+
],
486+
});
487+
488+
parsed_query.query = boolean_query;
489+
490+
parsed_query
491+
}

0 commit comments

Comments
 (0)