Skip to content

Commit d6f0fd2

Browse files
parallelize set operations
1 parent 41cf010 commit d6f0fd2

File tree

2 files changed

+27
-8
lines changed

2 files changed

+27
-8
lines changed

benches/criterion.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ fn query(c: &mut Criterion) {
9292
fn read_nt_benchmarks(c: &mut Criterion) {
9393
let mut group = c.benchmark_group("dictionary_read_nt");
9494
group.sample_size(10);
95-
let test_file = "tests/resources/tax.nt";
95+
let test_file = "tests/resources/persondata_en.nt";
9696

9797
// Benchmark 1: N-Triples parsing
9898
group.bench_function("nt_parsing", |b| {

src/four_sect_dict.rs

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -213,19 +213,29 @@ impl FourSectDict {
213213
block_size: usize,
214214
) -> Self {
215215
use log::warn;
216-
217216
if predicate_terms.is_empty() {
218217
warn!("no triples found in provided RDF");
219218
}
220219
let predicate_terms_ref: BTreeSet<&str> = predicate_terms.iter().map(std::ops::Deref::deref).collect();
221220

222-
let shared_terms: BTreeSet<&str> =
223-
subject_terms.intersection(object_terms).map(std::ops::Deref::deref).collect();
224-
let unique_subject_terms: BTreeSet<&str> =
225-
subject_terms.difference(object_terms).map(std::ops::Deref::deref).collect();
226-
let unique_object_terms: BTreeSet<&str> =
227-
object_terms.difference(subject_terms).map(std::ops::Deref::deref).collect();
221+
let [shared_terms, unique_subject_terms, unique_object_terms]: [BTreeSet<&str>; 3] =
222+
std::thread::scope(|s| {
223+
[
224+
s.spawn(|| subject_terms.intersection(object_terms).map(std::ops::Deref::deref).collect()),
225+
s.spawn(|| subject_terms.difference(object_terms).map(std::ops::Deref::deref).collect()),
226+
s.spawn(|| object_terms.difference(subject_terms).map(std::ops::Deref::deref).collect()),
227+
]
228+
.map(|t| t.join().unwrap())
229+
});
230+
231+
let (shared, subjects, predicates, objects) = (
232+
DictSectPFC::compress(&shared_terms, block_size),
233+
DictSectPFC::compress(&unique_subject_terms, block_size),
234+
DictSectPFC::compress(&predicate_terms_ref, block_size),
235+
DictSectPFC::compress(&unique_object_terms, block_size),
236+
);
228237

238+
/*
229239
// Parallelize dictionary compression using rayon
230240
let ((shared, predicates), (subjects, objects)) = rayon::join(
231241
|| {
@@ -241,6 +251,15 @@ impl FourSectDict {
241251
)
242252
},
243253
);
254+
*/
255+
/*
256+
use std::thread;
257+
let [shared, predicates, subjects, objects] = thread::scope(|s| {
258+
[&shared_terms, &predicate_terms_ref, &unique_subject_terms, &unique_object_terms]
259+
.map(|terms| s.spawn(|| DictSectPFC::compress(terms, block_size)))
260+
.map(|t| t.join().unwrap())
261+
});
262+
*/
244263

245264
FourSectDict { shared, subjects, predicates, objects }
246265
}

0 commit comments

Comments
 (0)