Skip to content

Commit fe9fbda

Browse files
authored
remove leftover info logs. update HDT type support comments. make it easier to write a cache file (#92)
1 parent f365753 commit fe9fbda

File tree

4 files changed

+8
-14
lines changed

4 files changed

+8
-14
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
A Rust library for the [Header Dictionary Triples](https://www.rdfhdt.org/) compressed RDF format, including:
1313

14-
* loading the HDT default format as created by [hdt-cpp](https://github.com/rdfhdt/hdt-cpp)
14+
* loading the HDT default format as created by this library or [hdt-cpp](https://github.com/rdfhdt/hdt-cpp)
1515
* converting N-Triples to HDT
1616
* efficient querying by triple patterns
1717
* serializing into other formats like RDF Turtle and N-Triples using the [Sophia](https://crates.io/crates/sophia) adapter

src/hdt.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl Hdt {
6767
}
6868

6969
/// Creates an immutable HDT instance containing the dictionary and triples from the given reader.
70-
/// The reader must point to the beginning of the data of an HDT file as produced by hdt-cpp.
70+
/// The reader must point to the beginning of the data of an HDT file.
7171
/// FourSectionDictionary with DictionarySectionPlainFrontCoding and SPO order is the only supported implementation.
7272
/// The format is specified at <https://www.rdfhdt.org/hdt-binary-format/>, however there are some deviations.
7373
/// The initial HDT specification at <http://www.w3.org/Submission/2011/03/> is outdated and not supported.
@@ -99,7 +99,7 @@ impl Hdt {
9999

100100
/// Creates an immutable HDT instance containing the dictionary and triples from the Path.
101101
/// Will utilize a custom cached TriplesBitmap file if exists or create one if it does not exist.
102-
/// The file path must point to the beginning of the data of an HDT file as produced by hdt-cpp.
102+
/// The file path must point to the beginning of the data of an HDT file.
103103
/// FourSectionDictionary with DictionarySectionPlainFrontCoding and SPO order is the only supported implementation.
104104
/// The format is specified at <https://www.rdfhdt.org/hdt-binary-format/>, however there are some deviations.
105105
/// The initial HDT specification at <http://www.w3.org/Submission/2011/03/> is outdated and not supported.
@@ -179,7 +179,9 @@ impl Hdt {
179179
}
180180

181181
#[cfg(feature = "cache")]
182-
fn write_cache(
182+
/// Writes a custom cache file to improve load times. This cache file is usuable only by
183+
/// this library and is not intended to be used with hdt-cpp or hdt-java versions of the HDT tooling
184+
pub fn write_cache(
183185
index_file_path: &std::path::PathBuf, triples: &TriplesBitmap, header_length: usize,
184186
) -> core::result::Result<(), Box<dyn std::error::Error>> {
185187
let new_index_file = File::create(index_file_path)?;

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
//!
1010
//! HDT is a loading and triple pattern querying library for the [Header Dictionary Triples](https://www.rdfhdt.org/) compressed binary RDF format.
1111
//!
12-
//! Currently this library only supports loading and querying existing HDT files as created by [hdt-cpp](https://github.com/rdfhdt/hdt-cpp).
12+
//! Currently this library only supports loading and querying existing HDT files as created by this library or [hdt-cpp](https://github.com/rdfhdt/hdt-cpp).
1313
//! For reference implementations of HDT in C++ and Java, which support conversion and serialization from and into HDT with different format options,
1414
//! and acknowledgement of all the original authors, please look at the <https://github.com/rdfhdt> organisation.
1515
//!

src/nt.rs

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,13 @@ use crate::{DictSectPFC, FourSectDict, IdKind};
66
use bitset_core::BitSet;
77
use bytesize::ByteSize;
88
use lasso::{Key, Spur, ThreadedRodeo};
9-
use log::{debug, error, info};
9+
use log::{debug, error};
1010
use oxttl::NTriplesParser;
1111
use rayon::prelude::*;
1212
use std::collections::BTreeSet;
1313
use std::path::Path;
1414
use std::sync::Arc;
1515
use std::thread;
16-
use std::time::Instant;
1716

1817
pub type Result<T> = std::io::Result<T>;
1918
type Simd = [u64; 4];
@@ -126,9 +125,7 @@ struct IndexPool {
126125
/// read N-Triples and convert them to a dictionary and triple IDs
127126
fn read_dict_triples(path: &Path, block_size: usize) -> Result<(FourSectDict, Vec<TripleId>)> {
128127
// 1. Parse N-Triples and collect terms using string interning
129-
let timer = Instant::now();
130128
let mut pool = parse_nt_terms(path)?;
131-
let parse_time = timer.elapsed();
132129

133130
// Sort and deduplicate triples in parallel with dictionary building
134131
let mut triples = std::mem::take(&mut pool.triples); // not needed anymore
@@ -139,12 +136,9 @@ fn read_dict_triples(path: &Path, block_size: usize) -> Result<(FourSectDict, Ve
139136
})?;
140137

141138
// 2. Build dictionary from term indices
142-
let timer = Instant::now();
143139
let dict = build_dict_from_terms(&pool, block_size);
144-
let dict_build_time = timer.elapsed();
145140

146141
// 3. Encode triples to IDs using dictionary
147-
let timer = Instant::now();
148142
let sorted_triple_indices = sorter.join().unwrap();
149143
let refs: &[[usize; 3]] = &sorted_triple_indices;
150144
let encoded_triples: Vec<TripleId> = refs
@@ -165,8 +159,6 @@ fn read_dict_triples(path: &Path, block_size: usize) -> Result<(FourSectDict, Ve
165159
})
166160
.collect();
167161

168-
info!("{parse_time:?},{dict_build_time:?},{:?}", timer.elapsed());
169-
170162
Ok((dict, encoded_triples))
171163
}
172164

0 commit comments

Comments
 (0)