Skip to content

Commit 47e2638

Browse files
authored
VER: Release 0.34.0
2 parents 3d2f268 + 2328653 commit 47e2638

File tree

6 files changed

+168
-42
lines changed

6 files changed

+168
-42
lines changed

CHANGELOG.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
# Changelog
22

3-
## 0.33.1 - TBD
3+
## 0.34.0 - 2025-09-23
4+
5+
### Enhancements
6+
- Added batch download retry, resumption, and checksum verification
7+
- Changed setter for `batch::DownloadParams` to accept any `impl ToString` for
8+
`filename_to_download`
9+
10+
### Breaking changes
11+
- Changed `sha2` and `hex` to required dependencies
12+
13+
## 0.33.1 - 2025-08-26
414

515
### Enhancements
616
- Upgraded DBN version to 0.41.0:

Cargo.toml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
name = "databento"
33
authors = ["Databento <[email protected]>"]
4-
version = "0.33.1"
4+
version = "0.34.0"
55
edition = "2021"
66
repository = "https://github.com/databento/databento-rs"
77
description = "Official Databento client library"
@@ -28,35 +28,35 @@ historical = [
2828
"dep:serde_json",
2929
"tokio/fs"
3030
]
31-
live = ["dep:hex", "dep:sha2", "tokio/net"]
31+
live = ["tokio/net"]
3232

3333
[dependencies]
3434
dbn = { version = "0.41.0", features = ["async", "serde"] }
3535

3636
async-compression = { version = "0.4", features = ["tokio", "zstd"], optional = true }
3737
# Async stream trait
3838
futures = { version = "0.3", optional = true }
39-
# Used for Live authentication
40-
hex = { version = "0.4", optional = true }
39+
# Used for Live authentication and historical checksums
40+
hex = "0.4"
4141
reqwest = { version = "0.12", optional = true, features = ["json", "stream"], default-features = false }
4242
serde = { version = "1.0", optional = true, features = ["derive"] }
4343
serde_json = { version = "1.0", optional = true }
44-
# Used for Live authentication
45-
sha2 = { version = "0.10", optional = true }
44+
# Used for Live authentication and historical checksums
45+
sha2 = "0.10"
4646
thiserror = "2.0"
4747
time = { version = ">=0.3.35", features = ["macros", "parsing", "serde"] }
4848
tokio = { version = ">=1.38", features = ["io-util", "macros"] }
4949
# Stream utils
5050
tokio-util = { version = "0.7", features = ["io"], optional = true }
5151
tracing = "0.1"
52-
typed-builder = "0.21"
52+
typed-builder = "0.22"
5353

5454
[dev-dependencies]
55-
anyhow = "1.0.99"
55+
anyhow = "1.0.100"
5656
async-compression = { version = "0.4", features = ["tokio", "zstd"] }
57-
clap = { version = "4.5.44", features = ["derive"] }
57+
clap = { version = "4.5.48", features = ["derive"] }
5858
rstest = "0.26.1"
59-
tempfile = "3.20.0"
59+
tempfile = "3.23.0"
6060
tokio = { version = "1.47", features = ["full"] }
61-
tracing-subscriber = "0.3.19"
61+
tracing-subscriber = "0.3.20"
6262
wiremock = "0.6"

src/historical/batch.rs

Lines changed: 135 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
//! The historical batch download API.
22
3-
use core::fmt;
43
use std::{
4+
cmp::Ordering,
55
collections::HashMap,
6+
fmt,
67
fmt::Write,
78
num::NonZeroU64,
89
path::{Path, PathBuf},
@@ -11,11 +12,13 @@ use std::{
1112

1213
use dbn::{Compression, Encoding, SType, Schema};
1314
use futures::StreamExt;
15+
use hex::ToHex;
1416
use reqwest::RequestBuilder;
1517
use serde::{de, Deserialize, Deserializer};
18+
use sha2::{Digest, Sha256};
1619
use time::OffsetDateTime;
1720
use tokio::io::BufWriter;
18-
use tracing::info;
21+
use tracing::{debug, error, info, info_span, warn, Instrument};
1922
use typed_builder::TypedBuilder;
2023

2124
use crate::{historical::check_http_error, Error, Symbols};
@@ -144,10 +147,11 @@ impl BatchClient<'_> {
144147
.urls
145148
.get("https")
146149
.ok_or_else(|| Error::internal("Missing https URL for batch file"))?;
147-
self.download_file(https_url, &output_path).await?;
150+
self.download_file(https_url, &output_path, &file_desc.hash, file_desc.size)
151+
.await?;
148152
Ok(vec![output_path])
149153
} else {
150-
let mut paths = Vec::new();
154+
let mut paths = Vec::with_capacity(job_files.len());
151155
for file_desc in job_files.iter() {
152156
let output_path = params
153157
.output_dir
@@ -157,31 +161,136 @@ impl BatchClient<'_> {
157161
.urls
158162
.get("https")
159163
.ok_or_else(|| Error::internal("Missing https URL for batch file"))?;
160-
self.download_file(https_url, &output_path).await?;
164+
self.download_file(https_url, &output_path, &file_desc.hash, file_desc.size)
165+
.await?;
161166
paths.push(output_path);
162167
}
163168
Ok(paths)
164169
}
165170
}
166171

167-
async fn download_file(&mut self, url: &str, path: impl AsRef<Path>) -> crate::Result<()> {
172+
async fn download_file(
173+
&mut self,
174+
url: &str,
175+
path: &Path,
176+
hash: &str,
177+
exp_size: u64,
178+
) -> crate::Result<()> {
179+
const MAX_RETRIES: usize = 5;
168180
let url = reqwest::Url::parse(url)
169181
.map_err(|e| Error::internal(format!("Unable to parse URL: {e:?}")))?;
170-
let resp = self.inner.get_with_path(url.path())?.send().await?;
171-
let mut stream = check_http_error(resp).await?.bytes_stream();
172-
info!(%url, path=%path.as_ref().display(), "Downloading file");
173-
let mut output = BufWriter::new(
174-
tokio::fs::OpenOptions::new()
175-
.create(true)
176-
.truncate(true)
177-
.write(true)
178-
.open(path)
179-
.await?,
180-
);
181-
while let Some(chunk) = stream.next().await {
182-
tokio::io::copy(&mut chunk?.as_ref(), &mut output).await?;
182+
183+
let Some((hash_algo, exp_hash_hex)) = hash.split_once(':') else {
184+
return Err(Error::internal("Unexpected hash string format {hash:?}"));
185+
};
186+
let mut hasher = if hash_algo == "sha256" {
187+
Some(Sha256::new())
188+
} else {
189+
warn!(
190+
hash_algo,
191+
"Skipping checksum with unsupported hash algorithm"
192+
);
193+
None
194+
};
195+
196+
let span = info_span!("BatchDownload", %url, path=%path.display());
197+
async move {
198+
let mut retries = 0;
199+
'retry: loop {
200+
let mut req = self.inner.get_with_path(url.path())?;
201+
match Self::check_if_exists(path, exp_size).await? {
202+
Header::Skip => {
203+
return Ok(());
204+
}
205+
Header::Range(Some((key, val))) => {
206+
req = req.header(key, val);
207+
}
208+
Header::Range(None) => {}
209+
}
210+
let resp = req.send().await?;
211+
let mut stream = check_http_error(resp).await?.bytes_stream();
212+
info!("Downloading file");
213+
let mut output = BufWriter::new(
214+
tokio::fs::OpenOptions::new()
215+
.create(true)
216+
.append(true)
217+
.write(true)
218+
.open(path)
219+
.await?,
220+
);
221+
while let Some(chunk) = stream.next().await {
222+
let chunk = match chunk {
223+
Ok(chunk) => chunk,
224+
Err(err) if retries < MAX_RETRIES => {
225+
retries += 1;
226+
error!(?err, retries, "Retrying download");
227+
continue 'retry;
228+
}
229+
Err(err) => {
230+
return Err(crate::Error::from(err));
231+
}
232+
};
233+
if retries > 0 {
234+
retries = 0;
235+
info!("Resumed download");
236+
}
237+
if let Some(hasher) = hasher.as_mut() {
238+
hasher.update(&chunk)
239+
}
240+
tokio::io::copy(&mut chunk.as_ref(), &mut output).await?;
241+
}
242+
debug!("Completed download");
243+
Self::verify_hash(hasher, exp_hash_hex).await;
244+
return Ok(());
245+
}
246+
}
247+
.instrument(span)
248+
.await
249+
}
250+
251+
async fn check_if_exists(path: &Path, exp_size: u64) -> crate::Result<Header> {
252+
let Ok(metadata) = tokio::fs::metadata(path).await else {
253+
return Ok(Header::Range(None));
254+
};
255+
let actual_size = metadata.len();
256+
match actual_size.cmp(&exp_size) {
257+
Ordering::Less => {
258+
debug!(
259+
prev_downloaded_bytes = actual_size,
260+
total_bytes = exp_size,
261+
"Found existing file, resuming download"
262+
);
263+
}
264+
Ordering::Equal => {
265+
debug!("Skipping download as file already exists and matches expected size");
266+
return Ok(Header::Skip);
267+
}
268+
Ordering::Greater => {
269+
return Err(crate::Error::Io(std::io::Error::other(format!(
270+
"Batch file {} already exists with size {actual_size} which is larger than expected size {exp_size}",
271+
path.file_name().unwrap().display(),
272+
))));
273+
}
274+
}
275+
Ok(Header::Range(Some((
276+
"Range",
277+
format!("bytes={}-", metadata.len()),
278+
))))
279+
}
280+
281+
async fn verify_hash(hasher: Option<Sha256>, exp_hash_hex: &str) {
282+
let Some(hasher) = hasher else {
283+
return;
284+
};
285+
let hash_hex = hasher.finalize().encode_hex::<String>();
286+
if hash_hex != exp_hash_hex {
287+
warn!(
288+
hash_hex,
289+
exp_hash_hex, "Downloaded file failed checksum validation"
290+
);
291+
} else {
292+
debug!("Successfully verified checksum");
183293
}
184-
Ok(())
185294
}
186295

187296
const PATH_PREFIX: &'static str = "batch";
@@ -403,7 +512,7 @@ pub struct DownloadParams {
403512
#[builder(setter(transform = |dt: impl ToString| dt.to_string()))]
404513
pub job_id: String,
405514
/// `None` means all files associated with the job will be downloaded.
406-
#[builder(default, setter(strip_option))]
515+
#[builder(default, setter(transform = |filename: impl ToString| Some(filename.to_string())))]
407516
pub filename_to_download: Option<String>,
408517
}
409518

@@ -542,6 +651,11 @@ fn deserialize_compression<'de, D: serde::Deserializer<'de>>(
542651
Ok(opt.unwrap_or(Compression::None))
543652
}
544653

654+
enum Header {
655+
Skip,
656+
Range(Option<(&'static str, String)>),
657+
}
658+
545659
#[cfg(test)]
546660
mod tests {
547661
use reqwest::StatusCode;

src/historical/client.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ pub struct Client {
2626
key: ApiKey,
2727
base_url: Url,
2828
gateway: HistoricalGateway,
29-
uprade_policy: VersionUpgradePolicy,
29+
upgrade_policy: VersionUpgradePolicy,
3030
client: reqwest::Client,
3131
}
3232

@@ -110,7 +110,7 @@ impl Client {
110110
}
111111

112112
pub(crate) fn upgrade_policy(&self) -> VersionUpgradePolicy {
113-
self.uprade_policy
113+
self.upgrade_policy
114114
}
115115

116116
pub(crate) fn get(&mut self, slug: &str) -> crate::Result<RequestBuilder> {
@@ -319,7 +319,7 @@ impl ClientBuilder<ApiKey> {
319319
key: self.key,
320320
base_url,
321321
gateway: self.gateway,
322-
uprade_policy: self.upgrade_policy,
322+
upgrade_policy: self.upgrade_policy,
323323
client: reqwest::ClientBuilder::new()
324324
.user_agent(user_agent)
325325
.default_headers(headers)

src/historical/metadata.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ impl FromStr for FeedMode {
358358
"historical-streaming" => Ok(Self::HistoricalStreaming),
359359
"live" => Ok(Self::Live),
360360
_ => Err(crate::Error::internal(format_args!(
361-
"Unabled to convert {s} to FeedMode"
361+
"Unable to convert {s} to FeedMode"
362362
))),
363363
}
364364
}

src/live.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,17 @@ pub struct Subscription {
2626
/// The symbology type of the symbols in [`symbols`](Self::symbols).
2727
#[builder(default = SType::RawSymbol)]
2828
pub stype_in: SType,
29-
/// If specified, requests available data since that time (inclusive), based on
30-
/// [`ts_event`](dbn::RecordHeader::ts_event). When `None`, only real-time data is sent.
29+
/// The inclusive start of subscription replay.
30+
/// Pass [`OffsetDateTime::UNIX_EPOCH`](time::OffsetDateTime::UNIX_EPOCH) to request all available data.
31+
/// When `None`, only real-time data is sent.
3132
///
32-
/// Setting this field is not supported once the session has been started with
33-
/// [`LiveClient::start`](crate::LiveClient::start).
33+
/// Cannot be specified after the session is started with [`LiveClient::start`](crate::LiveClient::start).
34+
/// See [`Intraday Replay`](https://databento.com/docs/api-reference-live/basics/intraday-replay).
3435
#[builder(default, setter(strip_option))]
3536
pub start: Option<OffsetDateTime>,
3637
#[doc(hidden)]
37-
/// Request subscription with snapshot. Defaults to `false`. Conflicts with the `start` parameter.
38+
/// Request subscription with snapshot. Only supported with `Mbo` schema.
39+
/// Defaults to `false`. Conflicts with the `start` parameter.
3840
#[builder(setter(strip_bool))]
3941
pub use_snapshot: bool,
4042
/// The optional numerical identifier associated with this subscription.

0 commit comments

Comments
 (0)