Skip to content

Commit 80bf923

Browse files
notriddleweihanglo
andcommitted
feat: support for rustdoc mergeable cross-crate info
This is an unstable feature that we designed to fix several performance problems with the old system: 1. You couldn't easily build crate docs in hermetic environments. This doesn't matter for Cargo, but it was one of the original reasons to implement the feature. 2. We have to build all the doc resources in their final form at every step, instead of delaying slow parts (mostly the search index) until the end and only doing them once. 3. It requires rustdoc to take a lock at the end. This reduces available concurrency for generating docs. A nightly feature `-Zrustdoc-mergeable-info` is added. Co-authored-by: Michael Howell <[email protected]> Co-authored-by: Weihang Lo <[email protected]>
1 parent 81bad57 commit 80bf923

File tree

10 files changed

+933
-9
lines changed

10 files changed

+933
-9
lines changed

crates/cargo-test-support/src/compare.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ static E2E_LITERAL_REDACTIONS: &[(&str, &str)] = &[
338338
("[BLOCKING]", " Blocking"),
339339
("[GENERATED]", " Generated"),
340340
("[OPENING]", " Opening"),
341+
("[MERGING]", " Merging"),
341342
];
342343

343344
/// Checks that the given string contains the given contiguous lines

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ pub enum FileFlavor {
7979
DebugInfo,
8080
/// SBOM (Software Bill of Materials pre-cursor) file (e.g. cargo-sbon.json).
8181
Sbom,
82+
/// Cross-crate info JSON files generated by rustdoc.
83+
DocParts,
8284
}
8385

8486
/// Type of each file generated by a Unit.

src/cargo/core/compiler/build_runner/compilation_files.rs

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,15 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
275275
self.layout(unit.kind).build_dir().deps(&dir)
276276
}
277277

278+
/// Returns the directories where Rust crate dependencies are found for the
279+
/// specified unit. (new layout)
280+
///
281+
/// New features should consider using this so we can avoid their migrations.
282+
pub fn deps_dir_new_layout(&self, unit: &Unit) -> PathBuf {
283+
let dir = self.pkg_dir(unit);
284+
self.layout(unit.kind).build_dir().deps_new_layout(&dir)
285+
}
286+
278287
/// Directory where the fingerprint for the given unit should go.
279288
pub fn fingerprint_dir(&self, unit: &Unit) -> PathBuf {
280289
let dir = self.pkg_dir(unit);
@@ -495,12 +504,27 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
495504
.join("index.html")
496505
};
497506

498-
vec![OutputFile {
507+
let mut outputs = vec![OutputFile {
499508
path,
500509
hardlink: None,
501510
export_path: None,
502511
flavor: FileFlavor::Normal,
503-
}]
512+
}];
513+
514+
if bcx.gctx.cli_unstable().rustdoc_mergeable_info {
515+
// `-Zrustdoc-mergeable-info` always uses the new layout.
516+
outputs.push(OutputFile {
517+
path: self
518+
.deps_dir_new_layout(unit)
519+
.join(unit.target.crate_name())
520+
.with_extension("json"),
521+
hardlink: None,
522+
export_path: None,
523+
flavor: FileFlavor::DocParts,
524+
})
525+
}
526+
527+
outputs
504528
}
505529
CompileMode::RunCustomBuild => {
506530
// At this time, this code path does not handle build script

src/cargo/core/compiler/build_runner/mod.rs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
//! [`BuildRunner`] is the mutable state used during the build process.
22
3-
use std::collections::{HashMap, HashSet};
3+
use std::collections::HashMap;
4+
use std::collections::HashSet;
45
use std::path::{Path, PathBuf};
56
use std::sync::{Arc, Mutex};
67

@@ -224,6 +225,8 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
224225
}
225226
}
226227

228+
self.collect_doc_merge_info()?;
229+
227230
// Collect the result of the build into `self.compilation`.
228231
for unit in &self.bcx.roots {
229232
self.collect_tests_and_executables(unit)?;
@@ -329,6 +332,77 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
329332
Ok(())
330333
}
331334

335+
fn collect_doc_merge_info(&mut self) -> CargoResult<()> {
336+
if !self.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
337+
return Ok(());
338+
}
339+
340+
if !self.bcx.build_config.intent.is_doc() {
341+
return Ok(());
342+
}
343+
344+
if self.bcx.build_config.intent.wants_doc_json_output() {
345+
// rustdoc JSON output doesn't support merge (yet?)
346+
return Ok(());
347+
}
348+
349+
let mut doc_parts_map: HashMap<_, Vec<_>> = HashMap::new();
350+
351+
let unit_iter = if self.bcx.build_config.intent.wants_deps_docs() {
352+
itertools::Either::Left(self.bcx.unit_graph.keys())
353+
} else {
354+
itertools::Either::Right(self.bcx.roots.iter())
355+
};
356+
357+
for unit in unit_iter {
358+
if !unit.mode.is_doc() {
359+
continue;
360+
}
361+
// Assumption: one `rustdoc` call generates only one cross-crate info JSON.
362+
let outputs = self.outputs(unit)?;
363+
364+
let Some(doc_parts) = outputs
365+
.iter()
366+
.find(|o| matches!(o.flavor, FileFlavor::DocParts))
367+
else {
368+
continue;
369+
};
370+
371+
doc_parts_map
372+
.entry(unit.kind)
373+
.or_default()
374+
.push(doc_parts.path.to_owned());
375+
}
376+
377+
self.compilation.doc_merge = Some(HashMap::from_iter(doc_parts_map.into_iter().map(
378+
|(kind, doc_parts)| {
379+
let out_dir = self
380+
.files()
381+
.layout(kind)
382+
.artifact_dir()
383+
.expect("artifact-dir was not locked")
384+
.doc()
385+
.to_path_buf();
386+
387+
let Some(fingerprint) = RustdocFingerprint::load(self, kind) else {
388+
let info = compiler::DocMergeInfo::new(doc_parts, out_dir, None);
389+
return (kind, compiler::DocMerge::Merge(info));
390+
};
391+
392+
let doc_merge = if fingerprint.is_outdated(self, &doc_parts) {
393+
let info = compiler::DocMergeInfo::new(doc_parts, out_dir, Some(fingerprint));
394+
compiler::DocMerge::Merge(info)
395+
} else {
396+
compiler::DocMerge::Fresh
397+
};
398+
399+
(kind, doc_merge)
400+
},
401+
)));
402+
403+
Ok(())
404+
}
405+
332406
/// Returns the executable for the specified unit (if any).
333407
pub fn get_executable(&mut self, unit: &Unit) -> CargoResult<Option<PathBuf>> {
334408
let is_binary = unit.target.is_executable();

src/cargo/core/compiler/compilation.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
33
use std::collections::{BTreeSet, HashMap};
44
use std::ffi::{OsStr, OsString};
5+
use std::path::Path;
56
use std::path::PathBuf;
67

78
use cargo_platform::CfgExpr;
89
use cargo_util::{ProcessBuilder, paths};
910

1011
use crate::core::Package;
1112
use crate::core::compiler::BuildContext;
13+
use crate::core::compiler::RustdocFingerprint;
1214
use crate::core::compiler::apply_env_config;
1315
use crate::core::compiler::{CompileKind, Unit, UnitHash};
1416
use crate::util::{CargoResult, GlobalContext, context};
@@ -106,6 +108,11 @@ pub struct Compilation<'gctx> {
106108
/// Libraries to test with rustdoc.
107109
pub to_doc_test: Vec<Doctest>,
108110

111+
/// Compilation information for running `rustdoc --merge=finalize`.
112+
///
113+
/// See `-Zrustdoc-mergeable-info` for more.
114+
pub doc_merge: Option<HashMap<CompileKind, DocMerge>>,
115+
109116
/// The target host triple.
110117
pub host: String,
111118

@@ -143,6 +150,7 @@ impl<'gctx> Compilation<'gctx> {
143150
root_crate_names: Vec::new(),
144151
extra_env: HashMap::new(),
145152
to_doc_test: Vec::new(),
153+
doc_merge: None,
146154
gctx: bcx.gctx,
147155
host: bcx.host_triple().to_string(),
148156
rustc_process,
@@ -383,6 +391,67 @@ impl<'gctx> Compilation<'gctx> {
383391
}
384392
}
385393

394+
/// Whether `rustdoc --merge=finalize` output is stale or fresh.
395+
pub enum DocMerge {
396+
/// Nothing is stale.
397+
Fresh,
398+
/// Doc merge is required.
399+
Merge(DocMergeInfo),
400+
}
401+
402+
/// Compilation information for running `rustdoc --merge=finalize`.
403+
pub struct DocMergeInfo {
404+
/// Cross-crate info JSON files for each rustdoc invocation during this `cargo doc` call.
405+
doc_parts: Vec<PathBuf>,
406+
/// Output directory for rustdoc final artifacts.
407+
out_dir: PathBuf,
408+
/// Rustdoc fingerprint file information, if existing.
409+
fingerprint: Option<RustdocFingerprint>,
410+
}
411+
412+
impl DocMergeInfo {
413+
pub fn new(
414+
doc_parts: Vec<PathBuf>,
415+
out_dir: PathBuf,
416+
fingerprint: Option<RustdocFingerprint>,
417+
) -> Self {
418+
Self {
419+
doc_parts,
420+
out_dir,
421+
fingerprint,
422+
}
423+
}
424+
425+
/// Provides arguments for rustdoc cross-crate info finalization.
426+
pub fn finalize<F>(&self, exec: F) -> CargoResult<()>
427+
where
428+
// 1. paths for `--include-parts-dir`
429+
// 2. path for `--out-dir`
430+
F: Fn(&[&Path], &Path) -> CargoResult<()>,
431+
{
432+
let mut doc_parts: Vec<_> = self
433+
.doc_parts
434+
.iter()
435+
.chain(self.fingerprint.iter().flat_map(|f| f.doc_parts().iter()))
436+
.cloned()
437+
.collect();
438+
439+
doc_parts.sort_unstable();
440+
doc_parts.dedup();
441+
442+
// rustdoc needs the directory holding doc parts files.
443+
let parts_dirs: Vec<_> = doc_parts.iter().map(|p| p.parent().unwrap()).collect();
444+
445+
exec(&parts_dirs, &self.out_dir)?;
446+
447+
if let Some(fingerprint) = &self.fingerprint {
448+
fingerprint.persist(doc_parts)?;
449+
}
450+
451+
Ok(())
452+
}
453+
}
454+
386455
/// Prepares a `rustc_tool` process with additional environment variables
387456
/// that are only relevant in a context that has a unit
388457
fn fill_rustc_tool_env(mut cmd: ProcessBuilder, unit: &Unit) -> ProcessBuilder {

src/cargo/core/compiler/fingerprint/rustdoc.rs

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::path::PathBuf;
33

44
use anyhow::Context as _;
55
use cargo_util::paths;
6+
use filetime::FileTime;
67
use serde::Deserialize;
78
use serde::Serialize;
89

@@ -15,6 +16,10 @@ use crate::core::compiler::CompileKind;
1516
struct RustdocFingerprintJson {
1617
/// `rustc -vV` verbose version output.
1718
pub rustc_vv: String,
19+
20+
/// Path to cross crate info JSON files from previous `cargo doc` invocations.
21+
#[serde(default, skip_serializing_if = "Vec::is_empty")]
22+
pub doc_parts: Vec<PathBuf>,
1823
}
1924

2025
/// Structure used to deal with Rustdoc fingerprinting
@@ -29,7 +34,13 @@ struct RustdocFingerprintJson {
2934
/// they were compiled with the same Rustc version that we're currently using.
3035
/// Otherwise we must remove the `doc/` folder and compile again forcing a rebuild.
3136
#[derive(Debug)]
32-
pub struct RustdocFingerprint {}
37+
pub struct RustdocFingerprint {
38+
/// File modified time when loading this fingerprint.
39+
mtime: Option<FileTime>,
40+
/// Path to this fingerprint file.
41+
path: PathBuf,
42+
fingerprint: RustdocFingerprintJson,
43+
}
3344

3445
impl RustdocFingerprint {
3546
/// Checks whether the latest version of rustc used to compile this workspace's docs
@@ -58,6 +69,7 @@ impl RustdocFingerprint {
5869
}
5970
let new_fingerprint = RustdocFingerprintJson {
6071
rustc_vv: build_runner.bcx.rustc().verbose_version.clone(),
72+
doc_parts: Vec::new(),
6173
};
6274

6375
for kind in &build_runner.bcx.build_config.requested_kinds {
@@ -66,6 +78,74 @@ impl RustdocFingerprint {
6678

6779
Ok(())
6880
}
81+
82+
/// Returns the path to rustdoc fingerprint file for a given [`CompileKind`].
83+
pub fn load(build_runner: &BuildRunner<'_, '_>, kind: CompileKind) -> Option<Self> {
84+
let path = fingerprint_path(build_runner, kind);
85+
let fingerprint = match paths::read(&path) {
86+
Ok(data) => data,
87+
Err(e) => {
88+
tracing::debug!("failed to read rustdoc fingerprint at {path:?}: {e}");
89+
return None;
90+
}
91+
};
92+
93+
match serde_json::from_str::<RustdocFingerprintJson>(&fingerprint) {
94+
Ok(mut fingerprint) => {
95+
// Doc parts may be selectively cleaned via `cargo clean -p <doc>`.
96+
// We should stop caching those.
97+
fingerprint.doc_parts.retain(|p| p.exists());
98+
Some(Self {
99+
mtime: paths::mtime(&path).ok(),
100+
path,
101+
fingerprint,
102+
})
103+
}
104+
Err(e) => {
105+
tracing::debug!("could not deserialize {:?}: {}", path, e);
106+
None
107+
}
108+
}
109+
}
110+
111+
/// Checks if the fingerprint is outdated comparing against given doc parts file paths.
112+
pub fn is_outdated(&self, build_runner: &BuildRunner<'_, '_>, doc_parts: &[PathBuf]) -> bool {
113+
let Some(fingerprint_mtime) = self.mtime.as_ref() else {
114+
return true;
115+
};
116+
117+
if self.fingerprint.rustc_vv != build_runner.bcx.rustc().verbose_version {
118+
return true;
119+
}
120+
121+
for path in doc_parts {
122+
let parts_mtime = match paths::mtime(&path) {
123+
Ok(mtime) => mtime,
124+
Err(e) => {
125+
tracing::debug!("failed to read mtime of {}: {e}", path.display());
126+
return true;
127+
}
128+
};
129+
130+
if &parts_mtime > fingerprint_mtime {
131+
return true;
132+
}
133+
}
134+
135+
false
136+
}
137+
138+
pub fn persist(&self, doc_parts: Vec<PathBuf>) -> CargoResult<()> {
139+
let new_fingerprint = RustdocFingerprintJson {
140+
rustc_vv: self.fingerprint.rustc_vv.clone(),
141+
doc_parts,
142+
};
143+
paths::write(&self.path, serde_json::to_string(&new_fingerprint)?)
144+
}
145+
146+
pub fn doc_parts(&self) -> &[PathBuf] {
147+
&self.fingerprint.doc_parts
148+
}
69149
}
70150

71151
/// Returns the path to rustdoc fingerprint file for a given [`CompileKind`].

src/cargo/core/compiler/layout.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,17 @@ impl BuildDirLayout {
309309
/// Fetch the deps path.
310310
pub fn deps(&self, pkg_dir: &str) -> PathBuf {
311311
if self.is_new_layout {
312-
self.build_unit(pkg_dir).join("deps")
312+
self.deps_new_layout(pkg_dir)
313313
} else {
314314
self.legacy_deps().to_path_buf()
315315
}
316316
}
317+
/// Fetch the deps path. (new layout)
318+
///
319+
/// New features should consider using this so we can avoid their migrations.
320+
pub fn deps_new_layout(&self, pkg_dir: &str) -> PathBuf {
321+
self.build_unit(pkg_dir).join("deps")
322+
}
317323
/// Fetch the deps path. (old layout)
318324
pub fn legacy_deps(&self) -> &Path {
319325
&self.deps

0 commit comments

Comments
 (0)