Skip to content

Commit f2bd8dd

Browse files
notriddleweihanglo
andcommitted
feat: support for rustdoc mergeable cross-crate info
This is an unstable feature that we designed to fix several performance problems with the old system: 1. You couldn't easily build crate docs in hermetic environments. This doesn't matter for Cargo, but it was one of the original reasons to implement the feature. 2. We have to build all the doc resources in their final form at every step, instead of delaying slow parts (mostly the search index) until the end and only doing them once. 3. It requires rustdoc to take a lock at the end. This reduces available concurrency for generating docs. A nightly feature `-Zrustdoc-mergeable-info` is added. Co-authored-by: Michael Howell <[email protected]> Co-authored-by: Weihang Lo <[email protected]>
1 parent 665c82f commit f2bd8dd

File tree

10 files changed

+484
-43
lines changed

10 files changed

+484
-43
lines changed

crates/cargo-test-support/src/compare.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ static E2E_LITERAL_REDACTIONS: &[(&str, &str)] = &[
338338
("[BLOCKING]", " Blocking"),
339339
("[GENERATED]", " Generated"),
340340
("[OPENING]", " Opening"),
341+
("[MERGING]", " Merging"),
341342
];
342343

343344
/// Checks that the given string contains the given contiguous lines

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ pub enum FileFlavor {
7979
DebugInfo,
8080
/// SBOM (Software Bill of Materials pre-cursor) file (e.g. cargo-sbon.json).
8181
Sbom,
82+
/// Cross-crate info JSON files generated by rustdoc.
83+
DocParts,
8284
}
8385

8486
/// Type of each file generated by a Unit.

src/cargo/core/compiler/build_runner/compilation_files.rs

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,15 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
275275
self.layout(unit.kind).build_dir().deps(&dir)
276276
}
277277

278+
/// Returns the directories where Rust crate dependencies are found for the
279+
/// specified unit. (new layout)
280+
///
281+
/// New features should consider using this so we can avoid their migrations.
282+
pub fn deps_dir_new_layout(&self, unit: &Unit) -> PathBuf {
283+
let dir = self.pkg_dir(unit);
284+
self.layout(unit.kind).build_dir().deps_new_layout(&dir)
285+
}
286+
278287
/// Directory where the fingerprint for the given unit should go.
279288
pub fn fingerprint_dir(&self, unit: &Unit) -> PathBuf {
280289
let dir = self.pkg_dir(unit);
@@ -495,12 +504,27 @@ impl<'a, 'gctx: 'a> CompilationFiles<'a, 'gctx> {
495504
.join("index.html")
496505
};
497506

498-
vec![OutputFile {
507+
let mut outputs = vec![OutputFile {
499508
path,
500509
hardlink: None,
501510
export_path: None,
502511
flavor: FileFlavor::Normal,
503-
}]
512+
}];
513+
514+
if bcx.gctx.cli_unstable().rustdoc_mergeable_info {
515+
// `-Zrustdoc-mergeable-info` always uses the new layout.
516+
outputs.push(OutputFile {
517+
path: self
518+
.deps_dir_new_layout(unit)
519+
.join(unit.target.crate_name())
520+
.with_extension("json"),
521+
hardlink: None,
522+
export_path: None,
523+
flavor: FileFlavor::DocParts,
524+
})
525+
}
526+
527+
outputs
504528
}
505529
CompileMode::RunCustomBuild => {
506530
// At this time, this code path does not handle build script

src/cargo/core/compiler/build_runner/mod.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
224224
}
225225
}
226226

227+
self.collect_doc_merge_info()?;
228+
227229
// Collect the result of the build into `self.compilation`.
228230
for unit in &self.bcx.roots {
229231
self.collect_tests_and_executables(unit)?;
@@ -329,6 +331,58 @@ impl<'a, 'gctx> BuildRunner<'a, 'gctx> {
329331
Ok(())
330332
}
331333

334+
fn collect_doc_merge_info(&mut self) -> CargoResult<()> {
335+
if !self.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
336+
return Ok(());
337+
}
338+
339+
if !self.bcx.build_config.intent.is_doc() {
340+
return Ok(());
341+
}
342+
343+
if self.bcx.build_config.intent.wants_doc_json_output() {
344+
// rustdoc JSON output doesn't support merge (yet?)
345+
return Ok(());
346+
}
347+
348+
let mut doc_parts_map: HashMap<_, Vec<_>> = HashMap::new();
349+
350+
let unit_iter = if self.bcx.build_config.intent.wants_deps_docs() {
351+
itertools::Either::Left(self.bcx.unit_graph.keys())
352+
} else {
353+
itertools::Either::Right(self.bcx.roots.iter())
354+
};
355+
356+
for unit in unit_iter {
357+
if !unit.mode.is_doc() {
358+
continue;
359+
}
360+
// Assumption: one `rustdoc` call generates only one cross-crate info JSON.
361+
let outputs = self.outputs(unit)?;
362+
363+
let Some(doc_parts) = outputs
364+
.iter()
365+
.find(|o| matches!(o.flavor, FileFlavor::DocParts))
366+
else {
367+
continue;
368+
};
369+
370+
doc_parts_map
371+
.entry(unit.kind)
372+
.or_default()
373+
.push(doc_parts.path.to_owned());
374+
}
375+
376+
self.compilation.rustdoc_fingerprints = Some(
377+
doc_parts_map
378+
.into_iter()
379+
.map(|(kind, doc_parts)| (kind, RustdocFingerprint::new(self, kind, doc_parts)))
380+
.collect(),
381+
);
382+
383+
Ok(())
384+
}
385+
332386
/// Returns the executable for the specified unit (if any).
333387
pub fn get_executable(&mut self, unit: &Unit) -> CargoResult<Option<PathBuf>> {
334388
let is_binary = unit.target.is_executable();

src/cargo/core/compiler/compilation.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use cargo_util::{ProcessBuilder, paths};
99

1010
use crate::core::Package;
1111
use crate::core::compiler::BuildContext;
12+
use crate::core::compiler::RustdocFingerprint;
1213
use crate::core::compiler::apply_env_config;
1314
use crate::core::compiler::{CompileKind, Unit, UnitHash};
1415
use crate::util::{CargoResult, GlobalContext, context};
@@ -106,6 +107,11 @@ pub struct Compilation<'gctx> {
106107
/// Libraries to test with rustdoc.
107108
pub to_doc_test: Vec<Doctest>,
108109

110+
/// Rustdoc fingerprint files to determine whether we need to run `rustdoc --merge=finalize`.
111+
///
112+
/// See `-Zrustdoc-mergeable-info` for more.
113+
pub rustdoc_fingerprints: Option<HashMap<CompileKind, RustdocFingerprint>>,
114+
109115
/// The target host triple.
110116
pub host: String,
111117

@@ -143,6 +149,7 @@ impl<'gctx> Compilation<'gctx> {
143149
root_crate_names: Vec::new(),
144150
extra_env: HashMap::new(),
145151
to_doc_test: Vec::new(),
152+
rustdoc_fingerprints: None,
146153
gctx: bcx.gctx,
147154
host: bcx.host_triple().to_string(),
148155
rustc_process,

src/cargo/core/compiler/fingerprint/rustdoc.rs

Lines changed: 140 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::collections::HashMap;
12
use std::path::Path;
23
use std::path::PathBuf;
34

@@ -15,6 +16,10 @@ use crate::core::compiler::CompileKind;
1516
struct RustdocFingerprintJson {
1617
/// `rustc -vV` verbose version output.
1718
pub rustc_vv: String,
19+
20+
/// Relative paths to cross crate info JSON files from previous `cargo doc` invocations.
21+
#[serde(default, skip_serializing_if = "Vec::is_empty")]
22+
pub doc_parts: Vec<PathBuf>,
1823
}
1924

2025
/// Structure used to deal with Rustdoc fingerprinting
@@ -29,7 +34,16 @@ struct RustdocFingerprintJson {
2934
/// they were compiled with the same Rustc version that we're currently using.
3035
/// Otherwise we must remove the `doc/` folder and compile again forcing a rebuild.
3136
#[derive(Debug)]
32-
pub struct RustdocFingerprint {}
37+
pub struct RustdocFingerprint {
38+
/// Path to the fingerprint file.
39+
path: PathBuf,
40+
/// `rustc -vV` verbose version output for the current session.
41+
rustc_vv: String,
42+
/// Absolute paths to new cross crate info JSON files generated in the current session.
43+
doc_parts: Vec<PathBuf>,
44+
/// The fingerprint file on disk.
45+
on_disk: Option<RustdocFingerprintJson>,
46+
}
3347

3448
impl RustdocFingerprint {
3549
/// Checks whether the latest version of rustc used to compile this workspace's docs
@@ -58,6 +72,7 @@ impl RustdocFingerprint {
5872
}
5973
let new_fingerprint = RustdocFingerprintJson {
6074
rustc_vv: build_runner.bcx.rustc().verbose_version.clone(),
75+
doc_parts: Vec::new(),
6176
};
6277

6378
for kind in &build_runner.bcx.build_config.requested_kinds {
@@ -66,6 +81,111 @@ impl RustdocFingerprint {
6681

6782
Ok(())
6883
}
84+
85+
/// Creates a new fingerprint with given doc parts paths.
86+
pub fn new(
87+
build_runner: &BuildRunner<'_, '_>,
88+
kind: CompileKind,
89+
doc_parts: Vec<PathBuf>,
90+
) -> Self {
91+
let path = fingerprint_path(build_runner, kind);
92+
let rustc_vv = build_runner.bcx.rustc().verbose_version.clone();
93+
let on_disk = load_on_disk(&path);
94+
Self {
95+
path,
96+
rustc_vv,
97+
doc_parts,
98+
on_disk,
99+
}
100+
}
101+
102+
/// Persists the fingerprint.
103+
///
104+
/// The closure will run before persisting the fingerprint,
105+
/// and will be given a list of doc parts directories for passing to
106+
/// `rustdoc --include-parts-dir`.
107+
pub fn persist<F>(&self, exec: F) -> CargoResult<()>
108+
where
109+
// 1. paths for `--include-parts-dir`
110+
F: Fn(&[&Path]) -> CargoResult<()>,
111+
{
112+
// Dedupe crate with the same name by file stem (which is effectively crate name),
113+
// since rustdoc doesn't distinguish different crate versions.
114+
//
115+
// Rules applied here:
116+
//
117+
// * If name collides, favor the one selected via CLI over cached ones
118+
// (done by the insertion order)
119+
let base = self.path.parent().unwrap();
120+
let on_disk_doc_parts: Vec<_> = self
121+
.on_disk
122+
.iter()
123+
.flat_map(|on_disk| {
124+
on_disk
125+
.doc_parts
126+
.iter()
127+
// Make absolute so that we can pass to rustdoc
128+
.map(|p| base.join(p))
129+
// Doc parts may be selectively cleaned by `cargo clean -p <doc>`.
130+
// We should stop caching those no-exist.
131+
.filter(|p| p.exists())
132+
})
133+
.collect();
134+
let dedup_map = on_disk_doc_parts
135+
.iter()
136+
.chain(self.doc_parts.iter())
137+
.map(|p| (p.file_stem(), p))
138+
.collect::<HashMap<_, _>>();
139+
let mut doc_parts: Vec<_> = dedup_map.into_values().collect();
140+
doc_parts.sort_unstable();
141+
142+
// Prepare args for `rustdoc --include-parts-dir`
143+
let doc_parts_dirs: Vec<_> = doc_parts.iter().map(|p| p.parent().unwrap()).collect();
144+
exec(&doc_parts_dirs)?;
145+
146+
// Persist with relative paths to the directory where fingerprint file is at.
147+
let json = RustdocFingerprintJson {
148+
rustc_vv: self.rustc_vv.clone(),
149+
doc_parts: doc_parts
150+
.iter()
151+
.map(|p| p.strip_prefix(base).unwrap_or(p).to_owned())
152+
.collect(),
153+
};
154+
paths::write(&self.path, serde_json::to_string(&json)?)?;
155+
156+
Ok(())
157+
}
158+
159+
/// Checks if the fingerprint is outdated comparing against given doc parts file paths.
160+
pub fn is_dirty(&self) -> bool {
161+
let Some(on_disk) = self.on_disk.as_ref() else {
162+
return true;
163+
};
164+
165+
let Some(fingerprint_mtime) = paths::mtime(&self.path).ok() else {
166+
return true;
167+
};
168+
169+
if self.rustc_vv != on_disk.rustc_vv {
170+
return true;
171+
}
172+
173+
for path in &self.doc_parts {
174+
let parts_mtime = match paths::mtime(&path) {
175+
Ok(mtime) => mtime,
176+
Err(e) => {
177+
tracing::debug!("failed to read mtime of {}: {e}", path.display());
178+
return true;
179+
}
180+
};
181+
182+
if parts_mtime > fingerprint_mtime {
183+
return true;
184+
}
185+
}
186+
187+
false
188+
}
69189
}
70190

71191
/// Returns the path to rustdoc fingerprint file for a given [`CompileKind`].
@@ -134,6 +254,25 @@ fn check_fingerprint(
134254
Ok(())
135255
}
136256

257+
/// Loads an on-disk fingerprint JSON file.
258+
fn load_on_disk(path: &Path) -> Option<RustdocFingerprintJson> {
259+
let on_disk = match paths::read(path) {
260+
Ok(data) => data,
261+
Err(e) => {
262+
tracing::debug!("failed to read rustdoc fingerprint at {path:?}: {e}");
263+
return None;
264+
}
265+
};
266+
267+
match serde_json::from_str::<RustdocFingerprintJson>(&on_disk) {
268+
Ok(on_disk) => Some(on_disk),
269+
Err(e) => {
270+
tracing::debug!("could not deserialize {path:?}: {e}");
271+
None
272+
}
273+
}
274+
}
275+
137276
fn clean_doc(path: &Path) -> CargoResult<()> {
138277
let entries = path
139278
.read_dir()

src/cargo/core/compiler/layout.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,11 +309,17 @@ impl BuildDirLayout {
309309
/// Fetch the deps path.
310310
pub fn deps(&self, pkg_dir: &str) -> PathBuf {
311311
if self.is_new_layout {
312-
self.build_unit(pkg_dir).join("deps")
312+
self.deps_new_layout(pkg_dir)
313313
} else {
314314
self.legacy_deps().to_path_buf()
315315
}
316316
}
317+
/// Fetch the deps path. (new layout)
318+
///
319+
/// New features should consider using this so we can avoid their migrations.
320+
pub fn deps_new_layout(&self, pkg_dir: &str) -> PathBuf {
321+
self.build_unit(pkg_dir).join("deps")
322+
}
317323
/// Fetch the deps path. (old layout)
318324
pub fn legacy_deps(&self) -> &Path {
319325
&self.deps

src/cargo/core/compiler/mod.rs

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -833,8 +833,13 @@ fn prepare_rustdoc(build_runner: &BuildRunner<'_, '_>, unit: &Unit) -> CargoResu
833833
if build_runner.bcx.gctx.cli_unstable().rustdoc_depinfo {
834834
// toolchain-shared-resources is required for keeping the shared styling resources
835835
// invocation-specific is required for keeping the original rustdoc emission
836-
let mut arg =
837-
OsString::from("--emit=toolchain-shared-resources,invocation-specific,dep-info=");
836+
let mut arg = if build_runner.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
837+
// toolchain resources are written at the end, at the same time as merging
838+
OsString::from("--emit=invocation-specific,dep-info=")
839+
} else {
840+
// if not using mergeable CCI, everything is written every time
841+
OsString::from("--emit=toolchain-shared-resources,invocation-specific,dep-info=")
842+
};
838843
arg.push(rustdoc_dep_info_loc(build_runner, unit));
839844
rustdoc.arg(arg);
840845

@@ -843,6 +848,19 @@ fn prepare_rustdoc(build_runner: &BuildRunner<'_, '_>, unit: &Unit) -> CargoResu
843848
}
844849

845850
rustdoc.arg("-Zunstable-options");
851+
} else if build_runner.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
852+
// toolchain resources are written at the end, at the same time as merging
853+
rustdoc.arg("--emit=invocation-specific");
854+
rustdoc.arg("-Zunstable-options");
855+
}
856+
857+
if build_runner.bcx.gctx.cli_unstable().rustdoc_mergeable_info {
858+
// write out mergeable data to be imported
859+
rustdoc.arg("--merge=none");
860+
let mut arg = OsString::from("--parts-out-dir=");
861+
// `-Zrustdoc-mergeable-info` always uses the new layout.
862+
arg.push(build_runner.files().deps_dir_new_layout(unit));
863+
rustdoc.arg(arg);
846864
}
847865

848866
if let Some(trim_paths) = unit.profile.trim_paths.as_ref() {

0 commit comments

Comments
 (0)