diff --git a/crates/dropshot-api-manager-types/src/validation.rs b/crates/dropshot-api-manager-types/src/validation.rs index 2ef7fe4..dd9a91c 100644 --- a/crates/dropshot-api-manager-types/src/validation.rs +++ b/crates/dropshot-api-manager-types/src/validation.rs @@ -201,6 +201,57 @@ impl ApiSpecFileName { _ => self.clone(), } } + + /// Converts a `Versioned` to its `VersionedGitRef` equivalent. + /// + /// For `VersionedGitRef` files, returns a clone of self. + /// For `Lockstep` files, returns a clone of self (lockstep files are not + /// converted to git refs). + pub fn to_git_ref_filename(&self) -> ApiSpecFileName { + match &self.kind { + ApiSpecFileNameKind::Versioned { version, hash } => { + ApiSpecFileName::new( + self.ident.clone(), + ApiSpecFileNameKind::VersionedGitRef { + version: version.clone(), + hash: hash.clone(), + }, + ) + } + _ => self.clone(), + } + } + + /// Returns the basename for this file as a git ref. + /// + /// - If this is already a `VersionedGitRef`, returns `basename()` directly. + /// - If this is a `Versioned`, returns `basename() + ".gitref"`. + /// - For `Lockstep`, returns `basename()` (lockstep files are not converted + /// to git refs). + pub fn git_ref_basename(&self) -> String { + match &self.kind { + ApiSpecFileNameKind::VersionedGitRef { .. } => self.basename(), + ApiSpecFileNameKind::Versioned { .. } => { + format!("{}.gitref", self.basename()) + } + ApiSpecFileNameKind::Lockstep => self.basename(), + } + } + + /// Returns the basename for this file as a JSON file. + /// + /// - If this is a `VersionedGitRef`, returns the basename without the + /// `.gitref` suffix. + /// - Otherwise, returns `basename()` directly. + pub fn json_basename(&self) -> String { + match &self.kind { + ApiSpecFileNameKind::Versioned { .. } => self.basename(), + ApiSpecFileNameKind::VersionedGitRef { version, hash } => { + format!("{}-{}-{}.json", self.ident, version, hash) + } + ApiSpecFileNameKind::Lockstep => self.basename(), + } + } } /// Describes how a particular OpenAPI document is named. diff --git a/crates/dropshot-api-manager/src/git.rs b/crates/dropshot-api-manager/src/git.rs index 29ccca6..c623601 100644 --- a/crates/dropshot-api-manager/src/git.rs +++ b/crates/dropshot-api-manager/src/git.rs @@ -367,11 +367,55 @@ impl GitRef { ) -> anyhow::Result> { git_show_file(repo_root, &GitRevision::from(self.commit), &self.path) } + + /// Returns the canonical file contents for this git ref. + /// + /// The canonical format is `commit:path\n` where: + /// - The path uses forward slashes (even on Windows). + /// - The file ends with a single newline. + pub fn to_file_contents(&self) -> String { + format!("{}\n", self) + } + + /// Returns whether the given file contents representing a _valid_ Git ref + /// file need to be rewritten. + /// + /// File contents need rewriting if they don't match the canonical format: + /// + /// - Missing trailing newline. + /// - Contains backslashes in the path. + /// - Has extra whitespace. + /// + /// This static method is used to detect files that are valid but need + /// normalization. + pub fn needs_rewrite(contents: &str) -> bool { + // Check for missing trailing newline. + if !contents.ends_with('\n') { + return true; + } + + // Check for extra trailing newlines or other whitespace issues. + let trimmed = contents.trim(); + if trimmed.len() + 1 != contents.len() { + return true; + } + + // Check for backslashes in the path (after the colon). + if let Some((_commit, path)) = trimmed.split_once(':') { + if path.contains('\\') { + return true; + } + } + + false + } } impl fmt::Display for GitRef { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}:{}", self.commit, self.path) + // Always use forward slashes for paths, even on Windows. + let path = self.path.as_str().replace('\\', "/"); + write!(f, "{}:{}", self.commit, path) } } @@ -380,10 +424,19 @@ impl FromStr for GitRef { fn from_str(s: &str) -> Result { let s = s.trim(); + if s.is_empty() { + return Err(GitRefParseError::EmptyInput); + } let (commit, path) = s .split_once(':') .ok_or_else(|| GitRefParseError::InvalidFormat(s.to_owned()))?; let commit: GitCommitHash = commit.parse()?; + // Normalize backslashes to forward slashes for cross-platform + // compatibility. + let path = path.replace('\\', "/"); + if path.is_empty() { + return Err(GitRefParseError::EmptyPath); + } Ok(GitRef { commit, path: Utf8PathBuf::from(path) }) } } @@ -392,13 +445,24 @@ impl FromStr for GitRef { #[derive(Debug, Error)] #[non_exhaustive] pub enum GitRefParseError { + /// The input was empty or contained only whitespace. + #[error("git ref file is empty")] + EmptyInput, + /// The git ref string did not contain the expected 'commit:path' format. - #[error("invalid git ref format: expected 'commit:path', got {0}")] + #[error( + "invalid git ref format: expected 'commit:path', got {0:?} \ + (missing ':' separator)" + )] InvalidFormat(String), /// The commit hash in the git ref was invalid. - #[error("invalid commit hash")] + #[error("invalid commit hash in git ref")] InvalidCommitHash(#[from] CommitHashParseError), + + /// The path component was empty. + #[error("git ref has empty path (nothing after ':')")] + EmptyPath, } #[cfg(test)] @@ -515,4 +579,98 @@ mod tests { let parsed = s.parse::().unwrap(); assert_eq!(git_ref, parsed); } + + #[test] + fn test_git_ref_to_file_contents() { + let git_ref = GitRef { + commit: VALID_SHA1.parse().unwrap(), + path: Utf8PathBuf::from("path/to/file.json"), + }; + let contents = git_ref.to_file_contents(); + let expected = format!("{}:path/to/file.json\n", VALID_SHA1); + assert_eq!(contents, expected, "should have trailing newline"); + } + + #[test] + fn test_git_ref_display_forward_slashes() { + // Even if the path internally has backslashes, display should use + // forward slashes. + let git_ref = GitRef { + commit: VALID_SHA1.parse().unwrap(), + path: Utf8PathBuf::from("path\\to\\file.json"), + }; + let s = git_ref.to_string(); + assert!( + !s.contains('\\'), + "display should convert backslashes to forward slashes" + ); + assert!(s.contains("path/to/file.json")); + } + + #[test] + fn test_git_ref_parse_normalizes_backslashes() { + // Parsing should normalize backslashes to forward slashes. + let input = format!("{}:path\\to\\file.json", VALID_SHA1); + let git_ref = input.parse::().unwrap(); + assert_eq!( + git_ref.path.as_str(), + "path/to/file.json", + "backslashes should be normalized to forward slashes" + ); + } + + #[test] + fn test_git_ref_parse_error_variants() { + // Empty input. + let result = "".parse::(); + assert!(matches!(result, Err(GitRefParseError::EmptyInput))); + + // Whitespace-only input. + let result = " \n ".parse::(); + assert!(matches!(result, Err(GitRefParseError::EmptyInput))); + + // Empty path (valid commit hash but nothing after colon). + let input = format!("{}:", VALID_SHA1); + let result = input.parse::(); + assert!(matches!(result, Err(GitRefParseError::EmptyPath))); + } + + #[test] + fn test_git_ref_needs_rewrite() { + // Canonical format: forward slashes, single trailing newline. + let canonical = format!("{}:path/to/file.json\n", VALID_SHA1); + assert!( + !GitRef::needs_rewrite(&canonical), + "canonical format should not need rewrite" + ); + + // Missing trailing newline. + let missing_newline = format!("{}:path/to/file.json", VALID_SHA1); + assert!( + GitRef::needs_rewrite(&missing_newline), + "missing trailing newline should need rewrite" + ); + + // Extra trailing newlines. + let extra_newlines = format!("{}:path/to/file.json\n\n", VALID_SHA1); + assert!( + GitRef::needs_rewrite(&extra_newlines), + "extra trailing newlines should need rewrite" + ); + + // Leading whitespace. + let leading_whitespace = + format!(" {}:path/to/file.json\n", VALID_SHA1); + assert!( + GitRef::needs_rewrite(&leading_whitespace), + "leading whitespace should need rewrite" + ); + + // Backslashes in path. + let backslashes = format!("{}:path\\to\\file.json\n", VALID_SHA1); + assert!( + GitRef::needs_rewrite(&backslashes), + "backslashes in path should need rewrite" + ); + } } diff --git a/crates/dropshot-api-manager/src/resolved.rs b/crates/dropshot-api-manager/src/resolved.rs index ce80050..c04f5a0 100644 --- a/crates/dropshot-api-manager/src/resolved.rs +++ b/crates/dropshot-api-manager/src/resolved.rs @@ -563,27 +563,25 @@ impl Fix<'_> { Fix::UpdateSymlink { .. } => {} Fix::ConvertToGitRef { local_file, .. } => { // Writes to the .gitref path, not the JSON path. - let json_path = local_file.spec_file_name().path(); - paths - .insert(Utf8PathBuf::from(format!("{}.gitref", json_path))); + paths.insert( + local_file.spec_file_name().to_git_ref_filename().path(), + ); } Fix::ConvertToJson { local_file, .. } => { - // Writes to the JSON path (removing .gitref suffix). - let git_ref_path = local_file.spec_file_name().path(); - if let Some(json_path) = - git_ref_path.as_str().strip_suffix(".gitref") - { - paths.insert(Utf8PathBuf::from(json_path)); - } + // Writes to the JSON path. + paths.insert( + local_file.spec_file_name().to_json_filename().path(), + ); } Fix::RegenerateFromBlessed { local_file, git_ref, .. } => { if git_ref.is_some() { - // When regenerating as git ref, writes to a .gitref file. - let json_path = local_file.spec_file_name().path(); - paths.insert(Utf8PathBuf::from(format!( - "{}.gitref", - json_path - ))); + // Writes to a .gitref file. + paths.insert( + local_file + .spec_file_name() + .to_git_ref_filename() + .path(), + ); } else { // Overwrites the corrupted local file. paths.insert(local_file.spec_file_name().path().to_owned()); @@ -667,21 +665,18 @@ impl Fix<'_> { Fix::ConvertToGitRef { local_file, git_ref } => { let json_path = root.join(local_file.spec_file_name().path()); - let git_ref_basename = format!( - "{}.gitref", - local_file.spec_file_name().basename() - ); + let git_ref_basename = + local_file.spec_file_name().git_ref_basename(); let git_ref_path = json_path .parent() .ok_or_else(|| anyhow!("cannot get parent directory"))? .join(&git_ref_basename); - // Write the git ref file. Add a trailing newline so diffs don't - // have the "\ No newline at end of file" message. Otherwise, - // the extra newline has no impact on usability or correctness. + // Write the git ref file in canonical format (forward slashes, + // trailing newline). let overwrite_status = overwrite_file( &git_ref_path, - format!("{}\n", git_ref).as_bytes(), + git_ref.to_file_contents().as_bytes(), )?; // Remove the original JSON file. @@ -700,17 +695,7 @@ impl Fix<'_> { // valid. let contents = blessed.contents(); - // Compute the JSON file path by removing the .gitref suffix. - let git_ref_basename = local_file.spec_file_name().basename(); - let json_basename = git_ref_basename - .strip_suffix(".gitref") - .ok_or_else(|| { - anyhow!( - "expected git ref file to end with .gitref: {}", - git_ref_basename - ) - })?; - + let json_basename = local_file.spec_file_name().json_basename(); let json_path = git_ref_path .parent() .ok_or_else(|| anyhow!("cannot get parent directory"))? @@ -737,19 +722,17 @@ impl Fix<'_> { if let Some(git_ref) = git_ref { // Write as a git ref file. - let git_ref_basename = format!( - "{}.gitref", - local_file.spec_file_name().basename() - ); + let git_ref_basename = + local_file.spec_file_name().git_ref_basename(); let git_ref_path = local_path .parent() .ok_or_else(|| anyhow!("cannot get parent directory"))? .join(&git_ref_basename); - // Add a trailing newline for clean diffs. + // Write in canonical format (forward slashes, trailing newline). let overwrite_status = overwrite_file( &git_ref_path, - format!("{}\n", git_ref).as_bytes(), + git_ref.to_file_contents().as_bytes(), )?; Ok(vec![ diff --git a/crates/dropshot-api-manager/src/spec_files_generic.rs b/crates/dropshot-api-manager/src/spec_files_generic.rs index 075eff5..1fdccfa 100644 --- a/crates/dropshot-api-manager/src/spec_files_generic.rs +++ b/crates/dropshot-api-manager/src/spec_files_generic.rs @@ -759,6 +759,63 @@ impl<'a, T: ApiLoad + AsRawFiles> ApiSpecFilesBuilder<'a, T> { } } + /// Record a file as unparseable without attempting to parse it. + /// + /// This is used for files that are known to be invalid before attempting + /// JSON parsing (e.g., git ref files with invalid format). The `reason` + /// error is recorded as a warning. + /// + /// For contexts where unparseable files are allowed (local files), this + /// tracks the file so it can be cleaned up during generate. For other + /// contexts, the error is recorded. + pub fn load_unparseable( + &mut self, + file_name: ApiSpecFileName, + contents: Vec, + reason: anyhow::Error, + ) { + match T::make_unparseable(file_name.clone(), contents) { + Some(unparseable) => { + // For local files, track the unparseable file so it can be + // cleaned up during generate. Record a warning so the user + // knows about it. + self.load_warning(reason.context("skipping unparseable file")); + + // Can the file be associated with a version? + if let Some(version) = file_name.version() { + let ident = file_name.ident().clone(); + let entry = self + .spec_files + .entry(ident) + .or_insert_with(ApiFiles::new) + .spec_files + .entry(version.clone()); + + match entry { + Entry::Vacant(vacant_entry) => { + vacant_entry + .insert(T::unparseable_into_self(unparseable)); + } + Entry::Occupied(mut occupied_entry) => { + occupied_entry + .get_mut() + .extend_unparseable(unparseable); + } + } + } else { + // No version info, fall back to old behavior. + self.record_unparseable_file( + file_name.ident().clone(), + UnparseableFile { path: file_name.path().to_owned() }, + ); + } + } + None => { + self.load_error(reason); + } + } + } + /// Record an unparseable file for later cleanup. fn record_unparseable_file( &mut self, diff --git a/crates/dropshot-api-manager/src/spec_files_local.rs b/crates/dropshot-api-manager/src/spec_files_local.rs index b29daad..de881b8 100644 --- a/crates/dropshot-api-manager/src/spec_files_local.rs +++ b/crates/dropshot-api-manager/src/spec_files_local.rs @@ -129,7 +129,7 @@ impl AsRawFiles for Vec { } } -/// Container for OpenAPI documents found in the local working tree +/// Container for OpenAPI documents found in the local working tree. /// /// **Be sure to check for load errors and warnings before using this /// structure.** @@ -350,17 +350,38 @@ fn load_versioned_directory( } }; + // Parse the git ref. If parsing fails or the file needs rewriting, + // mark it as unparseable so it will be deleted and regenerated. let git_ref = match git_ref_contents.parse::() { Ok(git_ref) => git_ref, Err(error) => { - api_files.load_error(anyhow!(error).context(format!( - "failed to parse git ref file {:?}", - entry.path() - ))); + api_files.load_unparseable( + spec_file_name, + git_ref_contents.into_bytes(), + anyhow!(error).context(format!( + "git ref file {:?} could not be parsed", + entry.path() + )), + ); continue; } }; + // Check if the file needs to be rewritten to canonical format. + // If so, treat it as unparseable so it gets deleted and regenerated. + if GitRef::needs_rewrite(&git_ref_contents) { + api_files.load_unparseable( + spec_file_name, + git_ref_contents.into_bytes(), + anyhow!( + "git ref file {:?} needs to be rewritten to canonical \ + format (forward slashes, trailing newline)", + entry.path() + ), + ); + continue; + } + let contents = match git_ref.read_contents(repo_root) { Ok(contents) => contents, Err(error) => { diff --git a/crates/integration-tests/tests/integration/git_ref.rs b/crates/integration-tests/tests/integration/git_ref.rs index 18ebc34..b70d394 100644 --- a/crates/integration-tests/tests/integration/git_ref.rs +++ b/crates/integration-tests/tests/integration/git_ref.rs @@ -1694,3 +1694,220 @@ fn rename_conflict_blessed_verify( Ok(()) } + +/// Test that unparseable git ref files (e.g., with conflict markers) are +/// detected and regenerated. +#[test] +fn test_unparseable_git_ref_regenerated() -> Result<()> { + let env = TestEnvironment::new()?; + + let v1_v2_apis = versioned_health_reduced_git_ref_apis()?; + env.generate_documents(&v1_v2_apis)?; + env.commit_documents()?; + let v1_v2_commit = env.get_current_commit_hash_full()?; + + env.make_unrelated_commit("intermediate")?; + + let v1_v2_v3_apis = versioned_health_git_ref_apis()?; + env.generate_documents(&v1_v2_v3_apis)?; + env.commit_documents()?; + + assert!( + env.versioned_git_ref_exists("versioned-health", "1.0.0")?, + "v1 should be a git ref" + ); + + let v1_git_ref_path = env + .find_versioned_git_ref_path("versioned-health", "1.0.0")? + .expect("v1 git ref should exist"); + let corrupted_content = "<<<<<<< HEAD\n\ +abc123:documents/versioned-health/old.json\n\ +=======\n\ +def456:documents/versioned-health/new.json\n\ +>>>>>>> branch\n"; + env.create_file(&v1_git_ref_path, corrupted_content)?; + + let result = check_apis_up_to_date(env.environment(), &v1_v2_v3_apis)?; + assert_eq!( + result, + CheckResult::NeedsUpdate, + "check should report needs update for unparseable git ref" + ); + + env.generate_documents(&v1_v2_v3_apis)?; + + assert!( + env.versioned_git_ref_exists("versioned-health", "1.0.0")?, + "v1 git ref should be regenerated" + ); + + let v1_git_ref = env.read_versioned_git_ref("versioned-health", "1.0.0")?; + let v1_commit = v1_git_ref.trim().split(':').next().unwrap(); + assert_eq!( + v1_commit, v1_v2_commit, + "regenerated v1 git ref should point to the original commit" + ); + + let v1_content = env.read_git_ref_content("versioned-health", "1.0.0")?; + assert!( + v1_content.contains("\"openapi\""), + "regenerated git ref content should be valid OpenAPI" + ); + + let result = check_apis_up_to_date(env.environment(), &v1_v2_v3_apis)?; + assert_eq!(result, CheckResult::Success); + + Ok(()) +} + +/// Test that git ref files with non-canonical format (backslashes, missing +/// trailing newline) are regenerated. +#[test] +fn test_non_canonical_git_ref_regenerated() -> Result<()> { + let env = TestEnvironment::new()?; + + let v1_v2_apis = versioned_health_reduced_git_ref_apis()?; + env.generate_documents(&v1_v2_apis)?; + env.commit_documents()?; + let v1_v2_commit = env.get_current_commit_hash_full()?; + + env.make_unrelated_commit("intermediate")?; + + let v1_v2_v3_apis = versioned_health_git_ref_apis()?; + env.generate_documents(&v1_v2_v3_apis)?; + env.commit_documents()?; + + assert!( + env.versioned_git_ref_exists("versioned-health", "1.0.0")?, + "v1 should be a git ref" + ); + + let v1_git_ref_path = env + .find_versioned_git_ref_path("versioned-health", "1.0.0")? + .expect("v1 git ref should exist"); + let original_content = env.read_file(&v1_git_ref_path)?; + let original_content = original_content.trim(); + + let (commit, path) = original_content + .split_once(':') + .expect("git ref should have commit:path format"); + let non_canonical_path = path.replace('/', "\\"); + let non_canonical_content = format!("{}:{}", commit, non_canonical_path); + env.create_file(&v1_git_ref_path, &non_canonical_content)?; + + let result = check_apis_up_to_date(env.environment(), &v1_v2_v3_apis)?; + assert_eq!( + result, + CheckResult::NeedsUpdate, + "check should report needs update for non-canonical git ref" + ); + + env.generate_documents(&v1_v2_v3_apis)?; + + let v1_git_ref = env.read_versioned_git_ref("versioned-health", "1.0.0")?; + assert!( + v1_git_ref.ends_with('\n'), + "regenerated git ref should have trailing newline" + ); + assert!( + !v1_git_ref.contains('\\'), + "regenerated git ref should use forward slashes" + ); + + let v1_commit = v1_git_ref.trim().split(':').next().unwrap(); + assert_eq!( + v1_commit, v1_v2_commit, + "regenerated v1 git ref should point to the original commit" + ); + + let result = check_apis_up_to_date(env.environment(), &v1_v2_v3_apis)?; + assert_eq!(result, CheckResult::Success); + + Ok(()) +} + +/// Test that an empty git ref file is detected and regenerated. +#[test] +fn test_empty_git_ref_regenerated() -> Result<()> { + let env = TestEnvironment::new()?; + + let v1_v2_apis = versioned_health_reduced_git_ref_apis()?; + env.generate_documents(&v1_v2_apis)?; + env.commit_documents()?; + + env.make_unrelated_commit("intermediate")?; + + let v1_v2_v3_apis = versioned_health_git_ref_apis()?; + env.generate_documents(&v1_v2_v3_apis)?; + env.commit_documents()?; + + let v1_git_ref_path = env + .find_versioned_git_ref_path("versioned-health", "1.0.0")? + .expect("v1 git ref should exist"); + env.create_file(&v1_git_ref_path, "")?; + + let result = check_apis_up_to_date(env.environment(), &v1_v2_v3_apis)?; + assert_eq!( + result, + CheckResult::NeedsUpdate, + "check should report needs update for empty git ref" + ); + + env.generate_documents(&v1_v2_v3_apis)?; + + let v1_git_ref = env.read_versioned_git_ref("versioned-health", "1.0.0")?; + assert!( + v1_git_ref.contains(':'), + "regenerated git ref should have commit:path format" + ); + + let result = check_apis_up_to_date(env.environment(), &v1_v2_v3_apis)?; + assert_eq!(result, CheckResult::Success); + + Ok(()) +} + +/// Test that a git ref file with an invalid commit hash is regenerated. +#[test] +fn test_invalid_commit_hash_git_ref_regenerated() -> Result<()> { + let env = TestEnvironment::new()?; + + let v1_v2_apis = versioned_health_reduced_git_ref_apis()?; + env.generate_documents(&v1_v2_apis)?; + env.commit_documents()?; + + env.make_unrelated_commit("intermediate")?; + + let v1_v2_v3_apis = versioned_health_git_ref_apis()?; + env.generate_documents(&v1_v2_v3_apis)?; + env.commit_documents()?; + + let v1_git_ref_path = env + .find_versioned_git_ref_path("versioned-health", "1.0.0")? + .expect("v1 git ref should exist"); + let invalid_content = + "not-a-valid-hash:documents/versioned-health/file.json\n"; + env.create_file(&v1_git_ref_path, invalid_content)?; + + let result = check_apis_up_to_date(env.environment(), &v1_v2_v3_apis)?; + assert_eq!( + result, + CheckResult::NeedsUpdate, + "check should report needs update for invalid commit hash" + ); + + env.generate_documents(&v1_v2_v3_apis)?; + + let v1_git_ref = env.read_versioned_git_ref("versioned-health", "1.0.0")?; + let v1_commit = v1_git_ref.trim().split(':').next().unwrap(); + assert_eq!( + v1_commit.len(), + 40, + "regenerated git ref should have valid SHA-1 commit hash" + ); + + let result = check_apis_up_to_date(env.environment(), &v1_v2_v3_apis)?; + assert_eq!(result, CheckResult::Success); + + Ok(()) +}