From 5ea8e2629f4cd5b6f53a75a4083ca6c28460ab16 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Fri, 24 Oct 2025 12:23:12 -0400 Subject: [PATCH] add native cpio extractor --- src/extractors.rs | 1 + src/extractors/cpio.rs | 157 +++++++++++++++++++++++++++++++++++++++++ src/magic.rs | 2 +- src/structures/cpio.rs | 132 +++++++++++++++++++++++++--------- 4 files changed, 259 insertions(+), 33 deletions(-) create mode 100644 src/extractors/cpio.rs diff --git a/src/extractors.rs b/src/extractors.rs index 04a121260..3f80b746b 100644 --- a/src/extractors.rs +++ b/src/extractors.rs @@ -146,6 +146,7 @@ pub mod bmp; pub mod bzip2; pub mod cab; pub mod common; +pub mod cpio; pub mod csman; pub mod dahua_zip; pub mod dmg; diff --git a/src/extractors/cpio.rs b/src/extractors/cpio.rs new file mode 100644 index 000000000..3689991d3 --- /dev/null +++ b/src/extractors/cpio.rs @@ -0,0 +1,157 @@ +use crate::common::is_offset_safe; +use crate::extractors::common::{Chroot, ExtractionResult, Extractor, ExtractorType}; +use crate::structures::cpio::{parse_cpio_entry_header, is_executable, CPIOFileType}; +use log::warn; + +const EOF_MARKER: &str = "TRAILER!!!"; + +pub fn cpio_extractor() -> Extractor { + Extractor { + utility: ExtractorType::Internal(extract_cpio), + ..Default::default() + } +} + +pub fn extract_cpio( + file_data: &[u8], + offset: usize, + output_directory: Option<&str>, +) -> ExtractionResult { + let mut result = ExtractionResult { + ..Default::default() + }; + + let available_data = file_data.len(); + let mut next_offset = offset; + let mut previous_offset = None; + let mut total_size: usize = 0; + let mut entries: Vec = vec![]; + + while is_offset_safe(available_data, next_offset, previous_offset) { + match file_data.get(next_offset..) { + None => break, + Some(entry_data) => { + match parse_cpio_entry_header(entry_data) { + Err(_) => break, + Ok(header) => { + let entry_total_size = header.header_size + header.data_size; + total_size += entry_total_size; + + if header.file_name == EOF_MARKER { + result.success = true; + result.size = Some(total_size); + break; + } + + let data_offset = next_offset + header.header_size; + let data_size = header.data_size; + + entries.push(CPIOEntry { + name: header.file_name.clone(), + file_type: header.file_type, + mode: header.mode, + data_offset, + data_size, + dev_major: header.dev_major, + dev_minor: header.dev_minor, + }); + + previous_offset = Some(next_offset); + next_offset += entry_total_size; + } + } + } + } + } + + if result.success && output_directory.is_some() { + let chroot = Chroot::new(output_directory); + let mut extracted_count: usize = 0; + + for entry in &entries { + if extract_cpio_entry(file_data, entry, &chroot) { + extracted_count += 1; + } + } + + if extracted_count == 0 { + result.success = false; + } + } + + result +} + +#[derive(Debug, Clone)] +struct CPIOEntry { + name: String, + file_type: CPIOFileType, + mode: usize, + data_offset: usize, + data_size: usize, + dev_major: usize, + dev_minor: usize, +} + +fn extract_cpio_entry(file_data: &[u8], entry: &CPIOEntry, chroot: &Chroot) -> bool { + let file_path = &entry.name; + + let extraction_success = match entry.file_type { + CPIOFileType::Directory => chroot.create_directory(file_path), + CPIOFileType::Regular => { + let actual_size = entry.data_size - calculate_padding(entry.data_size); + chroot.carve_file(file_path, file_data, entry.data_offset, actual_size) + } + CPIOFileType::Symlink => { + let actual_size = entry.data_size - calculate_padding(entry.data_size); + if let Some(target_bytes) = + file_data.get(entry.data_offset..entry.data_offset + actual_size) + { + let target_bytes_clean: Vec = target_bytes + .iter() + .copied() + .take_while(|&b| b != 0) + .collect(); + if let Ok(target) = String::from_utf8(target_bytes_clean) { + chroot.create_symlink(file_path, target) + } else { + warn!("Failed to parse symlink target for {}", file_path); + false + } + } else { + false + } + } + CPIOFileType::Fifo => chroot.create_fifo(file_path), + CPIOFileType::Socket => chroot.create_socket(file_path), + CPIOFileType::BlockDevice => { + chroot.create_block_device(file_path, entry.dev_major, entry.dev_minor) + } + CPIOFileType::CharDevice => { + chroot.create_character_device(file_path, entry.dev_major, entry.dev_minor) + } + CPIOFileType::Unknown => { + warn!("Unknown file type for {}", file_path); + false + } + }; + + if extraction_success { + if entry.file_type == CPIOFileType::Regular && is_executable(entry.mode) { + chroot.make_executable(file_path); + } + } else { + warn!("Failed to extract CPIO entry: {}", file_path); + } + + extraction_success +} + +fn calculate_padding(size: usize) -> usize { + let modulus = size % 4; + if modulus == 0 { + 0 + } else { + 4 - modulus + } +} diff --git a/src/magic.rs b/src/magic.rs index 6f129ccef..543396446 100644 --- a/src/magic.rs +++ b/src/magic.rs @@ -168,7 +168,7 @@ pub fn patterns() -> Vec { magic: signatures::cpio::cpio_magic(), parser: signatures::cpio::cpio_parser, description: signatures::cpio::DESCRIPTION.to_string(), - extractor: Some(extractors::sevenzip::sevenzip_extractor()), + extractor: Some(extractors::cpio::cpio_extractor()), }, // iso9660 primary volume signatures::common::Signature { diff --git a/src/structures/cpio.rs b/src/structures/cpio.rs index 070431472..03d23f649 100644 --- a/src/structures/cpio.rs +++ b/src/structures/cpio.rs @@ -10,14 +10,41 @@ pub struct CPIOEntryHeader { pub data_size: usize, pub file_name: String, pub header_size: usize, + pub mode: usize, + pub file_type: CPIOFileType, + pub dev_major: usize, + pub dev_minor: usize, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum CPIOFileType { + Regular, + Directory, + Symlink, + BlockDevice, + CharDevice, + Fifo, + Socket, + Unknown, +} + +impl Default for CPIOFileType { + fn default() -> Self { + CPIOFileType::Unknown + } } /// Parses a CPIO entry header pub fn parse_cpio_entry_header(cpio_data: &[u8]) -> Result { - // Some expected constants const NULL_BYTE_SIZE: usize = 1; const CPIO_MAGIC_START: usize = 0; const CPIO_MAGIC_END: usize = 6; + const MODE_START: usize = 14; + const MODE_END: usize = 22; + const DEV_MAJOR_START: usize = 22; + const DEV_MAJOR_END: usize = 30; + const DEV_MINOR_START: usize = 30; + const DEV_MINOR_END: usize = 38; const FILE_SIZE_START: usize = 54; const FILE_SIZE_END: usize = 62; const FILE_NAME_SIZE_START: usize = 94; @@ -25,42 +52,55 @@ pub fn parse_cpio_entry_header(cpio_data: &[u8]) -> Result CPIO_HEADER_SIZE { - // Grab the CPIO header magic bytes let header_magic = cpio_data[CPIO_MAGIC_START..CPIO_MAGIC_END].to_vec(); - // Get the ASCII hex string representing the file's data size - if let Ok(file_data_size_str) = - String::from_utf8(cpio_data[FILE_SIZE_START..FILE_SIZE_END].to_vec()) - { - // Convert the file data size from ASCII hex to an integer - if let Ok(file_data_size) = usize::from_str_radix(&file_data_size_str, 16) { - // Get the ASCII hex string representing the file name's size - if let Ok(file_name_size_str) = - String::from_utf8(cpio_data[FILE_NAME_SIZE_START..FILE_NAME_SIZE_END].to_vec()) + if let Ok(mode_str) = String::from_utf8(cpio_data[MODE_START..MODE_END].to_vec()) { + if let Ok(mode) = usize::from_str_radix(&mode_str, 16) { + if let Ok(dev_major_str) = + String::from_utf8(cpio_data[DEV_MAJOR_START..DEV_MAJOR_END].to_vec()) { - // Convert the file name size from ASCII hex to an integer - if let Ok(file_name_size) = usize::from_str_radix(&file_name_size_str, 16) { - // The file name immediately follows the fixed-length header data. - let file_name_start: usize = CPIO_HEADER_SIZE; - let file_name_end: usize = - file_name_start + file_name_size - NULL_BYTE_SIZE; - - // Get the file name - if let Some(file_name_raw_bytes) = - cpio_data.get(file_name_start..file_name_end) + if let Ok(dev_major) = usize::from_str_radix(&dev_major_str, 16) { + if let Ok(dev_minor_str) = + String::from_utf8(cpio_data[DEV_MINOR_START..DEV_MINOR_END].to_vec()) { - if let Ok(file_name) = String::from_utf8(file_name_raw_bytes.to_vec()) { - let header_total_size = CPIO_HEADER_SIZE + file_name_size; + if let Ok(dev_minor) = usize::from_str_radix(&dev_minor_str, 16) { + if let Ok(file_data_size_str) = + String::from_utf8(cpio_data[FILE_SIZE_START..FILE_SIZE_END].to_vec()) + { + if let Ok(file_data_size) = usize::from_str_radix(&file_data_size_str, 16) { + if let Ok(file_name_size_str) = + String::from_utf8(cpio_data[FILE_NAME_SIZE_START..FILE_NAME_SIZE_END].to_vec()) + { + if let Ok(file_name_size) = usize::from_str_radix(&file_name_size_str, 16) { + let file_name_start: usize = CPIO_HEADER_SIZE; + let file_name_end: usize = + file_name_start + file_name_size - NULL_BYTE_SIZE; + + if let Some(file_name_raw_bytes) = + cpio_data.get(file_name_start..file_name_end) + { + if let Ok(file_name) = String::from_utf8(file_name_raw_bytes.to_vec()) { + let header_total_size = CPIO_HEADER_SIZE + file_name_size; + let file_type = parse_file_type(mode); - return Ok(CPIOEntryHeader { - magic: header_magic.clone(), - file_name: file_name.clone(), - data_size: file_data_size + byte_padding(file_data_size), - header_size: header_total_size - + byte_padding(header_total_size), - }); + return Ok(CPIOEntryHeader { + magic: header_magic.clone(), + file_name: file_name.clone(), + data_size: file_data_size + byte_padding(file_data_size), + header_size: header_total_size + + byte_padding(header_total_size), + mode, + file_type, + dev_major, + dev_minor, + }); + } + } + } + } + } + } } } } @@ -72,8 +112,36 @@ pub fn parse_cpio_entry_header(cpio_data: &[u8]) -> Result usize { let modulus: usize = n % 4; if modulus == 0 { 0 } else { 4 - modulus } } + +fn parse_file_type(mode: usize) -> CPIOFileType { + const S_IFMT: usize = 0o170000; + const S_IFREG: usize = 0o100000; + const S_IFDIR: usize = 0o040000; + const S_IFLNK: usize = 0o120000; + const S_IFBLK: usize = 0o060000; + const S_IFCHR: usize = 0o020000; + const S_IFIFO: usize = 0o010000; + const S_IFSOCK: usize = 0o140000; + + match mode & S_IFMT { + S_IFREG => CPIOFileType::Regular, + S_IFDIR => CPIOFileType::Directory, + S_IFLNK => CPIOFileType::Symlink, + S_IFBLK => CPIOFileType::BlockDevice, + S_IFCHR => CPIOFileType::CharDevice, + S_IFIFO => CPIOFileType::Fifo, + S_IFSOCK => CPIOFileType::Socket, + _ => CPIOFileType::Unknown, + } +} + +pub fn is_executable(mode: usize) -> bool { + const S_IXUSR: usize = 0o100; + const S_IXGRP: usize = 0o010; + const S_IXOTH: usize = 0o001; + (mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0 +}