diff --git a/sdk/src/assertions/bmff_hash.rs b/sdk/src/assertions/bmff_hash.rs index 71a15ea7f..2c1007c69 100644 --- a/sdk/src/assertions/bmff_hash.rs +++ b/sdk/src/assertions/bmff_hash.rs @@ -261,9 +261,6 @@ pub struct BmffHash { } impl BmffHash { - /// Label prefix for a BMFF hash assertion. - /// - /// See . pub const LABEL: &'static str = labels::BMFF_HASH; pub fn new(name: &str, alg: &str, url: Option) -> Self { @@ -379,7 +376,7 @@ impl BmffHash { ) -> crate::error::Result<()> { let mut reader = Cursor::new(data); - self.verify_stream(&mut reader, alg) + self.verify_stream_hash(&mut reader, alg) } // The BMFFMerklMaps are stored contiguous in the file. Break this Vec into groups based on @@ -429,7 +426,7 @@ impl BmffHash { pub fn verify_hash(&self, asset_path: &Path, alg: Option<&str>) -> crate::error::Result<()> { let mut data = fs::File::open(asset_path)?; - self.verify_stream(&mut data, alg) + self.verify_stream_hash(&mut data, alg) } /* Verifies BMFF hashes from a single file asset. The following variants are handled @@ -439,7 +436,7 @@ impl BmffHash { Untimed media (Merkle hashes over iloc locations) A single BMFF asset containing all fragments (Merkle hashes over moof ranges). */ - pub fn verify_stream( + pub fn verify_stream_hash( &self, reader: &mut dyn CAIRead, alg: Option<&str>, @@ -922,7 +919,7 @@ pub mod tests { // get the bmff hashes let claim = store.provenance_claim().unwrap(); - for dh_assertion in claim.data_hash_assertions() { + for dh_assertion in claim.hash_assertions() { if dh_assertion.label_root() == BmffHash::LABEL { let bmff_hash = BmffHash::from_assertion(dh_assertion).unwrap(); diff --git a/sdk/src/assertions/box_hash.rs b/sdk/src/assertions/box_hash.rs new file mode 100644 index 000000000..9cbb8ab0d --- /dev/null +++ b/sdk/src/assertions/box_hash.rs @@ -0,0 +1,430 @@ +// Copyright 2023 Adobe. All rights reserved. +// This file is licensed to you under the Apache License, +// Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +// or the MIT license (http://opensource.org/licenses/MIT), +// at your option. + +// Unless required by applicable law or agreed to in writing, +// this software is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR REPRESENTATIONS OF ANY KIND, either express or +// implied. See the LICENSE-MIT and LICENSE-APACHE files for the +// specific language governing permissions and limitations under +// each license. + +use std::{fs::File, io::Cursor, path::*}; + +use serde::{Deserialize, Serialize}; +use serde_bytes::ByteBuf; + +use crate::{ + assertion::{Assertion, AssertionBase, AssertionCbor}, + assertions::labels, + asset_io::{AssetBoxHash, CAIRead}, + error::{Error, Result}, + utils::hash_utils::{hash_stream_by_alg, verify_stream_by_alg, HashRange}, + AssertionJson, +}; + +const ASSERTION_CREATION_VERSION: usize = 1; + +pub const C2PA_BOXHASH: &str = "C2PA"; + +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct BoxMap { + pub names: Vec, + + #[serde(skip_serializing_if = "Option::is_none")] + pub alg: Option, + + pub hash: ByteBuf, + pub pad: ByteBuf, + + #[serde(skip)] + pub range_start: usize, + + #[serde(skip)] + pub range_len: usize, +} + +/// Helper class to create BoxHash assertion +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] +pub struct BoxHash { + boxes: Vec, +} + +impl BoxHash { + pub const LABEL: &'static str = labels::BOX_HASH; + + pub fn verify_hash( + &self, + asset_path: &Path, + alg: Option<&str>, + bhp: &dyn AssetBoxHash, + ) -> Result<()> { + let mut file = File::open(asset_path)?; + + self.verify_stream_hash(&mut file, alg, bhp) + } + + pub fn verify_in_memory_hash( + &self, + data: &[u8], + alg: Option<&str>, + bhp: &dyn AssetBoxHash, + ) -> Result<()> { + let mut reader = Cursor::new(data); + + self.verify_stream_hash(&mut reader, alg, bhp) + } + + pub fn verify_stream_hash( + &self, + reader: &mut dyn CAIRead, + alg: Option<&str>, + bhp: &dyn AssetBoxHash, + ) -> Result<()> { + // it is a failure if no hashes are listed + if self.boxes.is_empty() { + return Err(Error::HashMismatch("No box hash found".to_string())); + } + + // get source box list + let source_bms = bhp.get_box_map(reader)?; + let mut source_index = 0; + + // check to see we source index starts at PNGh and skip if not included in the hash list + if let Some(first_expected_bms) = source_bms.get(source_index) { + if first_expected_bms.names[0] == "PNGh" && self.boxes[0].names[0] != "PNGh" { + source_index += 1; + } + } else { + return Err(Error::HashMismatch("No data boxes found".to_string())); + } + + for bm in &self.boxes { + let mut inclusions = Vec::new(); + + // build up current inclusion, consuming all names in this BoxMap + let mut skip_c2pa = false; + let mut inclusion = HashRange::new(0, 0); + for name in &bm.names { + match source_bms.get(source_index) { + Some(next_source_bm) => { + if name == &next_source_bm.names[0] { + if inclusion.length() == 0 { + // this is a new item + inclusion.set_start(next_source_bm.range_start); + inclusion.set_length(next_source_bm.range_len); + + if name == C2PA_BOXHASH { + // there should only be 1 collapsed C2PA range + if bm.names.len() != 1 { + return Err(Error::HashMismatch( + "Malformed C2PA box hash".to_owned(), + )); + } + skip_c2pa = true; + } + } else { + // update item + inclusion.set_length(inclusion.length() + next_source_bm.range_len); + } + } else { + return Err(Error::HashMismatch( + "Box hash name out of order".to_owned(), + )); + } + } + None => return Err(Error::HashMismatch("Box hash name not found".to_owned())), + } + source_index += 1; + } + + // C2PA chunks are skipped for hashing purposes + if skip_c2pa { + continue; + } + + inclusions.push(inclusion); + + let curr_alg = match &bm.alg { + Some(a) => a.clone(), + None => match alg { + Some(a) => a.to_owned(), + None => return Err(Error::HashMismatch("No algorithm specified".to_string())), + }, + }; + + if !verify_stream_by_alg(&curr_alg, &bm.hash, reader, Some(inclusions), false) { + return Err(Error::HashMismatch("Hashes do not match".to_owned())); + } + } + + Ok(()) + } + + #[allow(dead_code)] + pub fn generate_box_hash_from_stream( + &mut self, + reader: &mut dyn CAIRead, + alg: &str, + bhp: &dyn AssetBoxHash, + minimal_form: bool, + ) -> Result<()> { + // get source box list + let source_bms = bhp.get_box_map(reader)?; + + if minimal_form { + let mut before_c2pa = BoxMap { + names: Vec::new(), + alg: Some(alg.to_string()), + hash: ByteBuf::from(vec![]), + pad: ByteBuf::from(vec![]), + range_start: 0, + range_len: 0, + }; + + let mut c2pa_box = BoxMap { + names: Vec::new(), + alg: Some(alg.to_string()), + hash: ByteBuf::from(vec![]), + pad: ByteBuf::from(vec![]), + range_start: 0, + range_len: 0, + }; + + let mut after_c2pa = BoxMap { + names: Vec::new(), + alg: Some(alg.to_string()), + hash: ByteBuf::from(vec![]), + pad: ByteBuf::from(vec![]), + range_start: 0, + range_len: 0, + }; + + let mut is_before_c2pa = true; + + // collapse map list to minimal set + for bm in source_bms.into_iter() { + if bm.names[0] == "C2PA" { + // there should only be 1 collapsed C2PA range + if bm.names.len() != 1 { + return Err(Error::HashMismatch("Malformed C2PA box hash".to_owned())); + } + + c2pa_box = bm; + is_before_c2pa = false; + continue; + } + + if is_before_c2pa { + before_c2pa.names.extend(bm.names); + if before_c2pa.range_len == 0 { + before_c2pa.range_start = bm.range_start; + before_c2pa.range_len = bm.range_len; + } else { + before_c2pa.range_len += bm.range_len; + } + } else { + after_c2pa.names.extend(bm.names); + if after_c2pa.range_len == 0 { + after_c2pa.range_start = bm.range_start; + after_c2pa.range_len = bm.range_len; + } else { + after_c2pa.range_len += bm.range_len; + } + } + } + + self.boxes = vec![before_c2pa, c2pa_box, after_c2pa]; + + // compute the hashes + for bm in self.boxes.iter_mut() { + // skip c2pa box + if bm.names[0] == C2PA_BOXHASH { + continue; + } + + let mut inclusions = Vec::new(); + + let inclusion = HashRange::new(bm.range_start, bm.range_len); + inclusions.push(inclusion); + + bm.hash = ByteBuf::from(hash_stream_by_alg(alg, reader, Some(inclusions), false)?); + } + } else { + for mut bm in source_bms { + if bm.names[0] == "C2PA" { + // there should only be 1 collapsed C2PA range + if bm.names.len() != 1 { + return Err(Error::HashMismatch("Malformed C2PA box hash".to_owned())); + } + bm.hash = ByteBuf::from(vec![0]); + bm.pad = ByteBuf::from(vec![]); + self.boxes.push(bm); + continue; + } + + // this is a new item + let mut inclusions = Vec::new(); + + let inclusion = HashRange::new(bm.range_start, bm.range_len); + inclusions.push(inclusion); + + bm.alg = Some(alg.to_string()); + bm.hash = ByteBuf::from(hash_stream_by_alg(alg, reader, Some(inclusions), false)?); + bm.pad = ByteBuf::from(vec![]); + + self.boxes.push(bm); + } + } + + Ok(()) + } +} + +impl AssertionCbor for BoxHash {} + +impl AssertionJson for BoxHash {} + +impl AssertionBase for BoxHash { + const LABEL: &'static str = Self::LABEL; + const VERSION: Option = Some(ASSERTION_CREATION_VERSION); + + // todo: this mechanism needs to change since a struct could support different versions + + fn to_assertion(&self) -> crate::error::Result { + Self::to_cbor_assertion(self) + } + + fn from_assertion(assertion: &Assertion) -> crate::error::Result { + Self::from_cbor_assertion(assertion) + } +} + +#[cfg(test)] +mod tests { + #![allow(clippy::unwrap_used)] + + use super::*; + use crate::{jumbf_io::get_assetio_handler_from_path, utils::test::fixture_path}; + + #[test] + fn test_hash_verify_jpg() { + let ap = fixture_path("CA.jpg"); + + let bhp = get_assetio_handler_from_path(&ap) + .unwrap() + .asset_box_hash_ref() + .unwrap(); + + let mut input = File::open(&ap).unwrap(); + + let mut bh = BoxHash { boxes: Vec::new() }; + + // generate box hashes + bh.generate_box_hash_from_stream(&mut input, "sha256", bhp, false) + .unwrap(); + + // see if they match reading + bh.verify_stream_hash(&mut input, Some("sha256"), bhp) + .unwrap(); + } + + #[test] + fn test_hash_verify_jpg_reduced() { + let ap = fixture_path("CA.jpg"); + + let bhp = get_assetio_handler_from_path(&ap) + .unwrap() + .asset_box_hash_ref() + .unwrap(); + + let mut input = File::open(&ap).unwrap(); + + let mut bh = BoxHash { boxes: Vec::new() }; + + // generate box hashes + bh.generate_box_hash_from_stream(&mut input, "sha256", bhp, true) + .unwrap(); + + // see if they match reading + bh.verify_stream_hash(&mut input, Some("sha256"), bhp) + .unwrap(); + } + + #[test] + fn test_hash_verify_png() { + let ap = fixture_path("libpng-test.png"); + + let bhp = get_assetio_handler_from_path(&ap) + .unwrap() + .asset_box_hash_ref() + .unwrap(); + + let mut input = File::open(&ap).unwrap(); + + let mut bh = BoxHash { boxes: Vec::new() }; + + // generate box hashes + bh.generate_box_hash_from_stream(&mut input, "sha256", bhp, false) + .unwrap(); + + // see if they match reading + bh.verify_stream_hash(&mut input, Some("sha256"), bhp) + .unwrap(); + } + + #[test] + fn test_hash_verify_no_pngh() { + let ap = fixture_path("libpng-test.png"); + + let bhp = get_assetio_handler_from_path(&ap) + .unwrap() + .asset_box_hash_ref() + .unwrap(); + + let mut input = File::open(&ap).unwrap(); + + let mut bh = BoxHash { boxes: Vec::new() }; + + // generate box hashes + bh.generate_box_hash_from_stream(&mut input, "sha256", bhp, false) + .unwrap(); + + bh.boxes.remove(0); // remove PNGh + + // see if they match reading + bh.verify_stream_hash(&mut input, Some("sha256"), bhp) + .unwrap(); + } + + #[test] + fn test_json_round_trop() { + let ap = fixture_path("CA.jpg"); + + let bhp = get_assetio_handler_from_path(&ap) + .unwrap() + .asset_box_hash_ref() + .unwrap(); + + let mut input = File::open(&ap).unwrap(); + + let mut bh = BoxHash { boxes: Vec::new() }; + + // generate box hashes + bh.generate_box_hash_from_stream(&mut input, "sha256", bhp, true) + .unwrap(); + + // save and reload JSOH + let bh_json_assertion = bh.to_json_assertion().unwrap(); + println!("Box hash json: {:?}", bh_json_assertion.decode_data()); + + let reloaded_bh = BoxHash::from_json_assertion(&bh_json_assertion).unwrap(); + + // see if they match reading + reloaded_bh + .verify_stream_hash(&mut input, Some("sha256"), bhp) + .unwrap(); + } +} diff --git a/sdk/src/assertions/data_hash.rs b/sdk/src/assertions/data_hash.rs index bb261a2ba..5473ced6f 100644 --- a/sdk/src/assertions/data_hash.rs +++ b/sdk/src/assertions/data_hash.rs @@ -61,13 +61,10 @@ pub struct DataHash { } impl DataHash { - /// Label prefix for a data hash assertion. - /// - /// See . pub const LABEL: &'static str = labels::DATA_HASH; /// Create new DataHash instance - pub fn new(name: &str, alg: &str, url: Option) -> Self { + pub fn new(name: &str, alg: &str) -> Self { DataHash { exclusions: None, name: Some(name.to_string()), @@ -75,7 +72,7 @@ impl DataHash { hash: Vec::new(), pad: Vec::new(), pad2: None, - url, + url: None, //deprecated path: PathBuf::new(), } } @@ -297,7 +294,7 @@ pub mod tests { #[test] fn test_build_assertion() { // try json based assertion - let mut data_hash = DataHash::new("Some data", "sha256", None); + let mut data_hash = DataHash::new("Some data", "sha256"); data_hash.add_exclusion(HashRange::new(0, 1234)); data_hash.hash = vec![1, 2, 3]; @@ -338,7 +335,7 @@ pub mod tests { #[test] fn test_binary_round_trip() { - let mut data_hash = DataHash::new("Some data", "sha256", None); + let mut data_hash = DataHash::new("Some data", "sha256"); data_hash.add_exclusion(HashRange::new(0x2000, 0x1000)); data_hash.add_exclusion(HashRange::new(0x4000, 0x1000)); diff --git a/sdk/src/assertions/labels.rs b/sdk/src/assertions/labels.rs index 41b9d8900..c4c5990cb 100644 --- a/sdk/src/assertions/labels.rs +++ b/sdk/src/assertions/labels.rs @@ -34,6 +34,11 @@ pub const ASSERTION_METADATA: &str = "c2pa.assertion.metadata"; /// See . pub const DATA_HASH: &str = "c2pa.hash.data"; +/// Label prefix for a box hash assertion. +/// +/// See . +pub const BOX_HASH: &str = "c2pa.hash.boxes"; + /// Label prefix for a BMFF-based hash assertion. /// /// See . diff --git a/sdk/src/assertions/mod.rs b/sdk/src/assertions/mod.rs index 025ba1056..ee2a373d9 100644 --- a/sdk/src/assertions/mod.rs +++ b/sdk/src/assertions/mod.rs @@ -19,6 +19,9 @@ pub use actions::{c2pa_action, Action, Actions, SoftwareAgent}; mod bmff_hash; pub use bmff_hash::{BmffHash, BmffMerkleMap, DataMap, ExclusionsMap, SubsetMap}; +mod box_hash; +pub(crate) use box_hash::{BoxHash, BoxMap, C2PA_BOXHASH}; + #[allow(dead_code)] // will become public later mod data_hash; pub(crate) use data_hash::DataHash; diff --git a/sdk/src/asset_handlers/jpeg_io.rs b/sdk/src/asset_handlers/jpeg_io.rs index 9c0944f7e..eb71c7a0f 100644 --- a/sdk/src/asset_handlers/jpeg_io.rs +++ b/sdk/src/asset_handlers/jpeg_io.rs @@ -12,22 +12,28 @@ // each license. use std::{ + collections::HashMap, convert::{From, TryFrom}, fs::File, - io::Cursor, + io::{BufReader, Cursor}, path::*, }; use byteorder::{BigEndian, ReadBytesExt}; use img_parts::{ - jpeg::{markers, Jpeg, JpegSegment}, + jpeg::{ + markers::{self, P, RST0, RST7, Z}, + Jpeg, JpegSegment, + }, Bytes, DynImage, }; +use serde_bytes::ByteBuf; use tempfile::Builder; use crate::{ + assertions::{BoxMap, C2PA_BOXHASH}, asset_io::{ - AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashBlockObjectType, + AssetBoxHash, AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashBlockObjectType, HashObjectPositions, RemoteRefEmbed, RemoteRefEmbedType, }, error::{Error, Result}, @@ -197,7 +203,7 @@ impl CAIReader for JpegIO { if cai_seg_cnt > 0 && is_cai_continuation { // make sure this is a cai segment for additional segments, if z <= cai_seg_cnt { - // this a non contiguous segment with same "en"" so a bad set of data + // this a non contiguous segment with same "en" so a bad set of data // reset and continue to search cai_en = Vec::new(); continue; @@ -531,6 +537,10 @@ impl AssetIO for JpegIO { Some(self) } + fn asset_box_hash_ref(&self) -> Option<&dyn AssetBoxHash> { + Some(self) + } + fn supported_types(&self) -> &[&str] { &SUPPORTED_TYPES } @@ -571,6 +581,257 @@ impl RemoteRefEmbed for JpegIO { } } +fn in_entropy(marker: u8) -> bool { + matches!(marker, RST0..=RST7 | Z) +} + +// img-parts does not correctly return the true size of the SOS segment. This utility +// finds the correct break point for single image JPEGs. We will need a new JPEG decoder +// to handle those. Also this function can be removed if img-parts ever addresses this issue +fn get_entropy_size(input_stream: &mut dyn CAIRead, projected_len: usize) -> Result { + // Search the entropy data looking for non entropy segment marker. The first valid seg marker before we hit + // end of the file. + + let mut buf_reader = BufReader::new(input_stream); + + let mut size = 0; + + loop { + match buf_reader.read_u8() { + Ok(curr_byte) => { + if curr_byte == P { + let next_byte = buf_reader.read_u8()?; + + if !in_entropy(next_byte) { + break; + } else { + size += 1; + } + } + size += 1; + } + Err(e) => return Err(Error::IoError(e)), + } + if size > projected_len { + break; + } + } + + Ok(size) +} + +impl AssetBoxHash for JpegIO { + fn get_box_map(&self, input_stream: &mut dyn CAIRead) -> Result> { + let segment_names = HashMap::from([ + (0xe0u8, "APP0"), + (0xe1u8, "APP1"), + (0xe2u8, "APP2"), + (0xe3u8, "APP3"), + (0xe4u8, "APP4"), + (0xe5u8, "APP5"), + (0xe6u8, "APP6"), + (0xe7u8, "APP7"), + (0xe8u8, "APP8"), + (0xe9u8, "APP9"), + (0xeau8, "APP10"), + (0xebu8, "APP11"), + (0xecu8, "APP12"), + (0xedu8, "APP13"), + (0xeeu8, "APP14"), + (0xefu8, "APP15"), + (0xfeu8, "COM"), + (0xc4u8, "DHT"), + (0xdbu8, "DQT"), + (0xddu8, "DRI"), + (0xd9u8, "EOI"), + (0xd0u8, "RST0"), + (0xd1u8, "RST1"), + (0xd2u8, "RST2"), + (0xd3u8, "RST3"), + (0xd4u8, "RST4"), + (0xd5u8, "RST5"), + (0xd6u8, "RST6"), + (0xd7u8, "RST7"), + (0xc0u8, "SOF0"), + (0xc2u8, "SOF2"), + (0xd8u8, "SOI"), + (0xdau8, "SOS"), + (0xf0u8, "JPG0"), + (0xf1u8, "JPG1"), + (0xf2u8, "JPG2"), + (0xf3u8, "JPG3"), + (0xf4u8, "JPG4"), + (0xf5u8, "JPG5"), + (0xf6u8, "JPG6"), + (0xf7u8, "JPG7"), + (0xf8u8, "JPG8"), + (0xf9u8, "JPG9"), + (0xfau8, "JPG10"), + (0xfbu8, "JPG11"), + (0xfcu8, "JPG12"), + (0xfdu8, "JPG13"), + ]); + + let mut cai_en: Vec = Vec::new(); + let mut cai_seg_cnt: u32 = 0; + + let mut box_maps = Vec::new(); + let mut curr_offset = 2; // start after JPEG marker + + // load the bytes + let mut buf: Vec = Vec::new(); + input_stream.rewind()?; + input_stream.read_to_end(&mut buf).map_err(Error::IoError)?; + + // add first Map object contain SOI + let soi_bm = BoxMap { + names: vec!["SOI".to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: 0, + range_len: 2, + }; + box_maps.push(soi_bm); + + let mut c2pa_bm_index = 0; + + let dimg = DynImage::from_bytes(buf.into()) + .map_err(|e| Error::OtherError(Box::new(e)))? + .ok_or(Error::UnsupportedType)?; + + match dimg { + DynImage::Jpeg(jpeg) => { + for seg in jpeg.segments() { + match seg.marker() { + markers::APP11 => { + // JUMBF marker + let raw_bytes = seg.contents(); + + if raw_bytes.len() > 16 { + // we need at least 16 bytes in each segment for CAI + let mut raw_vec = raw_bytes.to_vec(); + let _ci = raw_vec.as_mut_slice()[0..2].to_vec(); + let en = raw_vec.as_mut_slice()[2..4].to_vec(); + + let is_cai_continuation = vec_compare(&cai_en, &en); + + if cai_seg_cnt > 0 && is_cai_continuation { + cai_seg_cnt += 1; + + let c2pa_bm = box_maps.get_mut(c2pa_bm_index).ok_or( + Error::InvalidAsset("Invalid C2PA segment".to_owned()), + )?; + + // update c2pa box map + c2pa_bm.range_len += seg.len_with_entropy(); + } else { + // check if this is a CAI JUMBF block + let jumb_type = raw_vec.as_mut_slice()[24..28].to_vec(); + let is_cai = vec_compare(&C2PA_MARKER, &jumb_type); + if is_cai { + cai_seg_cnt = 1; + cai_en = en.clone(); // store the identifier + + let c2pa_bm = BoxMap { + names: vec![C2PA_BOXHASH.to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: curr_offset, + range_len: seg.len_with_entropy(), + }; + + box_maps.push(c2pa_bm); + + c2pa_bm_index = box_maps.len() - 1; + } else { + let name = segment_names.get(&seg.marker()).ok_or( + Error::InvalidAsset( + "Unknown segment marker".to_owned(), + ), + )?; + + let bm = BoxMap { + names: vec![name.to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: curr_offset, + range_len: seg.len_with_entropy(), + }; + + box_maps.push(bm); + } + } + } + } + markers::SOS => { + // workaround for img-parts returning wrong segment len when entropy is present + + // move pointer to beginning of segment + input_stream + .seek(std::io::SeekFrom::Start((curr_offset + seg.len()) as u64))?; + + let size = + get_entropy_size(input_stream, seg.len_with_entropy() - seg.len())? + + seg.len(); + + let name = segment_names + .get(&seg.marker()) + .ok_or(Error::InvalidAsset("Unknown segment marker".to_owned()))?; + + let bm = BoxMap { + names: vec![name.to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: curr_offset, + range_len: size, + }; + + box_maps.push(bm); + curr_offset += size; + continue; + } + _ => { + let name = segment_names + .get(&seg.marker()) + .ok_or(Error::InvalidAsset("Unknown segment marker".to_owned()))?; + + let bm = BoxMap { + names: vec![name.to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: curr_offset, + range_len: seg.len_with_entropy(), + }; + + box_maps.push(bm); + } + } + curr_offset += seg.len_with_entropy(); + } + } + _ => return Err(Error::InvalidAsset("Unknown image format".to_owned())), + } + + // add last segment + let eoi_bm = BoxMap { + names: vec!["EOI".to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: curr_offset, + range_len: 2, + }; + box_maps.push(eoi_bm); + + Ok(box_maps) + } +} + #[cfg(test)] pub mod tests { #![allow(clippy::unwrap_used)] diff --git a/sdk/src/asset_handlers/png_io.rs b/sdk/src/asset_handlers/png_io.rs index 7a08e48bc..fc77c82a3 100644 --- a/sdk/src/asset_handlers/png_io.rs +++ b/sdk/src/asset_handlers/png_io.rs @@ -19,10 +19,12 @@ use std::{ use byteorder::{BigEndian, ReadBytesExt}; use conv::ValueFrom; +use serde_bytes::ByteBuf; use crate::{ + assertions::{BoxMap, C2PA_BOXHASH}, asset_io::{ - AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashBlockObjectType, + AssetBoxHash, AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashBlockObjectType, HashObjectPositions, RemoteRefEmbed, RemoteRefEmbedType, }, error::{Error, Result}, @@ -559,6 +561,10 @@ impl AssetIO for PngIO { Some(self) } + fn asset_box_hash_ref(&self) -> Option<&dyn AssetBoxHash> { + Some(self) + } + fn supported_types(&self) -> &[&str] { &SUPPORTED_TYPES } @@ -599,6 +605,55 @@ impl RemoteRefEmbed for PngIO { } } +impl AssetBoxHash for PngIO { + fn get_box_map(&self, input_stream: &mut dyn CAIRead) -> Result> { + let ps = get_png_chunk_positions(input_stream)?; + + let mut box_maps = Vec::new(); + + // add PNGh header + let pngh_bm = BoxMap { + names: vec!["PNGh".to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: 0, + range_len: 8, + }; + box_maps.push(pngh_bm); + + // add the other boxes + for pc in ps.into_iter() { + // add special C2PA box + if pc.name == CAI_CHUNK { + let c2pa_bm = BoxMap { + names: vec![C2PA_BOXHASH.to_string()], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: pc.start as usize, + range_len: (pc.length + 12) as usize, // length(4) + name(4) + crc(4) + }; + box_maps.push(c2pa_bm); + continue; + } + + // all other chunks + let c2pa_bm = BoxMap { + names: vec![pc.name_str], + alg: None, + hash: ByteBuf::from(Vec::new()), + pad: ByteBuf::from(Vec::new()), + range_start: pc.start as usize, + range_len: (pc.length + 12) as usize, // length(4) + name(4) + crc(4) + }; + box_maps.push(c2pa_bm); + } + + Ok(box_maps) + } +} + #[cfg(test)] pub mod tests { #![allow(clippy::panic)] diff --git a/sdk/src/asset_io.rs b/sdk/src/asset_io.rs index 43b0b1b2b..4bdf0bcc5 100644 --- a/sdk/src/asset_io.rs +++ b/sdk/src/asset_io.rs @@ -19,7 +19,7 @@ use std::{ use tempfile::NamedTempFile; -use crate::error::Result; +use crate::{assertions::BoxMap, error::Result}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum HashBlockObjectType { @@ -187,6 +187,11 @@ pub trait AssetIO: Sync + Send { fn remote_ref_writer_ref(&self) -> Option<&dyn RemoteRefEmbed> { None } + + // Returns [`AssetBoxHah`] trait if this I/O handler supports box hashing. + fn asset_box_hash_ref(&self) -> Option<&dyn AssetBoxHash> { + None + } } // `AssetPatch` optimizes output generation for asset_io handlers that @@ -200,6 +205,16 @@ pub trait AssetPatch { fn patch_cai_store(&self, asset_path: &Path, store_bytes: &[u8]) -> Result<()>; } +// `AssetBoxHash` provides interfaces needed to support C2PA BoxHash functionality. +// This trait is only implemented for supported types +pub trait AssetBoxHash { + // Returns Vec containing all BoxMap level objects in the asset in the order + // they occur in the asset. The hashes do not need to be calculated, only the + // name and the positional information. The list should be flat with each BoxMap + // representing a single entry. + fn get_box_map(&self, input_stream: &mut dyn CAIRead) -> Result>; +} + // Type of remote reference to embed. Some of the listed // emums are for future uses and experiments. #[allow(unused_variables)] diff --git a/sdk/src/claim.rs b/sdk/src/claim.rs index f449c97ae..fa42d41b2 100644 --- a/sdk/src/claim.rs +++ b/sdk/src/claim.rs @@ -26,7 +26,7 @@ use crate::{ assertions::{ self, labels::{self, CLAIM}, - AssetType, BmffHash, DataBox, DataHash, + AssetType, BmffHash, BoxHash, DataBox, DataHash, }, asset_io::CAIRead, cose_validator::{get_signing_info, verify_cose, verify_cose_async}, @@ -42,6 +42,7 @@ use crate::{ DATABOX, DATABOXES, SIGNATURE, }, }, + jumbf_io::{get_assetio_handler, get_assetio_handler_from_path}, salt::{DefaultSalt, SaltGenerator, NO_SALT}, status_tracker::{log_item, OneShotStatusTracker, StatusTracker}, utils::hash_utils::{hash_by_alg, vec_compare, verify_by_alg}, @@ -63,9 +64,9 @@ const GH_UA: &str = "Sec-CH-UA"; // used to handle different data types. pub enum ClaimAssetData<'a> { Path(&'a Path), - Bytes(&'a [u8]), - Stream(&'a mut dyn CAIRead), - StreamFragment(&'a mut dyn CAIRead, &'a mut dyn CAIRead), + Bytes(&'a [u8], &'a str), + Stream(&'a mut dyn CAIRead, &'a str), + StreamFragment(&'a mut dyn CAIRead, &'a mut dyn CAIRead, &'a str), } // helper struct to allow arbitrary order for assertions stored in jumbf. The instance is @@ -1236,7 +1237,7 @@ impl Claim { // verify data hashes for provenance claims if is_provenance { // must have at least one hard binding for normal manifests - if claim.data_hash_assertions().is_empty() && !claim.update_manifest() { + if claim.hash_assertions().is_empty() && !claim.update_manifest() { let log_item = log_item!( &claim.uri(), "claim missing data binding", @@ -1248,7 +1249,7 @@ impl Claim { } // update manifests cannot have data hashes - if !claim.data_hash_assertions().is_empty() && claim.update_manifest() { + if !claim.hash_assertions().is_empty() && claim.update_manifest() { let log_item = log_item!( &claim.uri(), "update manifests cannot contain data hash assertions", @@ -1259,9 +1260,9 @@ impl Claim { validation_log.log(log_item, Some(Error::UpdateManifestInvalid))?; } - for dh_assertion in claim.data_hash_assertions() { - if dh_assertion.label_root() == DataHash::LABEL { - let dh = DataHash::from_assertion(dh_assertion)?; + for hash_binding_assertion in claim.hash_assertions() { + if hash_binding_assertion.label_root() == DataHash::LABEL { + let dh = DataHash::from_assertion(hash_binding_assertion)?; let name = dh.name.as_ref().map_or(UNNAMED.to_string(), default_str); if !dh.is_remote_hash() { // only verify local hashes here @@ -1269,10 +1270,10 @@ impl Claim { ClaimAssetData::Path(asset_path) => { dh.verify_hash(asset_path, Some(claim.alg())) } - ClaimAssetData::Bytes(asset_bytes) => { + ClaimAssetData::Bytes(asset_bytes, _) => { dh.verify_in_memory_hash(asset_bytes, Some(claim.alg())) } - ClaimAssetData::Stream(stream_data) => { + ClaimAssetData::Stream(stream_data, _) => { dh.verify_stream_hash(*stream_data, Some(claim.alg())) } _ => return Err(Error::UnsupportedType), /* this should never happen (coding error) */ @@ -1281,7 +1282,7 @@ impl Claim { match hash_result { Ok(_a) => { let log_item = log_item!( - claim.assertion_uri(&dh_assertion.label()), + claim.assertion_uri(&hash_binding_assertion.label()), "data hash valid", "verify_internal" ) @@ -1292,7 +1293,7 @@ impl Claim { } Err(e) => { let log_item = log_item!( - claim.assertion_uri(&dh_assertion.label()), + claim.assertion_uri(&hash_binding_assertion.label()), format!("asset hash error, name: {name}, error: {e}"), "verify_internal" ) @@ -1306,9 +1307,9 @@ impl Claim { } } } - } else if dh_assertion.label_root() == BmffHash::LABEL { + } else if hash_binding_assertion.label_root() == BmffHash::LABEL { // handle BMFF data hashes - let dh = BmffHash::from_assertion(dh_assertion)?; + let dh = BmffHash::from_assertion(hash_binding_assertion)?; let name = dh.name().map_or("unnamed".to_string(), default_str); @@ -1316,13 +1317,13 @@ impl Claim { ClaimAssetData::Path(asset_path) => { dh.verify_hash(asset_path, Some(claim.alg())) } - ClaimAssetData::Bytes(asset_bytes) => { + ClaimAssetData::Bytes(asset_bytes, _) => { dh.verify_in_memory_hash(asset_bytes, Some(claim.alg())) } - ClaimAssetData::Stream(stream_data) => { - dh.verify_stream(*stream_data, Some(claim.alg())) + ClaimAssetData::Stream(stream_data, _) => { + dh.verify_stream_hash(*stream_data, Some(claim.alg())) } - ClaimAssetData::StreamFragment(initseg_data, fragment_data) => dh + ClaimAssetData::StreamFragment(initseg_data, fragment_data, _) => dh .verify_stream_segment( *initseg_data, *fragment_data, @@ -1333,7 +1334,7 @@ impl Claim { match hash_result { Ok(_a) => { let log_item = log_item!( - claim.assertion_uri(&dh_assertion.label()), + claim.assertion_uri(&hash_binding_assertion.label()), "data hash valid", "verify_internal" ) @@ -1344,7 +1345,7 @@ impl Claim { } Err(e) => { let log_item = log_item!( - claim.assertion_uri(&dh_assertion.label()), + claim.assertion_uri(&hash_binding_assertion.label()), format!("asset hash error, name: {name}, error: {e}"), "verify_internal" ) @@ -1357,9 +1358,78 @@ impl Claim { )?; } } - } else { + } else if hash_binding_assertion.label_root() == BoxHash::LABEL { // box hash case - return Err(Error::UnsupportedType); // implementation to come + // handle BMFF data hashes + let bh = BoxHash::from_assertion(hash_binding_assertion)?; + + let hash_result = match asset_data { + ClaimAssetData::Path(asset_path) => { + let box_hash_processor = get_assetio_handler_from_path(asset_path) + .ok_or(Error::UnsupportedType)? + .asset_box_hash_ref() + .ok_or(Error::HashMismatch("Box hash not supported".to_string()))?; + + bh.verify_hash(asset_path, Some(claim.alg()), box_hash_processor) + } + ClaimAssetData::Bytes(asset_bytes, asset_type) => { + let box_hash_processor = get_assetio_handler(asset_type) + .ok_or(Error::UnsupportedType)? + .asset_box_hash_ref() + .ok_or(Error::HashMismatch(format!( + "Box hash not supported for: {asset_type}" + )))?; + + bh.verify_in_memory_hash( + asset_bytes, + Some(claim.alg()), + box_hash_processor, + ) + } + ClaimAssetData::Stream(stream_data, asset_type) => { + let box_hash_processor = get_assetio_handler(asset_type) + .ok_or(Error::UnsupportedType)? + .asset_box_hash_ref() + .ok_or(Error::HashMismatch(format!( + "Box hash not supported for: {asset_type}" + )))?; + + bh.verify_stream_hash( + *stream_data, + Some(claim.alg()), + box_hash_processor, + ) + } + _ => return Err(Error::UnsupportedType), + }; + + match hash_result { + Ok(_a) => { + let log_item = log_item!( + claim.assertion_uri(&hash_binding_assertion.label()), + "data hash valid", + "verify_internal" + ) + .validation_status(validation_status::ASSERTION_DATAHASH_MATCH); + validation_log.log_silent(log_item); + + continue; + } + Err(e) => { + let log_item = log_item!( + claim.assertion_uri(&hash_binding_assertion.label()), + format!("asset hash error: {e}"), + "verify_internal" + ) + .error(Error::HashMismatch(format!("Asset hash failure: {e}"))) + .validation_status(validation_status::ASSERTION_DATAHASH_MISMATCH); + + validation_log.log( + log_item, + Some(Error::HashMismatch(format!("Asset hash failure: {e}"))), + )?; + } + } } } } @@ -1380,7 +1450,7 @@ impl Claim { } /// Return list of data hash assertions - pub fn data_hash_assertions(&self) -> Vec<&Assertion> { + pub fn hash_assertions(&self) -> Vec<&Assertion> { let dummy_data = AssertionData::Cbor(Vec::new()); let dummy_hash = Assertion::new(DataHash::LABEL, None, dummy_data); let mut data_hashes = self.assertions_by_type(&dummy_hash); @@ -1390,6 +1460,11 @@ impl Claim { let dummy_bmff_hash = Assertion::new(assertions::labels::BMFF_HASH, None, dummy_bmff_data); data_hashes.append(&mut self.assertions_by_type(&dummy_bmff_hash)); + // add in an box hashes + let dummy_box_data = AssertionData::Cbor(Vec::new()); + let dummy_box_hash = Assertion::new(assertions::labels::BOX_HASH, None, dummy_box_data); + data_hashes.append(&mut self.assertions_by_type(&dummy_box_hash)); + data_hashes } @@ -1400,8 +1475,15 @@ impl Claim { self.assertions_by_type(&dummy_bmff_hash) } + pub fn box_hash_assertions(&self) -> Vec<&Assertion> { + // add in an BMFF hashes + let dummy_box_data = AssertionData::Cbor(Vec::new()); + let dummy_box_hash = Assertion::new(assertions::labels::BOX_HASH, None, dummy_box_data); + self.assertions_by_type(&dummy_box_hash) + } + /// Return list of ingredient assertions. This function - /// is only useful on commited or loaded claims since ingredients + /// is only useful on committed or loaded claims since ingredients /// are resolved at commit time. pub fn ingredient_assertions(&self) -> Vec<&Assertion> { let dummy_data = AssertionData::Cbor(Vec::new()); diff --git a/sdk/src/ingredient.rs b/sdk/src/ingredient.rs index 6f7f14c02..e54655f1e 100644 --- a/sdk/src/ingredient.rs +++ b/sdk/src/ingredient.rs @@ -846,7 +846,7 @@ impl Ingredient { // verify the store Store::verify_store_async( &store, - &mut ClaimAssetData::Stream(stream), + &mut ClaimAssetData::Stream(stream, format), &mut validation_log, ) .await @@ -1190,7 +1190,7 @@ impl Ingredient { stream.rewind()?; Store::verify_store_async( &store, - &mut ClaimAssetData::Stream(stream), + &mut ClaimAssetData::Stream(stream, format), &mut validation_log, ) .await diff --git a/sdk/src/jumbf_io.rs b/sdk/src/jumbf_io.rs index 3edd1c0b8..652013450 100644 --- a/sdk/src/jumbf_io.rs +++ b/sdk/src/jumbf_io.rs @@ -136,6 +136,12 @@ pub fn save_jumbf_to_memory(asset_type: &str, data: &[u8], store_bytes: &[u8]) - Ok(output_stream.into_inner()) } +pub fn get_assetio_handler_from_path(asset_path: &Path) -> Option<&dyn AssetIO> { + let ext = get_file_extension(asset_path)?; + + ASSET_HANDLERS.get(&ext).map(|h| h.as_ref()) +} + pub fn get_assetio_handler(ext: &str) -> Option<&dyn AssetIO> { let ext = ext.to_lowercase(); diff --git a/sdk/src/manifest_store.rs b/sdk/src/manifest_store.rs index 1a8643ee0..a12c9b5cb 100644 --- a/sdk/src/manifest_store.rs +++ b/sdk/src/manifest_store.rs @@ -255,7 +255,7 @@ impl ManifestStore { /// let asset_bytes = include_bytes!("../tests/fixtures/cloud.jpg"); /// let manifest_bytes = include_bytes!("../tests/fixtures/cloud_manifest.c2pa"); /// - /// let manifest_store = ManifestStore::from_manifest_and_asset_bytes_async(manifest_bytes, asset_bytes) + /// let manifest_store = ManifestStore::from_manifest_and_asset_bytes_async(manifest_bytes, "image/jpg", asset_bytes) /// .await /// .unwrap(); /// @@ -267,6 +267,7 @@ impl ManifestStore { /// ``` pub async fn from_manifest_and_asset_bytes_async( manifest_bytes: &[u8], + format: &str, asset_bytes: &[u8], ) -> Result { let mut validation_log = DetailedStatusTracker::new(); @@ -274,7 +275,7 @@ impl ManifestStore { Store::verify_store_async( &store, - &mut ClaimAssetData::Bytes(asset_bytes), + &mut ClaimAssetData::Bytes(asset_bytes, format), &mut validation_log, ) .await?; @@ -431,10 +432,13 @@ mod tests { let asset_bytes = include_bytes!("../tests/fixtures/cloud.jpg"); let manifest_bytes = include_bytes!("../tests/fixtures/cloud_manifest.c2pa"); - let manifest_store = - ManifestStore::from_manifest_and_asset_bytes_async(manifest_bytes, asset_bytes) - .await - .unwrap(); + let manifest_store = ManifestStore::from_manifest_and_asset_bytes_async( + manifest_bytes, + "image/jpg", + asset_bytes, + ) + .await + .unwrap(); assert!(!manifest_store.manifests().is_empty()); assert!(manifest_store.validation_status().is_none()); println!("{manifest_store}"); diff --git a/sdk/src/store.rs b/sdk/src/store.rs index c743f572a..3a1df1984 100644 --- a/sdk/src/store.rs +++ b/sdk/src/store.rs @@ -1451,7 +1451,7 @@ impl Store { if found_jumbf { // add exclusion hash for bytes before and after jumbf - let mut dh = DataHash::new("jumbf manifest", alg, None); + let mut dh = DataHash::new("jumbf manifest", alg); if block_end > block_start { dh.add_exclusion(HashRange::new(block_start, block_end - block_start)); } @@ -1619,6 +1619,43 @@ impl Store { Ok(()) } + /// Returns a manifest suitible for direct embedding by a client. The + /// manfiest are only supported for cases when the client has provided + /// a content hash binding. Note, will not work for cases like BMFF where + /// the position of the content is also encoded. + pub fn get_embeddable_manifest(&mut self, signer: &dyn Signer) -> Result> { + let mut jumbf_bytes = self.to_jumbf_internal(signer.reserve_size())?; + + let pc = self.provenance_claim().ok_or(Error::ClaimEncoding)?; + + // make sure there are data hashes present before generating + if pc.hash_assertions().is_empty() { + return Err(Error::BadParam( + "Claim must have hash binding assertion".to_string(), + )); + } + + // don't allow BMFF assertions to be present + if !pc.bmff_hash_assertions().is_empty() { + return Err(Error::BadParam( + "BMFF assertions not supported in embeddable manifests".to_string(), + )); + } + + // sign contents + let sig = self.sign_claim(pc, signer, signer.reserve_size())?; + let sig_placeholder = Store::sign_claim_placeholder(pc, signer.reserve_size()); + + if sig_placeholder.len() != sig.len() { + return Err(Error::CoseSigboxTooSmall); + } + + patch_bytes(&mut jumbf_bytes, &sig_placeholder, &sig) + .map_err(|_| Error::JumbfCreationError)?; + + Ok(jumbf_bytes) + } + /// Embed the claims store as jumbf into a stream. Updates XMP with provenance record. /// When called, the stream should contain an asset matching format. /// on return, the stream will contain the new manifest signed with signer @@ -1922,26 +1959,29 @@ impl Store { } } - // 2) Get hash ranges if needed, do not generate for update manifests - let mut hash_ranges = object_locations_from_stream(format, &mut intermediate_stream)?; - let hashes: Vec = if pc.update_manifest() { - Vec::new() - } else { - Store::generate_data_hashes_for_stream( - &mut intermediate_stream, - pc.alg(), - &mut hash_ranges, - false, - )? - }; + // we will not do automatic hashing if we detect a box hash present + if pc.box_hash_assertions().is_empty() { + // 2) Get hash ranges if needed, do not generate for update manifests + let mut hash_ranges = object_locations_from_stream(format, &mut intermediate_stream)?; + let hashes: Vec = if pc.update_manifest() { + Vec::new() + } else { + Store::generate_data_hashes_for_stream( + &mut intermediate_stream, + pc.alg(), + &mut hash_ranges, + false, + )? + }; - // add the placeholder data hashes to provenance claim so that the required space is reserved - for mut hash in hashes { - // add padding to account for possible cbor expansion of final DataHash - let padding: Vec = vec![0x0; 10]; - hash.add_padding(padding); + // add the placeholder data hashes to provenance claim so that the required space is reserved + for mut hash in hashes { + // add padding to account for possible cbor expansion of final DataHash + let padding: Vec = vec![0x0; 10]; + hash.add_padding(padding); - pc.add_assertion(&hash)?; + pc.add_assertion(&hash)?; + } } // 3) Generate in memory CAI jumbf block @@ -1957,31 +1997,35 @@ impl Store { // replace the source with correct asset hashes so that the claim hash will be correct let pc = self.provenance_claim_mut().ok_or(Error::ClaimEncoding)?; - // get the final hash ranges, but not for update manifests - intermediate_stream.rewind()?; - output_stream.rewind()?; - std::io::copy(output_stream, &mut intermediate_stream)?; // can remove this once we can get a CAIReader from CAIReadWrite safely - let mut new_hash_ranges = object_locations_from_stream(format, &mut intermediate_stream)?; - let updated_hashes = if pc.update_manifest() { - Vec::new() - } else { - Store::generate_data_hashes_for_stream( - &mut intermediate_stream, - pc.alg(), - &mut new_hash_ranges, - true, - )? - }; + // we will not do automatic hashing if we detect a box hash present + if pc.box_hash_assertions().is_empty() { + // get the final hash ranges, but not for update manifests + intermediate_stream.rewind()?; + output_stream.rewind()?; + std::io::copy(output_stream, &mut intermediate_stream)?; // can remove this once we can get a CAIReader from CAIReadWrite safely + let mut new_hash_ranges = + object_locations_from_stream(format, &mut intermediate_stream)?; + let updated_hashes = if pc.update_manifest() { + Vec::new() + } else { + Store::generate_data_hashes_for_stream( + &mut intermediate_stream, + pc.alg(), + &mut new_hash_ranges, + true, + )? + }; - // patch existing claim hash with updated data - for hash in updated_hashes { - pc.update_data_hash(hash)?; - } + // patch existing claim hash with updated data + for hash in updated_hashes { + pc.update_data_hash(hash)?; + } - // regenerate the jumbf because the cbor changed - data = self.to_jumbf_internal(reserve_size)?; - if jumbf_size != data.len() { - return Err(Error::JumbfCreationError); + // regenerate the jumbf because the cbor changed + data = self.to_jumbf_internal(reserve_size)?; + if jumbf_size != data.len() { + return Err(Error::JumbfCreationError); + } } Ok(data) // return JUMBF data @@ -2140,21 +2184,24 @@ impl Store { } } } else { - // 2) Get hash ranges if needed, do not generate for update manifests - let mut hash_ranges = object_locations(&output_path)?; - let hashes: Vec = if pc.update_manifest() { - Vec::new() - } else { - Store::generate_data_hashes(dest_path, pc.alg(), &mut hash_ranges, false)? - }; + // we will not do automatic hashing if we detect a box hash present + if pc.box_hash_assertions().is_empty() { + // 2) Get hash ranges if needed, do not generate for update manifests + let mut hash_ranges = object_locations(&output_path)?; + let hashes: Vec = if pc.update_manifest() { + Vec::new() + } else { + Store::generate_data_hashes(dest_path, pc.alg(), &mut hash_ranges, false)? + }; - // add the placeholder data hashes to provenance claim so that the required space is reserved - for mut hash in hashes { - // add padding to account for possible cbor expansion of final DataHash - let padding: Vec = vec![0x0; 10]; - hash.add_padding(padding); + // add the placeholder data hashes to provenance claim so that the required space is reserved + for mut hash in hashes { + // add padding to account for possible cbor expansion of final DataHash + let padding: Vec = vec![0x0; 10]; + hash.add_padding(padding); - pc.add_assertion(&hash)?; + pc.add_assertion(&hash)?; + } } // 3) Generate in memory CAI jumbf block @@ -2166,19 +2213,21 @@ impl Store { // 4) determine final object locations and patch the asset hashes with correct offset // replace the source with correct asset hashes so that the claim hash will be correct + // If box hash is present we don't do any other let pc = self.provenance_claim_mut().ok_or(Error::ClaimEncoding)?; + if pc.box_hash_assertions().is_empty() { + // get the final hash ranges, but not for update manifests + let mut new_hash_ranges = object_locations(&output_path)?; + let updated_hashes = if pc.update_manifest() { + Vec::new() + } else { + Store::generate_data_hashes(dest_path, pc.alg(), &mut new_hash_ranges, true)? + }; - // get the final hash ranges, but not for update manifests - let mut new_hash_ranges = object_locations(&output_path)?; - let updated_hashes = if pc.update_manifest() { - Vec::new() - } else { - Store::generate_data_hashes(dest_path, pc.alg(), &mut new_hash_ranges, true)? - }; - - // patch existing claim hash with updated data - for hash in updated_hashes { - pc.update_data_hash(hash)?; + // patch existing claim hash with updated data + for hash in updated_hashes { + pc.update_data_hash(hash)?; + } } } @@ -2228,20 +2277,28 @@ impl Store { pub fn verify_from_buffer( &mut self, buf: &[u8], - _asset_type: &str, + asset_type: &str, validation_log: &mut impl StatusTracker, ) -> Result<()> { - Store::verify_store(self, &mut ClaimAssetData::Bytes(buf), validation_log) + Store::verify_store( + self, + &mut ClaimAssetData::Bytes(buf, asset_type), + validation_log, + ) } // verify from a buffer without file i/o pub fn verify_from_stream( &mut self, reader: &mut dyn CAIRead, - _asset_type: &str, + asset_type: &str, validation_log: &mut impl StatusTracker, ) -> Result<()> { - Store::verify_store(self, &mut ClaimAssetData::Stream(reader), validation_log) + Store::verify_store( + self, + &mut ClaimAssetData::Stream(reader, asset_type), + validation_log, + ) } // fetch remote manifest if possible @@ -2502,7 +2559,11 @@ impl Store { // verify the store if verify { // verify store and claims - Store::verify_store(&store, &mut ClaimAssetData::Bytes(data), validation_log)?; + Store::verify_store( + &store, + &mut ClaimAssetData::Bytes(data, asset_type), + validation_log, + )?; } Ok(store) @@ -2525,8 +2586,12 @@ impl Store { // verify the store if verify { // verify store and claims - Store::verify_store_async(&store, &mut ClaimAssetData::Bytes(data), validation_log) - .await?; + Store::verify_store_async( + &store, + &mut ClaimAssetData::Bytes(data, asset_type), + validation_log, + ) + .await?; } Ok(store) @@ -2556,6 +2621,7 @@ impl Store { &mut ClaimAssetData::StreamFragment( &mut init_segment_stream, &mut fragment_stream, + asset_type, ), validation_log, )?; @@ -2588,7 +2654,11 @@ impl Store { // verify store and claims Store::verify_store_async( &store, - &mut ClaimAssetData::StreamFragment(&mut init_segment_stream, &mut fragment_stream), + &mut ClaimAssetData::StreamFragment( + &mut init_segment_stream, + &mut fragment_stream, + asset_type, + ), validation_log, ) .await?; @@ -2694,9 +2764,12 @@ pub mod tests { use super::*; use crate::{ - assertions::{Action, Actions, Ingredient, Uuid}, + assertions::{labels::BOX_HASH, Action, Actions, BoxHash, Ingredient, Uuid}, claim::{AssertionStoreJsonFormat, Claim}, - jumbf_io::{load_jumbf_from_file, save_jumbf_to_file, update_file_jumbf}, + jumbf_io::{ + get_assetio_handler_from_path, load_jumbf_from_file, save_jumbf_to_file, + update_file_jumbf, + }, status_tracker::*, utils::{ patch::patch_file, @@ -2704,7 +2777,7 @@ pub mod tests { create_test_claim, fixture_path, temp_dir_path, temp_fixture_path, temp_signer, }, }, - SigningAlg, + AssertionJson, SigningAlg, }; fn create_editing_claim(claim: &mut Claim) -> Result<&mut Claim> { @@ -4291,4 +4364,45 @@ pub mod tests { } } } + + #[test] + fn test_embeddable_manifest() { + // test adding to actual image + let ap = fixture_path("CA.jpg"); + let box_hash_path = fixture_path("boxhash.json"); + + // Create claims store. + let mut store = Store::new(); + + // Create a new claim. + let mut claim = create_test_claim().unwrap(); + + // add box hash for CA.jpg + let box_hash_data = std::fs::read(box_hash_path).unwrap(); + let assertion = Assertion::from_data_json(BOX_HASH, &box_hash_data).unwrap(); + let box_hash = BoxHash::from_json_assertion(&assertion).unwrap(); + claim.add_assertion(&box_hash).unwrap(); + + store.commit_claim(claim).unwrap(); + + // Do we generate JUMBF? + let signer = temp_signer(); + + // get the embeddable manifest + let em = store.get_embeddable_manifest(signer.as_ref()).unwrap(); + + let mut report = DetailedStatusTracker::new(); + let new_store = Store::from_jumbf(&em, &mut report).unwrap(); + + let pc = new_store.provenance_claim().unwrap(); + let bhp = get_assetio_handler_from_path(&ap) + .unwrap() + .asset_box_hash_ref() + .unwrap(); + + for h in pc.box_hash_assertions() { + let bh = BoxHash::from_assertion(h).unwrap(); + bh.verify_hash(&ap, None, bhp).unwrap(); + } + } } diff --git a/sdk/src/utils/hash_utils.rs b/sdk/src/utils/hash_utils.rs index 9b425a567..73bcef79e 100644 --- a/sdk/src/utils/hash_utils.rs +++ b/sdk/src/utils/hash_utils.rs @@ -66,6 +66,10 @@ impl HashRange { self.length } + pub fn set_length(&mut self, length: usize) { + self.length = length; + } + // set offset for BMFF_V2 to be hashed in addition to data pub fn set_bmff_offset(&mut self, offset: u64) { self.bmff_offset = Some(offset); diff --git a/sdk/tests/fixtures/boxhash.json b/sdk/tests/fixtures/boxhash.json new file mode 100644 index 000000000..194df57fe --- /dev/null +++ b/sdk/tests/fixtures/boxhash.json @@ -0,0 +1 @@ +{"boxes":[{"names":["SOI","APP0"],"alg":"sha256","hash":[232,85,235,19,143,50,76,109,17,102,178,127,94,143,135,135,119,227,37,45,238,62,154,50,90,128,141,90,154,151,204,39],"pad":[]},{"names":["C2PA"],"hash":[],"pad":[]},{"names":["APP1","APP13","SOF0","DQT","DQT","DHT","DHT","DHT","DHT","SOS","EOI"],"alg":"sha256","hash":[213,99,66,179,112,116,10,48,156,242,80,116,80,15,225,208,161,109,173,142,119,188,148,157,123,161,29,106,34,105,73,237],"pad":[]}]} \ No newline at end of file