diff --git a/packer/src/main.rs b/packer/src/main.rs index 439a1e18f..0cc9d70e8 100644 --- a/packer/src/main.rs +++ b/packer/src/main.rs @@ -5,13 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#![warn( - clippy::all, - clippy::restriction, - clippy::pedantic, - clippy::nursery, - clippy::cargo -)] +#![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)] mod error; mod tarsum; @@ -22,9 +16,9 @@ use clap::value_t_or_exit; use rayon::prelude::{IntoParallelIterator, ParallelIterator}; use std::collections::BTreeMap; use std::fs::File; -use std::io::{BufReader, BufWriter, Read, Write}; +use std::io::{self, BufReader, BufWriter, Write}; use std::path; -use types::{PackType, Platform}; +use types::{HashSum, PackType, Platform}; const DEFAULT_PACKLIST: &str = include_str!("packlist.yaml"); // This is to ensure that all progress bar prefixes are aligned. @@ -36,11 +30,16 @@ type PackListPlatform = BTreeMap>; struct PackList(pub BTreeMap); #[derive(Debug, serde::Serialize)] -struct HashSum(String); +struct PackFile { + file_name: String, + intrinsic_checksum: HashSum, + extrinsic_checksum: HashSum, + file_bytes: u64, +} #[derive(Debug, serde::Serialize)] struct PackManifest { - files: BTreeMap, + files: BTreeMap, } fn default_progress_bar(len: u64) -> indicatif::ProgressBar { @@ -126,7 +125,8 @@ fn pack_platform( Ok(()) } -fn sha256_digest(mut reader: R) -> Result { +/// Calculate the sha256 checksum of a file represented by a Reader. +fn sha256_digest(mut reader: &mut R) -> Result { use sha2::{Digest, Sha256}; let mut sha256 = Sha256::new(); @@ -188,11 +188,11 @@ fn main() -> Result<(), anyhow::Error> { })?; let archive_paths = pack(&platform, &dist_dir, &pack_list, output_directory)?; let compressed_archive_paths = if compress { - compress_paths(&archive_paths)? + Some(compress_paths(&archive_paths)?) } else { - archive_paths + None }; - manifest(&compressed_archive_paths, &output_directory)?; + manifest(&archive_paths, &compressed_archive_paths, &output_directory)?; Ok(()) } @@ -238,10 +238,11 @@ fn compress_paths( fn manifest( archive_paths: &[(PackType, path::PathBuf)], + compressed_archive_paths: &Option>, output_directory: &path::PathBuf, ) -> Result { - let archive_manifest = gen_manifest(&archive_paths)?; - write_manifest(&output_directory, &archive_manifest) + let archive_manifest = gen_manifest(archive_paths, compressed_archive_paths)?; + write_manifest(output_directory, &archive_manifest) } fn write_manifest( @@ -255,35 +256,70 @@ fn write_manifest( Ok(path) } -fn gen_manifest(archive_paths: &[(PackType, path::PathBuf)]) -> Result { +fn gen_manifest( + archive_paths: &[(PackType, path::PathBuf)], + compressed_archive_paths: &Option>, +) -> Result { Ok(PackManifest { - files: gen_manifest_files(archive_paths)?, + files: gen_manifest_files(archive_paths, compressed_archive_paths)?, }) } fn gen_manifest_files( archive_paths: &[(PackType, path::PathBuf)], -) -> Result> { - let pb = default_progress_bar(archive_paths.len() as u64 - 1); + compressed_archive_paths: &Option>, +) -> Result> { + use std::iter; + let pb = default_progress_bar((archive_paths.len() as u64 - 1) * 2); pb.set_prefix(&format!( "{:width$}", "Computing manifest", width = PROGRESS_PREFIX_LEN )); + // This looks like a lot but we're just creating an iterator that either returns the + // values of `compressed_archive_paths` if it is `Some(_)` or an infinite repetition + // of `None`. This allows us to zip it below and avoid having to rely on index + // arithmetic. The `as _` is necessary to tell rustc to perform the casts from + // something like a `std::iter::Map` to the `Iterator` trait. + let compressed_iter: Box>> = + compressed_archive_paths.as_ref().map_or_else( + || Box::new(iter::repeat(None)) as _, + |inner| Box::new(inner.iter().map(Some)) as _, + ); + let res = archive_paths + .iter() + .zip(compressed_iter) + .collect::>() .into_par_iter() - .map(|(pack_type, path)| { - let reader = BufReader::new(File::open(path)?); - let hash = sha256_digest(reader)?; + .map(|((pack_type, uncompressed_path), compressed)| { + // If we have a compressed path, use that one, otherwise fall back to uncompressed. + let path = compressed.map_or(uncompressed_path, |(_, p)| p); + let file_bytes = File::open(path)?.metadata()?.len(); + let uncompressed_reader = BufReader::new(File::open(uncompressed_path)?); + let intrinsic_checksum = tarsum::tarsum(uncompressed_reader)?; pb.inc(1); - Ok((*pack_type, hash)) + let extrinsic_checksum = sha256_digest(&mut BufReader::new(File::open(path)?))?; + pb.inc(1); + Ok(( + *pack_type, + PackFile { + file_name: path + .file_name() + // The file name is only indicative and must serialize well, so the lossy approximation is fine. + .map_or_else(|| "".to_string(), |v| v.to_string_lossy().to_string()), + intrinsic_checksum, + extrinsic_checksum, + file_bytes, + }, + )) }) .collect::>>()? .into_iter() .fold( BTreeMap::new(), - |mut acc: BTreeMap, (pack_type, hash_sum)| { - acc.insert(pack_type, hash_sum); + |mut acc: BTreeMap<_, _>, (pack_type, pack_file)| { + acc.insert(pack_type, pack_file); acc }, ); @@ -304,19 +340,20 @@ mod test { #[test] fn test_manifest() -> anyhow::Result<()> { + let artifact_path = path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("__fixtures__") + .join("archive_a.tar"); let tmp_dir = tempdir::TempDir::new("manifest_test")?; - let artifact_path = path::PathBuf::from(tmp_dir.path()).join("core.tar"); - let mut artifact = File::create(&artifact_path)?; - artifact.write_all("Hello World.".as_bytes())?; let archive_paths = &[(PackType::Core, artifact_path)]; - let path = manifest(archive_paths, &tmp_dir.path().to_path_buf())?; + let path = manifest(archive_paths, &None, &tmp_dir.path().to_path_buf())?; let manifest_content = std::fs::read_to_string(&path)?; assert_eq!( manifest_content, - "{\n \"files\": {\n \"core\": \"f4bb1975bf1f81f76ce824f7536c1e101a8060a632a52289d530a6f600d52c92\"\n }\n}" + "{\n \"files\": {\n \"core\": {\n \"file_name\": \"archive_a.tar\",\n \"intrinsic_checksum\": \"f360fae5e433bd5c0ac0e00dbdad22ec51691139b9ec1e6d0dbbe16e0bb4c568\",\n \"extrinsic_checksum\": \"8de80c3904d85115d1595d48c215022e5db225c920811d4d2eee80586e6390c8\",\n \"file_bytes\": 3072\n }\n }\n}" ); Ok(()) diff --git a/packer/src/tarsum.rs b/packer/src/tarsum.rs index 5a711eb2e..b9852da7e 100644 --- a/packer/src/tarsum.rs +++ b/packer/src/tarsum.rs @@ -7,15 +7,13 @@ //! Intrinsic hash for a tarball. +use crate::types; use anyhow::Result; use std::collections; use std::io; -#[derive(Eq, PartialEq, Debug, serde::Serialize)] -pub struct HashSum(String); - /// Computes the intrinsic SHA256 checksum of a tar archive. -pub fn tarsum(reader: R) -> Result { +pub fn tarsum(reader: R) -> Result { use sha2::Digest; let mut archive = tar::Archive::new(reader); @@ -35,15 +33,15 @@ pub fn tarsum(reader: R) -> Result { digest.input(file_hash.0); } let hash = digest.result(); - Ok(HashSum(data_encoding::HEXLOWER.encode(&hash))) + Ok(types::HashSum(data_encoding::HEXLOWER.encode(&hash))) } -fn digest_file(reader: &mut R) -> io::Result { +fn digest_file(reader: &mut R) -> io::Result { use sha2::Digest; let mut digest = sha2::Sha256::new(); io::copy(reader, &mut digest)?; let hash = digest.result(); - Ok(HashSum(data_encoding::HEXLOWER.encode(&hash))) + Ok(types::HashSum(data_encoding::HEXLOWER.encode(&hash))) } #[cfg(test)] @@ -64,7 +62,9 @@ mod test { assert_eq!( res, - HashSum("6f92565bb50b9469494b3e1ad668f5d809caa3ffb534c3e56dec75f7ea7912df".to_string()) + types::HashSum( + "6f92565bb50b9469494b3e1ad668f5d809caa3ffb534c3e56dec75f7ea7912df".to_string() + ) ); } diff --git a/packer/src/types.rs b/packer/src/types.rs index a80bcaab3..c86f6c054 100644 --- a/packer/src/types.rs +++ b/packer/src/types.rs @@ -44,3 +44,6 @@ impl Display for PackType { } } } + +#[derive(Eq, PartialEq, Debug, serde::Serialize)] +pub struct HashSum(pub String);