From 2bbd76803c178bcc595d5337c4e5abf3bc276112 Mon Sep 17 00:00:00 2001 From: Pascal Hartig Date: Fri, 2 Oct 2020 03:17:41 -0700 Subject: [PATCH] Use new checksums in manifest Summary: This now makes use of the `tarsum` mod to surface both intrinsic and extrinsic checksums as part of the manifest. I also threw in the file name and the bytes for good measure. This is output in Sandcastle logs and can help identify mismatches. The filename also means we no longer have to rely on piecing together paths on Sandcastle which is nice. Reviewed By: nikoant Differential Revision: D24046464 fbshipit-source-id: dc6dc50eb510a067674166b5fdb11af9cf6762b2 --- packer/src/main.rs | 101 +++++++++++++++++++++++++++++-------------- packer/src/tarsum.rs | 16 +++---- packer/src/types.rs | 3 ++ 3 files changed, 80 insertions(+), 40 deletions(-) diff --git a/packer/src/main.rs b/packer/src/main.rs index 439a1e18f..0cc9d70e8 100644 --- a/packer/src/main.rs +++ b/packer/src/main.rs @@ -5,13 +5,7 @@ * LICENSE file in the root directory of this source tree. */ -#![warn( - clippy::all, - clippy::restriction, - clippy::pedantic, - clippy::nursery, - clippy::cargo -)] +#![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)] mod error; mod tarsum; @@ -22,9 +16,9 @@ use clap::value_t_or_exit; use rayon::prelude::{IntoParallelIterator, ParallelIterator}; use std::collections::BTreeMap; use std::fs::File; -use std::io::{BufReader, BufWriter, Read, Write}; +use std::io::{self, BufReader, BufWriter, Write}; use std::path; -use types::{PackType, Platform}; +use types::{HashSum, PackType, Platform}; const DEFAULT_PACKLIST: &str = include_str!("packlist.yaml"); // This is to ensure that all progress bar prefixes are aligned. @@ -36,11 +30,16 @@ type PackListPlatform = BTreeMap>; struct PackList(pub BTreeMap); #[derive(Debug, serde::Serialize)] -struct HashSum(String); +struct PackFile { + file_name: String, + intrinsic_checksum: HashSum, + extrinsic_checksum: HashSum, + file_bytes: u64, +} #[derive(Debug, serde::Serialize)] struct PackManifest { - files: BTreeMap, + files: BTreeMap, } fn default_progress_bar(len: u64) -> indicatif::ProgressBar { @@ -126,7 +125,8 @@ fn pack_platform( Ok(()) } -fn sha256_digest(mut reader: R) -> Result { +/// Calculate the sha256 checksum of a file represented by a Reader. +fn sha256_digest(mut reader: &mut R) -> Result { use sha2::{Digest, Sha256}; let mut sha256 = Sha256::new(); @@ -188,11 +188,11 @@ fn main() -> Result<(), anyhow::Error> { })?; let archive_paths = pack(&platform, &dist_dir, &pack_list, output_directory)?; let compressed_archive_paths = if compress { - compress_paths(&archive_paths)? + Some(compress_paths(&archive_paths)?) } else { - archive_paths + None }; - manifest(&compressed_archive_paths, &output_directory)?; + manifest(&archive_paths, &compressed_archive_paths, &output_directory)?; Ok(()) } @@ -238,10 +238,11 @@ fn compress_paths( fn manifest( archive_paths: &[(PackType, path::PathBuf)], + compressed_archive_paths: &Option>, output_directory: &path::PathBuf, ) -> Result { - let archive_manifest = gen_manifest(&archive_paths)?; - write_manifest(&output_directory, &archive_manifest) + let archive_manifest = gen_manifest(archive_paths, compressed_archive_paths)?; + write_manifest(output_directory, &archive_manifest) } fn write_manifest( @@ -255,35 +256,70 @@ fn write_manifest( Ok(path) } -fn gen_manifest(archive_paths: &[(PackType, path::PathBuf)]) -> Result { +fn gen_manifest( + archive_paths: &[(PackType, path::PathBuf)], + compressed_archive_paths: &Option>, +) -> Result { Ok(PackManifest { - files: gen_manifest_files(archive_paths)?, + files: gen_manifest_files(archive_paths, compressed_archive_paths)?, }) } fn gen_manifest_files( archive_paths: &[(PackType, path::PathBuf)], -) -> Result> { - let pb = default_progress_bar(archive_paths.len() as u64 - 1); + compressed_archive_paths: &Option>, +) -> Result> { + use std::iter; + let pb = default_progress_bar((archive_paths.len() as u64 - 1) * 2); pb.set_prefix(&format!( "{:width$}", "Computing manifest", width = PROGRESS_PREFIX_LEN )); + // This looks like a lot but we're just creating an iterator that either returns the + // values of `compressed_archive_paths` if it is `Some(_)` or an infinite repetition + // of `None`. This allows us to zip it below and avoid having to rely on index + // arithmetic. The `as _` is necessary to tell rustc to perform the casts from + // something like a `std::iter::Map` to the `Iterator` trait. + let compressed_iter: Box>> = + compressed_archive_paths.as_ref().map_or_else( + || Box::new(iter::repeat(None)) as _, + |inner| Box::new(inner.iter().map(Some)) as _, + ); + let res = archive_paths + .iter() + .zip(compressed_iter) + .collect::>() .into_par_iter() - .map(|(pack_type, path)| { - let reader = BufReader::new(File::open(path)?); - let hash = sha256_digest(reader)?; + .map(|((pack_type, uncompressed_path), compressed)| { + // If we have a compressed path, use that one, otherwise fall back to uncompressed. + let path = compressed.map_or(uncompressed_path, |(_, p)| p); + let file_bytes = File::open(path)?.metadata()?.len(); + let uncompressed_reader = BufReader::new(File::open(uncompressed_path)?); + let intrinsic_checksum = tarsum::tarsum(uncompressed_reader)?; pb.inc(1); - Ok((*pack_type, hash)) + let extrinsic_checksum = sha256_digest(&mut BufReader::new(File::open(path)?))?; + pb.inc(1); + Ok(( + *pack_type, + PackFile { + file_name: path + .file_name() + // The file name is only indicative and must serialize well, so the lossy approximation is fine. + .map_or_else(|| "".to_string(), |v| v.to_string_lossy().to_string()), + intrinsic_checksum, + extrinsic_checksum, + file_bytes, + }, + )) }) .collect::>>()? .into_iter() .fold( BTreeMap::new(), - |mut acc: BTreeMap, (pack_type, hash_sum)| { - acc.insert(pack_type, hash_sum); + |mut acc: BTreeMap<_, _>, (pack_type, pack_file)| { + acc.insert(pack_type, pack_file); acc }, ); @@ -304,19 +340,20 @@ mod test { #[test] fn test_manifest() -> anyhow::Result<()> { + let artifact_path = path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("__fixtures__") + .join("archive_a.tar"); let tmp_dir = tempdir::TempDir::new("manifest_test")?; - let artifact_path = path::PathBuf::from(tmp_dir.path()).join("core.tar"); - let mut artifact = File::create(&artifact_path)?; - artifact.write_all("Hello World.".as_bytes())?; let archive_paths = &[(PackType::Core, artifact_path)]; - let path = manifest(archive_paths, &tmp_dir.path().to_path_buf())?; + let path = manifest(archive_paths, &None, &tmp_dir.path().to_path_buf())?; let manifest_content = std::fs::read_to_string(&path)?; assert_eq!( manifest_content, - "{\n \"files\": {\n \"core\": \"f4bb1975bf1f81f76ce824f7536c1e101a8060a632a52289d530a6f600d52c92\"\n }\n}" + "{\n \"files\": {\n \"core\": {\n \"file_name\": \"archive_a.tar\",\n \"intrinsic_checksum\": \"f360fae5e433bd5c0ac0e00dbdad22ec51691139b9ec1e6d0dbbe16e0bb4c568\",\n \"extrinsic_checksum\": \"8de80c3904d85115d1595d48c215022e5db225c920811d4d2eee80586e6390c8\",\n \"file_bytes\": 3072\n }\n }\n}" ); Ok(()) diff --git a/packer/src/tarsum.rs b/packer/src/tarsum.rs index 5a711eb2e..b9852da7e 100644 --- a/packer/src/tarsum.rs +++ b/packer/src/tarsum.rs @@ -7,15 +7,13 @@ //! Intrinsic hash for a tarball. +use crate::types; use anyhow::Result; use std::collections; use std::io; -#[derive(Eq, PartialEq, Debug, serde::Serialize)] -pub struct HashSum(String); - /// Computes the intrinsic SHA256 checksum of a tar archive. -pub fn tarsum(reader: R) -> Result { +pub fn tarsum(reader: R) -> Result { use sha2::Digest; let mut archive = tar::Archive::new(reader); @@ -35,15 +33,15 @@ pub fn tarsum(reader: R) -> Result { digest.input(file_hash.0); } let hash = digest.result(); - Ok(HashSum(data_encoding::HEXLOWER.encode(&hash))) + Ok(types::HashSum(data_encoding::HEXLOWER.encode(&hash))) } -fn digest_file(reader: &mut R) -> io::Result { +fn digest_file(reader: &mut R) -> io::Result { use sha2::Digest; let mut digest = sha2::Sha256::new(); io::copy(reader, &mut digest)?; let hash = digest.result(); - Ok(HashSum(data_encoding::HEXLOWER.encode(&hash))) + Ok(types::HashSum(data_encoding::HEXLOWER.encode(&hash))) } #[cfg(test)] @@ -64,7 +62,9 @@ mod test { assert_eq!( res, - HashSum("6f92565bb50b9469494b3e1ad668f5d809caa3ffb534c3e56dec75f7ea7912df".to_string()) + types::HashSum( + "6f92565bb50b9469494b3e1ad668f5d809caa3ffb534c3e56dec75f7ea7912df".to_string() + ) ); } diff --git a/packer/src/types.rs b/packer/src/types.rs index a80bcaab3..c86f6c054 100644 --- a/packer/src/types.rs +++ b/packer/src/types.rs @@ -44,3 +44,6 @@ impl Display for PackType { } } } + +#[derive(Eq, PartialEq, Debug, serde::Serialize)] +pub struct HashSum(pub String);