Use new checksums in manifest

Summary: This now makes use of the `tarsum` mod to surface both intrinsic and extrinsic checksums as part of the manifest. I also threw in the file name and the bytes for good measure. This is output in Sandcastle logs and can help identify mismatches. The filename also means we no longer have to rely on piecing together paths on Sandcastle which is nice.

Reviewed By: nikoant

Differential Revision: D24046464

fbshipit-source-id: dc6dc50eb510a067674166b5fdb11af9cf6762b2
This commit is contained in:
Pascal Hartig
2020-10-02 03:17:41 -07:00
committed by Facebook GitHub Bot
parent cf4db8be24
commit 2bbd76803c
3 changed files with 80 additions and 40 deletions

View File

@@ -5,13 +5,7 @@
* LICENSE file in the root directory of this source tree.
*/
#![warn(
clippy::all,
clippy::restriction,
clippy::pedantic,
clippy::nursery,
clippy::cargo
)]
#![warn(clippy::all, clippy::pedantic, clippy::nursery, clippy::cargo)]
mod error;
mod tarsum;
@@ -22,9 +16,9 @@ use clap::value_t_or_exit;
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use std::collections::BTreeMap;
use std::fs::File;
use std::io::{BufReader, BufWriter, Read, Write};
use std::io::{self, BufReader, BufWriter, Write};
use std::path;
use types::{PackType, Platform};
use types::{HashSum, PackType, Platform};
const DEFAULT_PACKLIST: &str = include_str!("packlist.yaml");
// This is to ensure that all progress bar prefixes are aligned.
@@ -36,11 +30,16 @@ type PackListPlatform = BTreeMap<PackType, Vec<path::PathBuf>>;
struct PackList(pub BTreeMap<Platform, PackListPlatform>);
#[derive(Debug, serde::Serialize)]
struct HashSum(String);
struct PackFile {
file_name: String,
intrinsic_checksum: HashSum,
extrinsic_checksum: HashSum,
file_bytes: u64,
}
#[derive(Debug, serde::Serialize)]
struct PackManifest {
files: BTreeMap<PackType, HashSum>,
files: BTreeMap<PackType, PackFile>,
}
fn default_progress_bar(len: u64) -> indicatif::ProgressBar {
@@ -126,7 +125,8 @@ fn pack_platform(
Ok(())
}
fn sha256_digest<R: Read>(mut reader: R) -> Result<HashSum> {
/// Calculate the sha256 checksum of a file represented by a Reader.
fn sha256_digest<R: io::Read>(mut reader: &mut R) -> Result<HashSum> {
use sha2::{Digest, Sha256};
let mut sha256 = Sha256::new();
@@ -188,11 +188,11 @@ fn main() -> Result<(), anyhow::Error> {
})?;
let archive_paths = pack(&platform, &dist_dir, &pack_list, output_directory)?;
let compressed_archive_paths = if compress {
compress_paths(&archive_paths)?
Some(compress_paths(&archive_paths)?)
} else {
archive_paths
None
};
manifest(&compressed_archive_paths, &output_directory)?;
manifest(&archive_paths, &compressed_archive_paths, &output_directory)?;
Ok(())
}
@@ -238,10 +238,11 @@ fn compress_paths(
fn manifest(
archive_paths: &[(PackType, path::PathBuf)],
compressed_archive_paths: &Option<Vec<(PackType, path::PathBuf)>>,
output_directory: &path::PathBuf,
) -> Result<path::PathBuf> {
let archive_manifest = gen_manifest(&archive_paths)?;
write_manifest(&output_directory, &archive_manifest)
let archive_manifest = gen_manifest(archive_paths, compressed_archive_paths)?;
write_manifest(output_directory, &archive_manifest)
}
fn write_manifest(
@@ -255,35 +256,70 @@ fn write_manifest(
Ok(path)
}
fn gen_manifest(archive_paths: &[(PackType, path::PathBuf)]) -> Result<PackManifest> {
fn gen_manifest(
archive_paths: &[(PackType, path::PathBuf)],
compressed_archive_paths: &Option<Vec<(PackType, path::PathBuf)>>,
) -> Result<PackManifest> {
Ok(PackManifest {
files: gen_manifest_files(archive_paths)?,
files: gen_manifest_files(archive_paths, compressed_archive_paths)?,
})
}
fn gen_manifest_files(
archive_paths: &[(PackType, path::PathBuf)],
) -> Result<BTreeMap<PackType, HashSum>> {
let pb = default_progress_bar(archive_paths.len() as u64 - 1);
compressed_archive_paths: &Option<Vec<(PackType, path::PathBuf)>>,
) -> Result<BTreeMap<PackType, PackFile>> {
use std::iter;
let pb = default_progress_bar((archive_paths.len() as u64 - 1) * 2);
pb.set_prefix(&format!(
"{:width$}",
"Computing manifest",
width = PROGRESS_PREFIX_LEN
));
// This looks like a lot but we're just creating an iterator that either returns the
// values of `compressed_archive_paths` if it is `Some(_)` or an infinite repetition
// of `None`. This allows us to zip it below and avoid having to rely on index
// arithmetic. The `as _` is necessary to tell rustc to perform the casts from
// something like a `std::iter::Map` to the `Iterator` trait.
let compressed_iter: Box<dyn Iterator<Item = Option<&(PackType, path::PathBuf)>>> =
compressed_archive_paths.as_ref().map_or_else(
|| Box::new(iter::repeat(None)) as _,
|inner| Box::new(inner.iter().map(Some)) as _,
);
let res = archive_paths
.iter()
.zip(compressed_iter)
.collect::<Vec<_>>()
.into_par_iter()
.map(|(pack_type, path)| {
let reader = BufReader::new(File::open(path)?);
let hash = sha256_digest(reader)?;
.map(|((pack_type, uncompressed_path), compressed)| {
// If we have a compressed path, use that one, otherwise fall back to uncompressed.
let path = compressed.map_or(uncompressed_path, |(_, p)| p);
let file_bytes = File::open(path)?.metadata()?.len();
let uncompressed_reader = BufReader::new(File::open(uncompressed_path)?);
let intrinsic_checksum = tarsum::tarsum(uncompressed_reader)?;
pb.inc(1);
Ok((*pack_type, hash))
let extrinsic_checksum = sha256_digest(&mut BufReader::new(File::open(path)?))?;
pb.inc(1);
Ok((
*pack_type,
PackFile {
file_name: path
.file_name()
// The file name is only indicative and must serialize well, so the lossy approximation is fine.
.map_or_else(|| "".to_string(), |v| v.to_string_lossy().to_string()),
intrinsic_checksum,
extrinsic_checksum,
file_bytes,
},
))
})
.collect::<Result<Vec<_>>>()?
.into_iter()
.fold(
BTreeMap::new(),
|mut acc: BTreeMap<PackType, HashSum>, (pack_type, hash_sum)| {
acc.insert(pack_type, hash_sum);
|mut acc: BTreeMap<_, _>, (pack_type, pack_file)| {
acc.insert(pack_type, pack_file);
acc
},
);
@@ -304,19 +340,20 @@ mod test {
#[test]
fn test_manifest() -> anyhow::Result<()> {
let artifact_path = path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("src")
.join("__fixtures__")
.join("archive_a.tar");
let tmp_dir = tempdir::TempDir::new("manifest_test")?;
let artifact_path = path::PathBuf::from(tmp_dir.path()).join("core.tar");
let mut artifact = File::create(&artifact_path)?;
artifact.write_all("Hello World.".as_bytes())?;
let archive_paths = &[(PackType::Core, artifact_path)];
let path = manifest(archive_paths, &tmp_dir.path().to_path_buf())?;
let path = manifest(archive_paths, &None, &tmp_dir.path().to_path_buf())?;
let manifest_content = std::fs::read_to_string(&path)?;
assert_eq!(
manifest_content,
"{\n \"files\": {\n \"core\": \"f4bb1975bf1f81f76ce824f7536c1e101a8060a632a52289d530a6f600d52c92\"\n }\n}"
"{\n \"files\": {\n \"core\": {\n \"file_name\": \"archive_a.tar\",\n \"intrinsic_checksum\": \"f360fae5e433bd5c0ac0e00dbdad22ec51691139b9ec1e6d0dbbe16e0bb4c568\",\n \"extrinsic_checksum\": \"8de80c3904d85115d1595d48c215022e5db225c920811d4d2eee80586e6390c8\",\n \"file_bytes\": 3072\n }\n }\n}"
);
Ok(())

View File

@@ -7,15 +7,13 @@
//! Intrinsic hash for a tarball.
use crate::types;
use anyhow::Result;
use std::collections;
use std::io;
#[derive(Eq, PartialEq, Debug, serde::Serialize)]
pub struct HashSum(String);
/// Computes the intrinsic SHA256 checksum of a tar archive.
pub fn tarsum<R: io::Read>(reader: R) -> Result<HashSum> {
pub fn tarsum<R: io::Read>(reader: R) -> Result<types::HashSum> {
use sha2::Digest;
let mut archive = tar::Archive::new(reader);
@@ -35,15 +33,15 @@ pub fn tarsum<R: io::Read>(reader: R) -> Result<HashSum> {
digest.input(file_hash.0);
}
let hash = digest.result();
Ok(HashSum(data_encoding::HEXLOWER.encode(&hash)))
Ok(types::HashSum(data_encoding::HEXLOWER.encode(&hash)))
}
fn digest_file<R: io::Read>(reader: &mut R) -> io::Result<HashSum> {
fn digest_file<R: io::Read>(reader: &mut R) -> io::Result<types::HashSum> {
use sha2::Digest;
let mut digest = sha2::Sha256::new();
io::copy(reader, &mut digest)?;
let hash = digest.result();
Ok(HashSum(data_encoding::HEXLOWER.encode(&hash)))
Ok(types::HashSum(data_encoding::HEXLOWER.encode(&hash)))
}
#[cfg(test)]
@@ -64,7 +62,9 @@ mod test {
assert_eq!(
res,
HashSum("6f92565bb50b9469494b3e1ad668f5d809caa3ffb534c3e56dec75f7ea7912df".to_string())
types::HashSum(
"6f92565bb50b9469494b3e1ad668f5d809caa3ffb534c3e56dec75f7ea7912df".to_string()
)
);
}

View File

@@ -44,3 +44,6 @@ impl Display for PackType {
}
}
}
#[derive(Eq, PartialEq, Debug, serde::Serialize)]
pub struct HashSum(pub String);