From cf4db8be24451da8cffebf83dc46c9c745ca9a3b Mon Sep 17 00:00:00 2001 From: Pascal Hartig Date: Fri, 2 Oct 2020 03:17:41 -0700 Subject: [PATCH] Build intrinsic archive sum Summary: # Problem Oddly enough, since the addition of ReactiveCocoa to Electron on MacOS, we're seeing non-determinism when creating tar archives. P144049799 shows the difference between two release artifacts built on Sandcastle. Their contents are identical but they're packed differently. # Approach To counter this, I'm now calculating the checksum not based on the archive itself but on its *sorted* contents by first inserting all files into a `BTreeMap` with the file path as key and then lastly hashing all hashes, building some sort of bastardised Merkle tree. # This diff I'm only implementing the hashing here. The next diff will actually make use of this. # Further steps This requires a few more downstream changes which will require some finesse to roll out. - The manifest will need to include both the intrinsic and extrinsic checksums as Launcher depends on the latter to verify the download integrity. We could also calculate the intrinsic checksum again, but that just adds more complexity which is hard to debug. - Sandcastle will need to understand the new manifest format and we need to schedule the MSDK rollout accordingly. Not a huge problem as the call volume is low and manual. - We need to modify the artifact and release ents to contain both checksums. - The release endpoint needs to be modified to return the checksum the launcher cares about. Reviewed By: nikoant Differential Revision: D24024011 fbshipit-source-id: 55de748178c033c18a69c79c68f12e7c1aaf4deb --- packer/src/__fixtures__/archive_a.tar | Bin 0 -> 3072 bytes packer/src/__fixtures__/archive_b.tar | Bin 0 -> 3072 bytes packer/src/__fixtures__/nested_archive.tar | Bin 0 -> 6144 bytes packer/src/main.rs | 1 + packer/src/tarsum.rs | 94 +++++++++++++++++++++ 5 files changed, 95 insertions(+) create mode 100644 packer/src/__fixtures__/archive_a.tar create mode 100644 packer/src/__fixtures__/archive_b.tar create mode 100644 packer/src/__fixtures__/nested_archive.tar create mode 100644 packer/src/tarsum.rs diff --git a/packer/src/__fixtures__/archive_a.tar b/packer/src/__fixtures__/archive_a.tar new file mode 100644 index 0000000000000000000000000000000000000000..03aa37df7d5358f44bef5d03ea73c2091eb6c6ca GIT binary patch literal 3072 zcmeH`!EVAZ42FB=Df|Evb7|5Z(WZ&*vh4s8XRmeDYSl8irW-e2W3R`PP$;T|IDv%L zLvnF`$BFDe`bg5EZ1|qW`m4ch79z&RK+wh{wugw5PPeIjHi9rpA{&}XYz<&&VuJzT zZQ40~byX`#+nO*{-&EJ@)8k&XWqIDK^G!ENHyk3I7YsAAEJ(MTzZw<=!z-z>+`$ty zWj2L3ZdhPJpZo<6FfW{Fx>9)L+zC%d!yL$Wb&&&f{Wb4^cfdO^SO<0;VOF{T literal 0 HcmV?d00001 diff --git a/packer/src/__fixtures__/archive_b.tar b/packer/src/__fixtures__/archive_b.tar new file mode 100644 index 0000000000000000000000000000000000000000..535e081df4c3f1578cb5f1016b9e6addc4542a0c GIT binary patch literal 3072 zcmeHH!EVAZ4DESe;SZRYOOy79Hch|V<#`uf9@+?WilGDFw(fe3Uau!9iAI}oI(AVqI>Lth zPX<2k@m`2@^HRU6yFtW82+28zLEVdOPbRF1L#k;&vpnN8W1QtcXeI<50FPMHe(JSH z=|X8vN%JDT-|R=a58<`NqMPbOyM5G~3=KMCXB@~K|8LfrVvO$?q1J$hU|YU_;a?p1 z&sgA}a~Aj~5o$S_^2+&dc)vM9B_*x|fmZ*8cG128LH~dDe=!k3|BPik`F|fru;ss0 z#@Ms)f7p-FXuu3f>M_B-^YP@ zQf{eag)mzqDz}o=2(VOOG@?;Z%Blb_Hy$+JdO*EWfEJvcKSMp_tTE$cAAK+tXN{lw zmz53FyMMQVQ~D=7-|K&S|I0ZcN&j&e@6LLUUmu?yrpCadUY0n5X;o~7u**M$-(#3r YW0#fkYM7MXMK47OfrLOp;Bpc81~M?yA^-pY literal 0 HcmV?d00001 diff --git a/packer/src/main.rs b/packer/src/main.rs index b466b1feb..439a1e18f 100644 --- a/packer/src/main.rs +++ b/packer/src/main.rs @@ -14,6 +14,7 @@ )] mod error; +mod tarsum; mod types; use anyhow::{bail, Context, Result}; diff --git a/packer/src/tarsum.rs b/packer/src/tarsum.rs new file mode 100644 index 000000000..5a711eb2e --- /dev/null +++ b/packer/src/tarsum.rs @@ -0,0 +1,94 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +//! Intrinsic hash for a tarball. + +use anyhow::Result; +use std::collections; +use std::io; + +#[derive(Eq, PartialEq, Debug, serde::Serialize)] +pub struct HashSum(String); + +/// Computes the intrinsic SHA256 checksum of a tar archive. +pub fn tarsum(reader: R) -> Result { + use sha2::Digest; + + let mut archive = tar::Archive::new(reader); + let mut map = collections::BTreeMap::new(); + + // Store all entries in a BTreeMap using their path as key which implements `Ord`. + // This way we ensure that the hash of hashes is consistent indepent of the + // file order inside the archive. + for entry in archive.entries()? { + let mut e = entry?; + let path = e.path()?.into_owned(); + map.insert(path.clone(), digest_file(&mut e)?); + } + + let mut digest = sha2::Sha256::new(); + for (_, file_hash) in map { + digest.input(file_hash.0); + } + let hash = digest.result(); + Ok(HashSum(data_encoding::HEXLOWER.encode(&hash))) +} + +fn digest_file(reader: &mut R) -> io::Result { + use sha2::Digest; + let mut digest = sha2::Sha256::new(); + io::copy(reader, &mut digest)?; + let hash = digest.result(); + Ok(HashSum(data_encoding::HEXLOWER.encode(&hash))) +} + +#[cfg(test)] +mod test { + use super::*; + use std::fs; + use std::path; + + #[test] + fn test_nested_archive_tarsum() { + // This is an archive with a nested directory structure. + let archive_path = path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("__fixtures__") + .join("nested_archive.tar"); + let reader = fs::File::open(archive_path).unwrap(); + let res = tarsum(reader).unwrap(); + + assert_eq!( + res, + HashSum("6f92565bb50b9469494b3e1ad668f5d809caa3ffb534c3e56dec75f7ea7912df".to_string()) + ); + } + + #[test] + fn test_differently_ordered_archives() { + // These archives have equivalent contents but were created in reverse ways: + // $ tar cf archive_a.tar archive/a.txt + // $ tar cf archive_b.tar archive/b.txt + // $ tar rf archive_a.tar archive/b.txt + // $ tar rf archive_b.tar archive/a.txt + // $ gsha256sum archive_*.tar + // 8de80c3904d85115d1595d48c215022e5db225c920811d4d2eee80586e6390c8 archive_a.tar + // 60097b704cb1684f52f7e98e98193595ea2876047e9ecc6931db97757bc8a5fd archive_b.tar + let fixture_path = path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("src") + .join("__fixtures__"); + let archive_a = fixture_path.join("archive_a.tar"); + let archive_b = fixture_path.join("archive_b.tar"); + + let reader_a = fs::File::open(archive_a).unwrap(); + let reader_b = fs::File::open(archive_b).unwrap(); + let res_a = tarsum(reader_a).unwrap(); + let res_b = tarsum(reader_b).unwrap(); + + assert_eq!(res_a, res_b); + } +}