From cde3a924aec6bcd47ede650572bffa8c414243cf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 27 Feb 2017 13:50:00 -0400 Subject: make fsck check annex.securehashesonly, and new tip for working around SHA1 collisions with git-annex This commit was sponsored by andrea rota. --- CHANGELOG | 8 ++-- Command/Fsck.hs | 11 ++++- doc/git-annex.mdwn | 12 ++++++ doc/tips/using_signed_git_commits.mdwn | 79 ++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 doc/tips/using_signed_git_commits.mdwn diff --git a/CHANGELOG b/CHANGELOG index 92fc5b41b..cc5bf6d52 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,9 +2,11 @@ git-annex (6.20170215) UNRELEASED; urgency=medium * Cryptographically secure hashes can be forced to be used in a repository, by setting annex.securehashesonly. - This does not prevent the git repository from containing files - with insecure hashes, but it does prevent the content of such files - from being added to .git/annex/objects. + This does not prevent the git repository from containing links + to insecure hashes, but it does prevent the content of such files + from being added to .git/annex/objects by any method. + * fsck: Warn about any files whose content is present, that don't + use secure hashes, when annex.securehashesonly is set. * sync, merge: Fail when the current branch has no commits yet, instead of not merging in anything from remotes and appearing to succeed. * Run ssh with -n whenever input is not being piped into it, diff --git a/Command/Fsck.hs b/Command/Fsck.hs index f1b0b78a6..f20059bd1 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2016 Joey Hess + - Copyright 2010-2017 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -35,6 +35,7 @@ import Utility.PID import qualified Database.Keys import qualified Database.Fsck as FsckDb import Types.CleanupActions +import Types.Key import Data.Time.Clock.POSIX import System.Posix.Types (EpochTime) @@ -234,6 +235,14 @@ verifyLocationLog key keystatus desc = do whenM (liftIO $ doesDirectoryExist $ parentDir obj) $ freezeContentDir obj + {- Warn when annex.securehashesonly is set and content using an + - insecure hash is present. This should only be able to happen + - if the repository already contained the content before the + - config was set. -} + when (present && not (cryptographicallySecure (keyVariety key))) $ + whenM (annexSecureHashesOnly <$> Annex.getGitConfig) $ + warning $ "** Despite annex.securehashesonly being set, " ++ obj ++ " has content present in the annex using an insecure " ++ formatKeyVariety (keyVariety key) ++ " key" + {- In direct mode, modified files will show up as not present, - but that is expected and not something to do anything about. -} if direct && not present diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 1c905766d..2f7635f41 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -829,6 +829,18 @@ Here are all the supported configuration settings. This is overridden by annex annex.backend configuration in the .gitattributes files. +* `annex.securehashesonly` + + Set to true to indicate that the repository should only use + cryptographically secure hashes + (SHA2, SHA3) and not insecure hashes (MD5, SHA1) for content. + + When this is set, the contents of files using cryptographically + insecure hashes will not be allowed to be added to the repository. + + Also, git-annex fsck` will complain about any files present in + the repository that use insecure hashes. + * `annex.diskreserve` Amount of disk space to reserve. Disk space is checked when transferring diff --git a/doc/tips/using_signed_git_commits.mdwn b/doc/tips/using_signed_git_commits.mdwn new file mode 100644 index 000000000..7b1c07edf --- /dev/null +++ b/doc/tips/using_signed_git_commits.mdwn @@ -0,0 +1,79 @@ +Git uses SHA1, which is becoming increasingly broken. Using git-annex +and signed commits, we can work around the weaknesses of SHA1, and +let anyone who clones a repository verify that the data they receive +is the same data that was originally commited to it. + +This is recommended if you are storing any kind of binary +files in a git repository. + +### How to do it + +You need git-annex 6.20170228. Upgrade if you don't have it. + +git-annex can use many types of [[backends]] and not all of them are +secure. So, you need to configure git-annex to only use +cryptographically secure hashes. Also, let's make sure annex.verify +is set (it is by default, but let's override any global gitconfig setting +for it). + + git config annex.securehashesonly true + git config annex.verify true + +That needs to be run in every clone of the repository. This will prevent +any annexed object using an insecure hash from reaching your repository, +and it will verify the hashes when transferring objects. + +It's important that all commits to the git repository are signed. +Use `git commit --gpg-sign`, or enable the commit.gpgSign configuration. + +Use `git log --show-signature` to check the signatures of commits. +If the signature is valid, it guarantees that all annexed files +have the same content that was orignally committed. + +### Why is this more secure than git alone? + +SHA1 collisions exist now, and can be produced using a common-prefix +attack. See . Let's assume that a chosen-prefix +attack against SHA1 will also become feasible too. However, a full preimage +attack still seems unlikely, so we won't consider such attacks in the +analysis below. + +The reason that git-annex can work around git's problematic use of SHA1 is +that git-annex uses other, [[stronger hashes|backends]] of the contents of +annexed files. For example, an annexed file may be a symlink to +".git/annex/objects/Ab/Cd/SHA256--eb45a55eb8756646e244e6c5f47349294568d58a9321244f4ee09a163da23a27". + +Such a symlink is stored as a git blob object. The SHA1 of the git blobs +are listed in a git tree object, and the git commit object contains the +SHA1 of the tree. Finally, the commit object is gpg signed. + +So, by checking the signature of a commit (`git log --show-signature`), +you can verify that this is the same commit that was originally made +to the repository. As far as the git developers know, there is no way +to produce multiple colliding git tree objects (at least not without +creating files with spectacularly ugly and long names), so you +know that the tree object pointed to by the signed commit is the original one. + +Now, what about the blob objects that the tree lists? If these blobs +were regular git files, a SHA1 collision could mean your git repository +does not contain the same file that was orignally committed, and the signed +commit would not help. + +But, if the blob object is a git-annex symlink target, it has to contain the +strong hash of the file content. If a SHA1 collision swaps in some other +blob object, it will need to contain the strong hash of a different file's +content. The current common-prefix attack cannot do that. + +A chosen-prefix attack could make two strong hashes SHA1 the same, +but it would need to include additional data after the hash to do it. Since +git-annex version 6.20170224, there is no place for an attacker to +put such data in a git-symlink target. (See +[[todo/sha1_collision_embedding_in_git-annex_keys]] for details +of how this was prevented.) + +So, we have a SHA1 chain from the gpg signature to the git-annex symlink target, +and at no point in the chain is a SHA1 collision attack feasible. +Finally, git-annex verifies the strong hash when transferring +the content of a file into the repository (and `git annex fsck` verifies it +too), and so the content that the symlink is pointing to must be the same +content that was originally committed. -- cgit v1.2.3