aboutsummaryrefslogtreecommitdiff
path: root/doc/tips/Decrypting_files_in_special_remotes_without_git-annex.mdwn
blob: 3d2fd35a1180d363fe41a88e66c3881d3a5c3260 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
One of the selling points of `git-annex` is that it uses standard tools like `git` and `gpg` to deal with files, so that years from now it should be possible to explore and get useful data out of an old annex repository (this helps with [[future_proofing]]). If for whatever reason you need to decrypt files on [[special_remotes]] that use [[encryption]] without using `git-annex`, this can be done fairly easily using `gpg` (and `openssl` to compute the HMAC keys used to create the file names used on the special remote so you can look up the right file to decrypt). Here is an example script demonstrating how to compute the special remote file names and how to decrypt the special remote files.

    #!/usr/bin/env bash
    
    usage() {
    	echo "Usage: ga_decrypt.sh -r REMOTE [-k SYMLINK] [-d FILE]"
    	echo ""
    	echo "    Either lookups up key on REMOTE for annex file linked with SYMLINK"
    	echo "    or decrypts FILE encrypted for REMOTE."
    	echo ""
    	echo "    -r: REMOTE is special remote to use"
    	echo "    -k: SYMLINK is symlink in annex to print encrypted special remote key for"
    	echo "    -d: FILE is path to special remote file to decrypt to STDOUT"
    	echo ""
    	echo "NOTES: "
    	echo "    * Run in an indirect git annex repo."
    	echo "    * Must specify -k or -d."
    	echo "    * -k prints the key including the leading directory names used for a "
    	echo "       directory remote (even if REMOTE is not a directory remote)"
    	echo "    * -d works on a locally accessible file. It does not fetch a remote file"
    	echo "    * Must have gpg and openssl"
    }
    
    decrypt_cipher() {
    	cipher="$1"
    	echo "$(echo -n "$cipher" | base64 -d | gpg --decrypt --quiet)"
    }
    
    lookup_key() {
    	encryption="$1"
    	cipher="$2"
    	symlink="$3"
    
    	if [ "$encryption" == "hybrid" ] || [ "$encryption" == "pubkey" ]; then
    		cipher="$(decrypt_cipher "$cipher")"
    	fi
    
    	# Pull out MAC cipher from beginning of cipher
    	if [ "$encryption" = "hybrid" ] ; then
    		cipher="$(echo -n "$cipher" | head  -c 256 )"
    	elif [ "$encryption" = "shared" ] ; then
    		cipher="$(echo -n "$cipher" | base64 -d | tr -d '\n' | head  -c 256 )"
    	elif [ "$encryption" = "pubkey" ] ; then
    		# pubkey cipher includes a trailing newline which was stripped in
    		# decrypt_cipher process substitution step above
    		IFS= read -rd '' cipher < <( printf "$cipher\n" )
    	fi
    
    	annex_key="$(basename "$(readlink "$symlink")")"
    	hash="$(echo -n "$annex_key" | openssl dgst -sha1 -hmac "$cipher" | sed 's/(stdin)= //')"
    	key="GPGHMACSHA1--$hash"
    	checksum="$(echo -n $key | md5sum)"
    	echo "${checksum:0:3}/${checksum:3:3}/$key"
    }
    
    decrypt_file() {
    	encryption="$1"
    	cipher="$2"
    	file_path="$3"
    
    	if [ "$encryption" = "pubkey" ] ; then
    		gpg --quiet --decrypt "${file_path}"
    	else
    		if [ "$encryption" = "hybrid" ] ; then
    			cipher="$(decrypt_cipher "$cipher" | tail -c +257)"
    		elif [ "$encryption" = "shared" ] ; then
    			cipher="$(echo -n "$cipher" | base64 -d | tr -d '\n' | tail  -c +257 )"
    		fi
    		gpg --quiet --batch --passphrase "$cipher" --output - "${file_path}"
    	fi
    }
    
    main() {
    	OPTIND=1
    
    	mode=""
    	remote=""
    
    	while getopts "r:k:d:" opt; do
    		case "$opt" in
    			r)  remote="$OPTARG"
    				;;
    			k)  if [ -z "$mode" ] ; then
    					mode="lookup key"
    				else
    					usage
    					exit 2
    				fi
    				symlink="$OPTARG"
    				;;
    			d)  if [ -z "$mode" ] ; then
    					mode="decrypt file"
    				else
    					usage
    					exit 2
    				fi
    				file_path="$OPTARG"
    				;;
    		esac
    	done
    
    	if [ -z "$mode" ] || [ -z "$remote" ] ; then
    		usage
    		exit 2
    	fi
    
        shift $((OPTIND-1))
    
    	# Pull out config for desired remote name
    	remote_config="$(git show git-annex:remote.log | grep 'name='"$remote ")"
    
    	# Get encryption type and cipher from config
    	encryption="$(echo "$remote_config" | grep -oP 'encryption\=.*? ' | tr -d ' \n' | sed 's/encryption=//')"
    	cipher="$(echo "$remote_config" | grep -oP 'cipher\=.*? ' | tr -d ' \n' | sed 's/cipher=//')"
    
    	if [ "$mode" = "lookup key" ] ; then
    		lookup_key "$encryption" "$cipher" "$symlink"
    	elif [ "$mode" = "decrypt file" ] ; then
    		decrypt_file "$encryption" "$cipher" "${file_path}"
    	fi
    }

    main "$@"