summaryrefslogtreecommitdiff
path: root/Locations.hs
blob: 425e4fdcf94392f52a2c8c82cd53fb8929a5f8eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
{- git-annex file locations
 -
 - Copyright 2010 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Locations (
	keyFile,
	fileKey,
	gitAnnexLocation,
	annexLocations,
	gitAnnexDir,
	gitAnnexObjectDir,
	gitAnnexTmpDir,
	gitAnnexTmpLocation,
	gitAnnexBadDir,
	gitAnnexBadLocation,
	gitAnnexUnusedLog,
	gitAnnexJournalDir,
	gitAnnexJournalLock,
	isLinkToAnnex,
	hashDirMixed,
	hashDirLower,

	prop_idempotent_fileKey
) where

import Data.Bits
import Data.Word
import Data.Hash.MD5

import Common
import Types
import Types.Key
import qualified Git

{- Conventions:
 -
 - Functions ending in "Dir" should always return values ending with a
 - trailing path separator. Most code does not rely on that, but a few
 - things do. 
 -
 - Everything else should not end in a trailing path sepatator. 
 -
 - Only functions (with names starting with "git") that build a path
 - based on a git repository should return an absolute path.
 - Everything else should use relative paths.
 -}

{- The directory git annex uses for local state, relative to the .git
 - directory -}
annexDir :: FilePath
annexDir = addTrailingPathSeparator "annex"

{- The directory git annex uses for locally available object content,
 - relative to the .git directory -}
objectDir :: FilePath
objectDir = addTrailingPathSeparator $ annexDir </> "objects"

{- Annexed file's possible locations relative to the .git directory.
 - There are two different possibilities, using different hashes;
 - the first is the default for new content. -}
annexLocations :: Key -> [FilePath]
annexLocations key = [using hashDirMixed, using hashDirLower]
	where
		using h = objectDir </> h key </> f </> f
		f = keyFile key

{- Annexed file's absolute location in a repository.
 - Out of the possible annexLocations, returns the one where the file
 - is actually present. When the file is not present, returns the
 - one where the file should be put.
 -}
gitAnnexLocation :: Key -> Git.Repo -> IO FilePath
gitAnnexLocation key r
	| Git.repoIsLocalBare r =
		go (Git.workTree r) $ annexLocations key
	| otherwise =
		go (Git.workTree r </> ".git") $ annexLocations key
	where
		go dir locs = fromMaybe (dir </> head locs) <$> check dir locs
		check _ [] = return Nothing
		check dir (l:ls) = do
			let f = dir </> l
			e <- doesFileExist f
			if e then return (Just f) else check dir ls

{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
gitAnnexDir r
	| Git.repoIsLocalBare r = addTrailingPathSeparator $ Git.workTree r </> annexDir
	| otherwise = addTrailingPathSeparator $ Git.workTree r </> ".git" </> annexDir

{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> FilePath
gitAnnexObjectDir r
	| Git.repoIsLocalBare r = addTrailingPathSeparator $ Git.workTree r </> objectDir
	| otherwise = addTrailingPathSeparator $ Git.workTree r </> ".git" </> objectDir

{- .git/annex/tmp/ is used for temp files -}
gitAnnexTmpDir :: Git.Repo -> FilePath
gitAnnexTmpDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp"

{- The temp file to use for a given key. -}
gitAnnexTmpLocation :: Key -> Git.Repo -> FilePath
gitAnnexTmpLocation key r = gitAnnexTmpDir r </> keyFile key

{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> FilePath
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"

{- The bad file to use for a given key. -}
gitAnnexBadLocation :: Key -> Git.Repo -> FilePath
gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key

{- .git/annex/*unused is used to number possibly unused keys -}
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")

{- .git/annex/journal/ is used to journal changes made to the git-annex
 - branch -}
gitAnnexJournalDir :: Git.Repo -> FilePath
gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r </> "journal"

{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> FilePath
gitAnnexJournalLock r = gitAnnexDir r </> "journal.lck"

{- Checks a symlink target to see if it appears to point to annexed content. -}
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s

{- Converts a key into a filename fragment.
 -
 - Escape "/" in the key name, to keep a flat tree of files and avoid
 - issues with keys containing "/../" or ending with "/" etc. 
 -
 - "/" is escaped to "%" because it's short and rarely used, and resembles
 -     a slash
 - "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
 -     is one to one.
 - ":" is escaped to "&c", because despite it being 2011, people still care
 -     about FAT.
 -}
keyFile :: Key -> FilePath
keyFile key = replace "/" "%" $ replace ":" "&c" $
	replace "%" "&s" $ replace "&" "&a"  $ show key

{- Reverses keyFile, converting a filename fragment (ie, the basename of
 - the symlink target) into a key. -}
fileKey :: FilePath -> Maybe Key
fileKey file = readKey $
	replace "&a" "&" $ replace "&s" "%" $
		replace "&c" ":" $ replace "%" "/" file

{- for quickcheck -}
prop_idempotent_fileKey :: String -> Bool
prop_idempotent_fileKey s = Just k == fileKey (keyFile k)
	where k = stubKey { keyName = s, keyBackendName = "test" }

{- Given a key, generates a short directory name to put it in,
 - to do hashing to protect against filesystems that dislike having
 - many items in a single directory. -}
hashDirMixed :: Key -> FilePath
hashDirMixed k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
	where
		dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d]
		ABCD (a,b,c,d) = md5 $ Str $ show k

{- Generates a hash directory that is all lower case. -}
hashDirLower :: Key -> FilePath
hashDirLower k = addTrailingPathSeparator $ take 3 dir </> drop 3 dir
	where
		dir = take 6 $ md5s $ Str $ show k

{- modified version of display_32bits_as_hex from Data.Hash.MD5
 -   Copyright (C) 2001 Ian Lynagh 
 -   License: Either BSD or GPL
 -}
display_32bits_as_dir :: Word32 -> String
display_32bits_as_dir w = trim $ swap_pairs cs
	where 
		-- Need 32 characters to use. To avoid inaverdently making
		-- a real word, use letters that appear less frequently.
		chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
		cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
		getc n = chars !! fromIntegral n
		swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
		swap_pairs _ = []
		-- Last 2 will always be 00, so omit.
		trim = take 6