summaryrefslogtreecommitdiff
path: root/Locations.hs
blob: 47a009590cd4be3a3608deaeafc7e0abfa86f0be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
{- git-annex file locations
 -
 - Copyright 2010-2013 Joey Hess <joey@kitenet.net>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Locations (
	keyFile,
	fileKey,
	keyPaths,
	keyPath,
	annexDir,
	objectDir,
	gitAnnexLocation,
	gitAnnexLink,
	gitAnnexMapping,
	gitAnnexInodeCache,
	gitAnnexInodeSentinal,
	gitAnnexInodeSentinalCache,
	annexLocations,
	annexLocation,
	gitAnnexDir,
	gitAnnexObjectDir,
	gitAnnexTmpDir,
	gitAnnexTmpLocation,
	gitAnnexBadDir,
	gitAnnexBadLocation,
	gitAnnexUnusedLog,
	gitAnnexFsckState,
	gitAnnexFsckResultsLog,
	gitAnnexScheduleState,
	gitAnnexTransferDir,
	gitAnnexCredsDir,
	gitAnnexFeedStateDir,
	gitAnnexFeedState,
	gitAnnexMergeDir,
	gitAnnexJournalDir,
	gitAnnexJournalLock,
	gitAnnexIndex,
	gitAnnexIndexStatus,
	gitAnnexIgnoredRefs,
	gitAnnexPidFile,
	gitAnnexDaemonStatusFile,
	gitAnnexLogFile,
	gitAnnexFuzzTestLogFile,
	gitAnnexHtmlShim,
	gitAnnexUrlFile,
	gitAnnexTmpCfgFile,
	gitAnnexSshDir,
	gitAnnexRemotesDir,
	gitAnnexAssistantDefaultDir,
	isLinkToAnnex,
	annexHashes,
	hashDirMixed,
	hashDirLower,
	preSanitizeKeyName,

	prop_idempotent_fileKey
) where

import Data.Bits
import Data.Word
import Data.Hash.MD5
import Data.Char

import Common
import Types
import Types.Key
import Types.UUID
import qualified Git

{- Conventions:
 -
 - Functions ending in "Dir" should always return values ending with a
 - trailing path separator. Most code does not rely on that, but a few
 - things do. 
 -
 - Everything else should not end in a trailing path sepatator. 
 -
 - Only functions (with names starting with "git") that build a path
 - based on a git repository should return an absolute path.
 - Everything else should use relative paths.
 -}

{- The directory git annex uses for local state, relative to the .git
 - directory -}
annexDir :: FilePath
annexDir = addTrailingPathSeparator "annex"

{- The directory git annex uses for locally available object content,
 - relative to the .git directory -}
objectDir :: FilePath
objectDir = addTrailingPathSeparator $ annexDir </> "objects"

{- Annexed file's possible locations relative to the .git directory.
 - There are two different possibilities, using different hashes. -}
annexLocations :: Key -> [FilePath]
annexLocations key = map (annexLocation key) annexHashes
annexLocation :: Key -> Hasher -> FilePath
annexLocation key hasher = objectDir </> keyPath key hasher

{- Annexed object's absolute location in a repository.
 -
 - When there are multiple possible locations, returns the one where the
 - file is actually present.
 -
 - When the file is not present, returns the location where the file should
 - be stored.
 -
 - This does not take direct mode into account, so in direct mode it is not
 - the actual location of the file's content.
 -}
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexLocation key r config = gitAnnexLocation' key r (annexCrippledFileSystem config)
gitAnnexLocation' :: Key -> Git.Repo -> Bool -> IO FilePath
gitAnnexLocation' key r crippled
	{- Bare repositories default to hashDirLower for new
	 - content, as it's more portable.
	 -
	 - Repositories on filesystems that are crippled also use
	 - hashDirLower, since they do not use symlinks and it's
	 - more portable. -}
	| Git.repoIsLocalBare r || crippled =
		check $ map inrepo $ annexLocations key
	{- Non-bare repositories only use hashDirMixed, so
	 - don't need to do any work to check if the file is
	 - present. -}
	| otherwise = return $ inrepo $ annexLocation key hashDirMixed
  where
	inrepo d = Git.localGitDir r </> d
	check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs
	check [] = error "internal"

{- Calculates a symlink to link a file to an annexed object. -}
gitAnnexLink :: FilePath -> Key -> Git.Repo -> IO FilePath
gitAnnexLink file key r = do
	cwd <- getCurrentDirectory
	let absfile = fromMaybe whoops $ absNormPath cwd file
	loc <- gitAnnexLocation' key r False
	return $ relPathDirToFile (parentDir absfile) loc
  where
  	whoops = error $ "unable to normalize " ++ file

{- File that maps from a key to the file(s) in the git repository.
 - Used in direct mode. -}
gitAnnexMapping :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexMapping key r config = do
	loc <- gitAnnexLocation key r config
	return $ loc ++ ".map"

{- File that caches information about a key's content, used to determine
 - if a file has changed.
 - Used in direct mode. -}
gitAnnexInodeCache :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexInodeCache key r config  = do
	loc <- gitAnnexLocation key r config
	return $ loc ++ ".cache"

gitAnnexInodeSentinal :: Git.Repo -> FilePath
gitAnnexInodeSentinal r = gitAnnexDir r </> "sentinal"

gitAnnexInodeSentinalCache :: Git.Repo -> FilePath
gitAnnexInodeSentinalCache r = gitAnnexInodeSentinal r ++ ".cache"

{- The annex directory of a repository. -}
gitAnnexDir :: Git.Repo -> FilePath
gitAnnexDir r = addTrailingPathSeparator $ Git.localGitDir r </> annexDir

{- The part of the annex directory where file contents are stored. -}
gitAnnexObjectDir :: Git.Repo -> FilePath
gitAnnexObjectDir r = addTrailingPathSeparator $ Git.localGitDir r </> objectDir

{- .git/annex/tmp/ is used for temp files -}
gitAnnexTmpDir :: Git.Repo -> FilePath
gitAnnexTmpDir r = addTrailingPathSeparator $ gitAnnexDir r </> "tmp"

{- The temp file to use for a given key's content. -}
gitAnnexTmpLocation :: Key -> Git.Repo -> FilePath
gitAnnexTmpLocation key r = gitAnnexTmpDir r </> keyFile key

{- .git/annex/bad/ is used for bad files found during fsck -}
gitAnnexBadDir :: Git.Repo -> FilePath
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"

{- The bad file to use for a given key. -}
gitAnnexBadLocation :: Key -> Git.Repo -> FilePath
gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key

{- .git/annex/foounused is used to number possibly unused keys -}
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")

{- .git/annex/fsckstate is used to store information about incremental fscks. -}
gitAnnexFsckState :: Git.Repo -> FilePath
gitAnnexFsckState r = gitAnnexDir r </> "fsckstate"

{- .git/annex/fsckresults/uuid is used to store results of git fscks -}
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
gitAnnexFsckResultsLog u r = gitAnnexDir r </> "fsckresults" </> fromUUID u

{- .git/annex/schedulestate is used to store information about when
 - scheduled jobs were last run. -}
gitAnnexScheduleState :: Git.Repo -> FilePath
gitAnnexScheduleState r = gitAnnexDir r </> "schedulestate"

{- .git/annex/creds/ is used to store credentials to access some special
 - remotes. -}
gitAnnexCredsDir :: Git.Repo -> FilePath
gitAnnexCredsDir r = addTrailingPathSeparator $ gitAnnexDir r </> "creds"

{- .git/annex/feeds/ is used to record per-key (url) state by importfeeds -}
gitAnnexFeedStateDir :: Git.Repo -> FilePath
gitAnnexFeedStateDir r = addTrailingPathSeparator $ gitAnnexDir r </> "feedstate"

gitAnnexFeedState :: Key -> Git.Repo -> FilePath
gitAnnexFeedState k r = gitAnnexFeedStateDir r </> keyFile k

{- .git/annex/merge/ is used for direct mode merges. -}
gitAnnexMergeDir :: Git.Repo -> FilePath
gitAnnexMergeDir r = addTrailingPathSeparator $ gitAnnexDir r </> "merge"

{- .git/annex/transfer/ is used to record keys currently
 - being transferred, and other transfer bookkeeping info. -}
gitAnnexTransferDir :: Git.Repo -> FilePath
gitAnnexTransferDir r = addTrailingPathSeparator $ gitAnnexDir r </> "transfer"

{- .git/annex/journal/ is used to journal changes made to the git-annex
 - branch -}
gitAnnexJournalDir :: Git.Repo -> FilePath
gitAnnexJournalDir r = addTrailingPathSeparator $ gitAnnexDir r </> "journal"

{- Lock file for the journal. -}
gitAnnexJournalLock :: Git.Repo -> FilePath
gitAnnexJournalLock r = gitAnnexDir r </> "journal.lck"

{- .git/annex/index is used to stage changes to the git-annex branch -}
gitAnnexIndex :: Git.Repo -> FilePath
gitAnnexIndex r = gitAnnexDir r </> "index"

{- Holds the ref of the git-annex branch that the index was last updated to.
 -
 - The .lck in the name is a historical accident; this is not used as a
 - lock. -}
gitAnnexIndexStatus :: Git.Repo -> FilePath
gitAnnexIndexStatus r = gitAnnexDir r </> "index.lck"

{- List of refs that should not be merged into the git-annex branch. -}
gitAnnexIgnoredRefs :: Git.Repo -> FilePath
gitAnnexIgnoredRefs r = gitAnnexDir r </> "ignoredrefs"

{- Pid file for daemon mode. -}
gitAnnexPidFile :: Git.Repo -> FilePath
gitAnnexPidFile r = gitAnnexDir r </> "daemon.pid"

{- Status file for daemon mode. -}
gitAnnexDaemonStatusFile :: Git.Repo -> FilePath
gitAnnexDaemonStatusFile r = gitAnnexDir r </> "daemon.status"

{- Log file for daemon mode. -}
gitAnnexLogFile :: Git.Repo -> FilePath
gitAnnexLogFile r = gitAnnexDir r </> "daemon.log"

{- Log file for fuzz test. -}
gitAnnexFuzzTestLogFile :: Git.Repo -> FilePath
gitAnnexFuzzTestLogFile r = gitAnnexDir r </> "fuzztest.log"

{- Html shim file used to launch the webapp. -}
gitAnnexHtmlShim :: Git.Repo -> FilePath
gitAnnexHtmlShim r = gitAnnexDir r </> "webapp.html"

{- File containing the url to the webapp. -}
gitAnnexUrlFile :: Git.Repo -> FilePath
gitAnnexUrlFile r = gitAnnexDir r </> "url"

{- Temporary file used to edit configuriation from the git-annex branch. -}
gitAnnexTmpCfgFile :: Git.Repo -> FilePath
gitAnnexTmpCfgFile r = gitAnnexDir r </> "config.tmp"

{- .git/annex/ssh/ is used for ssh connection caching -}
gitAnnexSshDir :: Git.Repo -> FilePath
gitAnnexSshDir r = addTrailingPathSeparator $ gitAnnexDir r </> "ssh"

{- .git/annex/remotes/ is used for remote-specific state. -}
gitAnnexRemotesDir :: Git.Repo -> FilePath
gitAnnexRemotesDir r = addTrailingPathSeparator $ gitAnnexDir r </> "remotes"

{- This is the base directory name used by the assistant when making
 - repositories, by default. -}
gitAnnexAssistantDefaultDir :: FilePath
gitAnnexAssistantDefaultDir = "annex"

{- Checks a symlink target to see if it appears to point to annexed content.
 -
 - We only look at paths inside the .git directory, and not at the .git
 - directory itself, because GIT_DIR may cause a directory name other
 - than .git to be used.
 -}
isLinkToAnnex :: FilePath -> Bool
isLinkToAnnex s = (pathSeparator:objectDir) `isInfixOf` s

{- Sanitizes a String that will be used as part of a Key's keyName,
 - dealing with characters that cause problems on substandard filesystems.
 -
 - This is used when a new Key is initially being generated, eg by getKey.
 - Unlike keyFile and fileKey, it does not need to be a reversable
 - escaping. Also, it's ok to change this to add more problimatic
 - characters later. Unlike changing keyFile, which could result in the
 - filenames used for existing keys changing and contents getting lost.
 -
 - It is, however, important that the input and output of this function
 - have a 1:1 mapping, to avoid two different inputs from mapping to the
 - same key.
 -}
preSanitizeKeyName :: String -> String
preSanitizeKeyName = concatMap escape
  where
  	escape c
		| isAsciiUpper c || isAsciiLower c || isDigit c = [c]
		| c `elem` ".-_ " = [c] -- common, assumed safe
		| c `elem` "/%:" = [c] -- handled by keyFile
		-- , is safe and uncommon, so will be used to escape
		-- other characters. By itself, it is escaped to 
		-- doubled form.
		| c == ',' = ",,"
		| otherwise = ',' : show(ord(c))

{- Converts a key into a filename fragment without any directory.
 -
 - Escape "/" in the key name, to keep a flat tree of files and avoid
 - issues with keys containing "/../" or ending with "/" etc. 
 -
 - "/" is escaped to "%" because it's short and rarely used, and resembles
 -     a slash
 - "%" is escaped to "&s", and "&" to "&a"; this ensures that the mapping
 -     is one to one.
 - ":" is escaped to "&c", because it seemed like a good idea at the time.
 -
 - Changing what this function escapes and how is not a good idea, as it
 - can cause existing objects to get lost.
 -}
keyFile :: Key -> FilePath
keyFile key = replace "/" "%" $ replace ":" "&c" $
	replace "%" "&s" $ replace "&" "&a"  $ key2file key

{- Reverses keyFile, converting a filename fragment (ie, the basename of
 - the symlink target) into a key. -}
fileKey :: FilePath -> Maybe Key
fileKey file = file2key $
	replace "&a" "&" $ replace "&s" "%" $
		replace "&c" ":" $ replace "%" "/" file

{- for quickcheck -}
prop_idempotent_fileKey :: String -> Bool
prop_idempotent_fileKey s
	| null s = True -- it's not legal for a key to have no keyName
	| otherwise= Just k == fileKey (keyFile k)
  where
	k = stubKey { keyName = s, keyBackendName = "test" }

{- A location to store a key on the filesystem. A directory hash is used,
 - to protect against filesystems that dislike having many items in a
 - single directory.
 -
 - The file is put in a directory with the same name, this allows
 - write-protecting the directory to avoid accidental deletion of the file.
 -}
keyPath :: Key -> Hasher -> FilePath
keyPath key hasher = hasher key </> f </> f
  where
	f = keyFile key

{- All possibile locations to store a key using different directory hashes. -}
keyPaths :: Key -> [FilePath]
keyPaths key = map (keyPath key) annexHashes

{- Two different directory hashes may be used. The mixed case hash
 - came first, and is fine, except for the problem of case-strict
 - filesystems such as Linux VFAT (mounted with shortname=mixed),
 - which do not allow using a directory "XX" when "xx" already exists.
 - To support that, most repositories use the lower case hash for new data. -}
type Hasher = Key -> FilePath
annexHashes :: [Hasher]
annexHashes = [hashDirLower, hashDirMixed]

hashDirMixed :: Hasher
hashDirMixed k = addTrailingPathSeparator $ take 2 dir </> drop 2 dir
  where
	dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d]
	ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file k

hashDirLower :: Hasher
hashDirLower k = addTrailingPathSeparator $ take 3 dir </> drop 3 dir
  where
	dir = take 6 $ md5s $ md5FilePath $ key2file k

{- modified version of display_32bits_as_hex from Data.Hash.MD5
 -   Copyright (C) 2001 Ian Lynagh 
 -   License: Either BSD or GPL
 -}
display_32bits_as_dir :: Word32 -> String
display_32bits_as_dir w = trim $ swap_pairs cs
  where 
	-- Need 32 characters to use. To avoid inaverdently making
	-- a real word, use letters that appear less frequently.
	chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
	cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
	getc n = chars !! fromIntegral n
	swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
	swap_pairs _ = []
	-- Last 2 will always be 00, so omit.
	trim = take 6