1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
|
{- git repository handling
-
- This is written to be completely independant of git-annex and should be
- suitable for other uses.
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module GitRepo (
Repo,
repoFromCwd,
repoFromPath,
repoFromUrl,
repoIsUrl,
repoIsSsh,
repoDescribe,
workTree,
gitDir,
relative,
urlPath,
urlHost,
configGet,
configMap,
configRead,
configTrue,
gitCommandLine,
run,
pipeRead,
hPipeRead,
attributes,
remotes,
remotesAdd,
repoRemoteName,
inRepo,
notInRepo,
stagedFiles,
checkAttr,
decodeGitFile,
encodeGitFile,
prop_idempotent_deencode
) where
import Control.Monad (unless)
import System.Directory
import System.Posix.Directory
import System.Path
import System.Cmd.Utils
import IO (bracket_)
import Data.String.Utils
import System.IO
import qualified Data.Map as Map hiding (map, split)
import Network.URI
import Data.Maybe
import Data.Char
import Data.Word (Word8)
import Codec.Binary.UTF8.String (encode)
import Text.Printf
import Utility
{- There are two types of repositories; those on local disk and those
- accessed via an URL. -}
data RepoLocation = Dir FilePath | Url URI
deriving (Show, Eq)
data Repo = Repo {
location :: RepoLocation,
config :: Map.Map String String,
remotes :: [Repo],
-- remoteName holds the name used for this repo in remotes
remoteName :: Maybe String
} deriving (Show, Eq)
newFrom :: RepoLocation -> Repo
newFrom l =
Repo {
location = l,
config = Map.empty,
remotes = [],
remoteName = Nothing
}
{- Local Repo constructor. -}
repoFromPath :: FilePath -> Repo
repoFromPath dir = newFrom $ Dir dir
{- Remote Repo constructor. Throws exception on invalid url. -}
repoFromUrl :: String -> Repo
repoFromUrl url
| startswith "file://" url = repoFromPath $ uriPath u
| otherwise = newFrom $ Url u
where u = fromJust $ parseURI url
{- User-visible description of a git repo. -}
repoDescribe :: Repo -> String
repoDescribe Repo { remoteName = Just name } = name
repoDescribe Repo { location = Url url } = show url
repoDescribe Repo { location = Dir dir } = dir
{- Constructs and returns an updated version of a repo with
- different remotes list. -}
remotesAdd :: Repo -> [Repo] -> Repo
remotesAdd repo rs = repo { remotes = rs }
{- Returns the name of the remote that corresponds to the repo, if
- it is a remote. Otherwise, "" -}
repoRemoteName :: Repo -> String
repoRemoteName Repo { remoteName = Just name } = name
repoRemoteName _ = ""
{- Some code needs to vary between URL and normal repos,
- or bare and non-bare, these functions help with that. -}
repoIsUrl :: Repo -> Bool
repoIsUrl Repo { location = Url _ } = True
repoIsUrl _ = False
repoIsSsh :: Repo -> Bool
repoIsSsh Repo { location = Url url }
| uriScheme url == "ssh:" = True
| otherwise = False
repoIsSsh _ = False
assertLocal :: Repo -> a -> a
assertLocal repo action =
if (not $ repoIsUrl repo)
then action
else error $ "acting on URL git repo " ++ repoDescribe repo ++
" not supported"
assertUrl :: Repo -> a -> a
assertUrl repo action =
if (repoIsUrl repo)
then action
else error $ "acting on local git repo " ++ repoDescribe repo ++
" not supported"
assertSsh :: Repo -> a -> a
assertSsh repo action =
if (repoIsSsh repo)
then action
else error $ "unsupported url in repo " ++ repoDescribe repo
bare :: Repo -> Bool
bare repo = case Map.lookup "core.bare" $ config repo of
Just v -> configTrue v
Nothing -> error $ "it is not known if git repo " ++
repoDescribe repo ++
" is a bare repository; config not read"
{- Path to a repository's gitattributes file. -}
attributes :: Repo -> String
attributes repo
| bare repo = workTree repo ++ "/info/.gitattributes"
| otherwise = workTree repo ++ "/.gitattributes"
{- Path to a repository's .git directory, relative to its workTree. -}
gitDir :: Repo -> String
gitDir repo
| bare repo = ""
| otherwise = ".git"
{- Path to a repository's --work-tree, that is, its top.
-
- Note that for URL repositories, this is the path on the remote host. -}
workTree :: Repo -> FilePath
workTree r@(Repo { location = Url _ }) = urlPath r
workTree (Repo { location = Dir d }) = d
{- Given a relative or absolute filename in a repository, calculates the
- name to use to refer to the file relative to a git repository's top.
- This is the same form displayed and used by git. -}
relative :: Repo -> String -> String
relative repo@(Repo { location = Dir d }) file = drop (length absrepo) absfile
where
-- normalize both repo and file, so that repo
-- will be substring of file
absrepo = case (absNormPath "/" d) of
Just f -> f ++ "/"
Nothing -> error $ "bad repo" ++ repoDescribe repo
absfile = case (secureAbsNormPath absrepo file) of
Just f -> f
Nothing -> error $ file ++ " is not located inside git repository " ++ absrepo
relative repo _ = assertLocal repo $ error "internal"
{- Hostname of an URL repo. (May include a username and/or port too.) -}
urlHost :: Repo -> String
urlHost Repo { location = Url u } = uriUserInfo a ++ uriRegName a ++ uriPort a
where a = fromJust $ uriAuthority u
urlHost repo = assertUrl repo $ error "internal"
{- Path of an URL repo. -}
urlPath :: Repo -> String
urlPath Repo { location = Url u } = uriPath u
urlPath repo = assertUrl repo $ error "internal"
{- Constructs a git command line operating on the specified repo. -}
gitCommandLine :: Repo -> [String] -> [String]
gitCommandLine repo@(Repo { location = Dir d} ) params =
-- force use of specified repo via --git-dir and --work-tree
["--git-dir="++d++"/"++(gitDir repo), "--work-tree="++d] ++ params
gitCommandLine repo _ = assertLocal repo $ error "internal"
{- Runs git in the specified repo, throwing an error if it fails. -}
run :: Repo -> [String] -> IO ()
run repo params = assertLocal repo $ do
ok <- boolSystem "git" (gitCommandLine repo params)
unless (ok) $ error $ "git " ++ show params ++ " failed"
{- Runs a git subcommand and returns its output. -}
pipeRead :: Repo -> [String] -> IO String
pipeRead repo params = assertLocal repo $ do
pOpen ReadFromPipe "git" (gitCommandLine repo params) $ \h -> do
hGetContentsStrict h
{- Like pipeRead, but does not read output strictly; recommended
- for git commands that produce a lot of output that will be processed
- lazily.
-
- ONLY AFTER the string has been read completely, You must call either
- getProcessStatus or forceSuccess on the PipeHandle. Zombies will result
- otherwise.-}
hPipeRead :: Repo -> [String] -> IO (PipeHandle, String)
hPipeRead repo params = assertLocal repo $ do
pipeFrom "git" (gitCommandLine repo params)
{- Passed a location, recursively scans for all files that
- are checked into git at that location. -}
inRepo :: Repo -> FilePath -> IO [FilePath]
inRepo repo l = pipeNullSplit repo
["ls-files", "--cached", "--exclude-standard", "-z", l]
{- Passed a location, recursively scans for all files that are not checked
- into git, and not gitignored. -}
notInRepo :: Repo -> FilePath -> IO [FilePath]
notInRepo repo l = pipeNullSplit repo
["ls-files", "--others", "--exclude-standard", "-z", l]
{- Passed a location, returns a list of the files, staged for
- commit, that are being added, moved, or changed (but not deleted). -}
stagedFiles :: Repo -> FilePath -> IO [FilePath]
stagedFiles repo l = pipeNullSplit repo
["diff", "--cached", "--name-only", "--diff-filter=ACMRT", "-z",
"HEAD", l]
{- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it into a list of files. -}
pipeNullSplit :: Repo -> [String] -> IO [FilePath]
pipeNullSplit repo params = do
-- XXX handle is left open, this is ok for git-annex, but may need
-- to be cleaned up for other uses.
(_, fs0) <- hPipeRead repo params
return $ split0 fs0
where
split0 s = filter (not . null) $ split "\0" s
{- Runs git config and populates a repo with its config.
-
- For a ssh repository, a list of ssh options may optionally be specified. -}
configRead :: Repo -> Maybe [String] -> IO Repo
configRead repo@(Repo { location = Dir d }) _ = do
{- Cannot use pipeRead because it relies on the config having
been already read. Instead, chdir to the repo. -}
cwd <- getCurrentDirectory
bracket_ (changeWorkingDirectory d)
(\_ -> changeWorkingDirectory cwd) $
pOpen ReadFromPipe "git" ["config", "--list"] $
hConfigRead repo
configRead repo sshopts = assertSsh repo $ do
pOpen ReadFromPipe "ssh" params $ hConfigRead repo
where
params = case sshopts of
Nothing -> [urlHost repo, command]
Just l -> l ++ [urlHost repo, command]
command = "cd " ++ (shellEscape $ urlPath repo) ++
" && git config --list"
hConfigRead :: Repo -> Handle -> IO Repo
hConfigRead repo h = do
val <- hGetContentsStrict h
let r = repo { config = configParse val }
return r { remotes = configRemotes r }
{- Checks if a string fron git config is a true value. -}
configTrue :: String -> Bool
configTrue s = map toLower s == "true"
{- Calculates a list of a repo's configured remotes, by parsing its config. -}
configRemotes :: Repo -> [Repo]
configRemotes repo = map construct remotepairs
where
remotepairs = Map.toList $ filterremotes $ config repo
filterremotes = Map.filterWithKey (\k _ -> isremote k)
isremote k = (startswith "remote." k) && (endswith ".url" k)
remotename k = (split "." k) !! 1
construct (k,v) = (gen v) { remoteName = Just $ remotename k }
gen v | isURI v = repoFromUrl v
| otherwise = repoFromPath v
{- Parses git config --list output into a config map. -}
configParse :: String -> Map.Map String String
configParse s = Map.fromList $ map pair $ lines s
where
pair l = (key l, val l)
key l = (keyval l) !! 0
val l = join sep $ drop 1 $ keyval l
keyval l = split sep l :: [String]
sep = "="
{- Returns a single git config setting, or a default value if not set. -}
configGet :: Repo -> String -> String -> String
configGet repo key defaultValue =
Map.findWithDefault defaultValue key (config repo)
{- Access to raw config Map -}
configMap :: Repo -> Map.Map String String
configMap repo = config repo
{- Looks up a gitattributes value for each file in a list. -}
checkAttr :: Repo -> String -> [FilePath] -> IO [(FilePath, String)]
checkAttr repo attr files = do
(_, s) <- pipeBoth "git" params files0
return $ map topair $ lines s
-- XXX handle is left open, this is ok for git-annex, but may need
-- to be cleaned up for other uses.
where
params = gitCommandLine repo ["check-attr", attr, "-z", "--stdin"]
files0 = join "\0" files
topair l = (file, value)
where
file = decodeGitFile $ join sep $ take end bits
value = bits !! end
end = length bits - 1
bits = split sep l
sep = ": " ++ attr ++ ": "
{- Some git commands output encoded filenames. Decode that (annoyingly
- complex) encoding. -}
decodeGitFile :: String -> FilePath
decodeGitFile [] = []
decodeGitFile f@(c:s)
-- encoded strings will be inside double quotes
| c == '"' = unescape ("", middle)
| otherwise = f
where
e = '\\'
middle = take (length s - 1) s
unescape (b, []) = b
-- look for escapes starting with '\'
unescape (b, v) = b ++ beginning ++ unescape (decode rest)
where
pair = span (/= e) v
beginning = fst pair
rest = snd pair
isescape x = x == e
-- \NNN is an octal encoded character
decode (x:n1:n2:n3:rest)
| isescape x && alloctal = (fromoctal, rest)
where
alloctal = isOctDigit n1 &&
isOctDigit n2 &&
isOctDigit n3
fromoctal = [chr $ readoctal (n1:n2:n3:[])]
readoctal o = read $ "0o" ++ o :: Int
-- \C is used for a few special characters
decode (x:nc:rest)
| isescape x = ([echar nc], rest)
where
echar 'a' = '\a'
echar 'b' = '\b'
echar 'f' = '\f'
echar 'n' = '\n'
echar 'r' = '\r'
echar 't' = '\t'
echar 'v' = '\v'
echar a = a
decode n = ("", n)
{- Should not need to use this, except for testing decodeGitFile. -}
encodeGitFile :: FilePath -> String
encodeGitFile s = (foldl (++) "\"" (map echar s)) ++ "\""
where
e c = "\\" ++ [c]
echar '\a' = e 'a'
echar '\b' = e 'b'
echar '\f' = e 'f'
echar '\n' = e 'n'
echar '\r' = e 'r'
echar '\t' = e 't'
echar '\v' = e 'v'
echar '\\' = e '\\'
echar '"' = e '"'
echar x
| ord x < 0x20 = e_num x -- low ascii
| ord x >= 256 = e_utf x
| ord x > 0x7E = e_num x -- high ascii
| otherwise = [x] -- printable ascii
where
showoctal i = "\\" ++ (printf "%03o" i)
e_num c = showoctal $ ord c
-- unicode character is decomposed to
-- Word8s and each is shown in octal
e_utf c = foldl (++) "" $ map showoctal $
(encode [c] :: [Word8])
{- for quickcheck -}
prop_idempotent_deencode :: String -> Bool
prop_idempotent_deencode s = s == (decodeGitFile $ encodeGitFile s)
{- Finds the current git repository, which may be in a parent directory. -}
repoFromCwd :: IO Repo
repoFromCwd = do
cwd <- getCurrentDirectory
top <- seekUp cwd isRepoTop
case top of
(Just dir) -> return $ repoFromPath dir
Nothing -> error "Not in a git repository."
seekUp :: String -> (String -> IO Bool) -> IO (Maybe String)
seekUp dir want = do
ok <- want dir
if ok
then return (Just dir)
else case (parentDir dir) of
"" -> return Nothing
d -> seekUp d want
isRepoTop :: FilePath -> IO Bool
isRepoTop dir = do
r <- isRepo
b <- isBareRepo
return (r || b)
where
isRepo = gitSignature ".git" ".git/config"
isBareRepo = gitSignature "objects" "config"
gitSignature subdir file = do
s <- (doesDirectoryExist (dir ++ "/" ++ subdir))
f <- (doesFileExist (dir ++ "/" ++ file))
return (s && f)
|