aboutsummaryrefslogtreecommitdiff
path: root/Command/Log.hs
blob: 357bcf1f3e51398daa5341af6885cfc3986d377b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
{- git-annex command
 -
 - Copyright 2012, 2016 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

{-# LANGUAGE CPP #-}

module Command.Log where

import qualified Data.Set as S
import qualified Data.Map as M
import Data.Char
import Data.Time.Clock.POSIX
import Data.Time
#if ! MIN_VERSION_time(1,5,0)
import System.Locale
#endif

import Command
import Logs
import Logs.Location
import qualified Annex.Branch
import qualified Git
import Git.Command
import qualified Remote
import qualified Annex

data RefChange = RefChange 
	{ changetime :: POSIXTime
	, oldref :: Git.Ref
	, newref :: Git.Ref
	, changekey :: Key
	}
	deriving (Show)

data LogChange = Added | Removed

type Outputter = LogChange -> POSIXTime -> [UUID] -> Annex ()

cmd :: Command
cmd = withGlobalOptions annexedMatchingOptions $
	command "log" SectionQuery "shows location log"
		paramPaths (seek <$$> optParser)

data LogOptions = LogOptions
	{ logFiles :: CmdParams
	, allOption :: Bool
	, rawDateOption :: Bool
	, gourceOption :: Bool
	, passthruOptions :: [CommandParam]
	}

optParser :: CmdParamsDesc -> Parser LogOptions
optParser desc = LogOptions
	<$> cmdParams desc
	<*> switch
		( long "all"
		<> short 'A'
		<> help "display location log changes to all files"
		)
	<*> switch
		( long "raw-date"
		<> help "display seconds from unix epoch"
		)
	<*> switch
		( long "gource"
		<> help "format output for gource"
		)
	<*> (concat <$> many passthru)
  where
	passthru :: Parser [CommandParam]
	passthru = datepassthru "since"
		<|> datepassthru "after"
		<|> datepassthru "until"
		<|> datepassthru "before"
		<|> (mkpassthru "max-count" <$> strOption
			( long "max-count" <> metavar paramNumber
			<> help "limit number of logs displayed"
			))
	datepassthru n = mkpassthru n <$> strOption
		( long n <> metavar paramDate
		<> help ("show log " ++ n ++ " date")
		)
	mkpassthru n v = [Param ("--" ++ n), Param v]

seek :: LogOptions -> CommandSeek
seek o = do
	m <- Remote.uuidDescriptions
	zone <- liftIO getCurrentTimeZone
	let outputter = mkOutputter m zone o
	case (logFiles o, allOption o) of
		(fs, False) -> withFilesInGit (whenAnnexed $ start o outputter) fs
		([], True) -> commandAction (startAll o outputter)
		(_, True) -> giveup "Cannot specify both files and --all"

start :: LogOptions -> (FilePath -> Outputter) -> FilePath -> Key -> CommandStart
start o outputter file key = do
	(changes, cleanup) <- getKeyLog key (passthruOptions o)
	showLogIncremental (outputter file) changes
	void $ liftIO cleanup
	stop

startAll :: LogOptions -> (String -> Outputter) -> CommandStart
startAll o outputter = do
	(changes, cleanup) <- getAllLog (passthruOptions o)
	showLog outputter changes
	void $ liftIO cleanup
	stop

{- Displays changes made. Only works when all the RefChanges are for the
 - same key. The method is to compare each value with the value
 - after it in the list, which is the old version of the value.
 -
 - This ncessarily buffers the whole list, so does not stream.
 - But, the number of location log changes for a single key tends to be
 - fairly small.
 -
 - This minimizes the number of reads from git; each logged value is read
 - only once.
 -
 - This also generates subtly better output when the git-annex branch
 - got diverged.
 -}
showLogIncremental :: Outputter -> [RefChange] -> Annex ()
showLogIncremental outputter ps = do
	sets <- mapM (getset newref) ps
	previous <- maybe (return genesis) (getset oldref) (lastMaybe ps)
	let l = sets ++ [previous]
	let changes = map (\((t, new), (_, old)) -> (t, new, old))
		(zip l (drop 1 l))
	sequence_ $ compareChanges outputter changes
  where
	genesis = (0, S.empty)
	getset select change = do
		s <- S.fromList <$> loggedLocationsRef (select change)
		return (changetime change, s)

{- Displays changes made. Streams, and can display changes affecting
 - different keys, but does twice as much reading of logged values
 - as showLogIncremental. -}
showLog :: (String -> Outputter) -> [RefChange] -> Annex ()
showLog outputter cs = forM_ cs $ \c -> do
	let keyname = key2file (changekey c)
	new <- S.fromList <$> loggedLocationsRef (newref c)
	old <- S.fromList <$> loggedLocationsRef (oldref c)
	sequence_ $ compareChanges (outputter keyname)
		[(changetime c, new, old)]

mkOutputter :: M.Map UUID String -> TimeZone -> LogOptions -> FilePath -> Outputter
mkOutputter m zone o file
	| rawDateOption o = normalOutput lookupdescription file show
	| gourceOption o = gourceOutput lookupdescription file
	| otherwise = normalOutput lookupdescription file (showTimeStamp zone)
  where
	lookupdescription u = fromMaybe (fromUUID u) $ M.lookup u m

normalOutput :: (UUID -> String) -> FilePath -> (POSIXTime -> String) -> Outputter
normalOutput lookupdescription file formattime logchange ts us =
	liftIO $ mapM_ (putStrLn . format) us
  where
	time = formattime ts
	addel = case logchange of
		Added -> "+"
		Removed -> "-"
	format u = unwords [ addel, time, file, "|", 
		fromUUID u ++ " -- " ++ lookupdescription u ]

gourceOutput :: (UUID -> String) -> FilePath -> Outputter
gourceOutput lookupdescription file logchange ts us =
	liftIO $ mapM_ (putStrLn . intercalate "|" . format) us
  where
	time = takeWhile isDigit $ show ts
	addel = case logchange of
		Added -> "A" 
		Removed -> "M"
	format u = [ time, lookupdescription u, addel, file ]

{- Generates a display of the changes.
 - Uses a formatter to generate a display of items that are added and
 - removed. -}
compareChanges :: Ord a => (LogChange -> POSIXTime -> [a] -> b) -> [(POSIXTime, S.Set a, S.Set a)] -> [b]
compareChanges format changes = concatMap diff changes
  where
	diff (ts, new, old) =
		[ format Added ts   $ S.toList $ S.difference new old
		, format Removed ts $ S.toList $ S.difference old new
		]

{- Streams the git log for a given key's location log file.
 -
 - This is complicated by git log using paths relative to the current
 - directory, even when looking at files in a different branch. A wacky
 - relative path to the log file has to be used.
 -
 - The --remove-empty is a significant optimisation. It relies on location
 - log files never being deleted in normal operation. Letting git stop
 - once the location log file is gone avoids it checking all the way back
 - to commit 0 to see if it used to exist, so generally speeds things up a
 - *lot* for newish files. -}
getKeyLog :: Key -> [CommandParam] -> Annex ([RefChange], IO Bool)
getKeyLog key os = do
	top <- fromRepo Git.repoPath
	p <- liftIO $ relPathCwdToFile top
	config <- Annex.getGitConfig
	let logfile = p </> locationLogFile config key
	getGitLog [logfile] (Param "--remove-empty" : os)

{- Streams the git log for all git-annex branch changes. -}
getAllLog :: [CommandParam] -> Annex ([RefChange], IO Bool)
getAllLog = getGitLog []

getGitLog :: [FilePath] -> [CommandParam] -> Annex ([RefChange], IO Bool)
getGitLog fs os = do
	(ls, cleanup) <- inRepo $ pipeNullSplit $
		[ Param "log"
		, Param "-z"
		, Param "--pretty=format:%ct"
		, Param "--raw"
		, Param "--abbrev=40"
		] ++ os ++
		[ Param $ Git.fromRef Annex.Branch.fullname
		, Param "--"
		] ++ map Param fs
	return (parseGitRawLog ls, cleanup)

-- Parses chunked git log --raw output, which looks something like:
--
-- [ "timestamp\n:changeline"
-- , "logfile"
-- , ""
-- , "timestamp\n:changeline"
-- , "logfile"
-- , ":changeline"
-- , "logfile"
-- , ""
-- ]
--
-- The timestamp is not included before all changelines, so
-- keep track of the most recently seen timestamp.
parseGitRawLog :: [String] -> [RefChange]
parseGitRawLog = parse epoch
  where
	epoch = toEnum 0 :: POSIXTime
	parse oldts ([]:rest) = parse oldts rest
	parse oldts (c1:c2:rest) = case mrc of
		Just rc -> rc : parse ts rest
		Nothing -> parse ts (c2:rest)
	  where
		(ts, cl) = case separate (== '\n') c1 of
			(cl', []) -> (oldts, cl')
			(tss, cl') -> (parseTimeStamp tss, cl')
	  	mrc = do
			(old, new) <- parseRawChangeLine cl
			key <- locationLogFileKey c2
			return $ RefChange
				{ changetime = ts
				, oldref = old
				, newref = new
				, changekey = key
				}	
	parse _ _ = []

-- Parses something like "100644 100644 oldsha newsha M"
parseRawChangeLine :: String -> Maybe (Git.Ref, Git.Ref)
parseRawChangeLine = go . words
  where
	go (_:_:oldsha:newsha:_) = Just (Git.Ref oldsha, Git.Ref newsha)
	go _ = Nothing

parseTimeStamp :: String -> POSIXTime
parseTimeStamp = utcTimeToPOSIXSeconds . fromMaybe (error "bad timestamp") .
#if MIN_VERSION_time(1,5,0)
	parseTimeM True defaultTimeLocale "%s"
#else
	parseTime defaultTimeLocale "%s"
#endif

showTimeStamp :: TimeZone -> POSIXTime -> String
showTimeStamp zone = formatTime defaultTimeLocale rfc822DateFormat 
	. utcToZonedTime zone . posixSecondsToUTCTime