summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Backend/SHA.hs22
-rw-r--r--GitQueue.hs6
-rw-r--r--TestConfig.hs34
-rw-r--r--configure.hs11
-rw-r--r--debian/changelog2
-rw-r--r--doc/design/encryption.mdwn4
-rw-r--r--doc/todo/git-annex_unused_eats_memory.mdwn25
7 files changed, 77 insertions, 27 deletions
diff --git a/Backend/SHA.hs b/Backend/SHA.hs
index 0ec555ce3..d9aeb72aa 100644
--- a/Backend/SHA.hs
+++ b/Backend/SHA.hs
@@ -27,7 +27,7 @@ import qualified SysConfig
import Key
type SHASize = Int
-
+
backends :: [Backend Annex]
-- order is slightly significant; want sha1 first ,and more general
-- sizes earlier
@@ -35,20 +35,22 @@ backends = catMaybes $ map genBackend [1, 256, 512, 224, 384]
genBackend :: SHASize -> Maybe (Backend Annex)
genBackend size
- | supported size = Just b
- | otherwise = Nothing
+ | shaCommand size == Nothing = Nothing
+ | otherwise = Just b
where
b = Backend.File.backend
{ name = shaName size
, getKey = keyValue size
, fsckKey = Backend.File.checkKey $ checkKeyChecksum size
}
- supported 1 = SysConfig.sha1sum
- supported 256 = SysConfig.sha256sum
- supported 224 = SysConfig.sha224sum
- supported 384 = SysConfig.sha384sum
- supported 512 = SysConfig.sha512sum
- supported _ = False
+
+shaCommand :: SHASize -> Maybe String
+shaCommand 1 = SysConfig.sha1
+shaCommand 256 = SysConfig.sha256
+shaCommand 224 = SysConfig.sha224
+shaCommand 384 = SysConfig.sha384
+shaCommand 512 = SysConfig.sha512
+shaCommand _ = Nothing
shaName :: SHASize -> String
shaName size = "SHA" ++ show size
@@ -63,7 +65,7 @@ shaN size file = do
then error $ command ++ " parse error"
else return $ head bits
where
- command = "sha" ++ (show size) ++ "sum"
+ command = fromJust $ shaCommand size
{- A key is a checksum of its contents. -}
keyValue :: SHASize -> FilePath -> Annex (Maybe Key)
diff --git a/GitQueue.hs b/GitQueue.hs
index 480027fa0..be0fcfc4a 100644
--- a/GitQueue.hs
+++ b/GitQueue.hs
@@ -57,7 +57,11 @@ add :: Queue -> String -> [CommandParam] -> FilePath -> Queue
add (Queue n m) subcommand params file = Queue (n + 1) m'
where
action = Action subcommand params
- m' = M.insertWith' (++) action [file] m
+ -- There are probably few items in the map, but there
+ -- can be a lot of files per item. So, optimise adding
+ -- files.
+ m' = M.insertWith' const action files m
+ files = file:(M.findWithDefault [] action m)
{- Number of items in a queue. -}
size :: Queue -> Int
diff --git a/TestConfig.hs b/TestConfig.hs
index 5e59681dd..9b2759e19 100644
--- a/TestConfig.hs
+++ b/TestConfig.hs
@@ -7,7 +7,10 @@ import System.Cmd
import System.Exit
type ConfigKey = String
-data ConfigValue = BoolConfig Bool | StringConfig String
+data ConfigValue =
+ BoolConfig Bool |
+ StringConfig String |
+ MaybeStringConfig (Maybe String)
data Config = Config ConfigKey ConfigValue
type Test = IO Config
@@ -17,15 +20,17 @@ data TestCase = TestCase TestName Test
instance Show ConfigValue where
show (BoolConfig b) = show b
show (StringConfig s) = show s
+ show (MaybeStringConfig s) = show s
instance Show Config where
- show (Config key value) = unlines
+ show (Config key value) = unlines
[ key ++ " :: " ++ valuetype value
, key ++ " = " ++ show value
]
where
valuetype (BoolConfig _) = "Bool"
valuetype (StringConfig _) = "String"
+ valuetype (MaybeStringConfig _) = "Maybe String"
writeSysConfig :: [Config] -> IO ()
writeSysConfig config = writeFile "SysConfig.hs" body
@@ -67,17 +72,26 @@ testCmd k cmdline = do
{- Ensures that one of a set of commands is available by running each in
- turn. The Config is set to the first one found. -}
-selectCmd :: ConfigKey -> [String] -> Test
-selectCmd k cmds = search cmds
+selectCmd :: Bool -> ConfigKey -> [String] -> String -> Test
+selectCmd required k cmds param = search cmds
where
- search [] = do
- testEnd $ Config k (BoolConfig False)
- error $ "* need one of these commands, but none are available: " ++ show cmds
+ search [] = failure
search (c:cs) = do
- ret <- system $ quiet c
+ ret <- system $ quiet c ++ " " ++ param
if (ret == ExitSuccess)
- then return $ Config k (StringConfig c)
+ then success c
else search cs
+ success c
+ | required == True = return $ Config k (StringConfig c)
+ | otherwise = return $ Config k (MaybeStringConfig $ Just c)
+ failure
+ | required == True = do
+ testEnd $ Config k (BoolConfig False)
+ error $ "* need one of these commands, but none are available: " ++ show cmds
+ | otherwise = do
+ let r = Config k (MaybeStringConfig Nothing)
+ testEnd r
+ return r
quiet :: String -> String
quiet s = s ++ " >/dev/null 2>&1"
@@ -91,3 +105,5 @@ testEnd :: Config -> IO ()
testEnd (Config _ (BoolConfig True)) = putStrLn $ " yes"
testEnd (Config _ (BoolConfig False)) = putStrLn $ " no"
testEnd (Config _ (StringConfig s)) = putStrLn $ " " ++ s
+testEnd (Config _ (MaybeStringConfig (Just s))) = putStrLn $ " " ++ s
+testEnd (Config _ (MaybeStringConfig Nothing)) = putStrLn $ " not available"
diff --git a/configure.hs b/configure.hs
index f8cd577e9..0661813ae 100644
--- a/configure.hs
+++ b/configure.hs
@@ -11,7 +11,7 @@ tests = [
, testCp "cp_a" "-a"
, testCp "cp_p" "-p"
, testCp "cp_reflink_auto" "--reflink=auto"
- , TestCase "uuid generator" $ selectCmd "uuid" ["uuid", "uuidgen"]
+ , TestCase "uuid generator" $ selectCmd True "uuid" ["uuid", "uuidgen"] ""
, TestCase "xargs -0" $ requireCmd "xargs_0" "xargs -0 </dev/null"
, TestCase "rsync" $ requireCmd "rsync" "rsync --version >/dev/null"
, TestCase "curl" $ testCmd "curl" "curl --version >/dev/null"
@@ -20,10 +20,11 @@ tests = [
shaTestCases :: [Int] -> [TestCase]
shaTestCases l = map make l
- where
- make n =
- let cmd = "sha" ++ show n ++ "sum"
- in TestCase cmd $ requireCmd cmd (cmd ++ " </dev/null")
+ where make n =
+ let
+ cmds = map (\x -> "sha" ++ show n ++ x) ["", "sum"]
+ key = "sha" ++ show n
+ in TestCase key $ selectCmd False key cmds "</dev/null"
tmpDir :: String
tmpDir = "tmp"
diff --git a/debian/changelog b/debian/changelog
index fdc740cb8..7f104be10 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -14,6 +14,8 @@ git-annex (0.20110402) UNRELEASED; urgency=low
* Add doc-base file. Closes: #621408
* Periodically flush git command queue, to avoid boating memory usage
too much.
+ * Support "sha1" and "sha512" commands on FreeBSD, and allow building
+ if any/all SHA commands are not available. Thanks, Fraser Tweedale
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400
diff --git a/doc/design/encryption.mdwn b/doc/design/encryption.mdwn
index f8f8656a7..8a8f38108 100644
--- a/doc/design/encryption.mdwn
+++ b/doc/design/encryption.mdwn
@@ -61,8 +61,8 @@ more gpg public keys. This scheme allows new gpg private keys to be given
access to content that has already been stored in the remote.
Different encrypted remotes need to be able to each use different ciphers.
-There does not seem to be a benefit to allowing multiple cipers to be
-used within a single remote, and it would add a lot of complexity.
+Allowing multiple cipers to be used within a single remote would add a lot
+of complexity, so is not planned to be supported.
Instead, if you want a new cipher, create a new S3 bucket, or whatever.
There does not seem to be much benefit to using the same cipher for
two different enrypted remotes.
diff --git a/doc/todo/git-annex_unused_eats_memory.mdwn b/doc/todo/git-annex_unused_eats_memory.mdwn
new file mode 100644
index 000000000..6ce714004
--- /dev/null
+++ b/doc/todo/git-annex_unused_eats_memory.mdwn
@@ -0,0 +1,25 @@
+`git-annex unused` has to compare large sets of data
+(all keys with content present in the repository,
+with all keys used by files in the repository), and so
+uses more memory than git-annex typically needs; around
+60-80 mb when run in a repository with 80 thousand files.
+
+I would like to reduce this. One idea is to use a bloom filter.
+For example, construct a bloom filter of all keys used by files in
+the repository. Then for each key with content present, check if it's
+in the bloom filter. Since there can be false negatives, this might
+miss finding some unused keys. The probability/size of filter
+could be tunable.
+
+Another way might be to scan the git log for files that got removed
+or changed what key they pointed to. Correlate with keys with content
+currently present in the repository (possibly using a bloom filter again),
+and that would yield a shortlist of keys that are probably not used.
+Then scan thru all files in the repo to make sure that none point to keys
+on the shortlist.
+
+----
+
+`git annex unused --from remote` is much worse, using hundreds of mb of
+memory. It has not been profiled at all yet, and can probably be improved
+somewhat by fixing whatever memory leak it (probably) has.