summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Nathan Collins <nathan.collins@gmail.com>2012-06-09 20:48:32 -0700
committerGravatar Nathan Collins <nathan.collins@gmail.com>2012-06-09 20:48:32 -0700
commit7eb649612a5b8c4909a90ba93d5496448696c559 (patch)
tree128360c592523b1ccf72c0300fbcb24d2d8d566b
parent2b29a0228527e00d112770cb89b07f28c6bb0a14 (diff)
parent6a71a9729fbe02aa6c179bb6c617278257edf71c (diff)
Merge branch 'master' into cabal-man-pages
-rw-r--r--Annex/Branch.hs9
-rw-r--r--Command/InitRemote.hs2
-rw-r--r--Git/CatFile.hs8
-rw-r--r--Git/HashObject.hs18
-rw-r--r--Git/Types.hs15
-rw-r--r--Git/UnionMerge.hs58
-rw-r--r--Git/UpdateIndex.hs49
-rw-r--r--Remote.hs7
-rw-r--r--Types/Backend.hs13
-rw-r--r--debian/changelog1
-rw-r--r--doc/design/assistant/blog/day_3__more_races.mdwn26
-rw-r--r--doc/design/assistant/blog/day_3__more_races/comment_1_d6015338f602b574a3805de5481fc45e._comment8
-rw-r--r--doc/design/assistant/blog/day_3__more_races/comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment8
-rw-r--r--doc/design/assistant/blog/day_3__more_races/comment_3_03f5b2344c2a47dea60086f217d60f9b._comment14
-rw-r--r--doc/design/assistant/blog/day_3__more_races/comment_4_860e90e989ec022100001c65e353a91e._comment8
-rw-r--r--doc/design/assistant/blog/day_4__speed.mdwn47
-rw-r--r--doc/design/assistant/blog/day_4__speed/comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment8
-rw-r--r--doc/design/assistant/blog/day_4__speed/comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment8
-rw-r--r--doc/design/assistant/comment_3_05223be50c889b2ed6bc4abf74116450._comment9
-rw-r--r--doc/design/assistant/comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment9
-rw-r--r--doc/design/assistant/comment_5_f4e9af3fed6c27e8ff39badb9794064d._comment12
-rw-r--r--doc/design/assistant/comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment10
-rw-r--r--doc/design/assistant/comment_7_609d38e993267195a80fecd84c93d1e2._comment8
-rw-r--r--doc/design/assistant/inotify.mdwn39
-rw-r--r--doc/design/assistant/webapp.mdwn2
-rw-r--r--doc/tips/using_box.com_as_a_special_remote.mdwn2
-rw-r--r--doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn12
27 files changed, 316 insertions, 94 deletions
diff --git a/Annex/Branch.hs b/Annex/Branch.hs
index 706522f3b..c8d0719b0 100644
--- a/Annex/Branch.hs
+++ b/Annex/Branch.hs
@@ -33,6 +33,7 @@ import qualified Git.Command
import qualified Git.Ref
import qualified Git.Branch
import qualified Git.UnionMerge
+import qualified Git.UpdateIndex
import Git.HashObject
import qualified Git.Index
import Annex.CatFile
@@ -258,8 +259,8 @@ files = withIndexUpdate $ do
- in changes from other branches.
-}
genIndex :: Git.Repo -> IO ()
-genIndex g = Git.UnionMerge.stream_update_index g
- [Git.UnionMerge.ls_tree fullname g]
+genIndex g = Git.UpdateIndex.stream_update_index g
+ [Git.UpdateIndex.ls_tree fullname g]
{- Merges the specified refs into the index.
- Any changes staged in the index will be preserved. -}
@@ -335,13 +336,13 @@ stageJournal = do
g <- gitRepo
withIndex $ liftIO $ do
h <- hashObjectStart g
- Git.UnionMerge.stream_update_index g
+ Git.UpdateIndex.stream_update_index g
[genstream (gitAnnexJournalDir g) h fs]
hashObjectStop h
where
genstream dir h fs streamer = forM_ fs $ \file -> do
let path = dir </> file
sha <- hashFile h path
- _ <- streamer $ Git.UnionMerge.update_index_line
+ _ <- streamer $ Git.UpdateIndex.update_index_line
sha (fileJournal file)
removeFile path
diff --git a/Command/InitRemote.hs b/Command/InitRemote.hs
index 698d60455..a78505a19 100644
--- a/Command/InitRemote.hs
+++ b/Command/InitRemote.hs
@@ -15,6 +15,7 @@ import qualified Remote
import qualified Logs.Remote
import qualified Types.Remote as R
import Annex.UUID
+import Logs.UUID
def :: [Command]
def = [command "initremote"
@@ -60,6 +61,7 @@ findByName name = do
where
generate = do
uuid <- liftIO genUUID
+ describeUUID uuid name
return (uuid, M.insert nameKey name M.empty)
findByName' :: String -> M.Map UUID R.RemoteConfig -> Maybe (UUID, R.RemoteConfig)
diff --git a/Git/CatFile.hs b/Git/CatFile.hs
index c598d7aa4..d5b367945 100644
--- a/Git/CatFile.hs
+++ b/Git/CatFile.hs
@@ -21,6 +21,7 @@ import Common
import Git
import Git.Sha
import Git.Command
+import Git.Types
import qualified Utility.CoProcess as CoProcess
type CatFileHandle = CoProcess.CoProcessHandle
@@ -52,7 +53,7 @@ catObject h object = CoProcess.query h send receive
case words header of
[sha, objtype, size]
| length sha == shaSize &&
- validobjtype objtype ->
+ isJust (readObjectType objtype) ->
case reads size of
[(bytes, "")] -> readcontent bytes from
_ -> dne
@@ -67,8 +68,3 @@ catObject h object = CoProcess.query h send receive
error "missing newline from git cat-file"
return $ L.fromChunks [content]
dne = return L.empty
- validobjtype t
- | t == "blob" = True
- | t == "commit" = True
- | t == "tree" = True
- | otherwise = False
diff --git a/Git/HashObject.hs b/Git/HashObject.hs
index 617e5ac28..b052413fd 100644
--- a/Git/HashObject.hs
+++ b/Git/HashObject.hs
@@ -9,7 +9,9 @@ module Git.HashObject where
import Common
import Git
+import Git.Sha
import Git.Command
+import Git.Types
import qualified Utility.CoProcess as CoProcess
type HashObjectHandle = CoProcess.CoProcessHandle
@@ -24,11 +26,23 @@ hashObjectStart = CoProcess.start "git" . toCommand . gitCommandLine
hashObjectStop :: HashObjectHandle -> IO ()
hashObjectStop = CoProcess.stop
-{- Injects a file into git, returning the shas of the objects. -}
+{- Injects a file into git, returning the Sha of the object. -}
hashFile :: HashObjectHandle -> FilePath -> IO Sha
hashFile h file = CoProcess.query h send receive
where
send to = do
fileEncoding to
hPutStrLn to file
- receive from = Ref <$> hGetLine from
+ receive from = getSha "hash-object" $ hGetLine from
+
+{- Injects some content into git, returning its Sha. -}
+hashObject :: Repo -> ObjectType -> String -> IO Sha
+hashObject repo objtype content = getSha subcmd $ do
+ (h, s) <- pipeWriteRead (map Param params) content repo
+ length s `seq` do
+ forceSuccess h
+ reap -- XXX unsure why this is needed
+ return s
+ where
+ subcmd = "hash-object"
+ params = [subcmd, "-t", show objtype, "-w", "--stdin"]
diff --git a/Git/Types.hs b/Git/Types.hs
index deb14ebd4..64d418a04 100644
--- a/Git/Types.hs
+++ b/Git/Types.hs
@@ -48,3 +48,18 @@ instance Show Ref where
type Branch = Ref
type Sha = Ref
type Tag = Ref
+
+{- Types of objects that can be stored in git. -}
+data ObjectType = BlobObject | CommitObject | TreeObject
+
+instance Show ObjectType where
+ show BlobObject = "blob"
+ show CommitObject = "commit"
+ show TreeObject = "tree"
+
+readObjectType :: String -> Maybe ObjectType
+readObjectType "blob" = Just BlobObject
+readObjectType "commit" = Just CommitObject
+readObjectType "tree" = Just TreeObject
+readObjectType _ = Nothing
+
diff --git a/Git/UnionMerge.hs b/Git/UnionMerge.hs
index d68bb61ab..822e6abbf 100644
--- a/Git/UnionMerge.hs
+++ b/Git/UnionMerge.hs
@@ -7,14 +7,9 @@
module Git.UnionMerge (
merge,
- merge_index,
- update_index,
- stream_update_index,
- update_index_line,
- ls_tree
+ merge_index
) where
-import System.Cmd.Utils
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.Encoding as L
import qualified Data.Set as S
@@ -24,8 +19,9 @@ import Git
import Git.Sha
import Git.CatFile
import Git.Command
-
-type Streamer = (String -> IO ()) -> IO ()
+import Git.UpdateIndex
+import Git.HashObject
+import Git.Types
{- Performs a union merge between two branches, staging it in the index.
- Any previously staged changes in the index will be lost.
@@ -47,38 +43,6 @@ merge_index :: CatFileHandle -> Repo -> [Ref] -> IO ()
merge_index h repo bs =
stream_update_index repo $ map (\b -> merge_tree_index b h repo) bs
-{- Feeds content into update-index. Later items in the list can override
- - earlier ones, so the list can be generated from any combination of
- - ls_tree, merge_trees, and merge_tree_index. -}
-update_index :: Repo -> [String] -> IO ()
-update_index repo ls = stream_update_index repo [(`mapM_` ls)]
-
-{- Streams content into update-index. -}
-stream_update_index :: Repo -> [Streamer] -> IO ()
-stream_update_index repo as = do
- (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
- fileEncoding h
- forM_ as (stream h)
- hClose h
- forceSuccess p
- where
- params = map Param ["update-index", "-z", "--index-info"]
- stream h a = a (streamer h)
- streamer h s = do
- hPutStr h s
- hPutStr h "\0"
-
-{- Generates a line suitable to be fed into update-index, to add
- - a given file with a given sha. -}
-update_index_line :: Sha -> FilePath -> String
-update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
-
-{- Gets the current tree for a ref. -}
-ls_tree :: Ref -> Repo -> Streamer
-ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
- where
- params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
-
{- For merging two trees. -}
merge_trees :: Ref -> Ref -> CatFileHandle -> Repo -> Streamer
merge_trees (Ref x) (Ref y) h = calc_merge h $ "diff-tree":diff_opts ++ [x, y]
@@ -109,7 +73,7 @@ mergeFile :: String -> FilePath -> CatFileHandle -> Repo -> IO (Maybe String)
mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of
[] -> return Nothing
(sha:[]) -> use sha
- shas -> use =<< either return (hashObject repo . unlines) =<<
+ shas -> use =<< either return (hashObject repo BlobObject . unlines) =<<
calcMerge . zip shas <$> mapM getcontents shas
where
[_colonmode, _bmode, asha, bsha, _status] = words info
@@ -117,18 +81,6 @@ mergeFile info file h repo = case filter (/= nullSha) [Ref asha, Ref bsha] of
L.decodeUtf8 <$> catObject h s
use sha = return $ Just $ update_index_line sha file
-{- Injects some content into git, returning its Sha. -}
-hashObject :: Repo -> String -> IO Sha
-hashObject repo content = getSha subcmd $ do
- (h, s) <- pipeWriteRead (map Param params) content repo
- length s `seq` do
- forceSuccess h
- reap -- XXX unsure why this is needed
- return s
- where
- subcmd = "hash-object"
- params = [subcmd, "-w", "--stdin"]
-
{- Calculates a union merge between a list of refs, with contents.
-
- When possible, reuses the content of an existing ref, rather than
diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs
new file mode 100644
index 000000000..04bc4da5b
--- /dev/null
+++ b/Git/UpdateIndex.hs
@@ -0,0 +1,49 @@
+{- git-update-index library
+ -
+ - Copyright 2011, 2012 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.UpdateIndex (
+ Streamer,
+ stream_update_index,
+ update_index_line,
+ ls_tree
+) where
+
+import System.Cmd.Utils
+
+import Common
+import Git
+import Git.Command
+
+{- Streamers are passed a callback and should feed it lines in the form
+ - read by update-index, and generated by ls-tree. -}
+type Streamer = (String -> IO ()) -> IO ()
+
+{- Streams content into update-index from a list of Streamers. -}
+stream_update_index :: Repo -> [Streamer] -> IO ()
+stream_update_index repo as = do
+ (p, h) <- hPipeTo "git" (toCommand $ gitCommandLine params repo)
+ fileEncoding h
+ forM_ as (stream h)
+ hClose h
+ forceSuccess p
+ where
+ params = map Param ["update-index", "-z", "--index-info"]
+ stream h a = a (streamer h)
+ streamer h s = do
+ hPutStr h s
+ hPutStr h "\0"
+
+{- Generates a line suitable to be fed into update-index, to add
+ - a given file with a given sha. -}
+update_index_line :: Sha -> FilePath -> String
+update_index_line sha file = "100644 blob " ++ show sha ++ "\t" ++ file
+
+{- Gets the current tree for a ref. -}
+ls_tree :: Ref -> Repo -> Streamer
+ls_tree (Ref x) repo streamer = mapM_ streamer =<< pipeNullSplit params repo
+ where
+ params = map Param ["ls-tree", "-z", "-r", "--full-tree", x]
diff --git a/Remote.hs b/Remote.hs
index e9e66990c..839c6ddb0 100644
--- a/Remote.hs
+++ b/Remote.hs
@@ -54,9 +54,9 @@ remoteMap :: (Remote -> a) -> Annex (M.Map UUID a)
remoteMap c = M.fromList . map (\r -> (uuid r, c r)) .
filter (\r -> uuid r /= NoUUID) <$> remoteList
-{- Map of UUIDs and their descriptions.
+{- Map of UUIDs of remotes and their descriptions.
- The names of Remotes are added to suppliment any description that has
- - been set for a repository. -}
+ - been set for a repository. -}
uuidDescriptions :: Annex (M.Map UUID String)
uuidDescriptions = M.unionWith addName <$> uuidMap <*> remoteMap name
@@ -102,9 +102,6 @@ nameToUUID n = byName' n >>= go
{- Pretty-prints a list of UUIDs of remotes, for human display.
-
- - Shows descriptions from the uuid log, falling back to remote names,
- - as some remotes may not be in the uuid log.
- -
- When JSON is enabled, also generates a machine-readable description
- of the UUIDs. -}
prettyPrintUUIDs :: String -> [UUID] -> Annex String
diff --git a/Types/Backend.hs b/Types/Backend.hs
index 5abb0896d..97f7cef90 100644
--- a/Types/Backend.hs
+++ b/Types/Backend.hs
@@ -18,14 +18,11 @@ data KeySource = KeySource
, contentLocation :: FilePath
}
-data BackendA a = Backend {
- -- name of this backend
- name :: String,
- -- gets the key to use for a given content
- getKey :: KeySource -> a (Maybe Key),
- -- called during fsck to check a key, if the backend has its own checks
- fsckKey :: Maybe (Key -> FilePath -> a Bool)
-}
+data BackendA a = Backend
+ { name :: String
+ , getKey :: KeySource -> a (Maybe Key)
+ , fsckKey :: Maybe (Key -> FilePath -> a Bool)
+ }
instance Show (BackendA a) where
show backend = "Backend { name =\"" ++ name backend ++ "\" }"
diff --git a/debian/changelog b/debian/changelog
index 9a010327d..8a734e0aa 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -2,6 +2,7 @@ git-annex (3.20120606) UNRELEASED; urgency=low
* add: Prevent (most) modifications from being made to a file while it
is being added to the annex.
+ * initremote: Automatically describe a remote when creating it.
-- Joey Hess <joeyh@debian.org> Tue, 05 Jun 2012 20:25:51 -0400
diff --git a/doc/design/assistant/blog/day_3__more_races.mdwn b/doc/design/assistant/blog/day_3__more_races.mdwn
new file mode 100644
index 000000000..9c1182842
--- /dev/null
+++ b/doc/design/assistant/blog/day_3__more_races.mdwn
@@ -0,0 +1,26 @@
+Today I worked on the race conditions, and fixed two of them. Both
+were fixed by avoiding using `git add`, which looks at the files currently
+on disk. Instead, `git annex watch` injects symlinks directly into git's
+index, using `git update-index`.
+
+There is one bad race condition remaining. If multiple processes have a
+file open for write, one can close it, and it will be added to the annex.
+But then the other can still write to it.
+
+----
+
+Getting away from race conditions for a while, I made `git annex watch`
+not annex `.gitignore` and `.gitattributes` files.
+
+And, I made it handle running out of inotify descriptors. By default,
+`/proc/sys/fs/inotify/max_user_watches` is 8192, and that's how many
+directories inotify can watch. Now when it needs more, it will print
+a nice message showing how to increase it with `sysctl`.
+
+FWIW, DropBox also uses inotify and has the same limit. It seems to not
+tell the user how to fix it when it goes over. Here's what `git annex
+watch` will say:
+
+ Too many directories to watch! (Not watching ./dir4299)
+ Increase the limit by running:
+ echo fs.inotify.max_user_watches=81920 | sudo tee -a /etc/sysctl.conf; sudo sysctl -p
diff --git a/doc/design/assistant/blog/day_3__more_races/comment_1_d6015338f602b574a3805de5481fc45e._comment b/doc/design/assistant/blog/day_3__more_races/comment_1_d6015338f602b574a3805de5481fc45e._comment
new file mode 100644
index 000000000..2d330f332
--- /dev/null
+++ b/doc/design/assistant/blog/day_3__more_races/comment_1_d6015338f602b574a3805de5481fc45e._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="https://www.google.com/accounts/o8/id?id=AItOawkmtR6oVColYKoU0SjBORLDGrwR10G-mKo"
+ nickname="Jo-Herman"
+ subject="Dropbox Inotify"
+ date="2012-06-06T22:03:29Z"
+ content="""
+Actually, Dropbox giver you a warning via libnotify inotify. It tends to go away too quickly to properly read though, much less actually copy down the command...
+"""]]
diff --git a/doc/design/assistant/blog/day_3__more_races/comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment b/doc/design/assistant/blog/day_3__more_races/comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment
new file mode 100644
index 000000000..523e6d85f
--- /dev/null
+++ b/doc/design/assistant/blog/day_3__more_races/comment_2_4d6b23fc6442e0ee0303523cb69d0fba._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="http://joeyh.name/"
+ ip="4.252.8.36"
+ subject="comment 2"
+ date="2012-06-06T23:25:57Z"
+ content="""
+When I work on the [[webapp]], I'm planning to make it display this warning, and any other similar warning messages that might come up.
+"""]]
diff --git a/doc/design/assistant/blog/day_3__more_races/comment_3_03f5b2344c2a47dea60086f217d60f9b._comment b/doc/design/assistant/blog/day_3__more_races/comment_3_03f5b2344c2a47dea60086f217d60f9b._comment
new file mode 100644
index 000000000..92f5dcbd6
--- /dev/null
+++ b/doc/design/assistant/blog/day_3__more_races/comment_3_03f5b2344c2a47dea60086f217d60f9b._comment
@@ -0,0 +1,14 @@
+[[!comment format=mdwn
+ username="https://www.google.com/accounts/o8/id?id=AItOawnBJ6Dv1glxzzi4qIzGFNa6F-mfHIvv9Ck"
+ nickname="Jim"
+ subject="Wording"
+ date="2012-06-07T03:43:19Z"
+ content="""
+For the unfamiliar, it's hard to tell if a command like that would persist. I'd suggest being as clear as possible, e.g.:
+
+ Increase the limit for now by running:
+ sudo sysctl fs.inotify.max_user_watches=81920
+ Increase the limit now and automatically at every boot by running:
+ echo fs.inotify.max_user_watches=81920 | sudo tee -a /etc/sysctl.conf; sudo sysctl -p
+
+"""]]
diff --git a/doc/design/assistant/blog/day_3__more_races/comment_4_860e90e989ec022100001c65e353a91e._comment b/doc/design/assistant/blog/day_3__more_races/comment_4_860e90e989ec022100001c65e353a91e._comment
new file mode 100644
index 000000000..05b601eaf
--- /dev/null
+++ b/doc/design/assistant/blog/day_3__more_races/comment_4_860e90e989ec022100001c65e353a91e._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="http://joeyh.name/"
+ ip="4.252.8.36"
+ subject="comment 4"
+ date="2012-06-07T04:48:15Z"
+ content="""
+Good thought Jim. I've done something like that.
+"""]]
diff --git a/doc/design/assistant/blog/day_4__speed.mdwn b/doc/design/assistant/blog/day_4__speed.mdwn
new file mode 100644
index 000000000..badc6b7b1
--- /dev/null
+++ b/doc/design/assistant/blog/day_4__speed.mdwn
@@ -0,0 +1,47 @@
+Only had a few hours to work today, but my current focus is speed, and I
+have indeed sped up parts of `git annex watch`.
+
+One thing folks don't realize about git is that despite a rep for being
+fast, it can be rather slow in one area: Writing the index. You don't
+notice it until you have a lot of files, and the index gets big. So I've
+put a lot of effort into git-annex in the past to avoid writing the index
+repeatedly, and queue up big index changes that can happen all at once. The
+new `git annex watch` was not able to use that queue. Today I reworked the
+queue machinery to support the types of direct index writes it needs, and
+now repeated index writes are eliminated.
+
+... Eliminated too far, it turns out, since it doesn't yet *ever* flush
+that queue until shutdown! So the next step here will be to have a worker
+thread that wakes up periodically, flushes the queue, and autocommits.
+(This will, in fact, be the start of the [[syncing]] phase of my roadmap!)
+There's lots of room here for smart behavior. Like, if a lot of changes are
+being made close together, wait for them to die down before committing. Or,
+if it's been idle and a single file appears, commit it immediatly, since
+this is probably something the user wants synced out right away. I'll start
+with something stupid and then add the smarts.
+
+(BTW, in all my years of programming, I have avoided threads like the nasty
+bug-prone plague they are. Here I already have three threads, and am going to
+add probably 4 or 5 more before I'm done with the git annex assistant. So
+far, it's working well -- I give credit to Haskell for making it easy to
+manage state in ways that make it possible to reason about how the threads
+will interact.)
+
+What about the races I've been stressing over? Well, I have an ulterior
+motive in speeding up `git annex watch`, and that's to also be able to
+**slow it down**. Running in slow-mo makes it easy to try things that might
+cause a race and watch how it reacts. I'll be using this technique when
+I circle back around to dealing with the races.
+
+Another tricky speed problem came up today that I also need to fix. On
+startup, `git annex watch` scans the whole tree to find files that have
+been added or moved etc while it was not running, and take care of them.
+Currently, this scan involves re-staging every symlink in the tree. That's
+slow! I need to find a way to avoid re-staging symlinks; I may use `git
+cat-file` to check if the currently staged symlink is correct, or I may
+come up with some better and faster solution. Sleeping on this problem.
+
+----
+
+Oh yeah, I also found one more race bug today. It only happens at startup
+and could only make it miss staging file deletions.
diff --git a/doc/design/assistant/blog/day_4__speed/comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment b/doc/design/assistant/blog/day_4__speed/comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment
new file mode 100644
index 000000000..fb5b95490
--- /dev/null
+++ b/doc/design/assistant/blog/day_4__speed/comment_1_bf3c9c33cc0dea5eaeb6f2af110b924b._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="https://www.google.com/accounts/o8/id?id=AItOawldKnauegZulM7X6JoHJs7Gd5PnDjcgx-E"
+ nickname="Matt"
+ subject="open source?"
+ date="2012-06-09T22:34:30Z"
+ content="""
+Are you publishing the source code for git-annex assistant somewhere?
+"""]]
diff --git a/doc/design/assistant/blog/day_4__speed/comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment b/doc/design/assistant/blog/day_4__speed/comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment
new file mode 100644
index 000000000..1fcc197ab
--- /dev/null
+++ b/doc/design/assistant/blog/day_4__speed/comment_2_33aba4c9abaa3e6a05a2c87ab7df9d0e._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="http://joeyh.name/"
+ ip="4.153.8.126"
+ subject="comment 2"
+ date="2012-06-09T23:01:29Z"
+ content="""
+Yes, it's in [[git|download]] with the rest of git-annex. Currently in the `watch` branch.
+"""]]
diff --git a/doc/design/assistant/comment_3_05223be50c889b2ed6bc4abf74116450._comment b/doc/design/assistant/comment_3_05223be50c889b2ed6bc4abf74116450._comment
new file mode 100644
index 000000000..a78fa3343
--- /dev/null
+++ b/doc/design/assistant/comment_3_05223be50c889b2ed6bc4abf74116450._comment
@@ -0,0 +1,9 @@
+[[!comment format=mdwn
+ username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus"
+ nickname="Jimmy"
+ subject="comment 3"
+ date="2012-06-07T20:22:55Z"
+ content="""
+I'd agree getting it into the main distros is the way to go, if you need OSX binaries, I could volunteer to setup an autobuilder to generate binaries for OSX users, however it would rely on users to have macports with the correct ports installed to use it (things like coreutils etc...)
+
+"""]]
diff --git a/doc/design/assistant/comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment b/doc/design/assistant/comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment
new file mode 100644
index 000000000..cd3b5aaef
--- /dev/null
+++ b/doc/design/assistant/comment_4_fbbd93b55803ae21e6ba4b6568c2fafd._comment
@@ -0,0 +1,9 @@
+[[!comment format=mdwn
+ username="http://joeyh.name/"
+ subject="comment 4"
+ date="2012-06-08T01:56:52Z"
+ content="""
+I always appreciate your OSX work Jimmy...
+
+Could it be put into macports?
+"""]]
diff --git a/doc/design/assistant/comment_5_f4e9af3fed6c27e8ff39badb9794064d._comment b/doc/design/assistant/comment_5_f4e9af3fed6c27e8ff39badb9794064d._comment
new file mode 100644
index 000000000..bf8d9709e
--- /dev/null
+++ b/doc/design/assistant/comment_5_f4e9af3fed6c27e8ff39badb9794064d._comment
@@ -0,0 +1,12 @@
+[[!comment format=mdwn
+ username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus"
+ nickname="Jimmy"
+ subject="comment 5"
+ date="2012-06-08T07:22:34Z"
+ content="""
+In relation to macports, I often found that haskell in macports are often behind other distros, and I'm not willing to put much effort into maintaining or updating those ports. I found that to build git-annex, installing macports manually and then installing haskell-platform from the upstream to be the best way to get the most up to date dependancies for git-annex.
+
+fyi in macports ghc is at version 6.10.4 and haskell platform is at version 2009.2, so there are a significant number of ports to update.
+
+I was thinking about this a bit more and I reckon it might be easier to try and build a self contained .pkg package and have all the needed binaries in a .app styled package, that would work well when the webapp comes along. I will take a look at it in a week or two (currently moving house so I dont have much time)
+"""]]
diff --git a/doc/design/assistant/comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment b/doc/design/assistant/comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment
new file mode 100644
index 000000000..9fa66d6d3
--- /dev/null
+++ b/doc/design/assistant/comment_6_c7ad07cade1f44f9a8b61f92225bb9c5._comment
@@ -0,0 +1,10 @@
+[[!comment format=mdwn
+ username="https://www.google.com/accounts/o8/id?id=AItOawkSq2FDpK2n66QRUxtqqdbyDuwgbQmUWus"
+ nickname="Jimmy"
+ subject="comment 6"
+ date="2012-06-08T15:21:18Z"
+ content="""
+It's not much for now... but see <http://www.sgenomics.org/~jtang/gitbuilder-git-annex-x00-x86_64-apple-darwin10.8.0/> I'm ignoring the debian-stable and pristine-tar branches for now, as I am just building and testing on osx 10.7.
+
+Hope the autobuilder will help you develop the OSX side of things without having direct access to an osx machine! I will try and get gitbuilder to spit out appropriately named tarballs of the compiled binaries in a few days when I have more time.
+"""]]
diff --git a/doc/design/assistant/comment_7_609d38e993267195a80fecd84c93d1e2._comment b/doc/design/assistant/comment_7_609d38e993267195a80fecd84c93d1e2._comment
new file mode 100644
index 000000000..6685c6548
--- /dev/null
+++ b/doc/design/assistant/comment_7_609d38e993267195a80fecd84c93d1e2._comment
@@ -0,0 +1,8 @@
+[[!comment format=mdwn
+ username="http://joeyh.name/"
+ ip="4.153.8.126"
+ subject="comment 7"
+ date="2012-06-09T18:07:51Z"
+ content="""
+Thanks, that's already been useful to me. You might as well skip the debian-specific \"bpo\" tags too.
+"""]]
diff --git a/doc/design/assistant/inotify.mdwn b/doc/design/assistant/inotify.mdwn
index 5d903a9b0..7cdde33ac 100644
--- a/doc/design/assistant/inotify.mdwn
+++ b/doc/design/assistant/inotify.mdwn
@@ -19,23 +19,22 @@ really useful, it needs to:
- notice deleted files and stage the deletion
(tricky; there's a race with add since it replaces the file with a symlink..)
**done**
+- Gracefully handle when the default limit of 8192 inotified directories
+ is exceeded. This can be tuned by root, so help the user fix it.
+ **done**
- periodically auto-commit staged changes (avoid autocommitting when
lots of changes are coming in)
- tunable delays before adding new files, etc
-- Coleasce related add/rm events. See commit
- cbdaccd44aa8f0ca30afba23fc06dd244c242075 for some details of the problems
- with doing this.
-- don't annex `.gitignore` and `.gitattributes` files, but do auto-stage
- changes to them
+- coleasce related add/rm events for speed and less disk IO
+- don't annex `.gitignore` and `.gitattributes` files **done**
- configurable option to only annex files meeting certian size or
filename criteria
+- option to check files not meeting annex criteria into git directly
- honor .gitignore, not adding files it excludes (difficult, probably
needs my own .gitignore parser to avoid excessive running of git commands
to check for ignored files)
- Possibly, when a directory is moved out of the annex location,
unannex its contents.
-- Gracefully handle when the default limit of 8192 inotified directories
- is exceeded. This can be tuned by root, so help the user fix it.
- Support OSes other than Linux; it only uses inotify currently.
OSX and FreeBSD use the same mechanism, and there is a Haskell interface
for it,
@@ -67,9 +66,18 @@ Many races need to be dealt with by this code. Here are some of them.
**Currently unfixed**; This changes content in the annex, and fsck will
later catch the inconsistency.
- Possible fixes: Somehow track or detect if a file is open for write
- by any processes. Or, when possible, making a copy on write copy
- before adding the file would avoid this.
+ Possible fixes:
+
+ * Somehow track or detect if a file is open for write by any processes.
+ * Or, when possible, making a copy on write copy before adding the file
+ would avoid this.
+ * Or, as a last resort, make an expensive copy of the file and add that.
+ * Tracking file opens and closes with inotify could tell if any other
+ processes have the file open. But there are problems.. It doesn't
+ seem to differentiate between files opened for read and for write.
+ And there would still be a race after the last close and before it's
+ injected into the annex, where it could be opened for write again.
+ Would need to detect that and undo the annex injection or something.
* File is added and then replaced with another file before the annex add
makes its symlink.
@@ -81,16 +89,14 @@ Many races need to be dealt with by this code. Here are some of them.
* File is added and then replaced with another file before the annex add
stages the symlink in git.
- **Currently unfixed**; `git add` will be run on the new file, which is
- not at all good when it's big. Could be dealt with by using `git
- update-index` to manually put the symlink into the index without git
+ Now fixed; `git annex watch` avoids running `git add` because of this
+ race. Instead, it stages symlinks directly into the index, without
looking at what's currently on disk.
* Link is moved, fixed link is written by fix event, but then that is
removed by the user and replaced with a file before the event finishes.
- **Currently unfixed**: `git add` will be run on the file. Basically same
- effect as previous race above.
+ Now fixed; same fix as previous race above.
* File is removed and then re-added before the removal event starts.
@@ -102,3 +108,6 @@ Many races need to be dealt with by this code. Here are some of them.
Not a problem; The removal event removes the old file from the index, and
the add event adds the new one.
+* At startup, `git add --update` is run, to notice deleted files.
+ Then inotify starts up. Files deleted in between won't have their
+ removals staged.
diff --git a/doc/design/assistant/webapp.mdwn b/doc/design/assistant/webapp.mdwn
index abf7b38c9..598c1ff3a 100644
--- a/doc/design/assistant/webapp.mdwn
+++ b/doc/design/assistant/webapp.mdwn
@@ -23,6 +23,8 @@ The webapp is a web server that displays a shiny interface.
* there could be a UI to export a file, which would make it be served up
over http by the web app
+* Display any relevant warning messages. One is the `inotify max_user_watches`
+ exceeded message.
## implementation
diff --git a/doc/tips/using_box.com_as_a_special_remote.mdwn b/doc/tips/using_box.com_as_a_special_remote.mdwn
index 917c7a93b..cafbc033c 100644
--- a/doc/tips/using_box.com_as_a_special_remote.mdwn
+++ b/doc/tips/using_box.com_as_a_special_remote.mdwn
@@ -24,7 +24,7 @@ With a little setup, git-annex can use Box as a
* Create `~/.davfs2/davfs2.conf` with some important settings:
mkdir ~/.davfs2/
- echo use_locks 0 >> ~/.davfs2/davfs2.conf
+ echo use_locks 0 > ~/.davfs2/davfs2.conf
echo cache_size 1 >> ~/.davfs2/davfs2.conf
echo delay_upload 0 >> ~/.davfs2/davfs2.conf
diff --git a/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn b/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn
new file mode 100644
index 000000000..e11989e52
--- /dev/null
+++ b/doc/todo/wishlist:_special-case_handling_of_Youtube_URLs_in_Web_special_remote.mdwn
@@ -0,0 +1,12 @@
+The [Web special remote](http://git-annex.branchable.com/special_remotes/web/) could possibly be improved by detecting when URLs reference a Youtube video page and using [youtube-dl](http://rg3.github.com/youtube-dl/) instead of wget to download the page. Youtube-dl can also handle several other video sites such as vimeo.com and blip.tv, so if this idea were to be implemented, it might make sense to borrow the regular expressions that youtube-dl uses to identify video URLs. A quick grep through the youtube-dl source for the identifier _VALID_URL should find those regexes (in Python's regex format).
+
+> This is something I've thought about doing for a while..
+> Two things I have not figured out:
+>
+> * Seems that this should really be user-configurable or a plugin system,
+> to handle more than just this one case.
+> * Youtube-dl breaks from time to time, I really trust these urls a lot
+> less than regular urls. Perhaps per-url trust levels are called for by
+> this.
+>
+> --[[Joey]]