From 7227dd8f21f24c2ccadd38e1a3dec7b888a23e92 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 22 Dec 2011 21:23:11 -0400 Subject: add escape_var hack Makes it easy to find files with duplicate contents, anyway.. :) --- doc/git-annex.mdwn | 6 ++++-- doc/tips/finding_duplicate_files.mdwn | 21 +++++++++++++++++++++ ...nex__34___command_that_will_skip_duplicates.mdwn | 2 +- 3 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 doc/tips/finding_duplicate_files.mdwn (limited to 'doc') diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 7ad3fac69..2d0d2597e 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -437,8 +437,10 @@ subdirectories). Specifies a custom output format. The value is a format string, in which '${var}' is expanded to the value of a variable. To right-justify a variable with whitespace, use '${var;width}' ; to left-justify - a variable, use '${var;-width}'. Also, '\\n' is a newline, '\\000' is a NULL, - etc. + a variable, use '${var;-width}'; to escape unusual characters in a variable, + use '${escaped_var}' + + Also, '\\n' is a newline, '\\000' is a NULL, etc. * -c name=value diff --git a/doc/tips/finding_duplicate_files.mdwn b/doc/tips/finding_duplicate_files.mdwn new file mode 100644 index 000000000..94fc85400 --- /dev/null +++ b/doc/tips/finding_duplicate_files.mdwn @@ -0,0 +1,21 @@ +Maybe you had a lot of files scattered around on different drives, and you +added them all into a single git-annex repository. Some of the files are +surely duplicates of others. + +While git-annex stores the file contents efficiently, it would still +help in cleaning up this mess if you could find, and perhaps remove +the duplicate files. + +Here's a command line that will show duplicate sets of files grouped together: + + git annex find --include '*' --format='${file} ${escaped_key}\n' | \ + sort -k2 | uniq --all-repeated=separate -f1 | \ + sed 's/ [^ ]*$//' + +Here's a command line that will remove one of each duplicate set of files: + + git annex find --include '*' --format='${file} ${escaped_key}\n' | \ + sort -k2 | uniq --repeated -f1 | sed 's/ [^ ]*$//' | \ + xargs -d '\n' git rm + +--[[Joey]] diff --git a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates.mdwn b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates.mdwn index ca18afc57..933653578 100644 --- a/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates.mdwn +++ b/doc/todo/wishlist:_Provide_a___34__git_annex__34___command_that_will_skip_duplicates.mdwn @@ -25,4 +25,4 @@ I want this because I have copies of various of mine (photos, in particular) sca (As I write this, I realize it's possible to parse the destination of the symlink in a way that does this..) -> +> [[done]]; see [[tips/finding_duplicate_files]] --[[Joey]] -- cgit v1.2.3