diff options
author | Klaus Aehlig <aehlig@google.com> | 2018-01-31 07:27:13 -0800 |
---|---|---|
committer | Copybara-Service <copybara-piper@google.com> | 2018-01-31 07:29:15 -0800 |
commit | 7e6837cc1d1aa4259f5c27ba3606b277b5f6c3e9 (patch) | |
tree | 9ba880c2a4e59230d02ff8548bd270973bbf4633 | |
parent | 0fba3064bc32e2e124510627930b470f2788952c (diff) |
http_archive: verify that unicode characters are OK in tar archives
Add a test verifying that http_archive can extract a tar archive
containing unicode characters. While such files cannot be referred
to by labels, it is still important that the archive can be extracted.
Also fix that use case on Darwin, by appropriately reencoding the string,
so that the Files java standard library can encode it back to what we
had in the first place.
Work-around for #1653, showing that http_archive from @bazel_tools can
be used; however, the issue still remains for zip archives.
Change-Id: If944203bf618c21705af676347d8591ab015d559
PiperOrigin-RevId: 183987726
-rw-r--r-- | src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java | 26 | ||||
-rwxr-xr-x | src/test/shell/bazel/external_integration_test.sh | 27 |
2 files changed, 46 insertions, 7 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java index b3141c7831..599fe0689f 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/CompressedTarFunction.java @@ -17,19 +17,19 @@ package com.google.devtools.build.lib.bazel.repository; import com.google.common.base.Optional; import com.google.devtools.build.lib.bazel.repository.DecompressorValue.Decompressor; import com.google.devtools.build.lib.rules.repository.RepositoryFunction.RepositoryFunctionException; +import com.google.devtools.build.lib.util.OS; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.skyframe.SkyFunctionException.Transience; - -import java.util.Date; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; - import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; +import java.nio.file.Paths; import java.nio.file.StandardCopyOption; +import java.util.Date; +import org.apache.commons.compress.archivers.tar.TarArchiveEntry; +import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; /** * Common code for unarchiving a compressed TAR file. @@ -79,8 +79,20 @@ public abstract class CompressedTarFunction implements Decompressor { filename, descriptor.repositoryPath().getRelative(linkName)); } } else { - Files.copy( - tarStream, filename.getPathFile().toPath(), StandardCopyOption.REPLACE_EXISTING); + if (OS.getCurrent() == OS.DARWIN) { + // On Darwin, Files interprets file names as utf8, regardless of the standard + // encoding, so we have to create a unicode string that, when ecoded utf-8 gives + // the same octets back; in this way, we can have Files.copy to behave consistent + // with the file name interpretation of com.google.devtools.build.lib.vfs. + String filenameForFiles = + new String( + filename.getPathFile().toPath().toString().getBytes("ISO-8859-1"), "UTF-8"); + Files.copy( + tarStream, Paths.get(filenameForFiles), StandardCopyOption.REPLACE_EXISTING); + } else { + Files.copy( + tarStream, filename.getPathFile().toPath(), StandardCopyOption.REPLACE_EXISTING); + } filename.chmod(entry.getMode()); // This can only be done on real files, not links, or it will skip the reader to diff --git a/src/test/shell/bazel/external_integration_test.sh b/src/test/shell/bazel/external_integration_test.sh index b5c5f844b6..76df0206d9 100755 --- a/src/test/shell/bazel/external_integration_test.sh +++ b/src/test/shell/bazel/external_integration_test.sh @@ -949,6 +949,33 @@ EOF || fail 'Expected @ext//:foo and //:foo not to conflict' } +function test_unicode_characters_tar() { + # Verify that archives with the utf-8 encoding of unicode-characters in the + # file name can be decompressed. + WRKDIR=$(mktemp -d "${TEST_TMPDIR}/testXXXXXX") + cd "${WRKDIR}" + mkdir ext + # F0 9F 8D 82 is the UTF-8 encoding of the 'FALLEN LEAF' (U+1F342) unicode + # symbol + echo 'leaves' > ext/$'unicode-\xF0\x9F\x8D\x83.txt' + echo 'Hello World' > ext/hello.txt + echo 'exports_files(["hello.txt"])' > ext/BUILD + tar cvf ext.tar ext + rm -rf ext + + mkdir main + cd main + cat > WORKSPACE <<EOF +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +http_archive( + name="ext", + strip_prefix="ext", + urls=["file://${WRKDIR}/ext.tar"], +) +EOF + bazel build '@ext//:hello.txt' || fail "expected success" +} + function test_missing_build() { mkdir ext echo foo> ext/foo |