// Copyright 2014 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.lib.actions; import com.google.common.annotations.VisibleForTesting; import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.FileType; import com.google.devtools.build.lib.util.ShellEscaper; import com.google.devtools.build.lib.vfs.PathFragment; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; /** * Support for parameter file generation (as used by gcc and other tools, e.g. * {@code gcc @param_file}. Note that the parameter file needs to be explicitly * deleted after use. Different tools require different parameter file formats, * which can be selected via the {@link ParameterFileType} enum. * *

The default charset is ISO-8859-1 (latin1). This also has to match the * expectation of the tool. * *

Don't use this class for new code. Use the ParameterFileWriteAction * instead! */ public class ParameterFile { /** * Different styles of parameter files. */ public static enum ParameterFileType { /** * A parameter file with every parameter on a separate line. This format * cannot handle newlines in parameters. It is currently used for most * tools, but may not be interpreted correctly if parameters contain * white space or other special characters. It should be avoided for new * development. */ UNQUOTED, /** * A parameter file where each parameter is correctly quoted for shell * use, and separated by white space (space, tab, newline). This format is * safe for all characters, but must be specially supported by the tool. In * particular, it must not be used with gcc and related tools, which do not * support this format as it is. */ SHELL_QUOTED; } @VisibleForTesting public static final FileType PARAMETER_FILE = FileType.of(".params"); /** * Creates a parameter file with the given parameters. */ private ParameterFile() { } /** * Derives an path from a given path by appending ".params". */ public static PathFragment derivePath(PathFragment original) { return derivePath(original, "2"); } /** * Derives an path from a given path by appending ".params". */ public static PathFragment derivePath(PathFragment original, String flavor) { return original.replaceName(original.getBaseName() + "-" + flavor + ".params"); } /** Writes an argument list to a parameter file. */ public static void writeParameterFile( OutputStream out, Iterable arguments, ParameterFileType type, Charset charset) throws IOException { switch (type) { case SHELL_QUOTED: Iterable quotedContent = ShellEscaper.escapeAll(arguments); writeContent(out, quotedContent, charset); break; case UNQUOTED: writeContent(out, arguments, charset); break; } } private static void writeContent( OutputStream outputStream, Iterable arguments, Charset charset) throws IOException { if (charset.equals(StandardCharsets.ISO_8859_1) && StringUnsafe.canUse()) { writeContentLatin1Jdk9(outputStream, arguments); } else if (charset.equals(StandardCharsets.UTF_8) && StringUnsafe.canUse()) { writeContentUtf8Jdk9(outputStream, arguments); } else { // Generic charset support OutputStreamWriter out = new OutputStreamWriter(outputStream, charset); for (String line : arguments) { out.write(line); out.write('\n'); } out.flush(); } } /** * Fast LATIN-1 path that avoids GC overhead. This takes advantage of the fact that strings are * encoded as either LATIN-1 or UTF-16 under JDK9. When LATIN-1 we can simply copy the byte * buffer, when UTF-16 we can fail loudly. */ private static void writeContentLatin1Jdk9(OutputStream outputStream, Iterable arguments) throws IOException { StringUnsafe stringUnsafe = StringUnsafe.getInstance(); for (String line : arguments) { if (stringUnsafe.getCoder(line) == StringUnsafe.LATIN1) { byte[] bytes = stringUnsafe.getByteArray(line); outputStream.write(bytes); } else { // Error case, encode with '?' characters ByteBuffer encodedBytes = StandardCharsets.ISO_8859_1.encode(CharBuffer.wrap(line)); outputStream.write( encodedBytes.array(), encodedBytes.arrayOffset(), encodedBytes.arrayOffset() + encodedBytes.limit()); } outputStream.write('\n'); } outputStream.flush(); } /** * Fast UTF-8 path that tries to coder GC overhead. This takes advantage of the fact that strings * are encoded as either LATIN-1 or UTF-16 under JDK9. When LATIN-1 we can check if the buffer is * ASCII and copy that directly (since this is both valid LATIN-1 and UTF-8), in all other cases * we must re-encode. */ private static void writeContentUtf8Jdk9(OutputStream outputStream, Iterable arguments) throws IOException { CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder(); StringUnsafe stringUnsafe = StringUnsafe.getInstance(); for (String line : arguments) { byte[] bytes = stringUnsafe.getByteArray(line); if (stringUnsafe.getCoder(line) == StringUnsafe.LATIN1 && isAscii(bytes)) { outputStream.write(bytes); } else { ByteBuffer encodedBytes = encoder.encode(CharBuffer.wrap(line)); outputStream.write( encodedBytes.array(), encodedBytes.arrayOffset(), encodedBytes.arrayOffset() + encodedBytes.limit()); } outputStream.write('\n'); } outputStream.flush(); } private static boolean isAscii(byte[] latin1Bytes) { boolean hiBitSet = false; int n = latin1Bytes.length; for (int i = 0; i < n; ++i) { hiBitSet |= ((latin1Bytes[i] & 0x80) != 0); } return !hiBitSet; } }