// Copyright 2018 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.lib.syntax; import com.google.common.base.CharMatcher; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.events.Location; import com.google.devtools.build.lib.skylarkinterface.Param; import com.google.devtools.build.lib.skylarkinterface.ParamType; import com.google.devtools.build.lib.skylarkinterface.SkylarkCallable; import com.google.devtools.build.lib.skylarkinterface.SkylarkModule; import com.google.devtools.build.lib.skylarkinterface.SkylarkModuleCategory; import com.google.devtools.build.lib.syntax.SkylarkList.MutableList; import com.google.devtools.build.lib.syntax.SkylarkList.Tuple; import com.google.devtools.build.lib.syntax.Type.ConversionException; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Skylark String module. * *
This module has special treatment in Skylark, as its methods represent methods represent for * any 'string' objects in the language. * *
Methods of this class annotated with {@link SkylarkCallable} must have a positional-only
* 'String self' parameter as the first parameter of the method.
*/
@SkylarkModule(
name = "string",
category = SkylarkModuleCategory.BUILTIN,
doc =
"A language built-in type to support strings. "
+ "Examples of string literals:
"
+ "
a = 'abc\\ndef'\n" + "b = \"ab'cd\"\n" + "c = \"\"\"multiline string\"\"\"\n" + "\n" + "# Strings support slicing (negative index starts from the end):\n" + "x = \"hello\"[2:4] # \"ll\"\n" + "y = \"hello\"[1:-1] # \"ell\"\n" + "z = \"hello\"[:4] # \"hell\"" + "# Slice steps can be used, too:\n" + "s = \"hello\"[::2] # \"hlo\"\n" + "t = \"hello\"[3:0:-1] # \"lle\"\n" + "Strings are iterable and support the
in
operator. Examples:\"bc\" in \"abcd\" # evaluates to True\n" + "x = [s for s in \"abc\"] # x == [\"a\", \"b\", \"c\"]\n" + "Implicit concatenation of strings is not allowed; use the
+
"
+ "operator instead. Comparison operators perform a lexicographical comparison; "
+ "use ==
to test for equality."
)
public final class StringModule {
private StringModule() {}
// Emulate Python substring function
// It converts out of range indices, and never fails
private static String pythonSubstring(String str, int start, Object end, String msg)
throws ConversionException {
if (start == 0 && EvalUtils.isNullOrNone(end)) {
return str;
}
start = EvalUtils.clampRangeEndpoint(start, str.length());
int stop;
if (EvalUtils.isNullOrNone(end)) {
stop = str.length();
} else {
stop = EvalUtils.clampRangeEndpoint(Type.INTEGER.convert(end, msg), str.length());
}
if (start >= stop) {
return "";
}
return str.substring(start, stop);
}
@SkylarkCallable(
name = "join",
doc =
"Returns a string in which the string elements of the argument have been "
+ "joined by this string as a separator. Example:\"|\".join([\"a\", \"b\", \"c\"]) == \"a|b|c\"" + "", parameters = { @Param(name = "self", type = String.class), @Param(name = "elements", legacyNamed = true, type = SkylarkList.class, doc = "The objects to join.") }) public String join(String self, SkylarkList> elements) throws ConversionException { return Joiner.on(self).join(elements); } @SkylarkCallable( name = "lower", doc = "Returns the lower case version of this string.", parameters = {@Param(name = "self", type = String.class)}) public String lower(String self) { return self.toLowerCase(); } @SkylarkCallable( name = "upper", doc = "Returns the upper case version of this string.", parameters = {@Param(name = "self", type = String.class)}) public String upper(String self) { return self.toUpperCase(); } /** * For consistency with Python we recognize the same whitespace characters as they do over the * range 0x00-0xFF. See https://hg.python.org/cpython/file/3.6/Objects/unicodetype_db.h#l5738 This * list is a consequence of Unicode character information. * *
Note that this differs from Python 2.7, which uses ctype.h#isspace(), and from
* java.lang.Character#isWhitespace(), which does not recognize U+00A0.
*/
private static final String LATIN1_WHITESPACE =
("\u0009" + "\n" + "\u000B" + "\u000C" + "\r" + "\u001C" + "\u001D" + "\u001E" + "\u001F"
+ "\u0020" + "\u0085" + "\u00A0");
private static String stringLStrip(String self, String chars) {
CharMatcher matcher = CharMatcher.anyOf(chars);
for (int i = 0; i < self.length(); i++) {
if (!matcher.matches(self.charAt(i))) {
return self.substring(i);
}
}
return ""; // All characters were stripped.
}
private static String stringRStrip(String self, String chars) {
CharMatcher matcher = CharMatcher.anyOf(chars);
for (int i = self.length() - 1; i >= 0; i--) {
if (!matcher.matches(self.charAt(i))) {
return self.substring(0, i + 1);
}
}
return ""; // All characters were stripped.
}
private static String stringStrip(String self, String chars) {
return stringLStrip(stringRStrip(self, chars), chars);
}
@SkylarkCallable(
name = "lstrip",
doc =
"Returns a copy of the string where leading characters that appear in "
+ "chars
are removed."
+ "
" + "\"abcba\".lstrip(\"ba\") == \"cba\"" + "", parameters = { @Param(name = "self", type = String.class), @Param( name = "chars", type = String.class, legacyNamed = true, noneable = true, doc = "The characters to remove, or all whitespace if None.", defaultValue = "None") }) public String lstrip(String self, Object charsOrNone) { String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; return stringLStrip(self, chars); } @SkylarkCallable( name = "rstrip", doc = "Returns a copy of the string where trailing characters that appear in " + "
chars
are removed."
+ "" + "\"abcbaa\".rstrip(\"ab\") == \"abc\"" + "", parameters = { @Param(name = "self", type = String.class, doc = "This string."), @Param( name = "chars", type = String.class, legacyNamed = true, noneable = true, doc = "The characters to remove, or all whitespace if None.", defaultValue = "None") }) public String rstrip(String self, Object charsOrNone) { String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; return stringRStrip(self, chars); } @SkylarkCallable( name = "strip", doc = "Returns a copy of the string where leading or trailing characters that appear in " + "
chars
are removed."
+ "" + "\"aabcbcbaa\".strip(\"ab\") == \"cbc\"" + "", parameters = { @Param(name = "self", type = String.class, doc = "This string."), @Param( name = "chars", type = String.class, legacyNamed = true, noneable = true, doc = "The characters to remove, or all whitespace if None.", defaultValue = "None") }) public String strip(String self, Object charsOrNone) { String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; return stringStrip(self, chars); } @SkylarkCallable( name = "replace", doc = "Returns a copy of the string in which the occurrences " + "of
old
have been replaced with new
, optionally "
+ "restricting the number of replacements to maxsplit
.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(name = "old", legacyNamed = true, type = String.class,
doc = "The string to be replaced."),
@Param(name = "new", legacyNamed = true,
type = String.class, doc = "The string to replace with."),
@Param(
name = "maxsplit",
type = Integer.class,
noneable = true,
defaultValue = "None",
legacyNamed = true,
doc = "The maximum number of replacements.")
},
useLocation = true)
public String replace(
String self, String oldString, String newString, Object maxSplitO, Location loc)
throws EvalException {
StringBuffer sb = new StringBuffer();
Integer maxSplit =
Type.INTEGER.convertOptional(
maxSplitO, "'maxsplit' argument of 'replace'", /*label*/ null, Integer.MAX_VALUE);
try {
Matcher m = Pattern.compile(oldString, Pattern.LITERAL).matcher(self);
for (int i = 0; i < maxSplit && m.find(); i++) {
m.appendReplacement(sb, Matcher.quoteReplacement(newString));
}
m.appendTail(sb);
} catch (IllegalStateException e) {
throw new EvalException(loc, e.getMessage() + " in call to replace");
}
return sb.toString();
}
@SkylarkCallable(
name = "split",
doc =
"Returns a list of all the words in the string, using sep
as the "
+ "separator, optionally limiting the number of splits to maxsplit
.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(name = "sep", legacyNamed = true, type = String.class,
doc = "The string to split on."),
@Param(
name = "maxsplit",
type = Integer.class,
legacyNamed = true,
noneable = true,
defaultValue = "None",
doc = "The maximum number of splits.")
},
useEnvironment = true,
useLocation = true)
public MutableListsep
as the "
+ "separator, optionally limiting the number of splits to maxsplit
. "
+ "Except for splitting from the right, this method behaves like split().",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(name = "sep", legacyNamed = true, type = String.class,
doc = "The string to split on."),
@Param(
name = "maxsplit",
type = Integer.class,
legacyNamed = true,
noneable = true,
defaultValue = "None",
doc = "The maximum number of splits.")
},
useEnvironment = true,
useLocation = true)
public MutableListAt most {@code maxSplits} will be performed, going from right to left.
*
* @param input The input string.
* @param separator The separator string.
* @param maxSplits The maximum number of splits. Negative values mean unlimited splits.
* @return A list of words
* @throws IllegalArgumentException
*/
private static MutableList If the input string does not contain the separator, the tuple will consist of the original
* input string and two empty strings.
*
* This method emulates the behavior of Python's str.partition() and str.rpartition(),
* depending on the value of the {@code forward} flag.
*
* @param input The input string
* @param separator The string to split on
* @param forward A flag that controls whether the input string is split around the first ({@code
* true}) or last ({@code false}) occurrence of the separator.
* @return A three-tuple (List) of the form [part_before_separator, separator,
* part_after_separator].
*/
private static ImmutableListsep
and returns the resulting partition as a three-element "
+ "tuple of the form (substring_before, separator, substring_after).",
parameters = {
@Param(name = "self", type = String.class),
@Param(
name = "sep",
type = String.class,
legacyNamed = true,
defaultValue = "\" \"",
doc = "The string to split on, default is space (\" \").")
},
useEnvironment = true,
useLocation = true)
public Tuplesep
and returns the resulting partition as a three-element "
+ "tuple of the form (substring_before, separator, substring_after).",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(
name = "sep",
type = String.class,
legacyNamed = true,
defaultValue = "\" \"",
doc = "The string to split on, default is space (\" \").")
},
useEnvironment = true,
useLocation = true)
public Tuplesub
is found, or -1 if no such index exists, "
+ "optionally restricting to [start:end]
, "
+ "start
being inclusive and end
being exclusive.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(name = "sub", type = String.class, legacyNamed = true,
doc = "The substring to find."),
@Param(
name = "start",
type = Integer.class,
legacyNamed = true,
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
type = Integer.class,
legacyNamed = true,
noneable = true,
defaultValue = "None",
doc = "optional position before which to restrict to search.")
})
public Integer rfind(String self, String sub, Integer start, Object end)
throws ConversionException {
return stringFind(false, self, sub, start, end, "'end' argument to rfind");
}
@SkylarkCallable(
name = "find",
doc =
"Returns the first index where sub
is found, or -1 if no such index exists, "
+ "optionally restricting to [start:end]
, "
+ "start
being inclusive and end
being exclusive.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(name = "sub", type = String.class, legacyNamed = true,
doc = "The substring to find."),
@Param(
name = "start",
type = Integer.class,
legacyNamed = true,
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
type = Integer.class,
legacyNamed = true,
noneable = true,
defaultValue = "None",
doc = "optional position before which to restrict to search.")
})
public Integer invoke(String self, String sub, Integer start, Object end)
throws ConversionException {
return stringFind(true, self, sub, start, end, "'end' argument to find");
}
@SkylarkCallable(
name = "rindex",
doc =
"Returns the last index where sub
is found, or raises an error if no such "
+ "index exists, optionally restricting to [start:end]
, "
+ "start
being inclusive and end
being exclusive.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(name = "sub", type = String.class, legacyNamed = true,
doc = "The substring to find."),
@Param(
name = "start",
type = Integer.class,
legacyNamed = true,
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
type = Integer.class,
legacyNamed = true,
noneable = true,
defaultValue = "None",
doc = "optional position before which to restrict to search.")
},
useLocation = true)
public Integer rindex(String self, String sub, Integer start, Object end, Location loc)
throws EvalException {
int res = stringFind(false, self, sub, start, end, "'end' argument to rindex");
if (res < 0) {
throw new EvalException(loc, Printer.format("substring %r not found in %r", sub, self));
}
return res;
}
@SkylarkCallable(
name = "index",
doc =
"Returns the first index where sub
is found, or raises an error if no such "
+ " index exists, optionally restricting to [start:end]
"
+ "start
being inclusive and end
being exclusive.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(name = "sub", type = String.class, legacyNamed = true,
doc = "The substring to find."),
@Param(
name = "start",
type = Integer.class,
legacyNamed = true,
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
type = Integer.class,
legacyNamed = true,
noneable = true,
defaultValue = "None",
doc = "optional position before which to restrict to search.")
},
useLocation = true)
public Integer index(String self, String sub, Integer start, Object end, Location loc)
throws EvalException {
int res = stringFind(true, self, sub, start, end, "'end' argument to index");
if (res < 0) {
throw new EvalException(loc, Printer.format("substring %r not found in %r", sub, self));
}
return res;
}
@SkylarkCallable(
name = "splitlines",
doc =
"Splits the string at line boundaries ('\\n', '\\r\\n', '\\r') "
+ "and returns the result as a list.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(
name = "keepends",
type = Boolean.class,
legacyNamed = true,
defaultValue = "False",
doc = "Whether the line breaks should be included in the resulting list.")
})
public SkylarkListsub
in "
+ "string, optionally restricting to [start:end]
, start
"
+ "being inclusive and end
being exclusive.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(name = "sub", type = String.class, legacyNamed = true,
doc = "The substring to count."),
@Param(
name = "start",
type = Integer.class,
legacyNamed = true,
defaultValue = "0",
doc = "Restrict to search from this position."),
@Param(
name = "end",
type = Integer.class,
legacyNamed = true,
noneable = true,
defaultValue = "None",
doc = "optional position before which to restrict to search.")
})
public Integer count(String self, String sub, Integer start, Object end)
throws ConversionException {
String str = pythonSubstring(self, start, end, "'end' operand of 'find'");
if (sub.isEmpty()) {
return str.length() + 1;
}
int count = 0;
int index = -1;
while ((index = str.indexOf(sub)) >= 0) {
count++;
str = str.substring(index + sub.length());
}
return count;
}
@SkylarkCallable(
name = "elems",
doc =
"Returns an iterable value containing successive 1-element substrings of the string. "
+ "Equivalent to [s[i] for i in range(len(s))]
, except that the "
+ "returned value might not be a list.",
parameters = {@Param(name = "self", type = String.class, doc = "This string.")})
public SkylarkListsub
, otherwise False, optionally "
+ "restricting to [start:end]
, start
being inclusive "
+ "and end
being exclusive.",
parameters = {
@Param(name = "self", type = String.class, doc = "This string."),
@Param(
name = "sub",
allowedTypes = {
@ParamType(type = String.class),
@ParamType(type = Tuple.class, generic1 = String.class),
},
legacyNamed = true,
doc = "The substring to check."),
@Param(
name = "start",
type = Integer.class,
legacyNamed = true,
defaultValue = "0",
doc = "Test beginning at this position."),
@Param(
name = "end",
type = Integer.class,
legacyNamed = true,
noneable = true,
defaultValue = "None",
doc = "optional position at which to stop comparing.")
})
public Boolean endsWith(String self, Object sub, Integer start, Object end)
throws ConversionException, EvalException {
String str = pythonSubstring(self, start, end, "'end' operand of 'endswith'");
if (sub instanceof String) {
return str.endsWith((String) sub);
}
@SuppressWarnings("unchecked")
Tuple