diff options
Diffstat (limited to 'src')
10 files changed, 1078 insertions, 1032 deletions
diff --git a/src/main/java/com/google/devtools/build/docgen/skylark/SkylarkDoc.java b/src/main/java/com/google/devtools/build/docgen/skylark/SkylarkDoc.java index 0397ee7cc9..60c19874fd 100644 --- a/src/main/java/com/google/devtools/build/docgen/skylark/SkylarkDoc.java +++ b/src/main/java/com/google/devtools/build/docgen/skylark/SkylarkDoc.java @@ -16,6 +16,7 @@ package com.google.devtools.build.docgen.skylark; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.skylarkinterface.Param; +import com.google.devtools.build.lib.skylarkinterface.SkylarkCallable; import com.google.devtools.build.lib.skylarkinterface.SkylarkModule; import com.google.devtools.build.lib.skylarkinterface.SkylarkSignature; import com.google.devtools.build.lib.syntax.EvalUtils; @@ -24,6 +25,8 @@ import com.google.devtools.build.lib.syntax.Runtime.NoneType; import com.google.devtools.build.lib.syntax.SkylarkList; import com.google.devtools.build.lib.syntax.SkylarkList.MutableList; import com.google.devtools.build.lib.syntax.SkylarkList.Tuple; +import com.google.devtools.build.lib.syntax.StringModule; +import java.lang.reflect.Method; import java.util.Arrays; import java.util.Map; @@ -76,8 +79,8 @@ abstract class SkylarkDoc { return EvalUtils.getDataTypeNameFromClass(type); } - // Elide self parameter from parameters in class methods. - protected static Param[] adjustedParameters(SkylarkSignature annotation) { + // Omit self parameter from parameters in class methods. + protected static Param[] withoutSelfParam(SkylarkSignature annotation) { Param[] params = annotation.parameters(); if (params.length > 0 && !params[0].named() @@ -91,4 +94,15 @@ abstract class SkylarkDoc { return params; } } + + // Omit self parameter from parameters in class methods. + protected static Param[] withoutSelfParam(SkylarkCallable annotation, Method method) { + Param[] params = annotation.parameters(); + if (method.getDeclaringClass().equals(StringModule.class)) { + // Skip the self parameter, which is the first mandatory positional parameter. + return Arrays.copyOfRange(params, 1, params.length); + } else { + return params; + } + } } diff --git a/src/main/java/com/google/devtools/build/docgen/skylark/SkylarkMethodDoc.java b/src/main/java/com/google/devtools/build/docgen/skylark/SkylarkMethodDoc.java index b617803612..daa365efbd 100644 --- a/src/main/java/com/google/devtools/build/docgen/skylark/SkylarkMethodDoc.java +++ b/src/main/java/com/google/devtools/build/docgen/skylark/SkylarkMethodDoc.java @@ -63,7 +63,7 @@ public abstract class SkylarkMethodDoc extends SkylarkDoc { argList.add("arg" + i + ":" + getTypeAnchor(method.getParameterTypes()[i])); } boolean named = false; - for (Param param : annotation.parameters()) { + for (Param param : withoutSelfParam(annotation, method)) { if (param.named() && !param.positional() && !named) { named = true; if (!argList.isEmpty()) { @@ -92,7 +92,7 @@ public abstract class SkylarkMethodDoc extends SkylarkDoc { protected String getSignature(String objectName, SkylarkSignature method) { List<String> argList = new ArrayList<>(); boolean named = false; - for (Param param : adjustedParameters(method)) { + for (Param param : withoutSelfParam(method)) { if (param.named() && !param.positional() && !named) { named = true; if (!method.extraPositionals().name().isEmpty()) { diff --git a/src/main/java/com/google/devtools/build/lib/syntax/FuncallExpression.java b/src/main/java/com/google/devtools/build/lib/syntax/FuncallExpression.java index e19113c93f..1928fcec7f 100644 --- a/src/main/java/com/google/devtools/build/lib/syntax/FuncallExpression.java +++ b/src/main/java/com/google/devtools/build/lib/syntax/FuncallExpression.java @@ -279,10 +279,20 @@ public final class FuncallExpression extends Expression { return printer.toString(); } + /** + * Returns either the class itself or, if the class is {@link String}, the proxy class + * containing all 'string' methods. + */ + private static Class<?> getClassOrProxyClass(Class<?> clazz) { + return String.class.isAssignableFrom(clazz) + ? StringModule.class + : clazz; + } + /** Returns the Skylark callable Method of objClass with structField=true and the given name. */ public static MethodDescriptor getStructField(Class<?> objClass, String methodName) { try { - return fieldCache.get(objClass).get(methodName); + return fieldCache.get(getClassOrProxyClass(objClass)).get(methodName); } catch (ExecutionException e) { throw new IllegalStateException("Method loading failed: " + e); } @@ -291,7 +301,7 @@ public final class FuncallExpression extends Expression { /** Returns the list of names of Skylark callable Methods of objClass with structField=true. */ public static Set<String> getStructFieldNames(Class<?> objClass) { try { - return fieldCache.get(objClass).keySet(); + return fieldCache.get(getClassOrProxyClass(objClass)).keySet(); } catch (ExecutionException e) { throw new IllegalStateException("Method loading failed: " + e); } @@ -300,7 +310,7 @@ public final class FuncallExpression extends Expression { /** Returns the list of Skylark callable Methods of objClass with the given name. */ public static List<MethodDescriptor> getMethods(Class<?> objClass, String methodName) { try { - return methodCache.get(objClass).get(methodName); + return methodCache.get(getClassOrProxyClass(objClass)).get(methodName); } catch (ExecutionException e) { throw new IllegalStateException("Method loading failed: " + e); } @@ -312,7 +322,7 @@ public final class FuncallExpression extends Expression { */ public static Set<String> getMethodNames(Class<?> objClass) { try { - return methodCache.get(objClass).keySet(); + return methodCache.get(getClassOrProxyClass(objClass)).keySet(); } catch (ExecutionException e) { throw new IllegalStateException("Method loading failed: " + e); } @@ -429,7 +439,7 @@ public final class FuncallExpression extends Expression { String.format( "type '%s' has multiple matches for function %s", EvalUtils.getDataTypeNameFromClass(objClass), - formatMethod(methodName, args, kwargs))); + formatMethod(objClass, methodName, args, kwargs))); } } } @@ -445,14 +455,14 @@ public final class FuncallExpression extends Expression { String.format( "type '%s' has no method %s", EvalUtils.getDataTypeNameFromClass(objClass), - formatMethod(methodName, args, kwargs)); + formatMethod(objClass, methodName, args, kwargs)); } else { errorMessage = String.format( "%s, in method call %s of '%s'", argumentListConversionResult.getError(), - formatMethod(methodName, args, kwargs), + formatMethod(objClass, methodName, args, kwargs), EvalUtils.getDataTypeNameFromClass(objClass)); } throw new EvalException(getLocation(), errorMessage); @@ -640,7 +650,13 @@ public final class FuncallExpression extends Expression { return ArgumentListConversionResult.fromArgumentList(builder.build()); } - private static String formatMethod(String name, List<Object> args, Map<String, Object> kwargs) { + private static String formatMethod( + Class<?> objClass, String name, List<Object> args, Map<String, Object> kwargs) { + if (objClass == StringModule.class) { + // StringModule is a special case, and begins with a String "self" parameter which should + // be omitted from the method format. + args = args.subList(1, args.size()); + } StringBuilder sb = new StringBuilder(); sb.append(name).append("("); boolean first = true; @@ -722,6 +738,24 @@ public final class FuncallExpression extends Expression { } } + private boolean includeSelfAsArg( + Object value, @Nullable BaseFunction globalBuiltinRegistryFunction) { + if (value instanceof String) { + // String is a special case, as it is treated like a skylark value but cannot be subclassed + // in java. Callable functions which represent methods on string objects must thus be given + // the string 'self' object. + return true; + } + if (globalBuiltinRegistryFunction != null && !isNamespace(value.getClass())) { + // Non-namespace objects which have registered static functions in the global builtin registry + // need to have the instance object passed to the method invocation. + // TODO(cparsons): Global builtin registry functions are going away, so this use-case is + // deprecated. + return true; + } + return false; + } + /** * Call a method depending on the type of an object it is called on. * @@ -737,15 +771,15 @@ public final class FuncallExpression extends Expression { throws EvalException, InterruptedException { Location location = call.getLocation(); Object value = positionals.get(0); - ImmutableList<Object> positionalArgs = positionals.subList(1, positionals.size()); BaseFunction function = Runtime.getBuiltinRegistry().getFunction(value.getClass(), method); Object fieldValue = (value instanceof ClassObject) ? ((ClassObject) value).getValue(method) : null; + ImmutableList<Object> positionalArgs = + includeSelfAsArg(value, function) + ? positionals + : positionals.subList(1, positionals.size()); + if (function != null) { - if (!isNamespace(value.getClass())) { - // Use self as an implicit parameter in front. - positionalArgs = positionals; - } return function.call( positionalArgs, ImmutableMap.copyOf(keyWordArgs), call, env); } else if (fieldValue != null) { @@ -765,6 +799,11 @@ public final class FuncallExpression extends Expression { // Static call obj = null; objClass = (Class<?>) value; + } else if (value instanceof String) { + // String is special-cased, since it can't be subclassed. Methods on strings defer + // to StringModule. + obj = StringModule.INSTANCE; + objClass = StringModule.class; } else { obj = value; objClass = value.getClass(); diff --git a/src/main/java/com/google/devtools/build/lib/syntax/MethodLibrary.java b/src/main/java/com/google/devtools/build/lib/syntax/MethodLibrary.java index 6e563424c3..a6b865e703 100644 --- a/src/main/java/com/google/devtools/build/lib/syntax/MethodLibrary.java +++ b/src/main/java/com/google/devtools/build/lib/syntax/MethodLibrary.java @@ -16,8 +16,6 @@ package com.google.devtools.build.lib.syntax; import static java.util.stream.Collectors.joining; -import com.google.common.base.CharMatcher; -import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; @@ -40,8 +38,6 @@ import java.util.List; import java.util.NoSuchElementException; import java.util.Set; import java.util.TreeSet; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import javax.annotation.Nullable; /** A helper class containing built in functions for the Skylark language. */ @@ -49,970 +45,6 @@ public class MethodLibrary { private MethodLibrary() {} - // Emulate Python substring function - // It converts out of range indices, and never fails - private static String pythonSubstring(String str, int start, Object end, String msg) - throws ConversionException { - if (start == 0 && EvalUtils.isNullOrNone(end)) { - return str; - } - start = EvalUtils.clampRangeEndpoint(start, str.length()); - int stop; - if (EvalUtils.isNullOrNone(end)) { - stop = str.length(); - } else { - stop = EvalUtils.clampRangeEndpoint(Type.INTEGER.convert(end, msg), str.length()); - } - if (start >= stop) { - return ""; - } - return str.substring(start, stop); - } - - // supported string methods - - @SkylarkSignature(name = "join", objectType = StringModule.class, returnType = String.class, - doc = "Returns a string in which the string elements of the argument have been " - + "joined by this string as a separator. Example:<br>" - + "<pre class=\"language-python\">\"|\".join([\"a\", \"b\", \"c\"]) == \"a|b|c\"</pre>", - parameters = { - @Param(name = "self", type = String.class, doc = "This string, a separator."), - @Param(name = "elements", type = SkylarkList.class, doc = "The objects to join.")}) - private static final BuiltinFunction join = new BuiltinFunction("join") { - public String invoke(String self, SkylarkList<?> elements) throws ConversionException { - return Joiner.on(self).join(elements); - } - }; - - @SkylarkSignature(name = "lower", objectType = StringModule.class, returnType = String.class, - doc = "Returns the lower case version of this string.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string, to convert to lower case.")}) - private static final BuiltinFunction lower = new BuiltinFunction("lower") { - public String invoke(String self) { - return self.toLowerCase(); - } - }; - - @SkylarkSignature(name = "upper", objectType = StringModule.class, returnType = String.class, - doc = "Returns the upper case version of this string.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string, to convert to upper case.")}) - private static final BuiltinFunction upper = new BuiltinFunction("upper") { - public String invoke(String self) { - return self.toUpperCase(); - } - }; - - /** - * For consistency with Python we recognize the same whitespace characters as they do over the - * range 0x00-0xFF. See https://hg.python.org/cpython/file/3.6/Objects/unicodetype_db.h#l5738 - * This list is a consequence of Unicode character information. - * - * Note that this differs from Python 2.7, which uses ctype.h#isspace(), and from - * java.lang.Character#isWhitespace(), which does not recognize U+00A0. - */ - private static final String LATIN1_WHITESPACE = ( - "\u0009" - + "\n" - + "\u000B" - + "\u000C" - + "\r" - + "\u001C" - + "\u001D" - + "\u001E" - + "\u001F" - + "\u0020" - + "\u0085" - + "\u00A0" - ); - - private static String stringLStrip(String self, String chars) { - CharMatcher matcher = CharMatcher.anyOf(chars); - for (int i = 0; i < self.length(); i++) { - if (!matcher.matches(self.charAt(i))) { - return self.substring(i); - } - } - return ""; // All characters were stripped. - } - - private static String stringRStrip(String self, String chars) { - CharMatcher matcher = CharMatcher.anyOf(chars); - for (int i = self.length() - 1; i >= 0; i--) { - if (!matcher.matches(self.charAt(i))) { - return self.substring(0, i + 1); - } - } - return ""; // All characters were stripped. - } - - private static String stringStrip(String self, String chars) { - return stringLStrip(stringRStrip(self, chars), chars); - } - - @SkylarkSignature( - name = "lstrip", - objectType = StringModule.class, - returnType = String.class, - doc = - "Returns a copy of the string where leading characters that appear in <code>chars</code>" - + "are removed." - + "<pre class=\"language-python\">" - + "\"abcba\".lstrip(\"ba\") == \"cba\"" - + "</pre>", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param( - name = "chars", - type = String.class, - noneable = true, - doc = "The characters to remove, or all whitespace if None.", - defaultValue = "None" - ) - } - ) - private static final BuiltinFunction lstrip = - new BuiltinFunction("lstrip") { - public String invoke(String self, Object charsOrNone) { - String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; - return stringLStrip(self, chars); - } - }; - - @SkylarkSignature( - name = "rstrip", - objectType = StringModule.class, - returnType = String.class, - doc = - "Returns a copy of the string where trailing characters that appear in <code>chars</code>" - + "are removed." - + "<pre class=\"language-python\">" - + "\"abcbaa\".rstrip(\"ab\") == \"abc\"" - + "</pre>", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param( - name = "chars", - type = String.class, - noneable = true, - doc = "The characters to remove, or all whitespace if None.", - defaultValue = "None" - ) - } - ) - private static final BuiltinFunction rstrip = - new BuiltinFunction("rstrip") { - public String invoke(String self, Object charsOrNone) { - String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; - return stringRStrip(self, chars); - } - }; - - @SkylarkSignature( - name = "strip", - objectType = StringModule.class, - returnType = String.class, - doc = - "Returns a copy of the string where leading or trailing characters that appear in " - + "<code>chars</code> are removed." - + "<pre class=\"language-python\">" - + "\"aabcbcbaa\".strip(\"ab\") == \"cbc\"" - + "</pre>", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param( - name = "chars", - type = String.class, - noneable = true, - doc = "The characters to remove, or all whitespace if None.", - defaultValue = "None" - ) - } - ) - private static final BuiltinFunction strip = - new BuiltinFunction("strip") { - public String invoke(String self, Object charsOrNone) { - String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; - return stringStrip(self, chars); - } - }; - - @SkylarkSignature( - name = "replace", - objectType = StringModule.class, - returnType = String.class, - doc = - "Returns a copy of the string in which the occurrences " - + "of <code>old</code> have been replaced with <code>new</code>, optionally " - + "restricting the number of replacements to <code>maxsplit</code>.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "old", type = String.class, doc = "The string to be replaced."), - @Param(name = "new", type = String.class, doc = "The string to replace with."), - @Param( - name = "maxsplit", - type = Integer.class, - noneable = true, - defaultValue = "None", - doc = "The maximum number of replacements." - ) - }, - useLocation = true - ) - private static final BuiltinFunction replace = - new BuiltinFunction("replace") { - public String invoke( - String self, String oldString, String newString, Object maxSplitO, Location loc) - throws EvalException { - StringBuffer sb = new StringBuffer(); - Integer maxSplit = - Type.INTEGER.convertOptional( - maxSplitO, "'maxsplit' argument of 'replace'", /*label*/ null, Integer.MAX_VALUE); - try { - Matcher m = Pattern.compile(oldString, Pattern.LITERAL).matcher(self); - for (int i = 0; i < maxSplit && m.find(); i++) { - m.appendReplacement(sb, Matcher.quoteReplacement(newString)); - } - m.appendTail(sb); - } catch (IllegalStateException e) { - throw new EvalException(loc, e.getMessage() + " in call to replace"); - } - return sb.toString(); - } - }; - - @SkylarkSignature( - name = "split", - objectType = StringModule.class, - returnType = MutableList.class, - doc = - "Returns a list of all the words in the string, using <code>sep</code> as the separator, " - + "optionally limiting the number of splits to <code>maxsplit</code>.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sep", type = String.class, doc = "The string to split on."), - @Param( - name = "maxsplit", - type = Integer.class, - noneable = true, - defaultValue = "None", - doc = "The maximum number of splits." - ) - }, - useEnvironment = true, - useLocation = true - ) - private static final BuiltinFunction split = - new BuiltinFunction("split") { - public MutableList<String> invoke( - String self, String sep, Object maxSplitO, Location loc, Environment env) - throws EvalException { - int maxSplit = - Type.INTEGER.convertOptional( - maxSplitO, "'split' argument of 'split'", /*label*/ null, -2); - // + 1 because the last result is the remainder. The default is -2 so that after +1, - // it becomes -1. - String[] ss = Pattern.compile(sep, Pattern.LITERAL).split(self, maxSplit + 1); - return MutableList.of(env, ss); - } - }; - - @SkylarkSignature( - name = "rsplit", - objectType = StringModule.class, - returnType = MutableList.class, - doc = - "Returns a list of all the words in the string, using <code>sep</code> as the separator, " - + "optionally limiting the number of splits to <code>maxsplit</code>. " - + "Except for splitting from the right, this method behaves like split().", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sep", type = String.class, doc = "The string to split on."), - @Param( - name = "maxsplit", - type = Integer.class, - noneable = true, - defaultValue = "None", - doc = "The maximum number of splits." - ) - }, - useEnvironment = true, - useLocation = true - ) - private static final BuiltinFunction rsplit = - new BuiltinFunction("rsplit") { - @SuppressWarnings("unused") - public MutableList<String> invoke( - String self, String sep, Object maxSplitO, Location loc, Environment env) - throws EvalException { - int maxSplit = - Type.INTEGER.convertOptional(maxSplitO, "'split' argument of 'split'", null, -1); - try { - return stringRSplit(self, sep, maxSplit, env); - } catch (IllegalArgumentException ex) { - throw new EvalException(loc, ex); - } - } - }; - - /** - * Splits the given string into a list of words, using {@code separator} as a delimiter. - * - * <p>At most {@code maxSplits} will be performed, going from right to left. - * - * @param input The input string. - * @param separator The separator string. - * @param maxSplits The maximum number of splits. Negative values mean unlimited splits. - * @return A list of words - * @throws IllegalArgumentException - */ - private static MutableList<String> stringRSplit( - String input, String separator, int maxSplits, Environment env) { - if (separator.isEmpty()) { - throw new IllegalArgumentException("Empty separator"); - } - - if (maxSplits <= 0) { - maxSplits = Integer.MAX_VALUE; - } - - ArrayDeque<String> result = new ArrayDeque<>(); - String[] parts = input.split(Pattern.quote(separator), -1); - int sepLen = separator.length(); - int remainingLength = input.length(); - int splitsSoFar = 0; - - // Copies parts from the array into the final list, starting at the end (because - // it's rsplit), as long as fewer than maxSplits splits are performed. The - // last spot in the list is reserved for the remaining string, whose length - // has to be tracked throughout the loop. - for (int pos = parts.length - 1; (pos >= 0) && (splitsSoFar < maxSplits); --pos) { - String current = parts[pos]; - result.addFirst(current); - - ++splitsSoFar; - remainingLength -= sepLen + current.length(); - } - - if (splitsSoFar == maxSplits && remainingLength >= 0) { - result.addFirst(input.substring(0, remainingLength)); - } - - return MutableList.copyOf(env, result); - } - - @SkylarkSignature(name = "partition", objectType = StringModule.class, - returnType = Tuple.class, - doc = "Splits the input string at the first occurrence of the separator " - + "<code>sep</code> and returns the resulting partition as a three-element " - + "tuple of the form (substring_before, separator, substring_after).", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sep", type = String.class, - defaultValue = "\" \"", doc = "The string to split on, default is space (\" \").")}, - useEnvironment = true, - useLocation = true) - private static final BuiltinFunction partition = new BuiltinFunction("partition") { - @SuppressWarnings("unused") - public Tuple<String> invoke(String self, String sep, Location loc, Environment env) - throws EvalException { - return partitionWrapper(self, sep, true, loc); - } - }; - - @SkylarkSignature(name = "rpartition", objectType = StringModule.class, - returnType = Tuple.class, - doc = "Splits the input string at the last occurrence of the separator " - + "<code>sep</code> and returns the resulting partition as a three-element " - + "tuple of the form (substring_before, separator, substring_after).", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sep", type = String.class, - defaultValue = "\" \"", doc = "The string to split on, default is space (\" \").")}, - useEnvironment = true, - useLocation = true) - private static final BuiltinFunction rpartition = new BuiltinFunction("rpartition") { - @SuppressWarnings("unused") - public Tuple<String> invoke(String self, String sep, Location loc, Environment env) - throws EvalException { - return partitionWrapper(self, sep, false, loc); - } - }; - - /** - * Wraps the stringPartition() method and converts its results and exceptions - * to the expected types. - * - * @param self The input string - * @param separator The string to split on - * @param forward A flag that controls whether the input string is split around - * the first ({@code true}) or last ({@code false}) occurrence of the separator. - * @param loc The location that is used for potential exceptions - * @return A list with three elements - */ - private static Tuple<String> partitionWrapper( - String self, String separator, boolean forward, Location loc) throws EvalException { - try { - return Tuple.copyOf(stringPartition(self, separator, forward)); - } catch (IllegalArgumentException ex) { - throw new EvalException(loc, ex); - } - } - - /** - * Splits the input string at the {first|last} occurrence of the given separator and returns the - * resulting partition as a three-tuple of Strings, contained in a {@code MutableList}. - * - * <p>If the input string does not contain the separator, the tuple will consist of the original - * input string and two empty strings. - * - * <p>This method emulates the behavior of Python's str.partition() and str.rpartition(), - * depending on the value of the {@code forward} flag. - * - * @param input The input string - * @param separator The string to split on - * @param forward A flag that controls whether the input string is split around the first ({@code - * true}) or last ({@code false}) occurrence of the separator. - * @return A three-tuple (List) of the form [part_before_separator, separator, - * part_after_separator]. - */ - private static List<String> stringPartition(String input, String separator, boolean forward) { - if (separator.isEmpty()) { - throw new IllegalArgumentException("Empty separator"); - } - - int partitionSize = 3; - ArrayList<String> result = new ArrayList<>(partitionSize); - int pos = forward ? input.indexOf(separator) : input.lastIndexOf(separator); - - if (pos < 0) { - for (int i = 0; i < partitionSize; ++i) { - result.add(""); - } - - // Following Python's implementation of str.partition() and str.rpartition(), - // the input string is copied to either the first or the last position in the - // list, depending on the value of the forward flag. - result.set(forward ? 0 : partitionSize - 1, input); - } else { - result.add(input.substring(0, pos)); - result.add(separator); - - // pos + sep.length() is at most equal to input.length(). This worst-case - // happens when the separator is at the end of the input string. However, - // substring() will return an empty string in this scenario, thus making - // any additional safety checks obsolete. - result.add(input.substring(pos + separator.length())); - } - - return result; - } - - @SkylarkSignature( - name = "capitalize", - objectType = StringModule.class, - returnType = String.class, - doc = - "Returns a copy of the string with its first character capitalized and the rest " - + "lowercased. This method does not support non-ascii characters.", - parameters = {@Param(name = "self", type = String.class, doc = "This string.")} - ) - private static final BuiltinFunction capitalize = - new BuiltinFunction("capitalize") { - @SuppressWarnings("unused") - public String invoke(String self) throws EvalException { - if (self.isEmpty()) { - return self; - } - return Character.toUpperCase(self.charAt(0)) + self.substring(1).toLowerCase(); - } - }; - - @SkylarkSignature(name = "title", objectType = StringModule.class, - returnType = String.class, - doc = - "Converts the input string into title case, i.e. every word starts with an " - + "uppercase letter while the remaining letters are lowercase. In this " - + "context, a word means strictly a sequence of letters. This method does " - + "not support supplementary Unicode characters.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string.")}) - private static final BuiltinFunction title = new BuiltinFunction("title") { - @SuppressWarnings("unused") - public String invoke(String self) throws EvalException { - char[] data = self.toCharArray(); - boolean previousWasLetter = false; - - for (int pos = 0; pos < data.length; ++pos) { - char current = data[pos]; - boolean currentIsLetter = Character.isLetter(current); - - if (currentIsLetter) { - if (previousWasLetter && Character.isUpperCase(current)) { - data[pos] = Character.toLowerCase(current); - } else if (!previousWasLetter && Character.isLowerCase(current)) { - data[pos] = Character.toUpperCase(current); - } - } - previousWasLetter = currentIsLetter; - } - - return new String(data); - } - }; - - /** - * Common implementation for find, rfind, index, rindex. - * @param forward true if we want to return the last matching index. - */ - private static int stringFind(boolean forward, - String self, String sub, int start, Object end, String msg) - throws ConversionException { - String substr = pythonSubstring(self, start, end, msg); - int subpos = forward ? substr.indexOf(sub) : substr.lastIndexOf(sub); - start = EvalUtils.clampRangeEndpoint(start, self.length()); - return subpos < 0 ? subpos : subpos + start; - } - - @SkylarkSignature(name = "rfind", objectType = StringModule.class, returnType = Integer.class, - doc = "Returns the last index where <code>sub</code> is found, " - + "or -1 if no such index exists, optionally restricting to " - + "[<code>start</code>:<code>end</code>], " - + "<code>start</code> being inclusive and <code>end</code> being exclusive.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sub", type = String.class, doc = "The substring to find."), - @Param(name = "start", type = Integer.class, defaultValue = "0", - doc = "Restrict to search from this position."), - @Param(name = "end", type = Integer.class, noneable = true, defaultValue = "None", - doc = "optional position before which to restrict to search.")}) - private static final BuiltinFunction rfind = new BuiltinFunction("rfind") { - public Integer invoke(String self, String sub, Integer start, Object end) - throws ConversionException { - return stringFind(false, self, sub, start, end, "'end' argument to rfind"); - } - }; - - @SkylarkSignature(name = "find", objectType = StringModule.class, returnType = Integer.class, - doc = "Returns the first index where <code>sub</code> is found, " - + "or -1 if no such index exists, optionally restricting to " - + "[<code>start</code>:<code>end]</code>, " - + "<code>start</code> being inclusive and <code>end</code> being exclusive.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sub", type = String.class, doc = "The substring to find."), - @Param(name = "start", type = Integer.class, defaultValue = "0", - doc = "Restrict to search from this position."), - @Param(name = "end", type = Integer.class, noneable = true, defaultValue = "None", - doc = "optional position before which to restrict to search.")}) - private static final BuiltinFunction find = new BuiltinFunction("find") { - public Integer invoke(String self, String sub, Integer start, Object end) - throws ConversionException { - return stringFind(true, self, sub, start, end, "'end' argument to find"); - } - }; - - @SkylarkSignature( - name = "rindex", - objectType = StringModule.class, - returnType = Integer.class, - doc = - "Returns the last index where <code>sub</code> is found, " - + "or raises an error if no such index exists, optionally restricting to " - + "[<code>start</code>:<code>end</code>], " - + "<code>start</code> being inclusive and <code>end</code> being exclusive.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sub", type = String.class, doc = "The substring to find."), - @Param( - name = "start", - type = Integer.class, - defaultValue = "0", - doc = "Restrict to search from this position." - ), - @Param( - name = "end", - type = Integer.class, - noneable = true, - defaultValue = "None", - doc = "optional position before which to restrict to search." - ) - }, - useLocation = true - ) - private static final BuiltinFunction rindex = - new BuiltinFunction("rindex") { - public Integer invoke(String self, String sub, Integer start, Object end, Location loc) - throws EvalException { - int res = stringFind(false, self, sub, start, end, "'end' argument to rindex"); - if (res < 0) { - throw new EvalException(loc, Printer.format("substring %r not found in %r", sub, self)); - } - return res; - } - }; - - @SkylarkSignature( - name = "index", - objectType = StringModule.class, - returnType = Integer.class, - doc = - "Returns the first index where <code>sub</code> is found, " - + "or raises an error if no such index exists, optionally restricting to " - + "[<code>start</code>:<code>end]</code>, " - + "<code>start</code> being inclusive and <code>end</code> being exclusive.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sub", type = String.class, doc = "The substring to find."), - @Param( - name = "start", - type = Integer.class, - defaultValue = "0", - doc = "Restrict to search from this position." - ), - @Param( - name = "end", - type = Integer.class, - noneable = true, - defaultValue = "None", - doc = "optional position before which to restrict to search." - ) - }, - useLocation = true - ) - private static final BuiltinFunction index = - new BuiltinFunction("index") { - public Integer invoke(String self, String sub, Integer start, Object end, Location loc) - throws EvalException { - int res = stringFind(true, self, sub, start, end, "'end' argument to index"); - if (res < 0) { - throw new EvalException(loc, Printer.format("substring %r not found in %r", sub, self)); - } - return res; - } - }; - - @SkylarkSignature(name = "splitlines", objectType = StringModule.class, - returnType = SkylarkList.class, - doc = - "Splits the string at line boundaries ('\\n', '\\r\\n', '\\r') " - + "and returns the result as a list.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "keepends", type = Boolean.class, defaultValue = "False", - doc = "Whether the line breaks should be included in the resulting list.")}) - private static final BuiltinFunction splitLines = new BuiltinFunction("splitlines") { - @SuppressWarnings("unused") - public SkylarkList<String> invoke(String self, Boolean keepEnds) throws EvalException { - List<String> result = new ArrayList<>(); - Matcher matcher = SPLIT_LINES_PATTERN.matcher(self); - while (matcher.find()) { - String line = matcher.group("line"); - String lineBreak = matcher.group("break"); - boolean trailingBreak = lineBreak.isEmpty(); - if (line.isEmpty() && trailingBreak) { - break; - } - if (keepEnds && !trailingBreak) { - result.add(line + lineBreak); - } else { - result.add(line); - } - } - return SkylarkList.createImmutable(result); - } - }; - - private static final Pattern SPLIT_LINES_PATTERN = - Pattern.compile("(?<line>.*)(?<break>(\\r\\n|\\r|\\n)?)"); - - @SkylarkSignature(name = "isalpha", objectType = StringModule.class, returnType = Boolean.class, - doc = "Returns True if all characters in the string are alphabetic ([a-zA-Z]) and there is " - + "at least one character.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string.")}) - private static final BuiltinFunction isalpha = new BuiltinFunction("isalpha") { - @SuppressWarnings("unused") // Called via Reflection - public Boolean invoke(String self) throws EvalException { - return MethodLibrary.matches(self, MethodLibrary.ALPHA, false); - } - }; - - @SkylarkSignature(name = "isalnum", objectType = StringModule.class, returnType = Boolean.class, - doc = - "Returns True if all characters in the string are alphanumeric ([a-zA-Z0-9]) and there is " - + "at least one character.", - parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) - private static final BuiltinFunction isAlnum = new BuiltinFunction("isalnum") { - @SuppressWarnings("unused") // Called via Reflection - public Boolean invoke(String self) throws EvalException { - return MethodLibrary.matches(self, MethodLibrary.ALNUM, false); - } - }; - - @SkylarkSignature(name = "isdigit", objectType = StringModule.class, returnType = Boolean.class, - doc = - "Returns True if all characters in the string are digits ([0-9]) and there is " - + "at least one character.", - parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) - private static final BuiltinFunction isDigit = new BuiltinFunction("isdigit") { - @SuppressWarnings("unused") // Called via Reflection - public Boolean invoke(String self) throws EvalException { - return MethodLibrary.matches(self, MethodLibrary.DIGIT, false); - } - }; - - @SkylarkSignature(name = "isspace", objectType = StringModule.class, returnType = Boolean.class, - doc = - "Returns True if all characters are white space characters and the string " - + "contains at least one character.", - parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) - private static final BuiltinFunction isSpace = new BuiltinFunction("isspace") { - @SuppressWarnings("unused") // Called via Reflection - public Boolean invoke(String self) throws EvalException { - return MethodLibrary.matches(self, MethodLibrary.SPACE, false); - } - }; - - @SkylarkSignature(name = "islower", objectType = StringModule.class, returnType = Boolean.class, - doc = - "Returns True if all cased characters in the string are lowercase and there is " - + "at least one character.", - parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) - private static final BuiltinFunction isLower = new BuiltinFunction("islower") { - @SuppressWarnings("unused") // Called via Reflection - public Boolean invoke(String self) throws EvalException { - // Python also accepts non-cased characters, so we cannot use LOWER. - return MethodLibrary.matches(self, MethodLibrary.UPPER.negate(), true); - } - }; - - @SkylarkSignature(name = "isupper", objectType = StringModule.class, returnType = Boolean.class, - doc = - "Returns True if all cased characters in the string are uppercase and there is " - + "at least one character.", - parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) - private static final BuiltinFunction isUpper = new BuiltinFunction("isupper") { - @SuppressWarnings("unused") // Called via Reflection - public Boolean invoke(String self) throws EvalException { - // Python also accepts non-cased characters, so we cannot use UPPER. - return MethodLibrary.matches(self, MethodLibrary.LOWER.negate(), true); - } - }; - - @SkylarkSignature(name = "istitle", objectType = StringModule.class, returnType = Boolean.class, - doc = - "Returns True if the string is in title case and it contains at least one character. " - + "This means that every uppercase character must follow an uncased one (e.g. whitespace) " - + "and every lowercase character must follow a cased one (e.g. uppercase or lowercase).", - parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) - private static final BuiltinFunction isTitle = new BuiltinFunction("istitle") { - @SuppressWarnings("unused") // Called via Reflection - public Boolean invoke(String self) throws EvalException { - if (self.isEmpty()) { - return false; - } - // From the Python documentation: "uppercase characters may only follow uncased characters - // and lowercase characters only cased ones". - char[] data = self.toCharArray(); - CharMatcher matcher = CharMatcher.any(); - char leftMostCased = ' '; - for (int pos = data.length - 1; pos >= 0; --pos) { - char current = data[pos]; - // 1. Check condition that was determined by the right neighbor. - if (!matcher.matches(current)) { - return false; - } - // 2. Determine condition for the left neighbor. - if (LOWER.matches(current)) { - matcher = CASED; - } else if (UPPER.matches(current)) { - matcher = CASED.negate(); - } else { - matcher = CharMatcher.any(); - } - // 3. Store character if it is cased. - if (CASED.matches(current)) { - leftMostCased = current; - } - } - // The leftmost cased letter must be uppercase. If leftMostCased is not a cased letter here, - // then the string doesn't have any cased letter, so UPPER.test will return false. - return UPPER.matches(leftMostCased); - } - }; - - private static boolean matches( - String str, CharMatcher matcher, boolean requiresAtLeastOneCasedLetter) { - if (str.isEmpty()) { - return false; - } else if (!requiresAtLeastOneCasedLetter) { - return matcher.matchesAllOf(str); - } - int casedLetters = 0; - for (char current : str.toCharArray()) { - if (!matcher.matches(current)) { - return false; - } else if (requiresAtLeastOneCasedLetter && CASED.matches(current)) { - ++casedLetters; - } - } - return casedLetters > 0; - } - - private static final CharMatcher DIGIT = CharMatcher.javaDigit(); - private static final CharMatcher LOWER = CharMatcher.inRange('a', 'z'); - private static final CharMatcher UPPER = CharMatcher.inRange('A', 'Z'); - private static final CharMatcher ALPHA = LOWER.or(UPPER); - private static final CharMatcher ALNUM = ALPHA.or(DIGIT); - private static final CharMatcher CASED = ALPHA; - private static final CharMatcher SPACE = CharMatcher.whitespace(); - - @SkylarkSignature(name = "count", objectType = StringModule.class, returnType = Integer.class, - doc = "Returns the number of (non-overlapping) occurrences of substring <code>sub</code> in " - + "string, optionally restricting to [<code>start</code>:<code>end</code>], " - + "<code>start</code> being inclusive and <code>end</code> being exclusive.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sub", type = String.class, doc = "The substring to count."), - @Param(name = "start", type = Integer.class, defaultValue = "0", - doc = "Restrict to search from this position."), - @Param(name = "end", type = Integer.class, noneable = true, defaultValue = "None", - doc = "optional position before which to restrict to search.")}) - private static final BuiltinFunction count = new BuiltinFunction("count") { - public Integer invoke(String self, String sub, Integer start, Object end) - throws ConversionException { - String str = pythonSubstring(self, start, end, "'end' operand of 'find'"); - if (sub.isEmpty()) { - return str.length() + 1; - } - int count = 0; - int index = -1; - while ((index = str.indexOf(sub)) >= 0) { - count++; - str = str.substring(index + sub.length()); - } - return count; - } - }; - - @SkylarkSignature( - name = "elems", - objectType = StringModule.class, - returnType = SkylarkList.class, - doc = - "Returns an iterable value containing successive 1-element substrings of the string. " - + "Equivalent to <code>[s[i] for i in range(len(s))]</code>, except that the " - + "returned value might not be a list.", - parameters = {@Param(name = "self", type = String.class, doc = "This string.")} - ) - private static final BuiltinFunction elems = - new BuiltinFunction("elems") { - public SkylarkList<String> invoke(String self) throws ConversionException { - ImmutableList.Builder<String> builder = new ImmutableList.Builder<>(); - for (char c : self.toCharArray()) { - builder.add(String.valueOf(c)); - } - return SkylarkList.createImmutable(builder.build()); - } - }; - - @SkylarkSignature(name = "endswith", objectType = StringModule.class, returnType = Boolean.class, - doc = "Returns True if the string ends with <code>sub</code>, " - + "otherwise False, optionally restricting to [<code>start</code>:<code>end</code>], " - + "<code>start</code> being inclusive and <code>end</code> being exclusive.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sub", type = String.class, doc = "The substring to check."), - @Param(name = "start", type = Integer.class, defaultValue = "0", - doc = "Test beginning at this position."), - @Param(name = "end", type = Integer.class, noneable = true, defaultValue = "None", - doc = "optional position at which to stop comparing.")}) - private static final BuiltinFunction endswith = new BuiltinFunction("endswith") { - public Boolean invoke(String self, String sub, Integer start, Object end) - throws ConversionException { - return pythonSubstring(self, start, end, "'end' operand of 'endswith'").endsWith(sub); - } - }; - - // In Python, formatting is very complex. - // We handle here the simplest case which provides most of the value of the function. - // https://docs.python.org/3/library/string.html#formatstrings - @SkylarkSignature( - name = "format", - objectType = StringModule.class, - returnType = String.class, - doc = - "Perform string interpolation. Format strings contain replacement fields " - + "surrounded by curly braces <code>{}</code>. Anything that is not contained " - + "in braces is considered literal text, which is copied unchanged to the output." - + "If you need to include a brace character in the literal text, it can be " - + "escaped by doubling: <code>{{</code> and <code>}}</code>" - + "A replacement field can be either a name, a number, or empty. Values are " - + "converted to strings using the <a href=\"globals.html#str\">str</a> function." - + "<pre class=\"language-python\">" - + "# Access in order:\n" - + "\"{} < {}\".format(4, 5) == \"4 < 5\"\n" - + "# Access by position:\n" - + "\"{1}, {0}\".format(2, 1) == \"1, 2\"\n" - + "# Access by name:\n" - + "\"x{key}x\".format(key = 2) == \"x2x\"</pre>\n", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - }, - extraPositionals = - @Param( - name = "args", - type = SkylarkList.class, - defaultValue = "()", - doc = "List of arguments." - ), - extraKeywords = - @Param( - name = "kwargs", - type = SkylarkDict.class, - defaultValue = "{}", - doc = "Dictionary of arguments." - ), - useLocation = true - ) - private static final BuiltinFunction format = - new BuiltinFunction("format") { - @SuppressWarnings("unused") - public String invoke( - String self, - SkylarkList<Object> args, - SkylarkDict<?, ?> kwargs, - Location loc) - throws EvalException { - return new FormatParser(loc) - .format( - self, - args.getImmutableList(), - kwargs.getContents(String.class, Object.class, "kwargs")); - } - }; - - @SkylarkSignature(name = "startswith", objectType = StringModule.class, - returnType = Boolean.class, - doc = "Returns True if the string starts with <code>sub</code>, " - + "otherwise False, optionally restricting to [<code>start</code>:<code>end</code>], " - + "<code>start</code> being inclusive and <code>end</code> being exclusive.", - parameters = { - @Param(name = "self", type = String.class, doc = "This string."), - @Param(name = "sub", type = String.class, doc = "The substring to check."), - @Param(name = "start", type = Integer.class, defaultValue = "0", - doc = "Test beginning at this position."), - @Param(name = "end", type = Integer.class, noneable = true, defaultValue = "None", - doc = "Stop comparing at this position.")}) - private static final BuiltinFunction startswith = new BuiltinFunction("startswith") { - public Boolean invoke(String self, String sub, Integer start, Object end) - throws ConversionException { - return pythonSubstring(self, start, end, "'end' operand of 'startswith'").startsWith(sub); - } - }; - @SkylarkSignature( name = "min", returnType = Object.class, @@ -1895,32 +927,6 @@ public class MethodLibrary { } }; - /** Skylark String module. */ - @SkylarkModule( - name = "string", - category = SkylarkModuleCategory.BUILTIN, - doc = - "A language built-in type to support strings. " - + "Examples of string literals:<br>" - + "<pre class=\"language-python\">a = 'abc\\ndef'\n" - + "b = \"ab'cd\"\n" - + "c = \"\"\"multiline string\"\"\"\n" - + "\n" - + "# Strings support slicing (negative index starts from the end):\n" - + "x = \"hello\"[2:4] # \"ll\"\n" - + "y = \"hello\"[1:-1] # \"ell\"\n" - + "z = \"hello\"[:4] # \"hell\"" - + "# Slice steps can be used, too:\n" - + "s = \"hello\"[::2] # \"hlo\"\n" - + "t = \"hello\"[3:0:-1] # \"lle\"\n</pre>" - + "Strings are iterable and support the <code>in</code> operator. Examples:<br>" - + "<pre class=\"language-python\">\"bc\" in \"abcd\" # evaluates to True\n" - + "x = [s for s in \"abc\"] # x == [\"a\", \"b\", \"c\"]</pre>\n" - + "Implicit concatenation of strings is not allowed; use the <code>+</code> " - + "operator instead." - ) - static final class StringModule {} - /** Skylark int type. */ @SkylarkModule( name = "int", diff --git a/src/main/java/com/google/devtools/build/lib/syntax/Runtime.java b/src/main/java/com/google/devtools/build/lib/syntax/Runtime.java index 2c7b0ee978..db7849ee13 100644 --- a/src/main/java/com/google/devtools/build/lib/syntax/Runtime.java +++ b/src/main/java/com/google/devtools/build/lib/syntax/Runtime.java @@ -152,7 +152,7 @@ public final class Runtime { */ public static Class<?> getSkylarkNamespace(Class<?> clazz) { return String.class.isAssignableFrom(clazz) - ? MethodLibrary.StringModule.class + ? StringModule.class : EvalUtils.getSkylarkType(clazz); } diff --git a/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java b/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java new file mode 100644 index 0000000000..0153dc3987 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/syntax/StringModule.java @@ -0,0 +1,996 @@ +// Copyright 2018 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.syntax; + +import com.google.common.base.CharMatcher; +import com.google.common.base.Joiner; +import com.google.common.collect.ImmutableList; +import com.google.devtools.build.lib.events.Location; +import com.google.devtools.build.lib.skylarkinterface.Param; +import com.google.devtools.build.lib.skylarkinterface.SkylarkCallable; +import com.google.devtools.build.lib.skylarkinterface.SkylarkModule; +import com.google.devtools.build.lib.skylarkinterface.SkylarkModuleCategory; +import com.google.devtools.build.lib.syntax.SkylarkList.MutableList; +import com.google.devtools.build.lib.syntax.SkylarkList.Tuple; +import com.google.devtools.build.lib.syntax.Type.ConversionException; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Skylark String module. + * + * <p>This module has special treatment in Skylark, as its methods represent methods represent for + * any 'string' objects in the language. + * + * <p>Methods of this class annotated with {@link SkylarkCallable} must have a positional-only + * 'String self' parameter as the first parameter of the method. + */ +@SkylarkModule( + name = "string", + category = SkylarkModuleCategory.BUILTIN, + doc = + "A language built-in type to support strings. " + + "Examples of string literals:<br>" + + "<pre class=\"language-python\">a = 'abc\\ndef'\n" + + "b = \"ab'cd\"\n" + + "c = \"\"\"multiline string\"\"\"\n" + + "\n" + + "# Strings support slicing (negative index starts from the end):\n" + + "x = \"hello\"[2:4] # \"ll\"\n" + + "y = \"hello\"[1:-1] # \"ell\"\n" + + "z = \"hello\"[:4] # \"hell\"" + + "# Slice steps can be used, too:\n" + + "s = \"hello\"[::2] # \"hlo\"\n" + + "t = \"hello\"[3:0:-1] # \"lle\"\n</pre>" + + "Strings are iterable and support the <code>in</code> operator. Examples:<br>" + + "<pre class=\"language-python\">\"bc\" in \"abcd\" # evaluates to True\n" + + "x = [s for s in \"abc\"] # x == [\"a\", \"b\", \"c\"]</pre>\n" + + "Implicit concatenation of strings is not allowed; use the <code>+</code> " + + "operator instead." +) +public final class StringModule { + + private StringModule() {} + + // Emulate Python substring function + // It converts out of range indices, and never fails + private static String pythonSubstring(String str, int start, Object end, String msg) + throws ConversionException { + if (start == 0 && EvalUtils.isNullOrNone(end)) { + return str; + } + start = EvalUtils.clampRangeEndpoint(start, str.length()); + int stop; + if (EvalUtils.isNullOrNone(end)) { + stop = str.length(); + } else { + stop = EvalUtils.clampRangeEndpoint(Type.INTEGER.convert(end, msg), str.length()); + } + if (start >= stop) { + return ""; + } + return str.substring(start, stop); + } + + @SkylarkCallable( + name = "join", + doc = + "Returns a string in which the string elements of the argument have been " + + "joined by this string as a separator. Example:<br>" + + "<pre class=\"language-python\">\"|\".join([\"a\", \"b\", \"c\"]) == \"a|b|c\"" + + "</pre>", + parameters = { + @Param(name = "self", type = String.class), + @Param(name = "elements", legacyNamed = true, type = SkylarkList.class, + doc = "The objects to join.") + }) + public String join(String self, SkylarkList<?> elements) throws ConversionException { + return Joiner.on(self).join(elements); + } + + @SkylarkCallable( + name = "lower", + doc = "Returns the lower case version of this string.", + parameters = {@Param(name = "self", type = String.class)}) + public String lower(String self) { + return self.toLowerCase(); + } + + @SkylarkCallable( + name = "upper", + doc = "Returns the upper case version of this string.", + parameters = {@Param(name = "self", type = String.class)}) + public String upper(String self) { + return self.toUpperCase(); + } + + /** + * For consistency with Python we recognize the same whitespace characters as they do over the + * range 0x00-0xFF. See https://hg.python.org/cpython/file/3.6/Objects/unicodetype_db.h#l5738 This + * list is a consequence of Unicode character information. + * + * <p>Note that this differs from Python 2.7, which uses ctype.h#isspace(), and from + * java.lang.Character#isWhitespace(), which does not recognize U+00A0. + */ + private static final String LATIN1_WHITESPACE = + ("\u0009" + "\n" + "\u000B" + "\u000C" + "\r" + "\u001C" + "\u001D" + "\u001E" + "\u001F" + + "\u0020" + "\u0085" + "\u00A0"); + + private static String stringLStrip(String self, String chars) { + CharMatcher matcher = CharMatcher.anyOf(chars); + for (int i = 0; i < self.length(); i++) { + if (!matcher.matches(self.charAt(i))) { + return self.substring(i); + } + } + return ""; // All characters were stripped. + } + + private static String stringRStrip(String self, String chars) { + CharMatcher matcher = CharMatcher.anyOf(chars); + for (int i = self.length() - 1; i >= 0; i--) { + if (!matcher.matches(self.charAt(i))) { + return self.substring(0, i + 1); + } + } + return ""; // All characters were stripped. + } + + private static String stringStrip(String self, String chars) { + return stringLStrip(stringRStrip(self, chars), chars); + } + + @SkylarkCallable( + name = "lstrip", + doc = + "Returns a copy of the string where leading characters that appear in " + + "<code>chars</code> are removed." + + "<pre class=\"language-python\">" + + "\"abcba\".lstrip(\"ba\") == \"cba\"" + + "</pre>", + parameters = { + @Param(name = "self", type = String.class), + @Param( + name = "chars", + type = String.class, + legacyNamed = true, + noneable = true, + doc = "The characters to remove, or all whitespace if None.", + defaultValue = "None") + }) + public String lstrip(String self, Object charsOrNone) { + String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; + return stringLStrip(self, chars); + } + + @SkylarkCallable( + name = "rstrip", + doc = + "Returns a copy of the string where trailing characters that appear in " + + "<code>chars</code> are removed." + + "<pre class=\"language-python\">" + + "\"abcbaa\".rstrip(\"ab\") == \"abc\"" + + "</pre>", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param( + name = "chars", + type = String.class, + legacyNamed = true, + noneable = true, + doc = "The characters to remove, or all whitespace if None.", + defaultValue = "None") + }) + public String rstrip(String self, Object charsOrNone) { + String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; + return stringRStrip(self, chars); + } + + @SkylarkCallable( + name = "strip", + doc = + "Returns a copy of the string where leading or trailing characters that appear in " + + "<code>chars</code> are removed." + + "<pre class=\"language-python\">" + + "\"aabcbcbaa\".strip(\"ab\") == \"cbc\"" + + "</pre>", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param( + name = "chars", + type = String.class, + legacyNamed = true, + noneable = true, + doc = "The characters to remove, or all whitespace if None.", + defaultValue = "None") + }) + public String strip(String self, Object charsOrNone) { + String chars = charsOrNone != Runtime.NONE ? (String) charsOrNone : LATIN1_WHITESPACE; + return stringStrip(self, chars); + } + + @SkylarkCallable( + name = "replace", + doc = + "Returns a copy of the string in which the occurrences " + + "of <code>old</code> have been replaced with <code>new</code>, optionally " + + "restricting the number of replacements to <code>maxsplit</code>.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "old", legacyNamed = true, type = String.class, + doc = "The string to be replaced."), + @Param(name = "new", legacyNamed = true, + type = String.class, doc = "The string to replace with."), + @Param( + name = "maxsplit", + type = Integer.class, + noneable = true, + defaultValue = "None", + legacyNamed = true, + doc = "The maximum number of replacements.") + }, + useLocation = true) + public String replace( + String self, String oldString, String newString, Object maxSplitO, Location loc) + throws EvalException { + StringBuffer sb = new StringBuffer(); + Integer maxSplit = + Type.INTEGER.convertOptional( + maxSplitO, "'maxsplit' argument of 'replace'", /*label*/ null, Integer.MAX_VALUE); + try { + Matcher m = Pattern.compile(oldString, Pattern.LITERAL).matcher(self); + for (int i = 0; i < maxSplit && m.find(); i++) { + m.appendReplacement(sb, Matcher.quoteReplacement(newString)); + } + m.appendTail(sb); + } catch (IllegalStateException e) { + throw new EvalException(loc, e.getMessage() + " in call to replace"); + } + return sb.toString(); + } + + @SkylarkCallable( + name = "split", + doc = + "Returns a list of all the words in the string, using <code>sep</code> as the " + + "separator, optionally limiting the number of splits to <code>maxsplit</code>.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sep", legacyNamed = true, type = String.class, + doc = "The string to split on."), + @Param( + name = "maxsplit", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "The maximum number of splits.") + }, + useEnvironment = true, + useLocation = true) + public MutableList<String> split( + String self, String sep, Object maxSplitO, Location loc, Environment env) + throws EvalException { + int maxSplit = + Type.INTEGER.convertOptional(maxSplitO, "'split' argument of 'split'", /*label*/ null, -2); + // + 1 because the last result is the remainder. The default is -2 so that after +1, + // it becomes -1. + String[] ss = Pattern.compile(sep, Pattern.LITERAL).split(self, maxSplit + 1); + return MutableList.of(env, ss); + } + + @SkylarkCallable( + name = "rsplit", + doc = + "Returns a list of all the words in the string, using <code>sep</code> as the " + + "separator, optionally limiting the number of splits to <code>maxsplit</code>. " + + "Except for splitting from the right, this method behaves like split().", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sep", legacyNamed = true, type = String.class, + doc = "The string to split on."), + @Param( + name = "maxsplit", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "The maximum number of splits.") + }, + useEnvironment = true, + useLocation = true) + public MutableList<String> rsplit( + String self, String sep, Object maxSplitO, Location loc, Environment env) + throws EvalException { + int maxSplit = Type.INTEGER.convertOptional(maxSplitO, "'split' argument of 'split'", null, -1); + try { + return stringRSplit(self, sep, maxSplit, env); + } catch (IllegalArgumentException ex) { + throw new EvalException(loc, ex); + } + } + + /** + * Splits the given string into a list of words, using {@code separator} as a delimiter. + * + * <p>At most {@code maxSplits} will be performed, going from right to left. + * + * @param input The input string. + * @param separator The separator string. + * @param maxSplits The maximum number of splits. Negative values mean unlimited splits. + * @return A list of words + * @throws IllegalArgumentException + */ + private static MutableList<String> stringRSplit( + String input, String separator, int maxSplits, Environment env) { + if (separator.isEmpty()) { + throw new IllegalArgumentException("Empty separator"); + } + + if (maxSplits <= 0) { + maxSplits = Integer.MAX_VALUE; + } + + ArrayDeque<String> result = new ArrayDeque<>(); + String[] parts = input.split(Pattern.quote(separator), -1); + int sepLen = separator.length(); + int remainingLength = input.length(); + int splitsSoFar = 0; + + // Copies parts from the array into the final list, starting at the end (because + // it's rsplit), as long as fewer than maxSplits splits are performed. The + // last spot in the list is reserved for the remaining string, whose length + // has to be tracked throughout the loop. + for (int pos = parts.length - 1; (pos >= 0) && (splitsSoFar < maxSplits); --pos) { + String current = parts[pos]; + result.addFirst(current); + + ++splitsSoFar; + remainingLength -= sepLen + current.length(); + } + + if (splitsSoFar == maxSplits && remainingLength >= 0) { + result.addFirst(input.substring(0, remainingLength)); + } + + return MutableList.copyOf(env, result); + } + + @SkylarkCallable( + name = "partition", + doc = + "Splits the input string at the first occurrence of the separator " + + "<code>sep</code> and returns the resulting partition as a three-element " + + "tuple of the form (substring_before, separator, substring_after).", + parameters = { + @Param(name = "self", type = String.class), + @Param( + name = "sep", + type = String.class, + legacyNamed = true, + defaultValue = "\" \"", + doc = "The string to split on, default is space (\" \").") + }, + useEnvironment = true, + useLocation = true) + public Tuple<String> partition(String self, String sep, Location loc, Environment env) + throws EvalException { + return partitionWrapper(self, sep, true, loc); + } + + @SkylarkCallable( + name = "rpartition", + doc = + "Splits the input string at the last occurrence of the separator " + + "<code>sep</code> and returns the resulting partition as a three-element " + + "tuple of the form (substring_before, separator, substring_after).", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param( + name = "sep", + type = String.class, + legacyNamed = true, + defaultValue = "\" \"", + doc = "The string to split on, default is space (\" \").") + }, + useEnvironment = true, + useLocation = true) + public Tuple<String> rpartition(String self, String sep, Location loc, Environment env) + throws EvalException { + return partitionWrapper(self, sep, false, loc); + } + + /** + * Wraps the stringPartition() method and converts its results and exceptions + * to the expected types. + * + * @param self The input string + * @param separator The string to split on + * @param forward A flag that controls whether the input string is split around + * the first ({@code true}) or last ({@code false}) occurrence of the separator. + * @param loc The location that is used for potential exceptions + * @return A list with three elements + */ + private static Tuple<String> partitionWrapper( + String self, String separator, boolean forward, Location loc) throws EvalException { + try { + return Tuple.copyOf(stringPartition(self, separator, forward)); + } catch (IllegalArgumentException ex) { + throw new EvalException(loc, ex); + } + } + + /** + * Splits the input string at the {first|last} occurrence of the given separator and returns the + * resulting partition as a three-tuple of Strings, contained in a {@code MutableList}. + * + * <p>If the input string does not contain the separator, the tuple will consist of the original + * input string and two empty strings. + * + * <p>This method emulates the behavior of Python's str.partition() and str.rpartition(), + * depending on the value of the {@code forward} flag. + * + * @param input The input string + * @param separator The string to split on + * @param forward A flag that controls whether the input string is split around the first ({@code + * true}) or last ({@code false}) occurrence of the separator. + * @return A three-tuple (List) of the form [part_before_separator, separator, + * part_after_separator]. + */ + private static List<String> stringPartition(String input, String separator, boolean forward) { + if (separator.isEmpty()) { + throw new IllegalArgumentException("Empty separator"); + } + + int partitionSize = 3; + ArrayList<String> result = new ArrayList<>(partitionSize); + int pos = forward ? input.indexOf(separator) : input.lastIndexOf(separator); + + if (pos < 0) { + for (int i = 0; i < partitionSize; ++i) { + result.add(""); + } + + // Following Python's implementation of str.partition() and str.rpartition(), + // the input string is copied to either the first or the last position in the + // list, depending on the value of the forward flag. + result.set(forward ? 0 : partitionSize - 1, input); + } else { + result.add(input.substring(0, pos)); + result.add(separator); + + // pos + sep.length() is at most equal to input.length(). This worst-case + // happens when the separator is at the end of the input string. However, + // substring() will return an empty string in this scenario, thus making + // any additional safety checks obsolete. + result.add(input.substring(pos + separator.length())); + } + + return result; + } + + @SkylarkCallable( + name = "capitalize", + doc = + "Returns a copy of the string with its first character capitalized and the rest " + + "lowercased. This method does not support non-ascii characters.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public String capitalize(String self) throws EvalException { + if (self.isEmpty()) { + return self; + } + return Character.toUpperCase(self.charAt(0)) + self.substring(1).toLowerCase(); + } + + @SkylarkCallable( + name = "title", + doc = + "Converts the input string into title case, i.e. every word starts with an " + + "uppercase letter while the remaining letters are lowercase. In this " + + "context, a word means strictly a sequence of letters. This method does " + + "not support supplementary Unicode characters.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public String title(String self) throws EvalException { + char[] data = self.toCharArray(); + boolean previousWasLetter = false; + + for (int pos = 0; pos < data.length; ++pos) { + char current = data[pos]; + boolean currentIsLetter = Character.isLetter(current); + + if (currentIsLetter) { + if (previousWasLetter && Character.isUpperCase(current)) { + data[pos] = Character.toLowerCase(current); + } else if (!previousWasLetter && Character.isLowerCase(current)) { + data[pos] = Character.toUpperCase(current); + } + } + previousWasLetter = currentIsLetter; + } + + return new String(data); + } + + /** + * Common implementation for find, rfind, index, rindex. + * @param forward true if we want to return the last matching index. + */ + private static int stringFind(boolean forward, + String self, String sub, int start, Object end, String msg) + throws ConversionException { + String substr = pythonSubstring(self, start, end, msg); + int subpos = forward ? substr.indexOf(sub) : substr.lastIndexOf(sub); + start = EvalUtils.clampRangeEndpoint(start, self.length()); + return subpos < 0 ? subpos : subpos + start; + } + + private static final Pattern SPLIT_LINES_PATTERN = + Pattern.compile("(?<line>.*)(?<break>(\\r\\n|\\r|\\n)?)"); + + @SkylarkCallable( + name = "rfind", + doc = + "Returns the last index where <code>sub</code> is found, " + + "or -1 if no such index exists, optionally restricting to " + + "[<code>start</code>:<code>end</code>], " + + "<code>start</code> being inclusive and <code>end</code> being exclusive.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sub", type = String.class, legacyNamed = true, + doc = "The substring to find."), + @Param( + name = "start", + type = Integer.class, + legacyNamed = true, + defaultValue = "0", + doc = "Restrict to search from this position."), + @Param( + name = "end", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "optional position before which to restrict to search.") + }) + public Integer rfind(String self, String sub, Integer start, Object end) + throws ConversionException { + return stringFind(false, self, sub, start, end, "'end' argument to rfind"); + } + + @SkylarkCallable( + name = "find", + doc = + "Returns the first index where <code>sub</code> is found, " + + "or -1 if no such index exists, optionally restricting to " + + "[<code>start</code>:<code>end]</code>, " + + "<code>start</code> being inclusive and <code>end</code> being exclusive.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sub", type = String.class, legacyNamed = true, + doc = "The substring to find."), + @Param( + name = "start", + type = Integer.class, + legacyNamed = true, + defaultValue = "0", + doc = "Restrict to search from this position."), + @Param( + name = "end", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "optional position before which to restrict to search.") + }) + public Integer invoke(String self, String sub, Integer start, Object end) + throws ConversionException { + return stringFind(true, self, sub, start, end, "'end' argument to find"); + } + + @SkylarkCallable( + name = "rindex", + doc = + "Returns the last index where <code>sub</code> is found, " + + "or raises an error if no such index exists, optionally restricting to " + + "[<code>start</code>:<code>end</code>], " + + "<code>start</code> being inclusive and <code>end</code> being exclusive.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sub", type = String.class, legacyNamed = true, + doc = "The substring to find."), + @Param( + name = "start", + type = Integer.class, + legacyNamed = true, + defaultValue = "0", + doc = "Restrict to search from this position."), + @Param( + name = "end", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "optional position before which to restrict to search.") + }, + useLocation = true) + public Integer rindex(String self, String sub, Integer start, Object end, Location loc) + throws EvalException { + int res = stringFind(false, self, sub, start, end, "'end' argument to rindex"); + if (res < 0) { + throw new EvalException(loc, Printer.format("substring %r not found in %r", sub, self)); + } + return res; + } + + @SkylarkCallable( + name = "index", + doc = + "Returns the first index where <code>sub</code> is found, " + + "or raises an error if no such index exists, optionally restricting to " + + "[<code>start</code>:<code>end]</code>, " + + "<code>start</code> being inclusive and <code>end</code> being exclusive.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sub", type = String.class, legacyNamed = true, + doc = "The substring to find."), + @Param( + name = "start", + type = Integer.class, + legacyNamed = true, + defaultValue = "0", + doc = "Restrict to search from this position."), + @Param( + name = "end", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "optional position before which to restrict to search.") + }, + useLocation = true) + public Integer index(String self, String sub, Integer start, Object end, Location loc) + throws EvalException { + int res = stringFind(true, self, sub, start, end, "'end' argument to index"); + if (res < 0) { + throw new EvalException(loc, Printer.format("substring %r not found in %r", sub, self)); + } + return res; + } + + @SkylarkCallable( + name = "splitlines", + doc = + "Splits the string at line boundaries ('\\n', '\\r\\n', '\\r') " + + "and returns the result as a list.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param( + name = "keepends", + type = Boolean.class, + legacyNamed = true, + defaultValue = "False", + doc = "Whether the line breaks should be included in the resulting list.") + }) + public SkylarkList<String> splitLines(String self, Boolean keepEnds) throws EvalException { + List<String> result = new ArrayList<>(); + Matcher matcher = SPLIT_LINES_PATTERN.matcher(self); + while (matcher.find()) { + String line = matcher.group("line"); + String lineBreak = matcher.group("break"); + boolean trailingBreak = lineBreak.isEmpty(); + if (line.isEmpty() && trailingBreak) { + break; + } + if (keepEnds && !trailingBreak) { + result.add(line + lineBreak); + } else { + result.add(line); + } + } + return SkylarkList.createImmutable(result); + } + + @SkylarkCallable( + name = "isalpha", + doc = + "Returns True if all characters in the string are alphabetic ([a-zA-Z]) and there is " + + "at least one character.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public Boolean isAlpha(String self) throws EvalException { + return matches(self, ALPHA, false); + } + + @SkylarkCallable( + name = "isalnum", + doc = + "Returns True if all characters in the string are alphanumeric ([a-zA-Z0-9]) and there " + + "is at least one character.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public Boolean isAlnum(String self) throws EvalException { + return matches(self, ALNUM, false); + } + + @SkylarkCallable( + name = "isdigit", + doc = + "Returns True if all characters in the string are digits ([0-9]) and there is " + + "at least one character.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public Boolean isDigit(String self) throws EvalException { + return matches(self, DIGIT, false); + } + + @SkylarkCallable( + name = "isspace", + doc = + "Returns True if all characters are white space characters and the string " + + "contains at least one character.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public Boolean isSpace(String self) throws EvalException { + return matches(self, SPACE, false); + } + + @SkylarkCallable( + name = "islower", + doc = + "Returns True if all cased characters in the string are lowercase and there is " + + "at least one character.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public Boolean isLower(String self) throws EvalException { + // Python also accepts non-cased characters, so we cannot use LOWER. + return matches(self, UPPER.negate(), true); + } + + @SkylarkCallable( + name = "isupper", + doc = + "Returns True if all cased characters in the string are uppercase and there is " + + "at least one character.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public Boolean isUpper(String self) throws EvalException { + // Python also accepts non-cased characters, so we cannot use UPPER. + return matches(self, LOWER.negate(), true); + } + + @SkylarkCallable( + name = "istitle", + doc = + "Returns True if the string is in title case and it contains at least one character. " + + "This means that every uppercase character must follow an uncased one (e.g. " + + "whitespace) and every lowercase character must follow a cased one (e.g. " + + "uppercase or lowercase).", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public Boolean isTitle(String self) throws EvalException { + if (self.isEmpty()) { + return false; + } + // From the Python documentation: "uppercase characters may only follow uncased characters + // and lowercase characters only cased ones". + char[] data = self.toCharArray(); + CharMatcher matcher = CharMatcher.any(); + char leftMostCased = ' '; + for (int pos = data.length - 1; pos >= 0; --pos) { + char current = data[pos]; + // 1. Check condition that was determined by the right neighbor. + if (!matcher.matches(current)) { + return false; + } + // 2. Determine condition for the left neighbor. + if (LOWER.matches(current)) { + matcher = CASED; + } else if (UPPER.matches(current)) { + matcher = CASED.negate(); + } else { + matcher = CharMatcher.any(); + } + // 3. Store character if it is cased. + if (CASED.matches(current)) { + leftMostCased = current; + } + } + // The leftmost cased letter must be uppercase. If leftMostCased is not a cased letter here, + // then the string doesn't have any cased letter, so UPPER.test will return false. + return UPPER.matches(leftMostCased); + } + + private static boolean matches( + String str, CharMatcher matcher, boolean requiresAtLeastOneCasedLetter) { + if (str.isEmpty()) { + return false; + } else if (!requiresAtLeastOneCasedLetter) { + return matcher.matchesAllOf(str); + } + int casedLetters = 0; + for (char current : str.toCharArray()) { + if (!matcher.matches(current)) { + return false; + } else if (requiresAtLeastOneCasedLetter && CASED.matches(current)) { + ++casedLetters; + } + } + return casedLetters > 0; + } + + private static final CharMatcher DIGIT = CharMatcher.javaDigit(); + private static final CharMatcher LOWER = CharMatcher.inRange('a', 'z'); + private static final CharMatcher UPPER = CharMatcher.inRange('A', 'Z'); + private static final CharMatcher ALPHA = LOWER.or(UPPER); + private static final CharMatcher ALNUM = ALPHA.or(DIGIT); + private static final CharMatcher CASED = ALPHA; + private static final CharMatcher SPACE = CharMatcher.whitespace(); + + @SkylarkCallable( + name = "count", + doc = + "Returns the number of (non-overlapping) occurrences of substring <code>sub</code> in " + + "string, optionally restricting to [<code>start</code>:<code>end</code>], " + + "<code>start</code> being inclusive and <code>end</code> being exclusive.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sub", type = String.class, legacyNamed = true, + doc = "The substring to count."), + @Param( + name = "start", + type = Integer.class, + legacyNamed = true, + defaultValue = "0", + doc = "Restrict to search from this position."), + @Param( + name = "end", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "optional position before which to restrict to search.") + }) + public Integer count(String self, String sub, Integer start, Object end) + throws ConversionException { + String str = pythonSubstring(self, start, end, "'end' operand of 'find'"); + if (sub.isEmpty()) { + return str.length() + 1; + } + int count = 0; + int index = -1; + while ((index = str.indexOf(sub)) >= 0) { + count++; + str = str.substring(index + sub.length()); + } + return count; + } + + @SkylarkCallable( + name = "elems", + doc = + "Returns an iterable value containing successive 1-element substrings of the string. " + + "Equivalent to <code>[s[i] for i in range(len(s))]</code>, except that the " + + "returned value might not be a list.", + parameters = {@Param(name = "self", type = String.class, doc = "This string.")}) + public SkylarkList<String> elems(String self) throws ConversionException { + ImmutableList.Builder<String> builder = new ImmutableList.Builder<>(); + for (char c : self.toCharArray()) { + builder.add(String.valueOf(c)); + } + return SkylarkList.createImmutable(builder.build()); + } + + @SkylarkCallable( + name = "endswith", + doc = + "Returns True if the string ends with <code>sub</code>, " + + "otherwise False, optionally restricting to " + + "[<code>start</code>:<code>end</code>], <code>start</code> being inclusive " + + "and <code>end</code> being exclusive.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sub", type = String.class, legacyNamed = true, + doc = "The substring to check."), + @Param( + name = "start", + type = Integer.class, + legacyNamed = true, + defaultValue = "0", + doc = "Test beginning at this position."), + @Param( + name = "end", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "optional position at which to stop comparing.") + }) + public Boolean endsWith(String self, String sub, Integer start, Object end) + throws ConversionException { + return pythonSubstring(self, start, end, "'end' operand of 'endswith'").endsWith(sub); + } + + // In Python, formatting is very complex. + // We handle here the simplest case which provides most of the value of the function. + // https://docs.python.org/3/library/string.html#formatstrings + @SkylarkCallable( + name = "format", + doc = + "Perform string interpolation. Format strings contain replacement fields " + + "surrounded by curly braces <code>{}</code>. Anything that is not contained " + + "in braces is considered literal text, which is copied unchanged to the output." + + "If you need to include a brace character in the literal text, it can be " + + "escaped by doubling: <code>{{</code> and <code>}}</code>" + + "A replacement field can be either a name, a number, or empty. Values are " + + "converted to strings using the <a href=\"globals.html#str\">str</a> function." + + "<pre class=\"language-python\">" + + "# Access in order:\n" + + "\"{} < {}\".format(4, 5) == \"4 < 5\"\n" + + "# Access by position:\n" + + "\"{1}, {0}\".format(2, 1) == \"1, 2\"\n" + + "# Access by name:\n" + + "\"x{key}x\".format(key = 2) == \"x2x\"</pre>\n", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + }, + extraPositionals = + @Param( + name = "args", + type = SkylarkList.class, + defaultValue = "()", + doc = "List of arguments."), + extraKeywords = + @Param( + name = "kwargs", + type = SkylarkDict.class, + defaultValue = "{}", + doc = "Dictionary of arguments."), + useLocation = true) + public String format(String self, SkylarkList<?> args, SkylarkDict<?, ?> kwargs, Location loc) + throws EvalException { + @SuppressWarnings("unchecked") + List<Object> argObjects = (List<Object>) args.getImmutableList(); + return new FormatParser(loc) + .format(self, argObjects, kwargs.getContents(String.class, Object.class, "kwargs")); + } + + @SkylarkCallable( + name = "startswith", + doc = + "Returns True if the string starts with <code>sub</code>, " + + "otherwise False, optionally restricting to " + + "[<code>start</code>:<code>end</code>], <code>start</code> being inclusive and " + + "<code>end</code> being exclusive.", + parameters = { + @Param(name = "self", type = String.class, doc = "This string."), + @Param(name = "sub", type = String.class, legacyNamed = true, + doc = "The substring to check."), + @Param( + name = "start", + type = Integer.class, + legacyNamed = true, + defaultValue = "0", + doc = "Test beginning at this position."), + @Param( + name = "end", + type = Integer.class, + legacyNamed = true, + noneable = true, + defaultValue = "None", + doc = "Stop comparing at this position.") + }) + public Boolean startsWith(String self, String sub, Integer start, Object end) + throws ConversionException { + return pythonSubstring(self, start, end, "'end' operand of 'startswith'").startsWith(sub); + } + + public static final StringModule INSTANCE = new StringModule(); +} diff --git a/src/test/java/com/google/devtools/build/lib/skylark/SkylarkIntegrationTest.java b/src/test/java/com/google/devtools/build/lib/skylark/SkylarkIntegrationTest.java index 7921e2ade7..51fa9d33f0 100644 --- a/src/test/java/com/google/devtools/build/lib/skylark/SkylarkIntegrationTest.java +++ b/src/test/java/com/google/devtools/build/lib/skylark/SkylarkIntegrationTest.java @@ -359,8 +359,8 @@ public class SkylarkIntegrationTest extends BuildViewTestCase { "str", "\t\tstr.index(1)" + System.lineSeparator() - + "argument 'sub' has type 'int', but should be 'string'\n" - + "in call to builtin method string.index(sub, start, end)"); + + "expected value of type 'string' for parameter 'sub', " + + "in method call index(int) of 'string'"); } @Test diff --git a/src/test/java/com/google/devtools/build/lib/syntax/EvaluationTest.java b/src/test/java/com/google/devtools/build/lib/syntax/EvaluationTest.java index c0bd4a46fe..a69563a73f 100644 --- a/src/test/java/com/google/devtools/build/lib/syntax/EvaluationTest.java +++ b/src/test/java/com/google/devtools/build/lib/syntax/EvaluationTest.java @@ -693,8 +693,8 @@ public class EvaluationTest extends EvaluationTestCase { @Test public void testArgBothPosKey() throws Exception { newTest().testIfErrorContains( - "arguments 'old', 'new' passed both by position and by name " - + "in call to replace(self: string, ", - "'banana'.replace('a', 'o', 3, old='a', new=4)"); + "got multiple values for keyword argument 'old', " + + "in method call replace(string, string, int, string old) of 'string'", + "'banana'.replace('a', 'o', 3, old='a')"); } } diff --git a/src/test/java/com/google/devtools/build/lib/syntax/MethodLibraryTest.java b/src/test/java/com/google/devtools/build/lib/syntax/MethodLibraryTest.java index 2c353d6fc9..d487eff0d2 100644 --- a/src/test/java/com/google/devtools/build/lib/syntax/MethodLibraryTest.java +++ b/src/test/java/com/google/devtools/build/lib/syntax/MethodLibraryTest.java @@ -106,8 +106,8 @@ public class MethodLibraryTest extends EvaluationTestCase { // only one built-in function. new BothModesTest() .testIfExactError( - "argument 'sub' has type 'int', but should be 'string'\n" - + "in call to builtin method string.index(sub, start, end)", + "expected value of type 'string' for parameter 'sub', " + + "in method call index(int) of 'string'", "'test'.index(1)"); } @@ -131,8 +131,8 @@ public class MethodLibraryTest extends EvaluationTestCase { + LINE_SEPARATOR + "\t\t\"test\".index(x)" + LINE_SEPARATOR - + "argument 'sub' has type 'int', but should be 'string'\n" - + "in call to builtin method string.index(sub, start, end)", + + "expected value of type 'string' for parameter 'sub', " + + "in method call index(int) of 'string'", "def foo():", " bar(1)", "def bar(x):", @@ -146,8 +146,8 @@ public class MethodLibraryTest extends EvaluationTestCase { new BothModesTest() .testIfErrorContains("substring \"z\" not found in \"abc\"", "'abc'.index('z')") .testIfErrorContains( - "argument 'sub' has type 'int', but should be 'string'\n" - + "in call to builtin method string.startswith(sub, start, end)", + "expected value of type 'string' for parameter 'sub', " + + "in method call startswith(int) of 'string'", "'test'.startswith(1)") .testIfErrorContains( "expected value of type 'list(object)' for parameter args in dict(), " diff --git a/src/test/java/com/google/devtools/build/lib/syntax/RuntimeTest.java b/src/test/java/com/google/devtools/build/lib/syntax/RuntimeTest.java index 9da686707a..e2fc1e65d4 100644 --- a/src/test/java/com/google/devtools/build/lib/syntax/RuntimeTest.java +++ b/src/test/java/com/google/devtools/build/lib/syntax/RuntimeTest.java @@ -108,15 +108,6 @@ public final class RuntimeTest { } @Test - public void checkStaticallyRegistered_Method() throws Exception { - Field splitField = MethodLibrary.class.getDeclaredField("split"); - splitField.setAccessible(true); - Object splitFieldValue = splitField.get(null); - Object splitFunc = Runtime.getBuiltinRegistry().getFunction(String.class, "split"); - assertThat(splitFunc).isSameAs(splitFieldValue); - } - - @Test public void checkStaticallyRegistered_Global() throws Exception { Field lenField = MethodLibrary.class.getDeclaredField("len"); lenField.setAccessible(true); |