aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Laurent Le Brun <laurentlb@google.com>2016-06-01 16:36:41 +0000
committerGravatar Dmitry Lomov <dslomov@google.com>2016-06-01 18:18:14 +0000
commit9bf2da49b552ef4d57e9342bab5bfd2d57c0ec8a (patch)
treef58838748b0a9d5f64f8c20557d9c890d3cf31cc /src
parent77903754ad27cfaf660e4dfebc2e23ed4d640e98 (diff)
Add a SpellChecker class with edit distance function.
This will be used later to detect typos and provide suggestions. -- MOS_MIGRATED_REVID=123761611
Diffstat (limited to 'src')
-rw-r--r--src/main/java/com/google/devtools/build/lib/util/SpellChecker.java74
-rw-r--r--src/test/java/com/google/devtools/build/lib/util/SpellCheckerTest.java79
2 files changed, 153 insertions, 0 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/util/SpellChecker.java b/src/main/java/com/google/devtools/build/lib/util/SpellChecker.java
new file mode 100644
index 0000000000..e453ebcf19
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/util/SpellChecker.java
@@ -0,0 +1,74 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.util;
+
+/**
+ * Class that provides functions to do spell checking, i.e. detect typos
+ * and make suggestions.
+ */
+public final class SpellChecker {
+ /**
+ * Computes the edit distance between two strings. The edit distance is
+ * the minimum number of insertions, deletions and replacements to
+ * transform a string into the other string.
+ *
+ * maxEditDistance is the maximum distance the function can return. If
+ * it would be greater, the function returns -1. It is useful for
+ * speeding up the computations.
+ */
+ public static int editDistance(String s1, String s2, int maxEditDistance) {
+ // This is the Levenshtein distance, as described here:
+ // http://en.wikipedia.org/wiki/Levenshtein_distance
+ //
+ // We don't need to keep the full matrix. To update a cell, we only
+ // need top-left, top, and left values. Using a single array is
+ // sufficient. Top value is still in row[j] from the last iteration.
+ // Top-left value is stored in 'previous'. Left value is row[j - 1].
+
+ if (s1.equals(s2)) {
+ return 0;
+ }
+ // Short-circuit based on string length.
+ if (Math.abs(s1.length() - s2.length()) > maxEditDistance) {
+ return -1;
+ }
+
+ int[] row = new int[s2.length() + 1];
+ for (int i = 0; i <= s2.length(); i++) {
+ row[i] = i;
+ }
+
+ for (int i = 1; i <= s1.length(); i++) {
+ row[0] = i;
+ int bestInTheRow = row[0];
+ int previous = i - 1;
+
+ for (int j = 1; j <= s2.length(); j++) {
+ int old = row[j];
+
+ row[j] = Math.min(
+ previous + (s1.charAt(i - 1) == s2.charAt(j - 1) ? 0 : 1),
+ 1 + Math.min(row[j - 1], row[j]));
+ previous = old;
+ bestInTheRow = Math.min(bestInTheRow, row[j]);
+ }
+ if (bestInTheRow > maxEditDistance) {
+ return -1;
+ }
+ }
+ int result = row[s2.length()];
+ return result <= maxEditDistance ? result : -1;
+ }
+}
diff --git a/src/test/java/com/google/devtools/build/lib/util/SpellCheckerTest.java b/src/test/java/com/google/devtools/build/lib/util/SpellCheckerTest.java
new file mode 100644
index 0000000000..d0d84e38f6
--- /dev/null
+++ b/src/test/java/com/google/devtools/build/lib/util/SpellCheckerTest.java
@@ -0,0 +1,79 @@
+// Copyright 2016 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.util;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link SpellChecker}.
+ */
+@RunWith(JUnit4.class)
+public class SpellCheckerTest {
+
+ private void assertDistance(String s1, String s2, int distance) {
+ assertThat(SpellChecker.editDistance(s1, s2, 100)).isEqualTo(distance);
+ assertThat(SpellChecker.editDistance(s1, s2, distance)).isEqualTo(distance);
+
+ // Symmetry
+ assertThat(SpellChecker.editDistance(s2, s1, 100)).isEqualTo(distance);
+ assertThat(SpellChecker.editDistance(s2, s1, distance)).isEqualTo(distance);
+ }
+
+ @Test
+ public void editDistance_1() throws Exception {
+ // Deletion
+ assertDistance("abcdef", "abdef", 1);
+ assertDistance("abcdef", "abcde", 1);
+ assertDistance("abcdef", "bcdef", 1);
+
+ // Replacement
+ assertDistance("abcdef", "_bcdef", 1);
+ assertDistance("abcdef", "abc_ef", 1);
+ assertDistance("abcdef", "abcde_", 1);
+
+ // Insertion
+ assertDistance("abcdef", "_abcdef", 1);
+ assertDistance("abcdef", "abcd_ef", 1);
+ assertDistance("abcdef", "abcdef_", 1);
+ }
+
+ @Test
+ public void editDistance_general() throws Exception {
+ assertDistance("", "", 0);
+ assertDistance("abcd", "abcd", 0);
+ assertDistance("abcde", "", 5);
+ assertDistance("abcde", "12345", 5);
+ assertDistance("ab", "ba", 2);
+ assertDistance("abba", "acca", 2);
+ assertDistance("abaa", "aaca", 2);
+ assertDistance("kitten", "sitting", 3);
+ assertDistance("kitten kitten", "sitting sitting", 6);
+ assertDistance("flaw", "lawn", 2);
+ }
+
+ @Test
+ public void editDistance_maxDistance() throws Exception {
+ assertThat(SpellChecker.editDistance("kitten", "sitting", 0)).isEqualTo(-1);
+ assertThat(SpellChecker.editDistance("kitten", "sitting", 1)).isEqualTo(-1);
+ assertThat(SpellChecker.editDistance("kitten", "sitting", 2)).isEqualTo(-1);
+ assertThat(SpellChecker.editDistance("kitten", "sitting", 3)).isEqualTo(3);
+ assertThat(SpellChecker.editDistance("kitten", "sitting", 4)).isEqualTo(3);
+
+ assertThat(SpellChecker.editDistance("abcdefg", "s", 2)).isEqualTo(-1);
+ }
+}