1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
#include "tensorflow/core/lib/gtl/edit_distance.h"
#include "tensorflow/core/platform/logging.h"
#include "tensorflow/core/platform/port.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include <gtest/gtest.h>
namespace tensorflow {
namespace gtl {
namespace {
class LevenshteinDistanceTest : public ::testing::Test {
protected:
std::vector<char> empty_;
std::string s1_;
std::string s1234_;
std::string s567_;
std::string kilo_;
std::string kilogram_;
std::string mother_;
std::string grandmother_;
std::string lower_;
std::string upper_;
void SetUp() override {
s1_ = "1";
s1234_ = "1234";
s567_ = "567";
kilo_ = "kilo";
kilogram_ = "kilogram";
mother_ = "mother";
grandmother_ = "grandmother";
lower_ = "lower case";
upper_ = "UPPER case";
}
};
TEST_F(LevenshteinDistanceTest, BothEmpty) {
ASSERT_EQ(LevenshteinDistance(empty_, empty_, std::equal_to<char>()), 0);
}
TEST_F(LevenshteinDistanceTest, OneEmpty) {
ASSERT_EQ(LevenshteinDistance(s1234_, empty_, std::equal_to<char>()), 4);
ASSERT_EQ(LevenshteinDistance(empty_, s567_, std::equal_to<char>()), 3);
}
TEST_F(LevenshteinDistanceTest, SingleElement) {
ASSERT_EQ(LevenshteinDistance(s1234_, s1_, std::equal_to<char>()), 3);
ASSERT_EQ(LevenshteinDistance(s1_, s1234_, std::equal_to<char>()), 3);
}
TEST_F(LevenshteinDistanceTest, Prefix) {
ASSERT_EQ(LevenshteinDistance(kilo_, kilogram_, std::equal_to<char>()), 4);
ASSERT_EQ(LevenshteinDistance(kilogram_, kilo_, std::equal_to<char>()), 4);
}
TEST_F(LevenshteinDistanceTest, Suffix) {
ASSERT_EQ(LevenshteinDistance(mother_, grandmother_, std::equal_to<char>()),
5);
ASSERT_EQ(LevenshteinDistance(grandmother_, mother_, std::equal_to<char>()),
5);
}
TEST_F(LevenshteinDistanceTest, DifferentComparisons) {
ASSERT_EQ(LevenshteinDistance(lower_, upper_, std::equal_to<char>()), 5);
ASSERT_EQ(LevenshteinDistance(upper_, lower_, std::equal_to<char>()), 5);
ASSERT_EQ(
LevenshteinDistance(gtl::ArraySlice<char>(lower_.data(), lower_.size()),
gtl::ArraySlice<char>(upper_.data(), upper_.size()),
std::equal_to<char>()),
5);
auto no_case_cmp = [](char c1, char c2) {
return std::tolower(c1) == std::tolower(c2);
};
ASSERT_EQ(LevenshteinDistance(lower_, upper_, no_case_cmp), 3);
ASSERT_EQ(LevenshteinDistance(upper_, lower_, no_case_cmp), 3);
}
TEST_F(LevenshteinDistanceTest, Vectors) {
ASSERT_EQ(
LevenshteinDistance(std::string("algorithm"), std::string("altruistic"),
std::equal_to<char>()),
6);
}
static void BM_EditDistanceHelper(int n, int len, bool completely_different) {
string a =
"The quick brown fox jumped over the lazy dog and on and on and on"
" Every good boy deserves fudge. In fact, this is a very long sentence "
" w/many bytes..";
while (a.size() < static_cast<size_t>(len)) {
a = a + a;
}
string b = a;
if (completely_different) {
for (size_t i = 0; i < b.size(); i++) {
b[i]++;
}
}
while (n-- > 0) {
LevenshteinDistance(gtl::ArraySlice<char>(a.data(), len),
gtl::ArraySlice<char>(b.data(), len),
std::equal_to<char>());
}
}
static void BM_EditDistanceSame(int n, int len) {
BM_EditDistanceHelper(n, len, false);
}
static void BM_EditDistanceDiff(int n, int len) {
BM_EditDistanceHelper(n, len, true);
}
BENCHMARK(BM_EditDistanceSame)->Arg(5);
BENCHMARK(BM_EditDistanceSame)->Arg(50);
BENCHMARK(BM_EditDistanceSame)->Arg(200);
BENCHMARK(BM_EditDistanceSame)->Arg(1000);
BENCHMARK(BM_EditDistanceDiff)->Arg(5);
BENCHMARK(BM_EditDistanceDiff)->Arg(50);
BENCHMARK(BM_EditDistanceDiff)->Arg(200);
BENCHMARK(BM_EditDistanceDiff)->Arg(1000);
} // namespace
} // namespace gtl
} // namespace tensorflow
|