diff options
Diffstat (limited to 'tensorflow/core/lib/io/table_test.cc')
-rw-r--r-- | tensorflow/core/lib/io/table_test.cc | 601 |
1 files changed, 601 insertions, 0 deletions
diff --git a/tensorflow/core/lib/io/table_test.cc b/tensorflow/core/lib/io/table_test.cc new file mode 100644 index 0000000000..66e90ac64e --- /dev/null +++ b/tensorflow/core/lib/io/table_test.cc @@ -0,0 +1,601 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "tensorflow/core/lib/io/table.h" + +#include <map> +#include <string> +#include <gtest/gtest.h> +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/io/block.h" +#include "tensorflow/core/lib/io/block_builder.h" +#include "tensorflow/core/lib/io/format.h" +#include "tensorflow/core/lib/io/iterator.h" +#include "tensorflow/core/lib/io/table_builder.h" +#include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/env.h" + +namespace tensorflow { +namespace table { + +namespace test { +static StringPiece RandomString(random::SimplePhilox* rnd, int len, + string* dst) { + dst->resize(len); + for (int i = 0; i < len; i++) { + (*dst)[i] = static_cast<char>(' ' + rnd->Uniform(95)); // ' ' .. '~' + } + return StringPiece(*dst); +} +static string RandomKey(random::SimplePhilox* rnd, int len) { + // Make sure to generate a wide variety of characters so we + // test the boundary conditions for short-key optimizations. + static const char kTestChars[] = {'\0', '\1', 'a', 'b', 'c', + 'd', 'e', '\xfd', '\xfe', '\xff'}; + string result; + for (int i = 0; i < len; i++) { + result += kTestChars[rnd->Uniform(sizeof(kTestChars))]; + } + return result; +} +static StringPiece CompressibleString(random::SimplePhilox* rnd, + double compressed_fraction, size_t len, + string* dst) { + int raw = static_cast<int>(len * compressed_fraction); + if (raw < 1) raw = 1; + string raw_data; + RandomString(rnd, raw, &raw_data); + + // Duplicate the random data until we have filled "len" bytes + dst->clear(); + while (dst->size() < len) { + dst->append(raw_data); + } + dst->resize(len); + return StringPiece(*dst); +} +} + +static void Increment(string* key) { key->push_back('\0'); } + +// An STL comparator that compares two StringPieces +namespace { +struct STLLessThan { + STLLessThan() {} + bool operator()(const string& a, const string& b) const { + return StringPiece(a).compare(StringPiece(b)) < 0; + } +}; +} // namespace + +class StringSink : public WritableFile { + public: + ~StringSink() {} + + const string& contents() const { return contents_; } + + virtual Status Close() { return Status::OK(); } + virtual Status Flush() { return Status::OK(); } + virtual Status Sync() { return Status::OK(); } + + virtual Status Append(const StringPiece& data) { + contents_.append(data.data(), data.size()); + return Status::OK(); + } + + private: + string contents_; +}; + +class StringSource : public RandomAccessFile { + public: + StringSource(const StringPiece& contents) + : contents_(contents.data(), contents.size()), bytes_read_(0) {} + + virtual ~StringSource() {} + + uint64 Size() const { return contents_.size(); } + + virtual Status Read(uint64 offset, size_t n, StringPiece* result, + char* scratch) const { + if (offset > contents_.size()) { + return errors::InvalidArgument("invalid Read offset"); + } + if (offset + n > contents_.size()) { + n = contents_.size() - offset; + } + memcpy(scratch, &contents_[offset], n); + *result = StringPiece(scratch, n); + bytes_read_ += n; + return Status::OK(); + } + + uint64 BytesRead() const { return bytes_read_; } + + private: + string contents_; + mutable uint64 bytes_read_; +}; + +typedef std::map<string, string, STLLessThan> KVMap; + +// Helper class for tests to unify the interface between +// BlockBuilder/TableBuilder and Block/Table. +class Constructor { + public: + explicit Constructor() : data_(STLLessThan()) {} + virtual ~Constructor() {} + + void Add(const string& key, const StringPiece& value) { + data_[key] = value.ToString(); + } + + // Finish constructing the data structure with all the keys that have + // been added so far. Returns the keys in sorted order in "*keys" + // and stores the key/value pairs in "*kvmap" + void Finish(const Options& options, std::vector<string>* keys, KVMap* kvmap) { + *kvmap = data_; + keys->clear(); + for (KVMap::const_iterator it = data_.begin(); it != data_.end(); ++it) { + keys->push_back(it->first); + } + data_.clear(); + Status s = FinishImpl(options, *kvmap); + ASSERT_TRUE(s.ok()) << s.ToString(); + } + + // Construct the data structure from the data in "data" + virtual Status FinishImpl(const Options& options, const KVMap& data) = 0; + + virtual Iterator* NewIterator() const = 0; + + virtual const KVMap& data() { return data_; } + + private: + KVMap data_; +}; + +class BlockConstructor : public Constructor { + public: + BlockConstructor() : block_(NULL) {} + ~BlockConstructor() { delete block_; } + virtual Status FinishImpl(const Options& options, const KVMap& data) { + delete block_; + block_ = NULL; + BlockBuilder builder(&options); + + for (KVMap::const_iterator it = data.begin(); it != data.end(); ++it) { + builder.Add(it->first, it->second); + } + // Open the block + data_ = builder.Finish().ToString(); + BlockContents contents; + contents.data = data_; + contents.cachable = false; + contents.heap_allocated = false; + block_ = new Block(contents); + return Status::OK(); + } + virtual Iterator* NewIterator() const { return block_->NewIterator(); } + + private: + string data_; + Block* block_; +}; + +class TableConstructor : public Constructor { + public: + TableConstructor() : source_(NULL), table_(NULL) {} + ~TableConstructor() { Reset(); } + virtual Status FinishImpl(const Options& options, const KVMap& data) { + Reset(); + StringSink sink; + TableBuilder builder(options, &sink); + + for (KVMap::const_iterator it = data.begin(); it != data.end(); ++it) { + builder.Add(it->first, it->second); + TF_CHECK_OK(builder.status()); + } + Status s = builder.Finish(); + TF_CHECK_OK(s) << s.ToString(); + + CHECK_EQ(sink.contents().size(), builder.FileSize()); + + // Open the table + source_ = new StringSource(sink.contents()); + Options table_options; + return Table::Open(table_options, source_, sink.contents().size(), &table_); + } + + virtual Iterator* NewIterator() const { return table_->NewIterator(); } + + uint64 ApproximateOffsetOf(const StringPiece& key) const { + return table_->ApproximateOffsetOf(key); + } + + uint64 BytesRead() const { return source_->BytesRead(); } + + private: + void Reset() { + delete table_; + delete source_; + table_ = NULL; + source_ = NULL; + } + + StringSource* source_; + Table* table_; +}; + +enum TestType { TABLE_TEST, BLOCK_TEST }; + +struct TestArgs { + TestType type; + int restart_interval; +}; + +static const TestArgs kTestArgList[] = { + {TABLE_TEST, 16}, {TABLE_TEST, 1}, {TABLE_TEST, 1024}, + {BLOCK_TEST, 16}, {BLOCK_TEST, 1}, {BLOCK_TEST, 1024}, +}; +static const int kNumTestArgs = sizeof(kTestArgList) / sizeof(kTestArgList[0]); + +class Harness : public ::testing::Test { + public: + Harness() : constructor_(NULL) {} + + void Init(const TestArgs& args) { + delete constructor_; + constructor_ = NULL; + options_ = Options(); + + options_.block_restart_interval = args.restart_interval; + // Use shorter block size for tests to exercise block boundary + // conditions more. + options_.block_size = 256; + switch (args.type) { + case TABLE_TEST: + constructor_ = new TableConstructor(); + break; + case BLOCK_TEST: + constructor_ = new BlockConstructor(); + break; + } + } + + ~Harness() { delete constructor_; } + + void Add(const string& key, const string& value) { + constructor_->Add(key, value); + } + + void Test(random::SimplePhilox* rnd) { + std::vector<string> keys; + KVMap data; + constructor_->Finish(options_, &keys, &data); + + TestForwardScan(keys, data); + TestRandomAccess(rnd, keys, data); + } + + void TestForwardScan(const std::vector<string>& keys, const KVMap& data) { + Iterator* iter = constructor_->NewIterator(); + ASSERT_TRUE(!iter->Valid()); + iter->SeekToFirst(); + for (KVMap::const_iterator model_iter = data.begin(); + model_iter != data.end(); ++model_iter) { + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + iter->Next(); + } + ASSERT_TRUE(!iter->Valid()); + delete iter; + } + + void TestRandomAccess(random::SimplePhilox* rnd, + const std::vector<string>& keys, const KVMap& data) { + static const bool kVerbose = false; + Iterator* iter = constructor_->NewIterator(); + ASSERT_TRUE(!iter->Valid()); + KVMap::const_iterator model_iter = data.begin(); + if (kVerbose) fprintf(stderr, "---\n"); + for (int i = 0; i < 200; i++) { + const int toss = rnd->Uniform(3); + switch (toss) { + case 0: { + if (iter->Valid()) { + if (kVerbose) fprintf(stderr, "Next\n"); + iter->Next(); + ++model_iter; + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + } + break; + } + + case 1: { + if (kVerbose) fprintf(stderr, "SeekToFirst\n"); + iter->SeekToFirst(); + model_iter = data.begin(); + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + break; + } + + case 2: { + string key = PickRandomKey(rnd, keys); + model_iter = data.lower_bound(key); + if (kVerbose) + fprintf(stderr, "Seek '%s'\n", str_util::CEscape(key).c_str()); + iter->Seek(StringPiece(key)); + ASSERT_EQ(ToString(data, model_iter), ToString(iter)); + break; + } + } + } + delete iter; + } + + string ToString(const KVMap& data, const KVMap::const_iterator& it) { + if (it == data.end()) { + return "END"; + } else { + return "'" + it->first + "->" + it->second + "'"; + } + } + + string ToString(const KVMap& data, const KVMap::const_reverse_iterator& it) { + if (it == data.rend()) { + return "END"; + } else { + return "'" + it->first + "->" + it->second + "'"; + } + } + + string ToString(const Iterator* it) { + if (!it->Valid()) { + return "END"; + } else { + return "'" + it->key().ToString() + "->" + it->value().ToString() + "'"; + } + } + + string PickRandomKey(random::SimplePhilox* rnd, + const std::vector<string>& keys) { + if (keys.empty()) { + return "foo"; + } else { + const int index = rnd->Uniform(keys.size()); + string result = keys[index]; + switch (rnd->Uniform(3)) { + case 0: + // Return an existing key + break; + case 1: { + // Attempt to return something smaller than an existing key + if (result.size() > 0 && result[result.size() - 1] > '\0') { + result[result.size() - 1]--; + } + break; + } + case 2: { + // Return something larger than an existing key + Increment(&result); + break; + } + } + return result; + } + } + + private: + Options options_; + Constructor* constructor_; +}; + +// Test empty table/block. +TEST_F(Harness, Empty) { + for (int i = 0; i < kNumTestArgs; i++) { + Init(kTestArgList[i]); + random::PhiloxRandom philox(testing::RandomSeed() + 1, 17); + random::SimplePhilox rnd(&philox); + Test(&rnd); + } +} + +// Special test for a block with no restart entries. The C++ leveldb +// code never generates such blocks, but the Java version of leveldb +// seems to. +TEST_F(Harness, ZeroRestartPointsInBlock) { + char data[sizeof(uint32)]; + memset(data, 0, sizeof(data)); + BlockContents contents; + contents.data = StringPiece(data, sizeof(data)); + contents.cachable = false; + contents.heap_allocated = false; + Block block(contents); + Iterator* iter = block.NewIterator(); + iter->SeekToFirst(); + ASSERT_TRUE(!iter->Valid()); + iter->Seek("foo"); + ASSERT_TRUE(!iter->Valid()); + delete iter; +} + +// Test the empty key +TEST_F(Harness, SimpleEmptyKey) { + for (int i = 0; i < kNumTestArgs; i++) { + Init(kTestArgList[i]); + random::PhiloxRandom philox(testing::RandomSeed() + 1, 17); + random::SimplePhilox rnd(&philox); + Add("", "v"); + Test(&rnd); + } +} + +TEST_F(Harness, SimpleSingle) { + for (int i = 0; i < kNumTestArgs; i++) { + Init(kTestArgList[i]); + random::PhiloxRandom philox(testing::RandomSeed() + 2, 17); + random::SimplePhilox rnd(&philox); + Add("abc", "v"); + Test(&rnd); + } +} + +TEST_F(Harness, SimpleMulti) { + for (int i = 0; i < kNumTestArgs; i++) { + Init(kTestArgList[i]); + random::PhiloxRandom philox(testing::RandomSeed() + 3, 17); + random::SimplePhilox rnd(&philox); + Add("abc", "v"); + Add("abcd", "v"); + Add("ac", "v2"); + Test(&rnd); + } +} + +TEST_F(Harness, SimpleMultiBigValues) { + for (int i = 0; i < kNumTestArgs; i++) { + Init(kTestArgList[i]); + random::PhiloxRandom philox(testing::RandomSeed() + 3, 17); + random::SimplePhilox rnd(&philox); + Add("ainitial", "tiny"); + Add("anext", string(10000000, 'a')); + Add("anext2", string(10000000, 'b')); + Add("azz", "tiny"); + Test(&rnd); + } +} + +TEST_F(Harness, SimpleSpecialKey) { + for (int i = 0; i < kNumTestArgs; i++) { + Init(kTestArgList[i]); + random::PhiloxRandom philox(testing::RandomSeed() + 4, 17); + random::SimplePhilox rnd(&philox); + Add("\xff\xff", "v3"); + Test(&rnd); + } +} + +TEST_F(Harness, Randomized) { + for (int i = 0; i < kNumTestArgs; i++) { + Init(kTestArgList[i]); + random::PhiloxRandom philox(testing::RandomSeed() + 5, 17); + random::SimplePhilox rnd(&philox); + for (int num_entries = 0; num_entries < 2000; + num_entries += (num_entries < 50 ? 1 : 200)) { + if ((num_entries % 10) == 0) { + fprintf(stderr, "case %d of %d: num_entries = %d\n", (i + 1), + int(kNumTestArgs), num_entries); + } + for (int e = 0; e < num_entries; e++) { + string v; + Add(test::RandomKey(&rnd, rnd.Skewed(4)), + test::RandomString(&rnd, rnd.Skewed(5), &v).ToString()); + } + Test(&rnd); + } + } +} + +static bool Between(uint64 val, uint64 low, uint64 high) { + bool result = (val >= low) && (val <= high); + if (!result) { + fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", + (unsigned long long)(val), (unsigned long long)(low), + (unsigned long long)(high)); + } + return result; +} + +class TableTest {}; + +TEST(TableTest, ApproximateOffsetOfPlain) { + TableConstructor c; + c.Add("k01", "hello"); + c.Add("k02", "hello2"); + c.Add("k03", string(10000, 'x')); + c.Add("k04", string(200000, 'x')); + c.Add("k05", string(300000, 'x')); + c.Add("k06", "hello3"); + c.Add("k07", string(100000, 'x')); + std::vector<string> keys; + KVMap kvmap; + Options options; + options.block_size = 1024; + options.compression = kNoCompression; + c.Finish(options, &keys, &kvmap); + + ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 10, 500)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 210000, 211000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); +} + +static bool SnappyCompressionSupported() { + string out; + StringPiece in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + return port::Snappy_Compress(in.data(), in.size(), &out); +} + +TEST(TableTest, ApproximateOffsetOfCompressed) { + if (!SnappyCompressionSupported()) { + fprintf(stderr, "skipping compression tests\n"); + return; + } + + random::PhiloxRandom philox(301, 17); + random::SimplePhilox rnd(&philox); + TableConstructor c; + string tmp; + c.Add("k01", "hello"); + c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); + c.Add("k03", "hello3"); + c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); + std::vector<string> keys; + KVMap kvmap; + Options options; + options.block_size = 1024; + options.compression = kSnappyCompression; + c.Finish(options, &keys, &kvmap); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 10, 100)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000)); + ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6000)); +} + +TEST(TableTest, SeekToFirstKeyDoesNotReadTooMuch) { + random::PhiloxRandom philox(301, 17); + random::SimplePhilox rnd(&philox); + string tmp; + TableConstructor c; + c.Add("k01", "firstvalue"); + c.Add("k03", test::CompressibleString(&rnd, 0.25, 1000000, &tmp)); + c.Add("k04", "abc"); + std::vector<string> keys; + KVMap kvmap; + Options options; + options.block_size = 1024; + options.compression = kNoCompression; + c.Finish(options, &keys, &kvmap); + + Iterator* iter = c.NewIterator(); + iter->Seek("k01"); + delete iter; + // Make sure we don't read the big second block when just trying to + // retrieve the data in the first key + EXPECT_LT(c.BytesRead(), 200); +} + +} // namespace table +} // namespace tensorflow |