// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "tensorflow/core/lib/io/table.h" #include #include #include #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/io/block.h" #include "tensorflow/core/lib/io/block_builder.h" #include "tensorflow/core/lib/io/format.h" #include "tensorflow/core/lib/io/iterator.h" #include "tensorflow/core/lib/io/table_builder.h" #include "tensorflow/core/lib/random/simple_philox.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/env.h" namespace tensorflow { namespace table { namespace test { static StringPiece RandomString(random::SimplePhilox* rnd, int len, string* dst) { dst->resize(len); for (int i = 0; i < len; i++) { (*dst)[i] = static_cast(' ' + rnd->Uniform(95)); // ' ' .. '~' } return StringPiece(*dst); } static string RandomKey(random::SimplePhilox* rnd, int len) { // Make sure to generate a wide variety of characters so we // test the boundary conditions for short-key optimizations. static const char kTestChars[] = {'\0', '\1', 'a', 'b', 'c', 'd', 'e', '\xfd', '\xfe', '\xff'}; string result; for (int i = 0; i < len; i++) { result += kTestChars[rnd->Uniform(sizeof(kTestChars))]; } return result; } static StringPiece CompressibleString(random::SimplePhilox* rnd, double compressed_fraction, size_t len, string* dst) { int raw = static_cast(len * compressed_fraction); if (raw < 1) raw = 1; string raw_data; RandomString(rnd, raw, &raw_data); // Duplicate the random data until we have filled "len" bytes dst->clear(); while (dst->size() < len) { dst->append(raw_data); } dst->resize(len); return StringPiece(*dst); } } static void Increment(string* key) { key->push_back('\0'); } // An STL comparator that compares two StringPieces namespace { struct STLLessThan { STLLessThan() {} bool operator()(const string& a, const string& b) const { return StringPiece(a).compare(StringPiece(b)) < 0; } }; } // namespace class StringSink : public WritableFile { public: ~StringSink() {} const string& contents() const { return contents_; } virtual Status Close() { return Status::OK(); } virtual Status Flush() { return Status::OK(); } virtual Status Sync() { return Status::OK(); } virtual Status Append(const StringPiece& data) { contents_.append(data.data(), data.size()); return Status::OK(); } private: string contents_; }; class StringSource : public RandomAccessFile { public: StringSource(const StringPiece& contents) : contents_(contents.data(), contents.size()), bytes_read_(0) {} virtual ~StringSource() {} uint64 Size() const { return contents_.size(); } virtual Status Read(uint64 offset, size_t n, StringPiece* result, char* scratch) const { if (offset > contents_.size()) { return errors::InvalidArgument("invalid Read offset"); } if (offset + n > contents_.size()) { n = contents_.size() - offset; } memcpy(scratch, &contents_[offset], n); *result = StringPiece(scratch, n); bytes_read_ += n; return Status::OK(); } uint64 BytesRead() const { return bytes_read_; } private: string contents_; mutable uint64 bytes_read_; }; typedef std::map KVMap; // Helper class for tests to unify the interface between // BlockBuilder/TableBuilder and Block/Table. class Constructor { public: explicit Constructor() : data_(STLLessThan()) {} virtual ~Constructor() {} void Add(const string& key, const StringPiece& value) { data_[key] = value.ToString(); } // Finish constructing the data structure with all the keys that have // been added so far. Returns the keys in sorted order in "*keys" // and stores the key/value pairs in "*kvmap" void Finish(const Options& options, std::vector* keys, KVMap* kvmap) { *kvmap = data_; keys->clear(); for (KVMap::const_iterator it = data_.begin(); it != data_.end(); ++it) { keys->push_back(it->first); } data_.clear(); Status s = FinishImpl(options, *kvmap); ASSERT_TRUE(s.ok()) << s.ToString(); } // Construct the data structure from the data in "data" virtual Status FinishImpl(const Options& options, const KVMap& data) = 0; virtual Iterator* NewIterator() const = 0; virtual const KVMap& data() { return data_; } private: KVMap data_; }; class BlockConstructor : public Constructor { public: BlockConstructor() : block_(NULL) {} ~BlockConstructor() { delete block_; } virtual Status FinishImpl(const Options& options, const KVMap& data) { delete block_; block_ = NULL; BlockBuilder builder(&options); for (KVMap::const_iterator it = data.begin(); it != data.end(); ++it) { builder.Add(it->first, it->second); } // Open the block data_ = builder.Finish().ToString(); BlockContents contents; contents.data = data_; contents.cachable = false; contents.heap_allocated = false; block_ = new Block(contents); return Status::OK(); } virtual Iterator* NewIterator() const { return block_->NewIterator(); } private: string data_; Block* block_; }; class TableConstructor : public Constructor { public: TableConstructor() : source_(NULL), table_(NULL) {} ~TableConstructor() { Reset(); } virtual Status FinishImpl(const Options& options, const KVMap& data) { Reset(); StringSink sink; TableBuilder builder(options, &sink); for (KVMap::const_iterator it = data.begin(); it != data.end(); ++it) { builder.Add(it->first, it->second); TF_CHECK_OK(builder.status()); } Status s = builder.Finish(); TF_CHECK_OK(s) << s.ToString(); CHECK_EQ(sink.contents().size(), builder.FileSize()); // Open the table source_ = new StringSource(sink.contents()); Options table_options; return Table::Open(table_options, source_, sink.contents().size(), &table_); } virtual Iterator* NewIterator() const { return table_->NewIterator(); } uint64 ApproximateOffsetOf(const StringPiece& key) const { return table_->ApproximateOffsetOf(key); } uint64 BytesRead() const { return source_->BytesRead(); } private: void Reset() { delete table_; delete source_; table_ = NULL; source_ = NULL; } StringSource* source_; Table* table_; }; enum TestType { TABLE_TEST, BLOCK_TEST }; struct TestArgs { TestType type; int restart_interval; }; static const TestArgs kTestArgList[] = { {TABLE_TEST, 16}, {TABLE_TEST, 1}, {TABLE_TEST, 1024}, {BLOCK_TEST, 16}, {BLOCK_TEST, 1}, {BLOCK_TEST, 1024}, }; static const int kNumTestArgs = sizeof(kTestArgList) / sizeof(kTestArgList[0]); class Harness : public ::testing::Test { public: Harness() : constructor_(NULL) {} void Init(const TestArgs& args) { delete constructor_; constructor_ = NULL; options_ = Options(); options_.block_restart_interval = args.restart_interval; // Use shorter block size for tests to exercise block boundary // conditions more. options_.block_size = 256; switch (args.type) { case TABLE_TEST: constructor_ = new TableConstructor(); break; case BLOCK_TEST: constructor_ = new BlockConstructor(); break; } } ~Harness() { delete constructor_; } void Add(const string& key, const string& value) { constructor_->Add(key, value); } void Test(random::SimplePhilox* rnd) { std::vector keys; KVMap data; constructor_->Finish(options_, &keys, &data); TestForwardScan(keys, data); TestRandomAccess(rnd, keys, data); } void TestForwardScan(const std::vector& keys, const KVMap& data) { Iterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); iter->SeekToFirst(); for (KVMap::const_iterator model_iter = data.begin(); model_iter != data.end(); ++model_iter) { ASSERT_EQ(ToString(data, model_iter), ToString(iter)); iter->Next(); } ASSERT_TRUE(!iter->Valid()); delete iter; } void TestRandomAccess(random::SimplePhilox* rnd, const std::vector& keys, const KVMap& data) { static const bool kVerbose = false; Iterator* iter = constructor_->NewIterator(); ASSERT_TRUE(!iter->Valid()); KVMap::const_iterator model_iter = data.begin(); if (kVerbose) fprintf(stderr, "---\n"); for (int i = 0; i < 200; i++) { const int toss = rnd->Uniform(3); switch (toss) { case 0: { if (iter->Valid()) { if (kVerbose) fprintf(stderr, "Next\n"); iter->Next(); ++model_iter; ASSERT_EQ(ToString(data, model_iter), ToString(iter)); } break; } case 1: { if (kVerbose) fprintf(stderr, "SeekToFirst\n"); iter->SeekToFirst(); model_iter = data.begin(); ASSERT_EQ(ToString(data, model_iter), ToString(iter)); break; } case 2: { string key = PickRandomKey(rnd, keys); model_iter = data.lower_bound(key); if (kVerbose) fprintf(stderr, "Seek '%s'\n", str_util::CEscape(key).c_str()); iter->Seek(StringPiece(key)); ASSERT_EQ(ToString(data, model_iter), ToString(iter)); break; } } } delete iter; } string ToString(const KVMap& data, const KVMap::const_iterator& it) { if (it == data.end()) { return "END"; } else { return "'" + it->first + "->" + it->second + "'"; } } string ToString(const KVMap& data, const KVMap::const_reverse_iterator& it) { if (it == data.rend()) { return "END"; } else { return "'" + it->first + "->" + it->second + "'"; } } string ToString(const Iterator* it) { if (!it->Valid()) { return "END"; } else { return "'" + it->key().ToString() + "->" + it->value().ToString() + "'"; } } string PickRandomKey(random::SimplePhilox* rnd, const std::vector& keys) { if (keys.empty()) { return "foo"; } else { const int index = rnd->Uniform(keys.size()); string result = keys[index]; switch (rnd->Uniform(3)) { case 0: // Return an existing key break; case 1: { // Attempt to return something smaller than an existing key if (result.size() > 0 && result[result.size() - 1] > '\0') { result[result.size() - 1]--; } break; } case 2: { // Return something larger than an existing key Increment(&result); break; } } return result; } } private: Options options_; Constructor* constructor_; }; // Test empty table/block. TEST_F(Harness, Empty) { for (int i = 0; i < kNumTestArgs; i++) { Init(kTestArgList[i]); random::PhiloxRandom philox(testing::RandomSeed() + 1, 17); random::SimplePhilox rnd(&philox); Test(&rnd); } } // Special test for a block with no restart entries. The C++ leveldb // code never generates such blocks, but the Java version of leveldb // seems to. TEST_F(Harness, ZeroRestartPointsInBlock) { char data[sizeof(uint32)]; memset(data, 0, sizeof(data)); BlockContents contents; contents.data = StringPiece(data, sizeof(data)); contents.cachable = false; contents.heap_allocated = false; Block block(contents); Iterator* iter = block.NewIterator(); iter->SeekToFirst(); ASSERT_TRUE(!iter->Valid()); iter->Seek("foo"); ASSERT_TRUE(!iter->Valid()); delete iter; } // Test the empty key TEST_F(Harness, SimpleEmptyKey) { for (int i = 0; i < kNumTestArgs; i++) { Init(kTestArgList[i]); random::PhiloxRandom philox(testing::RandomSeed() + 1, 17); random::SimplePhilox rnd(&philox); Add("", "v"); Test(&rnd); } } TEST_F(Harness, SimpleSingle) { for (int i = 0; i < kNumTestArgs; i++) { Init(kTestArgList[i]); random::PhiloxRandom philox(testing::RandomSeed() + 2, 17); random::SimplePhilox rnd(&philox); Add("abc", "v"); Test(&rnd); } } TEST_F(Harness, SimpleMulti) { for (int i = 0; i < kNumTestArgs; i++) { Init(kTestArgList[i]); random::PhiloxRandom philox(testing::RandomSeed() + 3, 17); random::SimplePhilox rnd(&philox); Add("abc", "v"); Add("abcd", "v"); Add("ac", "v2"); Test(&rnd); } } TEST_F(Harness, SimpleMultiBigValues) { for (int i = 0; i < kNumTestArgs; i++) { Init(kTestArgList[i]); random::PhiloxRandom philox(testing::RandomSeed() + 3, 17); random::SimplePhilox rnd(&philox); Add("ainitial", "tiny"); Add("anext", string(10000000, 'a')); Add("anext2", string(10000000, 'b')); Add("azz", "tiny"); Test(&rnd); } } TEST_F(Harness, SimpleSpecialKey) { for (int i = 0; i < kNumTestArgs; i++) { Init(kTestArgList[i]); random::PhiloxRandom philox(testing::RandomSeed() + 4, 17); random::SimplePhilox rnd(&philox); Add("\xff\xff", "v3"); Test(&rnd); } } TEST_F(Harness, Randomized) { for (int i = 0; i < kNumTestArgs; i++) { Init(kTestArgList[i]); random::PhiloxRandom philox(testing::RandomSeed() + 5, 17); random::SimplePhilox rnd(&philox); for (int num_entries = 0; num_entries < 2000; num_entries += (num_entries < 50 ? 1 : 200)) { if ((num_entries % 10) == 0) { fprintf(stderr, "case %d of %d: num_entries = %d\n", (i + 1), int(kNumTestArgs), num_entries); } for (int e = 0; e < num_entries; e++) { string v; Add(test::RandomKey(&rnd, rnd.Skewed(4)), test::RandomString(&rnd, rnd.Skewed(5), &v).ToString()); } Test(&rnd); } } } static bool Between(uint64 val, uint64 low, uint64 high) { bool result = (val >= low) && (val <= high); if (!result) { fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n", (unsigned long long)(val), (unsigned long long)(low), (unsigned long long)(high)); } return result; } class TableTest {}; TEST(TableTest, ApproximateOffsetOfPlain) { TableConstructor c; c.Add("k01", "hello"); c.Add("k02", "hello2"); c.Add("k03", string(10000, 'x')); c.Add("k04", string(200000, 'x')); c.Add("k05", string(300000, 'x')); c.Add("k06", "hello3"); c.Add("k07", string(100000, 'x')); std::vector keys; KVMap kvmap; Options options; options.block_size = 1024; options.compression = kNoCompression; c.Finish(options, &keys, &kvmap); ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 10, 500)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 10000, 11000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 210000, 211000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000)); } static bool SnappyCompressionSupported() { string out; StringPiece in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; return port::Snappy_Compress(in.data(), in.size(), &out); } TEST(TableTest, ApproximateOffsetOfCompressed) { if (!SnappyCompressionSupported()) { fprintf(stderr, "skipping compression tests\n"); return; } random::PhiloxRandom philox(301, 17); random::SimplePhilox rnd(&philox); TableConstructor c; string tmp; c.Add("k01", "hello"); c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); c.Add("k03", "hello3"); c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp)); std::vector keys; KVMap kvmap; Options options; options.block_size = 1024; options.compression = kSnappyCompression; c.Finish(options, &keys, &kvmap); ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 10, 100)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000)); ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6000)); } TEST(TableTest, SeekToFirstKeyDoesNotReadTooMuch) { random::PhiloxRandom philox(301, 17); random::SimplePhilox rnd(&philox); string tmp; TableConstructor c; c.Add("k01", "firstvalue"); c.Add("k03", test::CompressibleString(&rnd, 0.25, 1000000, &tmp)); c.Add("k04", "abc"); std::vector keys; KVMap kvmap; Options options; options.block_size = 1024; options.compression = kNoCompression; c.Finish(options, &keys, &kvmap); Iterator* iter = c.NewIterator(); iter->Seek("k01"); delete iter; // Make sure we don't read the big second block when just trying to // retrieve the data in the first key EXPECT_LT(c.BytesRead(), 200); } } // namespace table } // namespace tensorflow