aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/lib/core
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/lib/core')
-rw-r--r--tensorflow/core/lib/core/arena.cc246
-rw-r--r--tensorflow/core/lib/core/arena.h90
-rw-r--r--tensorflow/core/lib/core/arena_test.cc92
-rw-r--r--tensorflow/core/lib/core/bit_cast_test.cc95
-rw-r--r--tensorflow/core/lib/core/bits.h84
-rw-r--r--tensorflow/core/lib/core/blocking_counter.h41
-rw-r--r--tensorflow/core/lib/core/blocking_counter_test.cc36
-rw-r--r--tensorflow/core/lib/core/casts.h85
-rw-r--r--tensorflow/core/lib/core/coding.cc164
-rw-r--r--tensorflow/core/lib/core/coding.h55
-rw-r--r--tensorflow/core/lib/core/coding_test.cc168
-rw-r--r--tensorflow/core/lib/core/command_line_flags.cc94
-rw-r--r--tensorflow/core/lib/core/command_line_flags.h60
-rw-r--r--tensorflow/core/lib/core/error_codes.proto145
-rw-r--r--tensorflow/core/lib/core/errors.h131
-rw-r--r--tensorflow/core/lib/core/notification.h42
-rw-r--r--tensorflow/core/lib/core/notification_test.cc64
-rw-r--r--tensorflow/core/lib/core/raw_coding.h43
-rw-r--r--tensorflow/core/lib/core/refcount.cc35
-rw-r--r--tensorflow/core/lib/core/refcount.h63
-rw-r--r--tensorflow/core/lib/core/refcount_test.cc92
-rw-r--r--tensorflow/core/lib/core/status.cc107
-rw-r--r--tensorflow/core/lib/core/status_test.cc84
-rw-r--r--tensorflow/core/lib/core/status_test_util.h20
-rw-r--r--tensorflow/core/lib/core/stringpiece.cc57
-rw-r--r--tensorflow/core/lib/core/stringpiece.h159
-rw-r--r--tensorflow/core/lib/core/threadpool.cc108
-rw-r--r--tensorflow/core/lib/core/threadpool.h59
-rw-r--r--tensorflow/core/lib/core/threadpool_test.cc93
29 files changed, 2612 insertions, 0 deletions
diff --git a/tensorflow/core/lib/core/arena.cc b/tensorflow/core/lib/core/arena.cc
new file mode 100644
index 0000000000..ceb1001af0
--- /dev/null
+++ b/tensorflow/core/lib/core/arena.cc
@@ -0,0 +1,246 @@
+// This approach to arenas overcomes many of the limitations described
+// in the "Specialized allocators" section of
+// http://www.pdos.lcs.mit.edu/~dm/c++-new.html
+//
+// A somewhat similar approach to Gladiator, but for heap-detection, was
+// suggested by Ron van der Wal and Scott Meyers at
+// http://www.aristeia.com/BookErrata/M27Comments_frames.html
+
+#include "tensorflow/core/lib/core/arena.h"
+
+#include <assert.h>
+#include <unistd.h>
+
+#include <vector>
+
+#include "tensorflow/core/platform/logging.h"
+namespace tensorflow {
+namespace core {
+
+static const int kPageSize = getpagesize();
+
+// ----------------------------------------------------------------------
+// Arena::Arena()
+// Arena::~Arena()
+// Destroying the arena automatically calls Reset()
+// ----------------------------------------------------------------------
+
+Arena::Arena(const size_t block_size)
+ : remaining_(0),
+ block_size_(block_size),
+ freestart_(NULL), // set for real in Reset()
+ blocks_alloced_(1),
+ overflow_blocks_(NULL) {
+ assert(block_size > kDefaultAlignment);
+
+ first_blocks_[0].mem = reinterpret_cast<char*>(malloc(block_size_));
+
+ first_blocks_[0].size = block_size_;
+
+ Reset();
+}
+
+Arena::~Arena() {
+ FreeBlocks();
+ assert(overflow_blocks_ == NULL); // FreeBlocks() should do that
+ // The first X blocks stay allocated always by default. Delete them now.
+ for (size_t i = 0; i < blocks_alloced_; ++i) free(first_blocks_[i].mem);
+}
+
+// Returns true iff it advances freestart_ to the first position
+// satisfying alignment without exhausting the current block.
+bool Arena::SatisfyAlignment(size_t alignment) {
+ const size_t overage = reinterpret_cast<size_t>(freestart_) & (alignment - 1);
+ if (overage > 0) {
+ const size_t waste = alignment - overage;
+ if (waste >= remaining_) {
+ return false;
+ }
+ freestart_ += waste;
+ remaining_ -= waste;
+ }
+ DCHECK_EQ(0, reinterpret_cast<size_t>(freestart_) & (alignment - 1));
+ return true;
+}
+
+// ----------------------------------------------------------------------
+// Arena::Reset()
+// Clears all the memory an arena is using.
+// ----------------------------------------------------------------------
+
+void Arena::Reset() {
+ FreeBlocks();
+ freestart_ = first_blocks_[0].mem;
+ remaining_ = first_blocks_[0].size;
+
+ // There is no guarantee the first block is properly aligned, so
+ // enforce that now.
+ CHECK(SatisfyAlignment(kDefaultAlignment));
+
+ freestart_when_empty_ = freestart_;
+}
+
+// ----------------------------------------------------------------------
+// Arena::MakeNewBlock()
+// Our sbrk() equivalent. We always make blocks of the same size
+// (though GetMemory() can also make a new block for really big
+// data.
+// ----------------------------------------------------------------------
+
+void Arena::MakeNewBlock(const uint32 alignment) {
+ AllocatedBlock* block = AllocNewBlock(block_size_, alignment);
+ freestart_ = block->mem;
+ remaining_ = block->size;
+ CHECK(SatisfyAlignment(alignment));
+}
+
+// The following simple numeric routines also exist in util/math/mathutil.h
+// but we don't want to depend on that library.
+
+// Euclid's algorithm for Greatest Common Denominator.
+static uint32 GCD(uint32 x, uint32 y) {
+ while (y != 0) {
+ uint32 r = x % y;
+ x = y;
+ y = r;
+ }
+ return x;
+}
+
+static uint32 LeastCommonMultiple(uint32 a, uint32 b) {
+ if (a > b) {
+ return (a / GCD(a, b)) * b;
+ } else if (a < b) {
+ return (b / GCD(b, a)) * a;
+ } else {
+ return a;
+ }
+}
+
+// -------------------------------------------------------------
+// Arena::AllocNewBlock()
+// Adds and returns an AllocatedBlock.
+// The returned AllocatedBlock* is valid until the next call
+// to AllocNewBlock or Reset. (i.e. anything that might
+// affect overflow_blocks_).
+// -------------------------------------------------------------
+
+Arena::AllocatedBlock* Arena::AllocNewBlock(const size_t block_size,
+ const uint32 alignment) {
+ AllocatedBlock* block;
+ // Find the next block.
+ if (blocks_alloced_ < TF_ARRAYSIZE(first_blocks_)) {
+ // Use one of the pre-allocated blocks
+ block = &first_blocks_[blocks_alloced_++];
+ } else { // oops, out of space, move to the vector
+ if (overflow_blocks_ == NULL)
+ overflow_blocks_ = new std::vector<AllocatedBlock>;
+ // Adds another block to the vector.
+ overflow_blocks_->resize(overflow_blocks_->size() + 1);
+ // block points to the last block of the vector.
+ block = &overflow_blocks_->back();
+ }
+
+ // NOTE(tucker): this utility is made slightly more complex by
+ // not disallowing the case where alignment > block_size.
+ // Can we, without breaking existing code?
+
+ // Must be a multiple of kDefaultAlignment, unless requested
+ // alignment is 1, in which case we don't care at all.
+ const uint32 adjusted_alignment =
+ (alignment > 1 ? LeastCommonMultiple(alignment, kDefaultAlignment) : 1);
+
+ CHECK_LE(adjusted_alignment, 1 << 20)
+ << "Alignment on boundaries greater than 1MB not supported.";
+
+ // If block_size > alignment we force block_size to be a multiple
+ // of alignment; if block_size < alignment we make no adjustment.
+ size_t adjusted_block_size = block_size;
+ if (adjusted_alignment > 1) {
+ if (adjusted_block_size > adjusted_alignment) {
+ const uint32 excess = adjusted_block_size % adjusted_alignment;
+ adjusted_block_size += (excess > 0 ? adjusted_alignment - excess : 0);
+ }
+ block->mem = reinterpret_cast<char*>(
+ port::aligned_malloc(adjusted_block_size, adjusted_alignment));
+ } else {
+ block->mem = reinterpret_cast<char*>(malloc(adjusted_block_size));
+ }
+ block->size = adjusted_block_size;
+ CHECK(NULL != block->mem) << "block_size=" << block_size
+ << " adjusted_block_size=" << adjusted_block_size
+ << " alignment=" << alignment
+ << " adjusted_alignment=" << adjusted_alignment;
+
+ return block;
+}
+
+// ----------------------------------------------------------------------
+// Arena::GetMemoryFallback()
+// We take memory out of our pool, aligned on the byte boundary
+// requested. If we don't have space in our current pool, we
+// allocate a new block (wasting the remaining space in the
+// current block) and give you that. If your memory needs are
+// too big for a single block, we make a special your-memory-only
+// allocation -- this is equivalent to not using the arena at all.
+// ----------------------------------------------------------------------
+
+void* Arena::GetMemoryFallback(const size_t size, const int alignment) {
+ if (0 == size) {
+ return NULL; // stl/stl_alloc.h says this is okay
+ }
+
+ // alignment must be a positive power of 2.
+ CHECK(alignment > 0 && 0 == (alignment & (alignment - 1)));
+
+ // If the object is more than a quarter of the block size, allocate
+ // it separately to avoid wasting too much space in leftover bytes.
+ if (block_size_ == 0 || size > block_size_ / 4) {
+ return AllocNewBlock(size, alignment)->mem;
+ }
+
+ // Enforce alignment on freestart_ then check for adequate space,
+ // which may require starting a new block.
+ if (!SatisfyAlignment(alignment) || size > remaining_) {
+ MakeNewBlock(alignment);
+ }
+ CHECK_LE(size, remaining_);
+
+ remaining_ -= size;
+ void* result = freestart_;
+ freestart_ += size;
+
+ return result;
+}
+
+// ----------------------------------------------------------------------
+// Arena::ReturnMemoryFallback()
+// Arena::FreeBlocks()
+// Unlike GetMemory(), which does actual work, ReturnMemory() is a
+// no-op: we don't "free" memory until Reset() is called. We do
+// update some stats, though. Note we do no checking that the
+// pointer you pass in was actually allocated by us, or that it
+// was allocated for the size you say, so be careful here!
+// FreeBlocks() does the work for Reset(), actually freeing all
+// memory allocated in one fell swoop.
+// ----------------------------------------------------------------------
+
+void Arena::FreeBlocks() {
+ for (size_t i = 1; i < blocks_alloced_; ++i) { // keep first block alloced
+ free(first_blocks_[i].mem);
+ first_blocks_[i].mem = NULL;
+ first_blocks_[i].size = 0;
+ }
+ blocks_alloced_ = 1;
+ if (overflow_blocks_ != NULL) {
+ std::vector<AllocatedBlock>::iterator it;
+ for (it = overflow_blocks_->begin(); it != overflow_blocks_->end(); ++it) {
+ free(it->mem);
+ }
+ delete overflow_blocks_; // These should be used very rarely
+ overflow_blocks_ = NULL;
+ }
+}
+
+} // namespace core
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/arena.h b/tensorflow/core/lib/core/arena.h
new file mode 100644
index 0000000000..59896803bb
--- /dev/null
+++ b/tensorflow/core/lib/core/arena.h
@@ -0,0 +1,90 @@
+// TODO(vrv): Switch this to an open-sourced version of Arena.
+
+#ifndef TENSORFLOW_LIB_CORE_ARENA_H_
+#define TENSORFLOW_LIB_CORE_ARENA_H_
+
+#include <assert.h>
+
+#include <vector>
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+namespace core {
+
+// This class is "thread-compatible": different threads can access the
+// arena at the same time without locking, as long as they use only
+// const methods.
+class Arena {
+ public:
+ // Allocates a thread-compatible arena with the specified block size.
+ explicit Arena(const size_t block_size);
+ ~Arena();
+
+ char* Alloc(const size_t size) {
+ return reinterpret_cast<char*>(GetMemory(size, 1));
+ }
+
+ void Reset();
+
+// This should be the worst-case alignment for any type. This is
+// good for IA-32, SPARC version 7 (the last one I know), and
+// supposedly Alpha. i386 would be more time-efficient with a
+// default alignment of 8, but ::operator new() uses alignment of 4,
+// and an assertion will fail below after the call to MakeNewBlock()
+// if you try to use a larger alignment.
+#ifdef __i386__
+ static const int kDefaultAlignment = 4;
+#else
+ static const int kDefaultAlignment = 8;
+#endif
+
+ protected:
+ bool SatisfyAlignment(const size_t alignment);
+ void MakeNewBlock(const uint32 alignment);
+ void* GetMemoryFallback(const size_t size, const int align);
+ void* GetMemory(const size_t size, const int align) {
+ assert(remaining_ <= block_size_); // an invariant
+ if (size > 0 && size < remaining_ && align == 1) { // common case
+ void* result = freestart_;
+ freestart_ += size;
+ remaining_ -= size;
+ return result;
+ }
+ return GetMemoryFallback(size, align);
+ }
+
+ size_t remaining_;
+
+ private:
+ struct AllocatedBlock {
+ char* mem;
+ size_t size;
+ };
+
+ // Allocate new new block of at least block_size, with the specified
+ // alignment.
+ // The returned AllocatedBlock* is valid until the next call to AllocNewBlock
+ // or Reset (i.e. anything that might affect overflow_blocks_).
+ AllocatedBlock* AllocNewBlock(const size_t block_size,
+ const uint32 alignment);
+
+ const size_t block_size_;
+ char* freestart_; // beginning of the free space in most recent block
+ char* freestart_when_empty_; // beginning of the free space when we're empty
+ // STL vector isn't as efficient as it could be, so we use an array at first
+ size_t blocks_alloced_; // how many of the first_blocks_ have been alloced
+ AllocatedBlock first_blocks_[16]; // the length of this array is arbitrary
+ // if the first_blocks_ aren't enough, expand into overflow_blocks_.
+ std::vector<AllocatedBlock>* overflow_blocks_;
+
+ void FreeBlocks(); // Frees all except first block
+
+ TF_DISALLOW_COPY_AND_ASSIGN(Arena);
+};
+
+} // namespace core
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_ARENA_H_
diff --git a/tensorflow/core/lib/core/arena_test.cc b/tensorflow/core/lib/core/arena_test.cc
new file mode 100644
index 0000000000..fa147c3014
--- /dev/null
+++ b/tensorflow/core/lib/core/arena_test.cc
@@ -0,0 +1,92 @@
+#include "tensorflow/core/lib/core/arena.h"
+
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace core {
+namespace {
+
+// Write random data to allocated memory
+static void TestMemory(void* mem, int size) {
+ // Check that we can memset the entire memory
+ memset(mem, 0xaa, size);
+
+ // Do some memory allocation to check that the arena doesn't mess up
+ // the internal memory allocator
+ char* tmp[100];
+ for (size_t i = 0; i < TF_ARRAYSIZE(tmp); i++) {
+ tmp[i] = new char[i * i + 1];
+ }
+
+ memset(mem, 0xcc, size);
+
+ // Free up the allocated memory;
+ for (size_t i = 0; i < TF_ARRAYSIZE(tmp); i++) {
+ delete[] tmp[i];
+ }
+
+ // Check that we can memset the entire memory
+ memset(mem, 0xee, size);
+}
+
+TEST(ArenaTest, TestBasicArena) {
+ Arena a(1024);
+ char* memory = a.Alloc(100);
+ ASSERT_NE(memory, nullptr);
+ TestMemory(memory, 100);
+
+ // Allocate again
+ memory = a.Alloc(100);
+ ASSERT_NE(memory, nullptr);
+ TestMemory(memory, 100);
+}
+
+TEST(ArenaTest, TestVariousArenaSizes) {
+ {
+ Arena a(1024);
+
+ // Allocate blocksize
+ char* memory = a.Alloc(1024);
+ ASSERT_NE(memory, nullptr);
+ TestMemory(memory, 1024);
+
+ // Allocate another blocksize
+ char* memory2 = a.Alloc(1024);
+ ASSERT_NE(memory2, nullptr);
+ TestMemory(memory2, 1024);
+ }
+
+ // Allocate an arena and allocate two blocks
+ // that together exceed a block size
+ {
+ Arena a(1024);
+
+ //
+ char* memory = a.Alloc(768);
+ ASSERT_NE(memory, nullptr);
+ TestMemory(memory, 768);
+
+ // Allocate another blocksize
+ char* memory2 = a.Alloc(768);
+ ASSERT_NE(memory2, nullptr);
+ TestMemory(memory2, 768);
+ }
+
+ // Allocate larger than a blocksize
+ {
+ Arena a(1024);
+
+ char* memory = a.Alloc(10240);
+ ASSERT_NE(memory, nullptr);
+ TestMemory(memory, 10240);
+
+ // Allocate another blocksize
+ char* memory2 = a.Alloc(1234);
+ ASSERT_NE(memory2, nullptr);
+ TestMemory(memory2, 1234);
+ }
+}
+
+} // namespace
+} // namespace core
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/bit_cast_test.cc b/tensorflow/core/lib/core/bit_cast_test.cc
new file mode 100644
index 0000000000..0ea583e96f
--- /dev/null
+++ b/tensorflow/core/lib/core/bit_cast_test.cc
@@ -0,0 +1,95 @@
+// Unit test for bit_cast template.
+
+#include "tensorflow/core/lib/core/casts.h"
+#include "tensorflow/core/platform/logging.h"
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+
+// Marshall and unmarshall.
+// ISO spec C++ section 3.9 promises this will work.
+
+template <int N>
+struct marshall {
+ char buf[N];
+};
+
+template <class T>
+void TestMarshall(const T values[], int num_values) {
+ for (int i = 0; i < num_values; ++i) {
+ T t0 = values[i];
+ marshall<sizeof(T)> m0 = bit_cast<marshall<sizeof(T)> >(t0);
+ T t1 = bit_cast<T>(m0);
+ marshall<sizeof(T)> m1 = bit_cast<marshall<sizeof(T)> >(t1);
+ ASSERT_EQ(0, memcmp(&t0, &t1, sizeof(T)));
+ ASSERT_EQ(0, memcmp(&m0, &m1, sizeof(T)));
+ }
+}
+
+// Convert back and forth to an integral type. The C++ standard does
+// not guarantee this will work.
+//
+// There are implicit assumptions about sizeof(float) and
+// sizeof(double). These assumptions are quite extant everywhere.
+
+template <class T, class I>
+void TestIntegral(const T values[], int num_values) {
+ for (int i = 0; i < num_values; ++i) {
+ T t0 = values[i];
+ I i0 = bit_cast<I>(t0);
+ T t1 = bit_cast<T>(i0);
+ I i1 = bit_cast<I>(t1);
+ ASSERT_EQ(0, memcmp(&t0, &t1, sizeof(T)));
+ ASSERT_EQ(i0, i1);
+ }
+}
+
+TEST(BitCast, Bool) {
+ LOG(INFO) << "Test bool";
+ static const bool bool_list[] = {false, true};
+ TestMarshall<bool>(bool_list, TF_ARRAYSIZE(bool_list));
+}
+
+TEST(BitCast, Int32) {
+ static const int32 int_list[] = {0, 1, 100, 2147483647,
+ -1, -100, -2147483647, -2147483647 - 1};
+ TestMarshall<int32>(int_list, TF_ARRAYSIZE(int_list));
+}
+
+TEST(BitCast, Int64) {
+ static const int64 int64_list[] = {0, 1, 1LL << 40, -1, -(1LL << 40)};
+ TestMarshall<int64>(int64_list, TF_ARRAYSIZE(int64_list));
+}
+
+TEST(BitCast, Uint64) {
+ static const uint64 uint64_list[] = {0, 1, 1LLU << 40, 1LLU << 63};
+ TestMarshall<uint64>(uint64_list, TF_ARRAYSIZE(uint64_list));
+}
+
+TEST(BitCast, Float) {
+ static const float float_list[] = {0.0, 1.0, -1.0, 10.0, -10.0, 1e10,
+ 1e20, 1e-10, 1e-20, 2.71828, 3.14159};
+ TestMarshall<float>(float_list, TF_ARRAYSIZE(float_list));
+ TestIntegral<float, int32>(float_list, TF_ARRAYSIZE(float_list));
+ TestIntegral<float, uint32>(float_list, TF_ARRAYSIZE(float_list));
+}
+
+TEST(BitCast, Double) {
+ static const double double_list[] = {
+ 0.0,
+ 1.0,
+ -1.0,
+ 10.0,
+ -10.0,
+ 1e10,
+ 1e100,
+ 1e-10,
+ 1e-100,
+ 2.718281828459045,
+ 3.141592653589793238462643383279502884197169399375105820974944};
+ TestMarshall<double>(double_list, TF_ARRAYSIZE(double_list));
+ TestIntegral<double, int64>(double_list, TF_ARRAYSIZE(double_list));
+ TestIntegral<double, uint64>(double_list, TF_ARRAYSIZE(double_list));
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/bits.h b/tensorflow/core/lib/core/bits.h
new file mode 100644
index 0000000000..5456a63168
--- /dev/null
+++ b/tensorflow/core/lib/core/bits.h
@@ -0,0 +1,84 @@
+#ifndef TENSORFLOW_LIB_CORE_BITS_H_
+#define TENSORFLOW_LIB_CORE_BITS_H_
+
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+
+// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
+int Log2Floor(uint32 n);
+int Log2Floor64(uint64 n);
+
+// Return ceiling(log2(n)) for positive integer n. Returns -1 iff n == 0.
+int Log2Ceiling(uint32 n);
+int Log2Ceiling64(uint64 n);
+
+// ------------------------------------------------------------------------
+// Implementation details follow
+// ------------------------------------------------------------------------
+
+#if defined(__GNUC__)
+
+// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
+inline int Log2Floor(uint32 n) {
+ return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+}
+
+// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
+inline int Log2Floor64(uint64 n) {
+ return n == 0 ? -1 : 63 ^ __builtin_clzll(n);
+}
+
+#else
+
+// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
+inline int Log2Floor(uint32 n) {
+ if (n == 0)
+ return -1;
+ int log = 0;
+ uint32 value = n;
+ for (int i = 4; i >= 0; --i) {
+ int shift = (1 << i);
+ uint32 x = value >> shift;
+ if (x != 0) {
+ value = x;
+ log += shift;
+ }
+ }
+ assert(value == 1);
+ return log;
+}
+
+// Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
+// Log2Floor64() is defined in terms of Log2Floor32()
+inline int Log2Floor64(uint64 n) {
+ const uint32 topbits = static_cast<uint32>(n >> 32);
+ if (topbits == 0) {
+ // Top bits are zero, so scan in bottom bits
+ return Log2Floor(static_cast<uint32>(n));
+ } else {
+ return 32 + Log2Floor(topbits);
+ }
+}
+
+#endif
+
+inline int Log2Ceiling(uint32 n) {
+ int floor = Log2Floor(n);
+ if (n == (n & ~(n - 1))) // zero or a power of two
+ return floor;
+ else
+ return floor + 1;
+}
+
+inline int Log2Ceiling64(uint64 n) {
+ int floor = Log2Floor64(n);
+ if (n == (n & ~(n - 1))) // zero or a power of two
+ return floor;
+ else
+ return floor + 1;
+}
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_BITS_H_
diff --git a/tensorflow/core/lib/core/blocking_counter.h b/tensorflow/core/lib/core/blocking_counter.h
new file mode 100644
index 0000000000..f141be2c76
--- /dev/null
+++ b/tensorflow/core/lib/core/blocking_counter.h
@@ -0,0 +1,41 @@
+#ifndef TENSORFLOW_LIB_CORE_BLOCKING_COUNTER_H_
+#define TENSORFLOW_LIB_CORE_BLOCKING_COUNTER_H_
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+
+class BlockingCounter {
+ public:
+ BlockingCounter(int initial_count) : count_(initial_count) {
+ CHECK_GE(count_, 0);
+ }
+
+ ~BlockingCounter() {}
+
+ inline void DecrementCount() {
+ mutex_lock l(mu_);
+ --count_;
+ CHECK(count_ >= 0);
+ if (count_ == 0) {
+ cond_var_.notify_all();
+ }
+ }
+
+ inline void Wait() {
+ mutex_lock l(mu_);
+ while (count_ > 0) {
+ cond_var_.wait(l);
+ }
+ }
+
+ private:
+ int count_;
+ mutex mu_;
+ condition_variable cond_var_;
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_BLOCKING_COUNTER_H_
diff --git a/tensorflow/core/lib/core/blocking_counter_test.cc b/tensorflow/core/lib/core/blocking_counter_test.cc
new file mode 100644
index 0000000000..feb0342086
--- /dev/null
+++ b/tensorflow/core/lib/core/blocking_counter_test.cc
@@ -0,0 +1,36 @@
+#include <gtest/gtest.h>
+
+#include "tensorflow/core/lib/core/blocking_counter.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+
+namespace tensorflow {
+namespace {
+
+TEST(BlockingCounterTest, TestZero) {
+ BlockingCounter bc(0);
+ bc.Wait();
+}
+
+TEST(BlockingCounterTest, TestSingleThread) {
+ BlockingCounter bc(2);
+ bc.DecrementCount();
+ bc.DecrementCount();
+ bc.Wait();
+}
+
+TEST(BlockingCounterTest, TestMultipleThread) {
+ int N = 3;
+ thread::ThreadPool* thread_pool =
+ new thread::ThreadPool(Env::Default(), "test", N);
+
+ BlockingCounter bc(N);
+ for (int i = 0; i < N; ++i) {
+ thread_pool->Schedule([&bc] { bc.DecrementCount(); });
+ }
+
+ bc.Wait();
+ delete thread_pool;
+}
+
+} // namespace
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/casts.h b/tensorflow/core/lib/core/casts.h
new file mode 100644
index 0000000000..5b72048ac5
--- /dev/null
+++ b/tensorflow/core/lib/core/casts.h
@@ -0,0 +1,85 @@
+// Various Google-specific casting templates.
+//
+// This code is compiled directly on many platforms, including client
+// platforms like Windows, Mac, and embedded systems. Before making
+// any changes here, make sure that you're not breaking any platforms.
+//
+
+#ifndef TENSORFLOW_LIB_CORE_CASTS_H_
+#define TENSORFLOW_LIB_CORE_CASTS_H_
+
+#include <string.h> // for memcpy
+
+namespace tensorflow {
+
+// bit_cast<Dest,Source> is a template function that implements the
+// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
+// very low-level functions like the protobuf library and fast math
+// support.
+//
+// float f = 3.14159265358979;
+// int i = bit_cast<int32>(f);
+// // i = 0x40490fdb
+//
+// The classical address-casting method is:
+//
+// // WRONG
+// float f = 3.14159265358979; // WRONG
+// int i = * reinterpret_cast<int*>(&f); // WRONG
+//
+// The address-casting method actually produces undefined behavior
+// according to ISO C++ specification section 3.10 -15 -. Roughly, this
+// section says: if an object in memory has one type, and a program
+// accesses it with a different type, then the result is undefined
+// behavior for most values of "different type".
+//
+// This is true for any cast syntax, either *(int*)&f or
+// *reinterpret_cast<int*>(&f). And it is particularly true for
+// conversions between integral lvalues and floating-point lvalues.
+//
+// The purpose of 3.10 -15- is to allow optimizing compilers to assume
+// that expressions with different types refer to different memory. gcc
+// 4.0.1 has an optimizer that takes advantage of this. So a
+// non-conforming program quietly produces wildly incorrect output.
+//
+// The problem is not the use of reinterpret_cast. The problem is type
+// punning: holding an object in memory of one type and reading its bits
+// back using a different type.
+//
+// The C++ standard is more subtle and complex than this, but that
+// is the basic idea.
+//
+// Anyways ...
+//
+// bit_cast<> calls memcpy() which is blessed by the standard,
+// especially by the example in section 3.9 . Also, of course,
+// bit_cast<> wraps up the nasty logic in one place.
+//
+// Fortunately memcpy() is very fast. In optimized mode, with a
+// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
+// code with the minimal amount of data movement. On a 32-bit system,
+// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
+// compiles to two loads and two stores.
+//
+// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
+//
+// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
+// is likely to surprise you.
+//
+// Props to Bill Gibbons for the compile time assertion technique and
+// Art Komninos and Igor Tandetnik for the msvc experiments.
+//
+// -- mec 2005-10-17
+
+template <class Dest, class Source>
+inline Dest bit_cast(const Source& source) {
+ static_assert(sizeof(Dest) == sizeof(Source), "Sizes do not match");
+
+ Dest dest;
+ memcpy(&dest, &source, sizeof(dest));
+ return dest;
+}
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_CASTS_H_
diff --git a/tensorflow/core/lib/core/coding.cc b/tensorflow/core/lib/core/coding.cc
new file mode 100644
index 0000000000..efff554742
--- /dev/null
+++ b/tensorflow/core/lib/core/coding.cc
@@ -0,0 +1,164 @@
+#include "tensorflow/core/lib/core/coding.h"
+
+namespace tensorflow {
+namespace core {
+
+void EncodeFixed32(char* buf, uint32 value) {
+ if (port::kLittleEndian) {
+ memcpy(buf, &value, sizeof(value));
+ } else {
+ buf[0] = value & 0xff;
+ buf[1] = (value >> 8) & 0xff;
+ buf[2] = (value >> 16) & 0xff;
+ buf[3] = (value >> 24) & 0xff;
+ }
+}
+
+void EncodeFixed64(char* buf, uint64 value) {
+ if (port::kLittleEndian) {
+ memcpy(buf, &value, sizeof(value));
+ } else {
+ buf[0] = value & 0xff;
+ buf[1] = (value >> 8) & 0xff;
+ buf[2] = (value >> 16) & 0xff;
+ buf[3] = (value >> 24) & 0xff;
+ buf[4] = (value >> 32) & 0xff;
+ buf[5] = (value >> 40) & 0xff;
+ buf[6] = (value >> 48) & 0xff;
+ buf[7] = (value >> 56) & 0xff;
+ }
+}
+
+void PutFixed32(string* dst, uint32 value) {
+ char buf[sizeof(value)];
+ EncodeFixed32(buf, value);
+ dst->append(buf, sizeof(buf));
+}
+
+void PutFixed64(string* dst, uint64 value) {
+ char buf[sizeof(value)];
+ EncodeFixed64(buf, value);
+ dst->append(buf, sizeof(buf));
+}
+
+char* EncodeVarint32(char* dst, uint32 v) {
+ // Operate on characters as unsigneds
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+ static const int B = 128;
+ if (v < (1 << 7)) {
+ *(ptr++) = v;
+ } else if (v < (1 << 14)) {
+ *(ptr++) = v | B;
+ *(ptr++) = v >> 7;
+ } else if (v < (1 << 21)) {
+ *(ptr++) = v | B;
+ *(ptr++) = (v >> 7) | B;
+ *(ptr++) = v >> 14;
+ } else if (v < (1 << 28)) {
+ *(ptr++) = v | B;
+ *(ptr++) = (v >> 7) | B;
+ *(ptr++) = (v >> 14) | B;
+ *(ptr++) = v >> 21;
+ } else {
+ *(ptr++) = v | B;
+ *(ptr++) = (v >> 7) | B;
+ *(ptr++) = (v >> 14) | B;
+ *(ptr++) = (v >> 21) | B;
+ *(ptr++) = v >> 28;
+ }
+ return reinterpret_cast<char*>(ptr);
+}
+
+void PutVarint32(string* dst, uint32 v) {
+ char buf[5];
+ char* ptr = EncodeVarint32(buf, v);
+ dst->append(buf, ptr - buf);
+}
+
+char* EncodeVarint64(char* dst, uint64 v) {
+ static const int B = 128;
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
+ while (v >= B) {
+ *(ptr++) = (v & (B - 1)) | B;
+ v >>= 7;
+ }
+ *(ptr++) = static_cast<unsigned char>(v);
+ return reinterpret_cast<char*>(ptr);
+}
+
+void PutVarint64(string* dst, uint64 v) {
+ char buf[10];
+ char* ptr = EncodeVarint64(buf, v);
+ dst->append(buf, ptr - buf);
+}
+
+int VarintLength(uint64_t v) {
+ int len = 1;
+ while (v >= 128) {
+ v >>= 7;
+ len++;
+ }
+ return len;
+}
+
+const char* GetVarint32PtrFallback(const char* p, const char* limit,
+ uint32* value) {
+ uint32 result = 0;
+ for (uint32 shift = 0; shift <= 28 && p < limit; shift += 7) {
+ uint32 byte = *(reinterpret_cast<const unsigned char*>(p));
+ p++;
+ if (byte & 128) {
+ // More bytes are present
+ result |= ((byte & 127) << shift);
+ } else {
+ result |= (byte << shift);
+ *value = result;
+ return reinterpret_cast<const char*>(p);
+ }
+ }
+ return NULL;
+}
+
+bool GetVarint32(StringPiece* input, uint32* value) {
+ const char* p = input->data();
+ const char* limit = p + input->size();
+ const char* q = GetVarint32Ptr(p, limit, value);
+ if (q == NULL) {
+ return false;
+ } else {
+ *input = StringPiece(q, limit - q);
+ return true;
+ }
+}
+
+const char* GetVarint64Ptr(const char* p, const char* limit, uint64* value) {
+ uint64 result = 0;
+ for (uint32 shift = 0; shift <= 63 && p < limit; shift += 7) {
+ uint64 byte = *(reinterpret_cast<const unsigned char*>(p));
+ p++;
+ if (byte & 128) {
+ // More bytes are present
+ result |= ((byte & 127) << shift);
+ } else {
+ result |= (byte << shift);
+ *value = result;
+ return reinterpret_cast<const char*>(p);
+ }
+ }
+ return NULL;
+}
+
+bool GetVarint64(StringPiece* input, uint64* value) {
+ const char* p = input->data();
+ const char* limit = p + input->size();
+ const char* q = GetVarint64Ptr(p, limit, value);
+ if (q == NULL) {
+ return false;
+ } else {
+ *input = StringPiece(q, limit - q);
+ return true;
+ }
+}
+
+} // namespace core
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/coding.h b/tensorflow/core/lib/core/coding.h
new file mode 100644
index 0000000000..0c14bf1bbf
--- /dev/null
+++ b/tensorflow/core/lib/core/coding.h
@@ -0,0 +1,55 @@
+// Endian-neutral encoding:
+// * Fixed-length numbers are encoded with least-significant byte first
+// * In addition we support variable length "varint" encoding
+// * Strings are encoded prefixed by their length in varint format
+
+#ifndef TENSORFLOW_LIB_CORE_CODING_H_
+#define TENSORFLOW_LIB_CORE_CODING_H_
+
+#include "tensorflow/core/lib/core/raw_coding.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+namespace core {
+
+// Lower-level versions of Put... that write directly into a character buffer
+// REQUIRES: dst has enough space for the value being written
+extern void EncodeFixed32(char* dst, uint32 value);
+extern void EncodeFixed64(char* dst, uint64 value);
+extern void PutFixed32(string* dst, uint32 value);
+extern void PutFixed64(string* dst, uint64 value);
+
+extern void PutVarint32(string* dst, uint32 value);
+extern void PutVarint64(string* dst, uint64 value);
+
+extern bool GetVarint32(StringPiece* input, uint32* value);
+extern bool GetVarint64(StringPiece* input, uint64* value);
+
+extern const char* GetVarint32Ptr(const char* p, const char* limit, uint32* v);
+extern const char* GetVarint64Ptr(const char* p, const char* limit, uint64* v);
+
+// Internal routine for use by fallback path of GetVarint32Ptr
+extern const char* GetVarint32PtrFallback(const char* p, const char* limit,
+ uint32* value);
+inline const char* GetVarint32Ptr(const char* p, const char* limit,
+ uint32* value) {
+ if (p < limit) {
+ uint32 result = *(reinterpret_cast<const unsigned char*>(p));
+ if ((result & 128) == 0) {
+ *value = result;
+ return p + 1;
+ }
+ }
+ return GetVarint32PtrFallback(p, limit, value);
+}
+
+extern char* EncodeVarint64(char* dst, uint64 v);
+
+// Returns the length of the varint32 or varint64 encoding of "v"
+extern int VarintLength(uint64_t v);
+
+} // namespace core
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_CODING_H_
diff --git a/tensorflow/core/lib/core/coding_test.cc b/tensorflow/core/lib/core/coding_test.cc
new file mode 100644
index 0000000000..5e9e2c5e96
--- /dev/null
+++ b/tensorflow/core/lib/core/coding_test.cc
@@ -0,0 +1,168 @@
+#include "tensorflow/core/lib/core/coding.h"
+
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace core {
+
+TEST(Coding, Fixed32) {
+ static const int N = 100000;
+
+ string s;
+ for (uint32 v = 0; v < N; v++) {
+ char buf[sizeof(uint32)];
+ EncodeFixed32(buf, v);
+ s.append(buf, sizeof(buf));
+ }
+
+ const char* p = s.data();
+ for (uint32 v = 0; v < N; v++) {
+ uint32 actual = DecodeFixed32(p);
+ ASSERT_EQ(v, actual);
+ p += sizeof(uint32);
+ }
+}
+
+TEST(Coding, Fixed64) {
+ string s;
+ for (int power = 0; power <= 63; power++) {
+ uint64 v = static_cast<uint64>(1) << power;
+ char buf[sizeof(uint64)];
+ EncodeFixed64(buf, v - 1);
+ s.append(buf, sizeof(buf));
+ EncodeFixed64(buf, v + 0);
+ s.append(buf, sizeof(buf));
+ EncodeFixed64(buf, v + 1);
+ s.append(buf, sizeof(buf));
+ }
+
+ const char* p = s.data();
+ for (int power = 0; power <= 63; power++) {
+ uint64 v = static_cast<uint64>(1) << power;
+ uint64 actual;
+ actual = DecodeFixed64(p);
+ ASSERT_EQ(v - 1, actual);
+ p += sizeof(uint64);
+
+ actual = DecodeFixed64(p);
+ ASSERT_EQ(v + 0, actual);
+ p += sizeof(uint64);
+
+ actual = DecodeFixed64(p);
+ ASSERT_EQ(v + 1, actual);
+ p += sizeof(uint64);
+ }
+}
+
+// Test that encoding routines generate little-endian encodings
+TEST(Coding, EncodingOutput) {
+ char dst[8];
+ EncodeFixed32(dst, 0x04030201);
+ ASSERT_EQ(0x01, static_cast<int>(dst[0]));
+ ASSERT_EQ(0x02, static_cast<int>(dst[1]));
+ ASSERT_EQ(0x03, static_cast<int>(dst[2]));
+ ASSERT_EQ(0x04, static_cast<int>(dst[3]));
+
+ EncodeFixed64(dst, 0x0807060504030201ull);
+ ASSERT_EQ(0x01, static_cast<int>(dst[0]));
+ ASSERT_EQ(0x02, static_cast<int>(dst[1]));
+ ASSERT_EQ(0x03, static_cast<int>(dst[2]));
+ ASSERT_EQ(0x04, static_cast<int>(dst[3]));
+ ASSERT_EQ(0x05, static_cast<int>(dst[4]));
+ ASSERT_EQ(0x06, static_cast<int>(dst[5]));
+ ASSERT_EQ(0x07, static_cast<int>(dst[6]));
+ ASSERT_EQ(0x08, static_cast<int>(dst[7]));
+}
+
+TEST(Coding, Varint32) {
+ string s;
+ for (uint32 i = 0; i < (32 * 32); i++) {
+ uint32 v = (i / 32) << (i % 32);
+ PutVarint32(&s, v);
+ }
+
+ const char* p = s.data();
+ const char* limit = p + s.size();
+ for (uint32 i = 0; i < (32 * 32); i++) {
+ uint32 expected = (i / 32) << (i % 32);
+ uint32 actual;
+ p = GetVarint32Ptr(p, limit, &actual);
+ ASSERT_TRUE(p != NULL);
+ ASSERT_EQ(expected, actual);
+ }
+ ASSERT_EQ(p, s.data() + s.size());
+}
+
+TEST(Coding, Varint64) {
+ // Construct the list of values to check
+ std::vector<uint64> values;
+ // Some special values
+ values.push_back(0);
+ values.push_back(100);
+ values.push_back(~static_cast<uint64>(0));
+ values.push_back(~static_cast<uint64>(0) - 1);
+ for (uint32 k = 0; k < 64; k++) {
+ // Test values near powers of two
+ const uint64 power = 1ull << k;
+ values.push_back(power);
+ values.push_back(power - 1);
+ values.push_back(power + 1);
+ }
+
+ string s;
+ for (size_t i = 0; i < values.size(); i++) {
+ PutVarint64(&s, values[i]);
+ }
+
+ const char* p = s.data();
+ const char* limit = p + s.size();
+ for (size_t i = 0; i < values.size(); i++) {
+ ASSERT_TRUE(p < limit);
+ uint64 actual;
+ p = GetVarint64Ptr(p, limit, &actual);
+ ASSERT_TRUE(p != NULL);
+ ASSERT_EQ(values[i], actual);
+ }
+ ASSERT_EQ(p, limit);
+}
+
+TEST(Coding, Varint32Overflow) {
+ uint32 result;
+ string input("\x81\x82\x83\x84\x85\x11");
+ ASSERT_TRUE(GetVarint32Ptr(input.data(), input.data() + input.size(),
+ &result) == NULL);
+}
+
+TEST(Coding, Varint32Truncation) {
+ uint32 large_value = (1u << 31) + 100;
+ string s;
+ PutVarint32(&s, large_value);
+ uint32 result;
+ for (size_t len = 0; len < s.size() - 1; len++) {
+ ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + len, &result) == NULL);
+ }
+ ASSERT_TRUE(GetVarint32Ptr(s.data(), s.data() + s.size(), &result) != NULL);
+ ASSERT_EQ(large_value, result);
+}
+
+TEST(Coding, Varint64Overflow) {
+ uint64 result;
+ string input("\x81\x82\x83\x84\x85\x81\x82\x83\x84\x85\x11");
+ ASSERT_TRUE(GetVarint64Ptr(input.data(), input.data() + input.size(),
+ &result) == NULL);
+}
+
+TEST(Coding, Varint64Truncation) {
+ uint64 large_value = (1ull << 63) + 100ull;
+ string s;
+ PutVarint64(&s, large_value);
+ uint64 result;
+ for (size_t len = 0; len < s.size() - 1; len++) {
+ ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + len, &result) == NULL);
+ }
+ ASSERT_TRUE(GetVarint64Ptr(s.data(), s.data() + s.size(), &result) != NULL);
+ ASSERT_EQ(large_value, result);
+}
+
+} // namespace core
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/command_line_flags.cc b/tensorflow/core/lib/core/command_line_flags.cc
new file mode 100644
index 0000000000..0f1072ffaa
--- /dev/null
+++ b/tensorflow/core/lib/core/command_line_flags.cc
@@ -0,0 +1,94 @@
+#include "tensorflow/core/lib/core/command_line_flags.h"
+
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace tensorflow {
+namespace {
+
+// Templated function to convert a string to target values.
+// Return true if the conversion is successful. Otherwise, return false.
+template <typename T>
+bool StringToValue(const string& content, T* value);
+
+template <>
+bool StringToValue<int32>(const string& content, int* value) {
+ return str_util::NumericParse32(content, value);
+}
+
+// Parse a single argument by linearly searching through the command table.
+// The input format is: --argument=value.
+// Return OK if the argument is used. It store the extracted value into the
+// matching flag.
+// Return NOT_FOUND if the argument is not recognized.
+// Retrun INVALID_ARGUMENT if the command is recognized, but fails to extract
+// its value.
+template <typename T>
+Status ParseArgument(const string& argument) {
+ for (auto& command :
+ internal::CommandLineFlagRegistry<int>::Instance()->commands) {
+ string prefix = strings::StrCat("--", command.name, "=");
+ if (tensorflow::StringPiece(argument).starts_with(prefix)) {
+ string content = argument.substr(prefix.length());
+ if (StringToValue<T>(content, command.value)) {
+ return Status::OK();
+ }
+ return Status(error::INVALID_ARGUMENT,
+ strings::StrCat("Cannot parse integer in: ", argument));
+ }
+ }
+ return Status(error::NOT_FOUND,
+ strings::StrCat("Unknown command: ", argument));
+}
+
+// A specialization for booleans. The input format is:
+// "--argument" or "--noargument".
+// Parse a single argument by linearly searching through the command table.
+// Return OK if the argument is used. The value is stored in the matching flag.
+// Return NOT_FOUND if the argument is not recognized.
+template <>
+Status ParseArgument<bool>(const string& argument) {
+ for (auto& command :
+ internal::CommandLineFlagRegistry<bool>::Instance()->commands) {
+ if (argument == strings::StrCat("--", command.name)) {
+ *command.value = true;
+ return Status::OK();
+ } else if (argument == strings::StrCat("--no", command.name)) {
+ *command.value = false;
+ return Status::OK();
+ }
+ }
+ return Status(error::NOT_FOUND,
+ strings::StrCat("Unknown command: ", argument));
+}
+} // namespace
+
+Status ParseCommandLineFlags(int* argc, char* argv[]) {
+ int unused_argc = 1;
+ for (int index = 1; index < *argc; ++index) {
+ Status s;
+ // Search bool commands.
+ s = ParseArgument<bool>(argv[index]);
+ if (s.ok()) {
+ continue;
+ }
+ if (s.code() != error::NOT_FOUND) {
+ return s;
+ }
+ // Search int32 commands.
+ s = ParseArgument<int32>(argv[index]);
+ if (s.ok()) {
+ continue;
+ }
+ if (s.code() != error::NOT_FOUND) {
+ return s;
+ }
+ // Pointer swap the unused argument to the front.
+ std::swap(argv[unused_argc++], argv[index]);
+ }
+ *argc = unused_argc;
+ return Status::OK();
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/command_line_flags.h b/tensorflow/core/lib/core/command_line_flags.h
new file mode 100644
index 0000000000..f1a94c11f9
--- /dev/null
+++ b/tensorflow/core/lib/core/command_line_flags.h
@@ -0,0 +1,60 @@
+#ifndef TENSORFLOW_LIB_CORE_COMMAND_LINE_FLAGS_H_
+#define TENSORFLOW_LIB_CORE_COMMAND_LINE_FLAGS_H_
+
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/public/status.h"
+
+namespace tensorflow {
+namespace internal {
+
+template <typename T>
+struct CommandLineFlagRegistry {
+ static CommandLineFlagRegistry* Instance() {
+ static CommandLineFlagRegistry instance_;
+ return &instance_;
+ }
+ struct Command {
+ string name;
+ T* value;
+ string text;
+ };
+ std::vector<Command> commands;
+
+ private:
+ CommandLineFlagRegistry() {}
+ TF_DISALLOW_COPY_AND_ASSIGN(CommandLineFlagRegistry);
+};
+
+template <typename T>
+struct CommandLineFlagRegister {
+ CommandLineFlagRegister(const string& name, T* val, const string& text) {
+ CommandLineFlagRegistry<T>::Instance()->commands.push_back(
+ {name, val, text});
+ }
+};
+
+#define TF_DEFINE_variable(type, name, default_value, text) \
+ type FLAGS_##name = default_value; \
+ namespace TF_flags_internal { \
+ tensorflow::internal::CommandLineFlagRegister<type> \
+ TF_flags_internal_var_##name(#name, &FLAGS_##name, text); \
+ } // namespace TF_flags_internal
+
+} // namespace internal
+
+#define TF_DEFINE_int32(name, default_value, text) \
+ TF_DEFINE_variable(int32, name, default_value, text);
+
+#define TF_DEFINE_bool(name, default_value, text) \
+ TF_DEFINE_variable(bool, name, default_value, text);
+
+// Parse argv[1]..argv[*argc-1] to options. Remove used arguments from the argv.
+// Returned the number of unused arguments in *argc.
+// Return error Status if the parsing encounters errors.
+// TODO(opensource): switch to a command line argument parser that can be
+// shared with other tests.
+Status ParseCommandLineFlags(int* argc, char* argv[]);
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_COMMAND_LINE_FLAGS_H_
diff --git a/tensorflow/core/lib/core/error_codes.proto b/tensorflow/core/lib/core/error_codes.proto
new file mode 100644
index 0000000000..6735fd8f88
--- /dev/null
+++ b/tensorflow/core/lib/core/error_codes.proto
@@ -0,0 +1,145 @@
+syntax = "proto3";
+
+package tensorflow.error;
+// option cc_enable_arenas = true;
+
+// The canonical error codes for TensorFlow APIs.
+//
+// Warnings:
+//
+// - Do not change any numeric assignments.
+// - Changes to this list should only be made if there is a compelling
+// need that can't be satisfied in another way. Such changes
+// must be approved by at least two OWNERS.
+//
+// Sometimes multiple error codes may apply. Services should return
+// the most specific error code that applies. For example, prefer
+// OUT_OF_RANGE over FAILED_PRECONDITION if both codes apply.
+// Similarly prefer NOT_FOUND or ALREADY_EXISTS over FAILED_PRECONDITION.
+enum Code {
+ // Not an error; returned on success
+ OK = 0;
+
+ // The operation was cancelled (typically by the caller).
+ CANCELLED = 1;
+
+ // Unknown error. An example of where this error may be returned is
+ // if a Status value received from another address space belongs to
+ // an error-space that is not known in this address space. Also
+ // errors raised by APIs that do not return enough error information
+ // may be converted to this error.
+ UNKNOWN = 2;
+
+ // Client specified an invalid argument. Note that this differs
+ // from FAILED_PRECONDITION. INVALID_ARGUMENT indicates arguments
+ // that are problematic regardless of the state of the system
+ // (e.g., a malformed file name).
+ INVALID_ARGUMENT = 3;
+
+ // Deadline expired before operation could complete. For operations
+ // that change the state of the system, this error may be returned
+ // even if the operation has completed successfully. For example, a
+ // successful response from a server could have been delayed long
+ // enough for the deadline to expire.
+ DEADLINE_EXCEEDED = 4;
+
+ // Some requested entity (e.g., file or directory) was not found.
+ // For privacy reasons, this code *may* be returned when the client
+ // does not have the access right to the entity.
+ NOT_FOUND = 5;
+
+ // Some entity that we attempted to create (e.g., file or directory)
+ // already exists.
+ ALREADY_EXISTS = 6;
+
+ // The caller does not have permission to execute the specified
+ // operation. PERMISSION_DENIED must not be used for rejections
+ // caused by exhausting some resource (use RESOURCE_EXHAUSTED
+ // instead for those errors). PERMISSION_DENIED must not be
+ // used if the caller can not be identified (use UNAUTHENTICATED
+ // instead for those errors).
+ PERMISSION_DENIED = 7;
+
+ // The request does not have valid authentication credentials for the
+ // operation.
+ UNAUTHENTICATED = 16;
+
+ // Some resource has been exhausted, perhaps a per-user quota, or
+ // perhaps the entire file system is out of space.
+ RESOURCE_EXHAUSTED = 8;
+
+ // Operation was rejected because the system is not in a state
+ // required for the operation's execution. For example, directory
+ // to be deleted may be non-empty, an rmdir operation is applied to
+ // a non-directory, etc.
+ //
+ // A litmus test that may help a service implementor in deciding
+ // between FAILED_PRECONDITION, ABORTED, and UNAVAILABLE:
+ // (a) Use UNAVAILABLE if the client can retry just the failing call.
+ // (b) Use ABORTED if the client should retry at a higher-level
+ // (e.g., restarting a read-modify-write sequence).
+ // (c) Use FAILED_PRECONDITION if the client should not retry until
+ // the system state has been explicitly fixed. E.g., if an "rmdir"
+ // fails because the directory is non-empty, FAILED_PRECONDITION
+ // should be returned since the client should not retry unless
+ // they have first fixed up the directory by deleting files from it.
+ // (d) Use FAILED_PRECONDITION if the client performs conditional
+ // REST Get/Update/Delete on a resource and the resource on the
+ // server does not match the condition. E.g., conflicting
+ // read-modify-write on the same resource.
+ FAILED_PRECONDITION = 9;
+
+ // The operation was aborted, typically due to a concurrency issue
+ // like sequencer check failures, transaction aborts, etc.
+ //
+ // See litmus test above for deciding between FAILED_PRECONDITION,
+ // ABORTED, and UNAVAILABLE.
+ ABORTED = 10;
+
+ // Operation was attempted past the valid range. E.g., seeking or
+ // reading past end of file.
+ //
+ // Unlike INVALID_ARGUMENT, this error indicates a problem that may
+ // be fixed if the system state changes. For example, a 32-bit file
+ // system will generate INVALID_ARGUMENT if asked to read at an
+ // offset that is not in the range [0,2^32-1], but it will generate
+ // OUT_OF_RANGE if asked to read from an offset past the current
+ // file size.
+ //
+ // There is a fair bit of overlap between FAILED_PRECONDITION and
+ // OUT_OF_RANGE. We recommend using OUT_OF_RANGE (the more specific
+ // error) when it applies so that callers who are iterating through
+ // a space can easily look for an OUT_OF_RANGE error to detect when
+ // they are done.
+ OUT_OF_RANGE = 11;
+
+ // Operation is not implemented or not supported/enabled in this service.
+ UNIMPLEMENTED = 12;
+
+ // Internal errors. Means some invariants expected by underlying
+ // system has been broken. If you see one of these errors,
+ // something is very broken.
+ INTERNAL = 13;
+
+ // The service is currently unavailable. This is a most likely a
+ // transient condition and may be corrected by retrying with
+ // a backoff.
+ //
+ // See litmus test above for deciding between FAILED_PRECONDITION,
+ // ABORTED, and UNAVAILABLE.
+ UNAVAILABLE = 14;
+
+ // Unrecoverable data loss or corruption.
+ DATA_LOSS = 15;
+
+ // An extra enum entry to prevent people from writing code that
+ // fails to compile when a new code is added.
+ //
+ // Nobody should ever reference this enumeration entry. In particular,
+ // if you write C++ code that switches on this enumeration, add a default:
+ // case instead of a case that mentions this enumeration entry.
+ //
+ // Nobody should rely on the value (currently 20) listed here. It
+ // may change in the future.
+ DO_NOT_USE_RESERVED_FOR_FUTURE_EXPANSION_USE_DEFAULT_IN_SWITCH_INSTEAD_ = 20;
+}
diff --git a/tensorflow/core/lib/core/errors.h b/tensorflow/core/lib/core/errors.h
new file mode 100644
index 0000000000..b0badd8c4d
--- /dev/null
+++ b/tensorflow/core/lib/core/errors.h
@@ -0,0 +1,131 @@
+#ifndef TENSORFLOW_LIB_CORE_ERRORS_H_
+#define TENSORFLOW_LIB_CORE_ERRORS_H_
+
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/public/status.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace errors {
+
+typedef ::tensorflow::error::Code Code;
+
+// Append some context to an error message. Each time we append
+// context put it on a new line, since it is possible for there
+// to be several layers of additional context.
+template <typename... Args>
+void AppendToMessage(::tensorflow::Status* status, Args... args) {
+ *status = ::tensorflow::Status(
+ status->code(),
+ strings::StrCat(status->error_message(), "\n\t", args...));
+}
+
+// For propagating errors when calling a function.
+#define TF_RETURN_IF_ERROR(expr) \
+ do { \
+ const ::tensorflow::Status _status = (expr); \
+ if (TF_PREDICT_FALSE(!_status.ok())) return _status; \
+ } while (0)
+
+#define TF_RETURN_WITH_CONTEXT_IF_ERROR(expr, ...) \
+ do { \
+ ::tensorflow::Status _status = (expr); \
+ if (TF_PREDICT_FALSE(!_status.ok())) { \
+ ::tensorflow::errors::AppendToMessage(&_status, __VA_ARGS__); \
+ return _status; \
+ } \
+ } while (0)
+
+// Convenience functions for generating and using error status.
+// Example usage:
+// status.Update(errors::InvalidArgument("The ", foo, " isn't right."));
+// if (errors::IsInvalidArgument(status)) { ... }
+// switch (status.code()) { case error::INVALID_ARGUMENT: ... }
+
+#define DECLARE_ERROR(FUNC, CONST) \
+ template <typename... Args> \
+ inline ::tensorflow::Status FUNC(Args... args) { \
+ return ::tensorflow::Status(::tensorflow::error::CONST, \
+ strings::StrCat(args...)); \
+ } \
+ inline bool Is##FUNC(const ::tensorflow::Status& status) { \
+ return status.code() == ::tensorflow::error::CONST; \
+ }
+
+DECLARE_ERROR(Cancelled, CANCELLED)
+DECLARE_ERROR(InvalidArgument, INVALID_ARGUMENT)
+DECLARE_ERROR(NotFound, NOT_FOUND)
+DECLARE_ERROR(AlreadyExists, ALREADY_EXISTS)
+DECLARE_ERROR(ResourceExhausted, RESOURCE_EXHAUSTED)
+DECLARE_ERROR(Unavailable, UNAVAILABLE)
+DECLARE_ERROR(FailedPrecondition, FAILED_PRECONDITION)
+DECLARE_ERROR(OutOfRange, OUT_OF_RANGE)
+DECLARE_ERROR(Unimplemented, UNIMPLEMENTED)
+DECLARE_ERROR(Internal, INTERNAL)
+DECLARE_ERROR(Aborted, ABORTED)
+DECLARE_ERROR(DeadlineExceeded, DEADLINE_EXCEEDED)
+DECLARE_ERROR(DataLoss, DATA_LOSS)
+DECLARE_ERROR(Unknown, UNKNOWN)
+DECLARE_ERROR(PermissionDenied, PERMISSION_DENIED)
+DECLARE_ERROR(Unauthenticated, UNAUTHENTICATED)
+
+#undef DECLARE_ERROR
+
+// The CanonicalCode() for non-errors.
+using ::tensorflow::error::OK;
+
+// Convenience macros for asserting and handling exceptional conditions.
+// Analogous to the CHECK* macros provided by logging.h.
+//
+// Example use:
+// void Compute(OperationContext* context) {
+// OP_REQUIRES(context, context->num_inputs() == 2,
+// errors::InvalidArgument("FooOp requires 2 arguments"));
+// ...
+// Status status = SomeUncertainMethod();
+// OP_REQUIRES_OK(context, status);
+// ...
+// }
+
+#define OP_REQUIRES(CTX, EXP, STATUS) \
+ if (!(EXP)) { \
+ ::tensorflow::Status _s(STATUS); \
+ VLOG(1) << _s; \
+ (CTX)->SetStatus(_s); \
+ return; \
+ }
+
+#define OP_REQUIRES_OK(CTX, STATUS) \
+ do { \
+ ::tensorflow::Status _s(STATUS); \
+ if (!_s.ok()) { \
+ LOG(WARNING) << _s; \
+ (CTX)->SetStatus(_s); \
+ return; \
+ } \
+ } while (0)
+
+#define OP_REQUIRES_ASYNC(CTX, EXP, STATUS, CALLBACK) \
+ if (!(EXP)) { \
+ ::tensorflow::Status _s(STATUS); \
+ VLOG(1) << _s; \
+ (CTX)->SetStatus(_s); \
+ (CALLBACK)(); \
+ return; \
+ }
+
+#define OP_REQUIRES_OK_ASYNC(CTX, STATUS, CALLBACK) \
+ do { \
+ ::tensorflow::Status _s(STATUS); \
+ if (!_s.ok()) { \
+ LOG(WARNING) << _s; \
+ (CTX)->SetStatus(_s); \
+ (CALLBACK)(); \
+ return; \
+ } \
+ } while (0)
+
+} // namespace errors
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_ERRORS_H_
diff --git a/tensorflow/core/lib/core/notification.h b/tensorflow/core/lib/core/notification.h
new file mode 100644
index 0000000000..071e24285a
--- /dev/null
+++ b/tensorflow/core/lib/core/notification.h
@@ -0,0 +1,42 @@
+#ifndef TENSORFLOW_UTIL_NOTIFICATION_H_
+#define TENSORFLOW_UTIL_NOTIFICATION_H_
+
+#include <assert.h>
+
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+
+class Notification {
+ public:
+ Notification() : notified_(false) {}
+ ~Notification() {}
+
+ void Notify() {
+ mutex_lock l(mu_);
+ assert(!notified_);
+ notified_ = true;
+ cv_.notify_all();
+ }
+
+ bool HasBeenNotified() {
+ mutex_lock l(mu_);
+ return notified_;
+ }
+
+ void WaitForNotification() {
+ mutex_lock l(mu_);
+ while (!notified_) {
+ cv_.wait(l);
+ }
+ }
+
+ private:
+ mutex mu_;
+ condition_variable cv_;
+ bool notified_;
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_UTIL_NOTIFICATION_H_
diff --git a/tensorflow/core/lib/core/notification_test.cc b/tensorflow/core/lib/core/notification_test.cc
new file mode 100644
index 0000000000..a9e8942f05
--- /dev/null
+++ b/tensorflow/core/lib/core/notification_test.cc
@@ -0,0 +1,64 @@
+#include <gtest/gtest.h>
+
+#include "tensorflow/core/lib/core/notification.h"
+#include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+namespace {
+
+TEST(NotificationTest, TestSingleNotification) {
+ thread::ThreadPool* thread_pool =
+ new thread::ThreadPool(Env::Default(), "test", 1);
+
+ int counter = 0;
+ Notification start;
+ Notification proceed;
+ thread_pool->Schedule([&start, &proceed, &counter] {
+ start.Notify();
+ proceed.WaitForNotification();
+ ++counter;
+ });
+
+ // Wait for the thread to start
+ start.WaitForNotification();
+
+ // The thread should be waiting for the 'proceed' notification.
+ EXPECT_EQ(0, counter);
+
+ // Unblock the thread
+ proceed.Notify();
+
+ delete thread_pool; // Wait for closure to finish.
+
+ // Verify the counter has been incremented
+ EXPECT_EQ(1, counter);
+}
+
+TEST(NotificationTest, TestMultipleThreadsWaitingOnNotification) {
+ const int num_closures = 4;
+ thread::ThreadPool* thread_pool =
+ new thread::ThreadPool(Env::Default(), "test", num_closures);
+
+ mutex lock;
+ int counter = 0;
+ Notification n;
+
+ for (int i = 0; i < num_closures; ++i) {
+ thread_pool->Schedule([&n, &lock, &counter] {
+ n.WaitForNotification();
+ mutex_lock l(lock);
+ ++counter;
+ });
+ }
+ sleep(1);
+
+ EXPECT_EQ(0, counter);
+
+ n.Notify();
+ delete thread_pool; // Wait for all closures to finish.
+ EXPECT_EQ(4, counter);
+}
+
+} // namespace
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/raw_coding.h b/tensorflow/core/lib/core/raw_coding.h
new file mode 100644
index 0000000000..1fe49b75bb
--- /dev/null
+++ b/tensorflow/core/lib/core/raw_coding.h
@@ -0,0 +1,43 @@
+#ifndef TENSORFLOW_LIB_CORE_RAW_CODING_H_
+#define TENSORFLOW_LIB_CORE_RAW_CODING_H_
+
+#include <string.h>
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+namespace core {
+
+// Lower-level versions of Get... that read directly from a character buffer
+// without any bounds checking.
+
+inline uint32 DecodeFixed32(const char* ptr) {
+ if (port::kLittleEndian) {
+ // Load the raw bytes
+ uint32 result;
+ memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
+ return result;
+ } else {
+ return ((static_cast<uint32>(static_cast<unsigned char>(ptr[0]))) |
+ (static_cast<uint32>(static_cast<unsigned char>(ptr[1])) << 8) |
+ (static_cast<uint32>(static_cast<unsigned char>(ptr[2])) << 16) |
+ (static_cast<uint32>(static_cast<unsigned char>(ptr[3])) << 24));
+ }
+}
+
+inline uint64 DecodeFixed64(const char* ptr) {
+ if (port::kLittleEndian) {
+ // Load the raw bytes
+ uint64 result;
+ memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
+ return result;
+ } else {
+ uint64 lo = DecodeFixed32(ptr);
+ uint64 hi = DecodeFixed32(ptr + 4);
+ return (hi << 32) | lo;
+ }
+}
+
+} // namespace core
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_RAW_CODING_H_
diff --git a/tensorflow/core/lib/core/refcount.cc b/tensorflow/core/lib/core/refcount.cc
new file mode 100644
index 0000000000..3ed8c58eb8
--- /dev/null
+++ b/tensorflow/core/lib/core/refcount.cc
@@ -0,0 +1,35 @@
+#include "tensorflow/core/lib/core/refcount.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace core {
+
+RefCounted::RefCounted() : ref_(1) {}
+
+RefCounted::~RefCounted() { DCHECK_EQ(ref_.load(), 0); }
+
+void RefCounted::Ref() const {
+ DCHECK_GE(ref_.load(), 1);
+ ref_.fetch_add(1, std::memory_order_relaxed);
+}
+
+bool RefCounted::Unref() const {
+ DCHECK_GT(ref_.load(), 0);
+ // If ref_==1, this object is owned only by the caller. Bypass a locked op
+ // in that case.
+ if (ref_.load(std::memory_order_acquire) == 1 || ref_.fetch_sub(1) == 1) {
+ // Make DCHECK in ~RefCounted happy
+ DCHECK((ref_.store(0), true));
+ delete this;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool RefCounted::RefCountIsOne() const {
+ return (ref_.load(std::memory_order_acquire) == 1);
+}
+
+} // namespace core
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/refcount.h b/tensorflow/core/lib/core/refcount.h
new file mode 100644
index 0000000000..f727750f9e
--- /dev/null
+++ b/tensorflow/core/lib/core/refcount.h
@@ -0,0 +1,63 @@
+#ifndef TENSORFLOW_LIB_CORE_REFCOUNT_H_
+#define TENSORFLOW_LIB_CORE_REFCOUNT_H_
+
+#include <atomic>
+
+namespace tensorflow {
+namespace core {
+
+class RefCounted {
+ public:
+ // Initial reference count is one.
+ RefCounted();
+
+ // Increments reference count by one.
+ void Ref() const;
+
+ // Decrements reference count by one. If the count remains
+ // positive, returns false. When the count reaches zero, returns
+ // true and deletes this, in which case the caller must not access
+ // the object afterward.
+ bool Unref() const;
+
+ // Return whether the reference count is one.
+ // If the reference count is used in the conventional way, a
+ // reference count of 1 implies that the current thread owns the
+ // reference and no other thread shares it.
+ // This call performs the test for a reference count of one, and
+ // performs the memory barrier needed for the owning thread
+ // to act on the object, knowing that it has exclusive access to the
+ // object.
+ bool RefCountIsOne() const;
+
+ protected:
+ // Make destructor protected so that RefCounted objects cannot
+ // be instantiated directly. Only subclasses can be instantiated.
+ virtual ~RefCounted();
+
+ private:
+ mutable std::atomic_int_fast32_t ref_;
+
+ RefCounted(const RefCounted&) = delete;
+ void operator=(const RefCounted&) = delete;
+};
+
+// Helper class to unref an object when out-of-scope.
+class ScopedUnref {
+ public:
+ explicit ScopedUnref(RefCounted* o) : obj_(o) {}
+ ~ScopedUnref() {
+ if (obj_) obj_->Unref();
+ }
+
+ private:
+ RefCounted* obj_;
+
+ ScopedUnref(const ScopedUnref&) = delete;
+ void operator=(const ScopedUnref&) = delete;
+};
+
+} // namespace core
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_REFCOUNT_H_
diff --git a/tensorflow/core/lib/core/refcount_test.cc b/tensorflow/core/lib/core/refcount_test.cc
new file mode 100644
index 0000000000..c042be2d61
--- /dev/null
+++ b/tensorflow/core/lib/core/refcount_test.cc
@@ -0,0 +1,92 @@
+#include "tensorflow/core/lib/core/refcount.h"
+
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace core {
+namespace {
+
+static int constructed = 0;
+static int destroyed = 0;
+
+class MyRef : public RefCounted {
+ public:
+ MyRef() { constructed++; }
+ ~MyRef() override { destroyed++; }
+};
+
+class RefTest : public testing::Test {
+ public:
+ RefTest() {
+ constructed = 0;
+ destroyed = 0;
+ }
+};
+
+TEST_F(RefTest, New) {
+ MyRef* ref = new MyRef;
+ ASSERT_EQ(1, constructed);
+ ASSERT_EQ(0, destroyed);
+ ref->Unref();
+ ASSERT_EQ(1, constructed);
+ ASSERT_EQ(1, destroyed);
+}
+
+TEST_F(RefTest, RefUnref) {
+ MyRef* ref = new MyRef;
+ ASSERT_EQ(1, constructed);
+ ASSERT_EQ(0, destroyed);
+ ref->Ref();
+ ASSERT_EQ(0, destroyed);
+ ref->Unref();
+ ASSERT_EQ(0, destroyed);
+ ref->Unref();
+ ASSERT_EQ(1, destroyed);
+}
+
+TEST_F(RefTest, RefCountOne) {
+ MyRef* ref = new MyRef;
+ ASSERT_TRUE(ref->RefCountIsOne());
+ ref->Unref();
+}
+
+TEST_F(RefTest, RefCountNotOne) {
+ MyRef* ref = new MyRef;
+ ref->Ref();
+ ASSERT_FALSE(ref->RefCountIsOne());
+ ref->Unref();
+ ref->Unref();
+}
+
+TEST_F(RefTest, ConstRefUnref) {
+ const MyRef* cref = new MyRef;
+ ASSERT_EQ(1, constructed);
+ ASSERT_EQ(0, destroyed);
+ cref->Ref();
+ ASSERT_EQ(0, destroyed);
+ cref->Unref();
+ ASSERT_EQ(0, destroyed);
+ cref->Unref();
+ ASSERT_EQ(1, destroyed);
+}
+
+TEST_F(RefTest, ReturnOfUnref) {
+ MyRef* ref = new MyRef;
+ ref->Ref();
+ EXPECT_FALSE(ref->Unref());
+ EXPECT_TRUE(ref->Unref());
+}
+
+TEST_F(RefTest, ScopedUnref) {
+ { ScopedUnref unref(new MyRef); }
+ EXPECT_EQ(destroyed, 1);
+}
+
+TEST_F(RefTest, ScopedUnref_Nullptr) {
+ { ScopedUnref unref(nullptr); }
+ EXPECT_EQ(destroyed, 0);
+}
+
+} // namespace
+} // namespace core
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/status.cc b/tensorflow/core/lib/core/status.cc
new file mode 100644
index 0000000000..24ce842560
--- /dev/null
+++ b/tensorflow/core/lib/core/status.cc
@@ -0,0 +1,107 @@
+#include "tensorflow/core/public/status.h"
+#include <stdio.h>
+
+namespace tensorflow {
+
+Status::Status(tensorflow::error::Code code, StringPiece msg) {
+ assert(code != tensorflow::error::OK);
+ state_ = new State;
+ state_->code = code;
+ state_->msg = msg.ToString();
+}
+Status::~Status() { delete state_; }
+
+void Status::Update(const Status& new_status) {
+ if (ok()) {
+ *this = new_status;
+ }
+}
+
+void Status::SlowCopyFrom(const State* src) {
+ delete state_;
+ if (src == nullptr) {
+ state_ = nullptr;
+ } else {
+ state_ = new State(*src);
+ }
+}
+
+const string& Status::empty_string() {
+ static string* empty = new string;
+ return *empty;
+}
+
+string Status::ToString() const {
+ if (state_ == NULL) {
+ return "OK";
+ } else {
+ char tmp[30];
+ const char* type;
+ switch (code()) {
+ case tensorflow::error::CANCELLED:
+ type = "Cancelled";
+ break;
+ case tensorflow::error::UNKNOWN:
+ type = "Unknown";
+ break;
+ case tensorflow::error::INVALID_ARGUMENT:
+ type = "Invalid argument";
+ break;
+ case tensorflow::error::DEADLINE_EXCEEDED:
+ type = "Deadline exceeded";
+ break;
+ case tensorflow::error::NOT_FOUND:
+ type = "Not found";
+ break;
+ case tensorflow::error::ALREADY_EXISTS:
+ type = "Already exists";
+ break;
+ case tensorflow::error::PERMISSION_DENIED:
+ type = "Permission denied";
+ break;
+ case tensorflow::error::UNAUTHENTICATED:
+ type = "Unauthenticated";
+ break;
+ case tensorflow::error::RESOURCE_EXHAUSTED:
+ type = "Resource exhausted";
+ break;
+ case tensorflow::error::FAILED_PRECONDITION:
+ type = "Failed precondition";
+ break;
+ case tensorflow::error::ABORTED:
+ type = "Aborted";
+ break;
+ case tensorflow::error::OUT_OF_RANGE:
+ type = "Out of range";
+ break;
+ case tensorflow::error::UNIMPLEMENTED:
+ type = "Unimplemented";
+ break;
+ case tensorflow::error::INTERNAL:
+ type = "Internal";
+ break;
+ case tensorflow::error::UNAVAILABLE:
+ type = "Unavailable";
+ break;
+ case tensorflow::error::DATA_LOSS:
+ type = "Data loss";
+ break;
+ default:
+ snprintf(tmp, sizeof(tmp), "Unknown code(%d)",
+ static_cast<int>(code()));
+ type = tmp;
+ break;
+ }
+ string result(type);
+ result += ": ";
+ result += state_->msg;
+ return result;
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, const Status& x) {
+ os << x.ToString();
+ return os;
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/status_test.cc b/tensorflow/core/lib/core/status_test.cc
new file mode 100644
index 0000000000..3ef6b3302a
--- /dev/null
+++ b/tensorflow/core/lib/core/status_test.cc
@@ -0,0 +1,84 @@
+#include "tensorflow/core/public/status.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+
+TEST(Status, OK) {
+ EXPECT_EQ(Status::OK().code(), error::OK);
+ EXPECT_EQ(Status::OK().error_message(), "");
+ EXPECT_OK(Status::OK());
+ ASSERT_OK(Status::OK());
+ EXPECT_EQ(Status::OK(), Status());
+ Status s;
+ EXPECT_TRUE(s.ok());
+}
+
+TEST(DeathStatus, CheckOK) {
+ Status status(errors::InvalidArgument("Invalid"));
+ ASSERT_DEATH(TF_CHECK_OK(status), "Invalid");
+}
+
+TEST(Status, Set) {
+ Status status;
+ status = Status(error::CANCELLED, "Error message");
+ EXPECT_EQ(status.code(), error::CANCELLED);
+ EXPECT_EQ(status.error_message(), "Error message");
+}
+
+TEST(Status, Copy) {
+ Status a(errors::InvalidArgument("Invalid"));
+ Status b(a);
+ ASSERT_EQ(a.ToString(), b.ToString());
+}
+
+TEST(Status, Assign) {
+ Status a(errors::InvalidArgument("Invalid"));
+ Status b;
+ b = a;
+ ASSERT_EQ(a.ToString(), b.ToString());
+}
+
+TEST(Status, Update) {
+ Status s;
+ s.Update(Status::OK());
+ ASSERT_TRUE(s.ok());
+ Status a(errors::InvalidArgument("Invalid"));
+ s.Update(a);
+ ASSERT_EQ(s.ToString(), a.ToString());
+ Status b(errors::Internal("Internal"));
+ s.Update(b);
+ ASSERT_EQ(s.ToString(), a.ToString());
+ s.Update(Status::OK());
+ ASSERT_EQ(s.ToString(), a.ToString());
+ ASSERT_FALSE(s.ok());
+}
+
+TEST(Status, EqualsOK) { ASSERT_EQ(Status::OK(), Status()); }
+
+TEST(Status, EqualsSame) {
+ Status a(errors::InvalidArgument("Invalid"));
+ Status b(errors::InvalidArgument("Invalid"));
+ ASSERT_EQ(a, b);
+}
+
+TEST(Status, EqualsCopy) {
+ const Status a(errors::InvalidArgument("Invalid"));
+ const Status b = a;
+ ASSERT_EQ(a, b);
+}
+
+TEST(Status, EqualsDifferentCode) {
+ const Status a(errors::InvalidArgument("message"));
+ const Status b(errors::Internal("message"));
+ ASSERT_NE(a, b);
+}
+
+TEST(Status, EqualsDifferentMessage) {
+ const Status a(errors::InvalidArgument("message"));
+ const Status b(errors::InvalidArgument("another"));
+ ASSERT_NE(a, b);
+}
+
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/status_test_util.h b/tensorflow/core/lib/core/status_test_util.h
new file mode 100644
index 0000000000..b3b4db429f
--- /dev/null
+++ b/tensorflow/core/lib/core/status_test_util.h
@@ -0,0 +1,20 @@
+#ifndef TENSORFLOW_LIB_CORE_STATUS_TEST_UTIL_H_
+#define TENSORFLOW_LIB_CORE_STATUS_TEST_UTIL_H_
+
+#include <gtest/gtest.h>
+#include "tensorflow/core/public/status.h"
+
+// Macros for testing the results of functions that return util::Status.
+
+#define EXPECT_OK(statement) EXPECT_EQ(::tensorflow::Status::OK(), (statement))
+#define ASSERT_OK(statement) ASSERT_EQ(::tensorflow::Status::OK(), (statement))
+
+// There are no EXPECT_NOT_OK/ASSERT_NOT_OK macros since they would not
+// provide much value (when they fail, they would just print the OK status
+// which conveys no more information than EXPECT_FALSE(status.ok());
+// If you want to check for particular errors, better alternatives are:
+// EXPECT_EQ(::util::Status(...expected error...), status.StripMessage());
+// EXPECT_THAT(status.ToString(), HasSubstr("expected error"));
+// Also, see testing/lib/util/status_util.h.
+
+#endif // TENSORFLOW_LIB_CORE_STATUS_TEST_UTIL_H_
diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc
new file mode 100644
index 0000000000..57c5139f47
--- /dev/null
+++ b/tensorflow/core/lib/core/stringpiece.cc
@@ -0,0 +1,57 @@
+#include "tensorflow/core/lib/core/stringpiece.h"
+
+#include <iostream>
+#include "tensorflow/core/lib/hash/hash.h"
+
+namespace tensorflow {
+
+size_t StringPiece::Hasher::operator()(StringPiece s) const {
+ return Hash64(s.data(), s.size());
+}
+
+std::ostream& operator<<(std::ostream& o, StringPiece piece) {
+ o.write(piece.data(), piece.size());
+ return o;
+}
+
+bool StringPiece::contains(StringPiece s) const {
+ return memmem(data_, size_, s.data_, s.size_) != nullptr;
+}
+
+size_t StringPiece::find(char c, size_t pos) const {
+ if (pos >= size_) {
+ return npos;
+ }
+ const char* result =
+ reinterpret_cast<const char*>(memchr(data_ + pos, c, size_ - pos));
+ return result != NULL ? result - data_ : npos;
+}
+
+// Search range is [0..pos] inclusive. If pos == npos, search everything.
+size_t StringPiece::rfind(char c, size_t pos) const {
+ if (size_ == 0) return npos;
+ for (const char* p = data_ + std::min(pos, size_ - 1); p >= data_; p--) {
+ if (*p == c) {
+ return p - data_;
+ }
+ }
+ return npos;
+}
+
+bool StringPiece::Consume(StringPiece x) {
+ if (starts_with(x)) {
+ remove_prefix(x.size_);
+ return true;
+ }
+ return false;
+}
+
+StringPiece StringPiece::substr(size_t pos, size_t n) const {
+ if (pos > size_) pos = size_;
+ if (n > size_ - pos) n = size_ - pos;
+ return StringPiece(data_ + pos, n);
+}
+
+const StringPiece::size_type StringPiece::npos = size_type(-1);
+
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h
new file mode 100644
index 0000000000..17d4b294e9
--- /dev/null
+++ b/tensorflow/core/lib/core/stringpiece.h
@@ -0,0 +1,159 @@
+// StringPiece is a simple structure containing a pointer into some external
+// storage and a size. The user of a StringPiece must ensure that the slice
+// is not used after the corresponding external storage has been
+// deallocated.
+//
+// Multiple threads can invoke const methods on a StringPiece without
+// external synchronization, but if any of the threads may call a
+// non-const method, all threads accessing the same StringPiece must use
+// external synchronization.
+
+#ifndef TENSORFLOW_LIB_CORE_STRINGPIECE_H_
+#define TENSORFLOW_LIB_CORE_STRINGPIECE_H_
+
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include <iosfwd>
+#include <string>
+#include "tensorflow/core/platform/port.h"
+
+namespace tensorflow {
+
+class StringPiece {
+ public:
+ typedef size_t size_type;
+
+ // Create an empty slice.
+ StringPiece() : data_(""), size_(0) {}
+
+ // Create a slice that refers to d[0,n-1].
+ StringPiece(const char* d, size_t n) : data_(d), size_(n) {}
+
+ // Create a slice that refers to the contents of "s"
+ StringPiece(const string& s) : data_(s.data()), size_(s.size()) {}
+
+ // Create a slice that refers to s[0,strlen(s)-1]
+ StringPiece(const char* s) : data_(s), size_(strlen(s)) {}
+
+ void set(const void* data, size_t len) {
+ data_ = reinterpret_cast<const char*>(data);
+ size_ = len;
+ }
+
+ // Return a pointer to the beginning of the referenced data
+ const char* data() const { return data_; }
+
+ // Return the length (in bytes) of the referenced data
+ size_t size() const { return size_; }
+
+ // Return true iff the length of the referenced data is zero
+ bool empty() const { return size_ == 0; }
+
+ typedef const char* const_iterator;
+ typedef const char* iterator;
+ iterator begin() const { return data_; }
+ iterator end() const { return data_ + size_; }
+
+ static const size_t npos;
+
+ // Return the ith byte in the referenced data.
+ // REQUIRES: n < size()
+ char operator[](size_t n) const {
+ assert(n < size());
+ return data_[n];
+ }
+
+ // Change this slice to refer to an empty array
+ void clear() {
+ data_ = "";
+ size_ = 0;
+ }
+
+ // Drop the first "n" bytes from this slice.
+ void remove_prefix(size_t n) {
+ assert(n <= size());
+ data_ += n;
+ size_ -= n;
+ }
+
+ void remove_suffix(size_t n) {
+ assert(size_ >= n);
+ size_ -= n;
+ }
+
+ size_t find(char c, size_t pos = 0) const;
+ size_t rfind(char c, size_t pos = npos) const;
+ bool contains(StringPiece s) const;
+
+ // Checks whether StringPiece starts with x and if so advances the beginning
+ // of it to past the match. It's basically a shortcut for starts_with
+ // followed by remove_prefix.
+ bool Consume(StringPiece x);
+
+ StringPiece substr(size_t pos, size_t n = npos) const;
+
+ struct Hasher {
+ size_t operator()(StringPiece arg) const;
+ };
+
+ // Return a string that contains the copy of the referenced data.
+ std::string ToString() const { return std::string(data_, size_); }
+
+ // Three-way comparison. Returns value:
+ // < 0 iff "*this" < "b",
+ // == 0 iff "*this" == "b",
+ // > 0 iff "*this" > "b"
+ int compare(StringPiece b) const;
+
+ // Return true iff "x" is a prefix of "*this"
+ bool starts_with(StringPiece x) const {
+ return ((size_ >= x.size_) && (memcmp(data_, x.data_, x.size_) == 0));
+ }
+ // Return true iff "x" is a suffix of "*this"
+ bool ends_with(StringPiece x) const {
+ return ((size_ >= x.size_) &&
+ (memcmp(data_ + (size_ - x.size_), x.data_, x.size_) == 0));
+ }
+
+ private:
+ const char* data_;
+ size_t size_;
+
+ // Intentionally copyable
+};
+
+inline bool operator==(StringPiece x, StringPiece y) {
+ return ((x.size() == y.size()) &&
+ (memcmp(x.data(), y.data(), x.size()) == 0));
+}
+
+inline bool operator!=(StringPiece x, StringPiece y) { return !(x == y); }
+
+inline bool operator<(StringPiece x, StringPiece y) { return x.compare(y) < 0; }
+inline bool operator>(StringPiece x, StringPiece y) { return x.compare(y) > 0; }
+inline bool operator<=(StringPiece x, StringPiece y) {
+ return x.compare(y) <= 0;
+}
+inline bool operator>=(StringPiece x, StringPiece y) {
+ return x.compare(y) >= 0;
+}
+
+inline int StringPiece::compare(StringPiece b) const {
+ const size_t min_len = (size_ < b.size_) ? size_ : b.size_;
+ int r = memcmp(data_, b.data_, min_len);
+ if (r == 0) {
+ if (size_ < b.size_)
+ r = -1;
+ else if (size_ > b.size_)
+ r = +1;
+ }
+ return r;
+}
+
+// allow StringPiece to be logged
+extern std::ostream& operator<<(std::ostream& o, tensorflow::StringPiece piece);
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_STRINGPIECE_H_
diff --git a/tensorflow/core/lib/core/threadpool.cc b/tensorflow/core/lib/core/threadpool.cc
new file mode 100644
index 0000000000..e9b84d3102
--- /dev/null
+++ b/tensorflow/core/lib/core/threadpool.cc
@@ -0,0 +1,108 @@
+#include "tensorflow/core/lib/core/threadpool.h"
+
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/tracing.h"
+
+namespace tensorflow {
+namespace thread {
+
+struct ThreadPool::Waiter {
+ condition_variable cv;
+ bool ready;
+};
+
+ThreadPool::ThreadPool(Env* env, const string& name, int num_threads)
+ : ThreadPool(env, ThreadOptions(), name, num_threads) {}
+
+ThreadPool::ThreadPool(Env* env, const ThreadOptions& thread_options,
+ const string& name, int num_threads)
+ : name_(name) {
+ CHECK_GE(num_threads, 1);
+ string name_prefix = "tf_" + name_;
+ for (int i = 0; i < num_threads; i++) {
+ threads_.push_back(env->StartThread(thread_options, name_prefix,
+ [this]() { WorkerLoop(); }));
+ }
+}
+
+ThreadPool::~ThreadPool() {
+ {
+ // Wait for all work to get done.
+ mutex_lock l(mu_);
+
+ // Inform every thread to exit.
+ for (size_t i = 0; i < threads_.size(); ++i) {
+ pending_.push_back({nullptr, 0});
+ }
+
+ // Wakeup all waiters.
+ for (auto w : waiters_) {
+ w->ready = true;
+ w->cv.notify_one();
+ }
+ }
+
+ // Wait for threads to finish.
+ for (auto t : threads_) {
+ delete t;
+ }
+}
+
+bool ThreadPool::HasPendingClosures() const {
+ mutex_lock l(mu_);
+ return pending_.size() != 0;
+}
+
+void ThreadPool::Schedule(std::function<void()> fn) {
+ CHECK(fn != nullptr);
+ uint64 id = 0;
+ if (port::Tracing::IsActive()) {
+ id = port::Tracing::UniqueId();
+ port::Tracing::RecordEvent(port::Tracing::EventCategory::kScheduleClosure,
+ id);
+ }
+
+ mutex_lock l(mu_);
+ pending_.push_back({fn, id});
+ if (!waiters_.empty()) {
+ Waiter* w = waiters_.back();
+ waiters_.pop_back();
+ w->ready = true;
+ w->cv.notify_one();
+ }
+}
+
+void ThreadPool::WorkerLoop() {
+ port::Tracing::RegisterCurrentThread(name_.c_str());
+ mutex_lock l(mu_);
+ Waiter w;
+ while (true) {
+ while (pending_.empty()) {
+ // Wait for work to be assigned to me
+ w.ready = false;
+ waiters_.push_back(&w);
+ while (!w.ready) {
+ w.cv.wait(l);
+ }
+ }
+ // Pick up pending work
+ Item item = pending_.front();
+ pending_.pop_front();
+ if (item.fn == nullptr) {
+ break;
+ }
+ mu_.unlock();
+ if (item.id != 0) {
+ port::Tracing::ScopedActivity region(
+ port::Tracing::EventCategory::kRunClosure, item.id);
+ item.fn();
+ } else {
+ item.fn();
+ }
+ mu_.lock();
+ }
+}
+
+} // namespace thread
+} // namespace tensorflow
diff --git a/tensorflow/core/lib/core/threadpool.h b/tensorflow/core/lib/core/threadpool.h
new file mode 100644
index 0000000000..5cf780fa86
--- /dev/null
+++ b/tensorflow/core/lib/core/threadpool.h
@@ -0,0 +1,59 @@
+#ifndef TENSORFLOW_LIB_CORE_THREADPOOL_H_
+#define TENSORFLOW_LIB_CORE_THREADPOOL_H_
+
+#include <deque>
+#include <functional>
+#include <thread>
+#include <vector>
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/public/env.h"
+
+namespace tensorflow {
+namespace thread {
+
+class ThreadPool {
+ public:
+ // Construct a pool that contains "num_threads" threads with specified "name".
+ // env->StartThread() is used to create individual threads.
+ //
+ // REQUIRES: num_threads > 0
+ ThreadPool(Env* env, const string& name, int num_threads);
+
+ // Construct a pool that contains "num_threads" threads with specified "name".
+ // env->StartThread() is used to create individual threads.
+ //
+ // REQUIRES: num_threads > 0
+ ThreadPool(Env* env, const ThreadOptions& thread_options, const string& name,
+ int num_threads);
+
+ // Wait until all scheduled work has finished and then destroy the
+ // set of threads.
+ virtual ~ThreadPool();
+
+ // Schedule fn() for execution in the pool of threads.
+ virtual void Schedule(std::function<void()> fn);
+
+ virtual bool HasPendingClosures() const;
+
+ private:
+ struct Waiter;
+ struct Item {
+ std::function<void()> fn;
+ uint64 id;
+ };
+
+ void WorkerLoop();
+
+ const string name_;
+ mutable mutex mu_;
+ std::vector<Thread*> threads_; // All threads
+ std::vector<Waiter*> waiters_; // Stack of waiting threads.
+ std::deque<Item> pending_; // Queue of pending work
+
+ TF_DISALLOW_COPY_AND_ASSIGN(ThreadPool);
+};
+
+} // namespace thread
+} // namespace tensorflow
+
+#endif // TENSORFLOW_LIB_CORE_THREADPOOL_H_
diff --git a/tensorflow/core/lib/core/threadpool_test.cc b/tensorflow/core/lib/core/threadpool_test.cc
new file mode 100644
index 0000000000..f4909c445c
--- /dev/null
+++ b/tensorflow/core/lib/core/threadpool_test.cc
@@ -0,0 +1,93 @@
+#include "tensorflow/core/lib/core/threadpool.h"
+
+#include <atomic>
+
+#include "tensorflow/core/platform/test_benchmark.h"
+#include "tensorflow/core/public/env.h"
+#include <gtest/gtest.h>
+
+namespace tensorflow {
+namespace thread {
+
+static const int kNumThreads = 30;
+
+TEST(ThreadPool, Empty) {
+ for (int num_threads = 1; num_threads < kNumThreads; num_threads++) {
+ fprintf(stderr, "Testing with %d threads\n", num_threads);
+ ThreadPool pool(Env::Default(), "test", num_threads);
+ }
+}
+
+TEST(ThreadPool, DoWork) {
+ for (int num_threads = 1; num_threads < kNumThreads; num_threads++) {
+ fprintf(stderr, "Testing with %d threads\n", num_threads);
+ const int kWorkItems = 15;
+ bool work[kWorkItems];
+ for (int i = 0; i < kWorkItems; i++) {
+ work[i] = false;
+ }
+ {
+ ThreadPool pool(Env::Default(), "test", num_threads);
+ for (int i = 0; i < kWorkItems; i++) {
+ pool.Schedule([&work, i]() {
+ ASSERT_FALSE(work[i]);
+ work[i] = true;
+ });
+ }
+ }
+ for (int i = 0; i < kWorkItems; i++) {
+ ASSERT_TRUE(work[i]);
+ }
+ }
+}
+
+static void BM_Sequential(int iters) {
+ ThreadPool pool(Env::Default(), "test", kNumThreads);
+ // Decrement count sequentially until 0.
+ int count = iters;
+ mutex done_lock;
+ condition_variable done;
+ bool done_flag = false;
+ std::function<void()> work = [&pool, &count, &done_lock, &done, &done_flag,
+ &work]() {
+ if (count--) {
+ pool.Schedule(work);
+ } else {
+ mutex_lock l(done_lock);
+ done_flag = true;
+ done.notify_all();
+ }
+ };
+ work();
+ mutex_lock l(done_lock);
+ if (!done_flag) {
+ done.wait(l);
+ }
+}
+BENCHMARK(BM_Sequential);
+
+static void BM_Parallel(int iters) {
+ ThreadPool pool(Env::Default(), "test", kNumThreads);
+ // Decrement count concurrently until 0.
+ std::atomic_int_fast32_t count(iters);
+ mutex done_lock;
+ condition_variable done;
+ bool done_flag = false;
+ for (int i = 0; i < iters; ++i) {
+ pool.Schedule([&count, &done_lock, &done, &done_flag]() {
+ if (count.fetch_sub(1) == 1) {
+ mutex_lock l(done_lock);
+ done_flag = true;
+ done.notify_all();
+ }
+ });
+ }
+ mutex_lock l(done_lock);
+ if (!done_flag) {
+ done.wait(l);
+ }
+}
+BENCHMARK(BM_Parallel);
+
+} // namespace thread
+} // namespace tensorflow