From a0ffaf3caa0234653035a692858606c7bdacd63b Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Thu, 13 Jul 2017 14:51:47 -0700 Subject: Merge changes from github. END_PUBLIC --- Commit fe5338177 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 161727345 --- Commit c65f69119 authored by Eugene Brevdo Committed by TensorFlower Gardener: Factor out DenseUpdate ops into dense_update_functor build dep. Also add support for complex types. PiperOrigin-RevId: 161726749 --- Commit 9a172989e authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Update ops-related pbtxt files. PiperOrigin-RevId: 161726324 --- Commit fd5530d6e authored by A. Unique TensorFlower Committed by TensorFlower Gardener: adding bazel-toolchains repo to workspace. This repo will be necessary for remote execution (specifically for cross OS compilation) PiperOrigin-RevId: 161719899 --- Commit 71c4ec8ed authored by Derek Murray Committed by TensorFlower Gardener: Add a mechanism for switching between multiple iterators by feeding a handle. With this change, you can do the following: 1. Fetch a string handle for any iterator, by evaluating the result of `Iterator.string_handle()`. 2. Define an `Iterator` object based on a `tf.string` placeholder handle. 3. Feed the placeholder using an evaluated string handle to use a particular iterator in a particular step. Concretely, this allows you to define two iterators for a training dataset and a test dataset, and choose which one to use on a per-run basis: ```python train_iterator = tf.contrib.data.Dataset(...).make_one_shot_iterator() train_iterator_handle = sess.run(train_iterator.string_handle()) test_iterator = tf.contrib.data.Dataset(...).make_one_shot_iterator() test_iterator_handle = sess.run(test_iterator.string_handle()) handle = tf.placeholder(tf.string, shape=[]) iterator = tf.contrib.data.Iterator.from_string_handle( handle, train_iterator.output_types) next_element = iterator.get_next() loss = f(next_element) train_loss = sess.run(loss, feed_dict={handle: train_iterator_handle}) test_loss = sess.run(loss, feed_dict={handle: test_iterator_handle}) ``` PiperOrigin-RevId: 161719836 --- Commit 6d6dda807 authored by Kay Zhu Committed by TensorFlower Gardener: [TF:XLA] Fix an issue where plugin/Executor backend is used by default when TF is built from source with XLA support. See Github issue #11122. The priority of the executor backend is set to be higher than the default (50) and CPUs (<100), and is therefore selected as the default when tf.device is not explicitly specified. PiperOrigin-RevId: 161717173 --- Commit 6b28eb084 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Rename HloLocation to HloPosition, to avoid ambiguity with MemoryLocation. PiperOrigin-RevId: 161716528 --- Commit 8e7f57371 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Expose tf.contrib.nn.rank_sampled_softmax_loss. PiperOrigin-RevId: 161716450 --- Commit e424d209a authored by Peter Hawkins Committed by TensorFlower Gardener: [TF:XLA] Use a more numerically accurate formulation of ResourceApplyRMSProp. PiperOrigin-RevId: 161706120 --- Commit 45a58d378 authored by Skye Wanderman-Milne Committed by TensorFlower Gardener: Introduce Python-only extensions to the C API Implements an incomplete version of Operation._add_control_input() using a new extension to make sure the plumbing works. This also adds header guards to c_api_internal.h, which were missing. For some reason the missing guards caused problems in the cmake build even though there doesn't appear to be any #include cycles. PiperOrigin-RevId: 161705859 --- Commit 4f5433634 authored by Jonathan Hseu Committed by TensorFlower Gardener: Rename TpuEstimator to TPUEstimator and TpuConfig to TPUConfig to follow PEP8 naming conventions. PiperOrigin-RevId: 161704561 --- Commit 38180d7bb authored by Yun Peng Committed by gunan: Disable nn_test on Windows (#11445) --- Commit e1de7a1b0 authored by Yun Peng Committed by gunan: Windows Bazel Build: Build TensorFlow with wrapper-less CROSSTOOL (#11454) --- Commit c9d03a568 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Add tf.contrib.nn.rank_sampled_softmax_loss, a variant of tf.nn.sampled_softmax_loss that has been shown to improve rank loss. Paper: https://arxiv.org/abs/1707.03073 PiperOrigin-RevId: 161702455 --- Commit 9aa0dcbf2 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Add shape check for MakeQuantileSummariesOp. PiperOrigin-RevId: 161698801 --- Commit 9c4da4a24 authored by vhasanov Committed by Frank Chen: Deleted unnecessary repetition of the same text. (#11459) The same text was repeated two times. I deleted the repetition. --- Commit d1e3cadda authored by DimanNe Committed by drpngx: Fix linking options issued by bazel in oorder to make gradients register (#11449) --- Commit 8605f7ab8 authored by Taehoon Lee Committed by Frank Chen: Fix typos (#11444) --- Commit 7c1fe9068 authored by Karl Lessard Committed by Frank Chen: [Java] Add base classes and utilities for operation wrappers. (#11188) * Add base classes and utilities for operation wrappers. * Rename Input interface to Operand * Introduce changes after code review --- Commit 2195db6d8 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: Remove unused flag: xla_hlo_graph_for_compute_constant PiperOrigin-RevId: 161686867 --- Commit a72fc31bc authored by Martin Wicke Committed by Martin Wicke: Remove tabs. Unassign contrib/framework. --- Commit 6e74bd65a authored by Martin Wicke Committed by Martin Wicke: Add CODEOWNERS Added what we know about contrib mainly, and some well-separated components. --- Commit de546d066 authored by A. Unique TensorFlower Committed by TensorFlower Gardener: BUILD cleanup in tensorflow/compiler/... PiperOrigin-RevId: 161679855 --- Commit 576c7b1ec authored by A. Unique TensorFlower Committed by TensorFlower Gardener: BEGIN_PUBLIC Automated g4 rollback of changelist 161218103 PiperOrigin-RevId: 161868747 --- tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc | 13 +++++------ tensorflow/contrib/mpi/mpi_rendezvous_mgr.h | 33 ++++++++++------------------ 2 files changed, 16 insertions(+), 30 deletions(-) (limited to 'tensorflow/contrib/mpi') diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc index e97e8d0163..1a2563d20f 100644 --- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc +++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.cc @@ -44,7 +44,8 @@ MPIRendezvousMgr::MPIRendezvousMgr(const WorkerEnv* env) // extract worker-name auto parsed = env->local_devices[0]->parsed_name(); - const std::string task_id = strings::StrCat(parsed.job, ":", parsed.replica); + const std::string task_id = + strings::StrCat(parsed.job, ":", parsed.replica, ":", parsed.task); mpiutils_ = new MPIUtils(task_id); background_thread_ = @@ -66,8 +67,8 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync( VLOG(2) << "MPI User requested " << parsed.FullKey() << " @ step: " << step_id_; - std::string src_task = - strings::StrCat(parsed.src.job, ":", parsed.src.replica); + std::string src_task = strings::StrCat( + parsed.src.job, ":", parsed.src.replica, ":", parsed.src.task); const int dst = mpiutils_->GetSourceID(src_task); Device* dst_device; @@ -138,11 +139,7 @@ void MPIRemoteRendezvous::RecvFromRemoteAsync( std::move(request_call), rendezvous_call); } -MPIRemoteRendezvous::~MPIRemoteRendezvous() { - MPIRendezvousMgr* mgr = - reinterpret_cast(this->rendezvous_mgr_); - mgr->RemoveStepID(step_id_); -} +MPIRemoteRendezvous::~MPIRemoteRendezvous() {} /* * Add the request for one of our Tensors by a remote process diff --git a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h index 50fc380496..24e784df3e 100644 --- a/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h +++ b/tensorflow/contrib/mpi/mpi_rendezvous_mgr.h @@ -147,15 +147,8 @@ class MPIRendezvousMgr : public BaseRendezvousMgr { MPIRequestTensorCall* rCall) { mutex_lock l(mrq_); request_queue_.push(RequestQueueEntry(key, std::move(request_call))); - recv_tensor_map_[step_id][key] = - std::shared_ptr(rCall); - } - - void RemoveStepID(const int64 step_id) { - mutex_lock l(mrq_); - CHECK(recv_tensor_map_[step_id].size() == 0) << "Removing unfinished step"; - recv_tensor_map_.erase(step_id); - // TODO(jbedorf) Should we verify that the step_id is clear before remove? + const std::string key_id = strings::StrCat(key, "_", step_id); + recv_tensor_map_[key_id] = std::shared_ptr(rCall); } protected: @@ -181,9 +174,8 @@ class MPIRendezvousMgr : public BaseRendezvousMgr { std::queue send_queue_ GUARDED_BY(msq_); std::queue request_queue_ GUARDED_BY(mrq_); - std::map>> - recv_tensor_map_ GUARDED_BY(mrq_); + std::map> recv_tensor_map_ + GUARDED_BY(mrq_); void AddRequest(RecvTensorRequest, const int); void MPIBackgroundThread(); @@ -196,22 +188,19 @@ class MPIRendezvousMgr : public BaseRendezvousMgr { void GetRecvCall(const int64 step_id, const std::string& key, std::shared_ptr* call) { mutex_lock l(mrq_); - if (recv_tensor_map_.find(step_id) == recv_tensor_map_.end()) { - LOG(FATAL) << "Step not found in recv_tensor_map_, step: " << step_id - << " key: " << key << std::endl; - } - if (recv_tensor_map_[step_id].find(key) != - recv_tensor_map_[step_id].end()) { - *call = recv_tensor_map_[step_id][key]; - } else { - LOG(FATAL) << "Key not found in recv_tensor_map_, step: " << step_id + + const std::string key_id = strings::StrCat(key, "_", step_id); + if (recv_tensor_map_.find(key_id) == recv_tensor_map_.end()) { + LOG(FATAL) << "Key/step not found in recv_tensor_map_, step: " << step_id << " key: " << key << std::endl; } + *call = recv_tensor_map_[key_id]; } void RemoveRecvCall(const int64 step_id, const std::string& key) { mutex_lock l(mrq_); - recv_tensor_map_[step_id].erase(key); + const std::string key_id = strings::StrCat(key, "_", step_id); + recv_tensor_map_.erase(key_id); } bool GetRequest(RequestQueueEntry* req) { -- cgit v1.2.3