From 519099e020783125d1fbc323f783b415fe86acde Mon Sep 17 00:00:00 2001 From: Steven Winston Date: Mon, 23 Apr 2018 11:11:30 -0700 Subject: fix for AR not being defined. --- tensorflow/contrib/makefile/compile_nsync.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/makefile/compile_nsync.sh b/tensorflow/contrib/makefile/compile_nsync.sh index e8c6edd7ba..55f0aef19b 100755 --- a/tensorflow/contrib/makefile/compile_nsync.sh +++ b/tensorflow/contrib/makefile/compile_nsync.sh @@ -256,6 +256,7 @@ for arch in $archs; do esac makefile=' + AR := ${NDK_ROOT}/toolchains/'"$toolchain"'/prebuilt/'"$android_os_arch"'/bin/'"$bin_prefix"'-ar CC=${CC_PREFIX} \ ${NDK_ROOT}/toolchains/'"$toolchain"'/prebuilt/'"$android_os_arch"'/bin/'"$bin_prefix"'-g++ PLATFORM_CPPFLAGS=--sysroot \ -- cgit v1.2.3 From ac15dfe64ea7c122f7609ae5f3f927447b41a02e Mon Sep 17 00:00:00 2001 From: Rholais Lii Date: Sun, 29 Apr 2018 15:44:46 +0800 Subject: Use `get_cosine_decay_fn` to match the description --- tensorflow/contrib/opt/python/training/powersign.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/opt/python/training/powersign.py b/tensorflow/contrib/opt/python/training/powersign.py index 828f3c51c9..b4aa19264d 100644 --- a/tensorflow/contrib/opt/python/training/powersign.py +++ b/tensorflow/contrib/opt/python/training/powersign.py @@ -65,7 +65,7 @@ class PowerSignOptimizer(optimizer.Optimizer): Example usage for PowerSign-cd (PowerSign with cosine sign decay) ``` decay_steps = 1000 - linear_decay_fn = sign_decays.get_linear_decay_fn(decay_steps) + linear_decay_fn = sign_decays.get_cosine_decay_fn(decay_steps) opt = PowerSignOptimizer(learning_rate=0.1, sign_decay_fn=linear_decay_fn) ``` -- cgit v1.2.3 From daa76e16e05c2b7a3521bf739670903d996d9a33 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 18 May 2018 11:37:53 -0700 Subject: enhancement with relu primitive reuse --- tensorflow/core/kernels/mkl_conv_ops.cc | 280 ++++++++------ tensorflow/core/kernels/mkl_relu_op.cc | 661 +++++++++++++++++++++++++------- tensorflow/core/util/mkl_util.h | 32 +- 3 files changed, 702 insertions(+), 271 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index f2b14f1278..c032add82e 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,7 +59,8 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -struct ConvFwdDimensions { +// This structure aggregates multiple inputs to Conv2DFwd* methods. +struct MklConvFwdParams { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -69,7 +70,7 @@ struct ConvFwdDimensions { memory::dims padding_left; memory::dims padding_right; - ConvFwdDimensions(memory::dims src_dims, + MklConvFwdParams(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -82,35 +83,40 @@ struct ConvFwdDimensions { }; template -class Conv2DFwd : public DnnOp { +class MklConv2DFwdPrimitive: public MklPrimitive { public: - explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { - fwd_stream_.reset(new stream(stream::kind::eager)); + explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { + context_.fwd_stream.reset(new stream(stream::kind::eager)); // create conv primitive - if (conv_fwd_ == nullptr) { + if (context_.conv_fwd == nullptr) { Setup(convFwdDims); } } - ~Conv2DFwd() {} + ~MklConv2DFwdPrimitive() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - bias_mem_->set_data_handle(static_cast(bias_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); + void Execute(const T* src_data, const T* filter_data, + const T* bias_data, const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.bias_mem->set_data_handle( + static_cast(const_cast(bias_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - bias_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.bias_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } @@ -119,139 +125,174 @@ class Conv2DFwd : public DnnOp { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(T* src_data, T* filter_data, T* dst_data) { - src_mem_->set_data_handle(static_cast(src_data)); - filter_mem_->set_data_handle(static_cast(filter_data)); - dst_mem_->set_data_handle(static_cast(dst_data)); - fwd_stream_->submit(fwd_primitives_); - - // after exec, set data handle back - src_mem_->set_data_handle(DummyData); - filter_mem_->set_data_handle(DummyData); - dst_mem_->set_data_handle(DummyData); + void Execute(const T* src_data, const T* filter_data, + const T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.filter_mem->set_data_handle( + static_cast(const_cast(filter_data))); + context_.dst_mem->set_data_handle( + static_cast(const_cast(dst_data))); + context_.fwd_stream->submit(context_.fwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.filter_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); return; } - // expected memory format for this primitive instance - memory::format src_fmt_; - memory::format filter_fmt_; + memory::format GetSrcMemoryFormat() const { + return context_.src_fmt; + } + + memory::format GetFilterMemoryFormat() const { + return context_.filter_fmt; + } - // convolution primitive - std::shared_ptr fwd_pd_; - std::shared_ptr conv_fwd_; + std::shared_ptr + GetPrimitiveDesc() const { + return context_.fwd_pd; + } private: - void Setup(const ConvFwdDimensions& convFwdDims) { + // Primitive reuse context for Conv2D Fwd op + struct ConvFwdContext { + // expected memory format for this primitive instance + memory::format src_fmt; + memory::format filter_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr filter_mem; + std::shared_ptr bias_mem; + std::shared_ptr dst_mem; + + // desc & prmitive desc + std::shared_ptr fwd_desc; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr filter_md; + std::shared_ptr bias_md; + std::shared_ptr dst_md; + + // convolution primitive + std::shared_ptr fwd_pd; + std::shared_ptr conv_fwd; + + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + ConvFwdContext() : + src_fmt(memory::format::any), filter_fmt(memory::format::any), + src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), + dst_mem(nullptr), fwd_desc(nullptr), + src_md(nullptr), filter_md(nullptr), bias_md(nullptr), + fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); + + void Setup(const MklConvFwdParams& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - src_md_.reset(new memory::desc({convFwdDims.src_dims}, + context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, + context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, + context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, + context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.bias_md, *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *src_md_, *filter_md_, *dst_md_, - convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, - convFwdDims.padding_right, padding_kind::zero)); + context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *context_.src_md, *context_.filter_md, + *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, + convFwdDims.padding_left, convFwdDims.padding_right, + padding_kind::zero)); } - fwd_pd_.reset(new convolution_forward::primitive_desc( - *fwd_desc_, cpu_engine_)); + context_.fwd_pd.reset(new convolution_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); // store the expected memory format - src_fmt_ = static_cast( - fwd_pd_.get()->src_primitive_desc().desc().data.format); + context_.src_fmt = static_cast( + context_.fwd_pd.get()->src_primitive_desc().desc().data.format); - filter_fmt_ = static_cast( - fwd_pd_.get()->weights_primitive_desc().desc().data.format); + context_.filter_fmt = static_cast( + context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); - filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), - DummyData)); - dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); + context_.src_mem.reset(new memory( + context_.fwd_pd.get()->src_primitive_desc(), DummyData)); + context_.filter_mem.reset(new memory( + context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); + context_.dst_mem.reset(new memory( + context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), - memory::format::x}, cpu_engine_}, DummyData)); - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *bias_mem_, *dst_mem_)); + context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, + MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, + *context_.bias_mem, *context_.dst_mem)); } else { - conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, - *filter_mem_, *dst_mem_)); + context_.conv_fwd.reset(new convolution_forward( + *context_.fwd_pd, *context_.src_mem, + *context_.filter_mem, *context_.dst_mem)); } - fwd_primitives_.push_back(*conv_fwd_); + context_.fwd_primitives.push_back(*context_.conv_fwd); return; } - - // MKLDNN memory - std::shared_ptr src_mem_; - std::shared_ptr filter_mem_; - std::shared_ptr bias_mem_; - std::shared_ptr dst_mem_; - - std::shared_ptr fwd_stream_; - std::vector fwd_primitives_; - - // desc & prmitive desc - std::shared_ptr fwd_desc_; - - // memory desc - std::shared_ptr src_md_; - std::shared_ptr filter_md_; - std::shared_ptr bias_md_; - std::shared_ptr dst_md_; - - engine cpu_engine_ = engine(engine::cpu, 0); }; template -class Conv2DFwdFactory : public DnnOpFactory { +class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { public: - static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { - Conv2DFwd* conv2d_fwd = nullptr; + static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { + MklConv2DFwdPrimitive* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( + convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new Conv2DFwd(convFwdDims); - Conv2DFwdFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); + MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - Conv2DFwdFactory() {} - ~Conv2DFwdFactory() {} + MklConv2DFwdPrimitiveFactory() {} + ~MklConv2DFwdPrimitiveFactory() {} static const int kDilationH = 0, kDilationW = 1; - static Conv2DFwdFactory& GetInstance() { - static Conv2DFwdFactory instance_; + static MklConv2DFwdPrimitiveFactory& GetInstance() { + static MklConv2DFwdPrimitiveFactory instance_; return instance_; } - static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { + static std::string CreateKey(const MklConvFwdParams& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -266,12 +307,12 @@ class Conv2DFwdFactory : public DnnOpFactory { return key_creator.GetKey(); } - DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { + MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { + void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -762,7 +803,6 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); - MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -812,7 +852,6 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); - src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -820,29 +859,28 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); - filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - Conv2DFwd *conv2d_fwd = nullptr; + MklConv2DFwdPrimitive *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, + MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } else { - ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, + MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); + conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->fwd_pd_; + conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -854,20 +892,30 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - if (src_md.data.format != conv2d_fwd->src_fmt_) - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - - if (filter_md.data.format != conv2d_fwd->filter_fmt_) - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); + T *src_data = nullptr; + if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast(const_cast( + src_tensor.flat().data())); + } + T *filter_data = nullptr; + if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { + filter.SetUsrMem(filter_md, &filter_tensor); + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); + filter_data = static_cast(filter.GetOpMem().get_data_handle()); + } else { + filter_data = static_cast(const_cast( + filter_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); - T* src_data = static_cast( - src.GetOpMem().get_data_handle()); - T* filter_data = static_cast( - filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 1ed43834dd..048d4883b2 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -25,6 +25,7 @@ limitations under the License. #include "mkl_dnn.h" #include "mkl_dnn_types.h" +#include "tensorflow/core/platform/default/logging.h" #include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML @@ -38,10 +39,406 @@ using mkldnn::prop_kind; using mkldnn::relu_backward; using mkldnn::relu_forward; using mkldnn::stream; +using mkldnn::memory; #endif namespace tensorflow { +#ifndef INTEL_MKL_ML + +template +class MklEltwiseFwdParams { + public: + memory::dims src_dims; // check if this is needed + memory::desc src_md; + algorithm alg_kind; + T alpha; + T beta; + + MklEltwiseFwdParams(memory::dims src_dims, memory::desc src_md, + algorithm alg_kind, T alpha, T beta) : + src_dims(src_dims), src_md(src_md), + alg_kind(alg_kind), alpha(alpha), beta(beta) { + } +}; + +template +class MklEltwiseFwdPrimitive : public MklPrimitive { + public: + explicit MklEltwiseFwdPrimitive(const MklEltwiseFwdParams& fwdParams) { + // store expected format + context_.src_fmt = static_cast( + fwdParams.src_md.data.format); + context_.fwd_stream.reset(new stream(stream::kind::eager)); + + // create eltwise primitive + if (context_.eltwise_fwd == nullptr) { + Setup(fwdParams); + } + } + + ~MklEltwiseFwdPrimitive() {} + + // Eltwise forward execute + // src_data: input data buffer of src + // dst_data: output data buffer of dst + void Execute(T* src_data, T* dst_data) { + context_.src_mem->set_data_handle(static_cast(src_data)); + context_.dst_mem->set_data_handle(static_cast(dst_data)); + context_.fwd_stream->submit(context_.fwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); + return; + } + + std::shared_ptr GetEltwiseFwdPd() { + return context_.fwd_pd; + } + + memory::format GetSrcMemoryFormat() { + return context_.src_fmt; + } + + private: + // Primitive reuse context for eltwise Fwd ops: Relu, Elu, Tanh + struct EltwiseFwdContext { + // expected memory format for this primitive instance + mkldnn::memory::format src_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr dst_mem; + + // desc & prmitive desc + std::shared_ptr fwd_desc; + std::shared_ptr fwd_pd; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr dst_md; + + // memory primitive desc + std::shared_ptr src_mpd; + + // Eltwise primitive + std::shared_ptr eltwise_fwd; + + std::shared_ptr fwd_stream; + std::vector fwd_primitives; + + EltwiseFwdContext() : + src_fmt(memory::format::any), src_mem(nullptr), dst_mem(nullptr), + fwd_desc(nullptr), fwd_pd(nullptr), src_md(nullptr), dst_md(nullptr), + src_mpd(nullptr), eltwise_fwd(nullptr), fwd_stream(nullptr) { + } + } context_; + + // Eltwise forward primitive setup + void Setup(const MklEltwiseFwdParams& fwdParams) { + // create memory descriptors for eltwise data with specified format + context_.src_md.reset(new memory::desc(fwdParams.src_md.data)); + context_.src_mpd.reset(new memory::primitive_desc( + *context_.src_md, cpu_engine_)); + + // create a eltwise + context_.fwd_desc.reset(new mkldnn::eltwise_forward::desc( + prop_kind::forward, fwdParams.alg_kind, *context_.src_md, + fwdParams.alpha, fwdParams.beta)); + context_.fwd_pd.reset(new mkldnn::eltwise_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); + + // create memory primitive based on dummy data + context_.src_mem.reset(new memory(*context_.src_mpd, DummyData)); + context_.dst_mem.reset(new memory( + context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); + + // create eltwise primitive and add it to net + context_.eltwise_fwd.reset(new mkldnn::eltwise_forward(*context_.fwd_pd, + *context_.src_mem, *context_.dst_mem)); + + context_.fwd_primitives.push_back(*context_.eltwise_fwd); + return; + } + + engine cpu_engine_ = engine(engine::cpu, 0); +}; + +template +class MklEltwiseFwdPrimitiveFactory : public MklPrimitiveFactory { + public: + static MklEltwiseFwdPrimitive* Get( + const MklEltwiseFwdParams& fwdParams) { + MklEltwiseFwdPrimitive* eltwise_forward = nullptr; + + auto src_fmt = static_cast( + fwdParams.src_md.data.format); + + // Get a eltwise fwd primitive from the cached pool + eltwise_forward = static_cast*>( + MklEltwiseFwdPrimitiveFactory::GetInstance().GetEltwiseFwd( + fwdParams, src_fmt)); + if (eltwise_forward == nullptr) { + eltwise_forward = new MklEltwiseFwdPrimitive(fwdParams); + MklEltwiseFwdPrimitiveFactory::GetInstance().SetEltwiseFwd( + fwdParams, src_fmt, eltwise_forward); + } + return eltwise_forward; + } + + static MklEltwiseFwdPrimitiveFactory& GetInstance() { + static MklEltwiseFwdPrimitiveFactory instance_; + return instance_; + } + + private: + MklEltwiseFwdPrimitiveFactory() {} + ~MklEltwiseFwdPrimitiveFactory() {} + + static std::string CreateKey( + const MklEltwiseFwdParams& fwdParams, memory::format src_fmt) { + std::string prefix = "eltwise_fwd"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(fwdParams.src_dims); + key_creator.AddAsKey(static_cast(fwdParams.alg_kind)); + key_creator.AddAsKey(static_cast(fwdParams.alpha)); + key_creator.AddAsKey(static_cast(fwdParams.beta)); + key_creator.AddAsKey(static_cast(src_fmt)); + return key_creator.GetKey(); + } + + MklPrimitive* GetEltwiseFwd(const MklEltwiseFwdParams& fwdParams, + memory::format src_fmt) { + std::string key = CreateKey(fwdParams, src_fmt); + return this->GetOp(key); + } + + void SetEltwiseFwd(const MklEltwiseFwdParams& fwdParams, + memory::format src_fmt, MklPrimitive* op) { + std::string key = CreateKey(fwdParams, src_fmt); + this->SetOp(key, op); + } +}; + +template +class MklEltwiseBwdParams { + public: + memory::dims src_dims; + memory::desc common_md; + algorithm alg_kind; + T alpha; + T beta; + + MklEltwiseBwdParams(const memory::dims &src_dims, + const memory::desc &common_md, + algorithm alg_kind, T alpha, T beta) : + src_dims(src_dims), common_md(common_md), + alg_kind(alg_kind), alpha(alpha), beta(beta) { + } +}; + +template +class MklEltwiseBwdPrimitive : public MklPrimitive { + public: + explicit MklEltwiseBwdPrimitive(const MklEltwiseBwdParams& bwdParams) { + context_.src_fmt = static_cast( + bwdParams.common_md.data.format); + context_.diff_dst_fmt = static_cast( + bwdParams.common_md.data.format); + context_.bwd_stream.reset(new stream(stream::kind::eager)); + // create eltwise primitive + if (context_.eltwise_bwd == nullptr) { + Setup(bwdParams); + } + } + + ~MklEltwiseBwdPrimitive() {} + + // Eltwise backward execute + // src_data: input data buffer of src + // diff_dst_data: input data buffer of diff_dst + // diff_src_data: output data buffer of diff_src + + void Execute(T* src_data, T* diff_dst_data, T* diff_src_data) { + context_.src_mem->set_data_handle(static_cast(src_data)); + context_.diff_dst_mem->set_data_handle(static_cast(diff_dst_data)); + context_.diff_src_mem->set_data_handle(static_cast(diff_src_data)); + context_.bwd_stream->submit(context_.bwd_primitives); + + // after execution, set data handle back + context_.src_mem->set_data_handle(DummyData); + context_.diff_dst_mem->set_data_handle(DummyData); + context_.diff_src_mem->set_data_handle(DummyData); + return; + } + + std::shared_ptr GetEltwiseBwdPd() { + return context_.bwd_pd; + } + + memory::format GetSrcMemoryFormat() { + return context_.src_fmt; + } + + memory::format GetDiffDstMemoryFormat() { + return context_.diff_dst_fmt; + } + + private: + // Primitive reuse context for eltwise Bwd ops: Relu, Elu, Tanh + struct EltwiseBwdContext { + // expected memory format for this primitive instance + memory::format src_fmt; + memory::format diff_dst_fmt; + + // MKLDNN memory + std::shared_ptr src_mem; + std::shared_ptr diff_dst_mem; + std::shared_ptr diff_src_mem; + + // desc & prmitive desc + std::shared_ptr bwd_desc; + + // memory desc + std::shared_ptr src_md; + std::shared_ptr diff_dst_md; + std::shared_ptr common_md; + + // memory primitive desc + std::shared_ptr src_mpd; + std::shared_ptr diff_dst_mpd; + + // fwd primitive desc + std::shared_ptr fwd_desc; + std::shared_ptr fwd_pd; + std::shared_ptr bwd_pd; + + // Eltwise primitive + std::shared_ptr eltwise_bwd; + + std::shared_ptr bwd_stream; + std::vector bwd_primitives; + + EltwiseBwdContext() : + src_fmt(memory::format::any), diff_dst_fmt(memory::format::any), + src_mem(nullptr), diff_dst_mem(nullptr), diff_src_mem(nullptr), + src_md(nullptr), diff_dst_md(nullptr), common_md(nullptr), + src_mpd(nullptr), diff_dst_mpd(nullptr), + fwd_desc(nullptr), fwd_pd(nullptr), bwd_pd(nullptr), + eltwise_bwd(nullptr), bwd_stream(nullptr) { + } + } context_; + + // Eltwise backward primitive setup + void Setup(const MklEltwiseBwdParams& bwdParams) { + // create memory descriptors for eltwise data w/ no specified format + context_.src_md.reset(new memory::desc(bwdParams.common_md.data)); + context_.diff_dst_md.reset(new memory::desc(bwdParams.common_md.data)); + + context_.src_mpd.reset(new memory::primitive_desc( + *context_.src_md, cpu_engine_)); + context_.diff_dst_mpd.reset(new memory::primitive_desc( + *context_.diff_dst_md, cpu_engine_)); + + // create forward eltwise primitive + context_.fwd_desc.reset(new mkldnn::eltwise_forward::desc( + prop_kind::forward_training, bwdParams.alg_kind, + *context_.src_md, bwdParams.alpha, bwdParams.beta)); + context_.fwd_pd.reset(new mkldnn::eltwise_forward::primitive_desc( + *context_.fwd_desc, cpu_engine_)); + context_.bwd_desc.reset(new mkldnn::eltwise_backward::desc( + bwdParams.alg_kind, *context_.diff_dst_md, + *context_.src_md, bwdParams.alpha, bwdParams.beta)); + context_.bwd_pd.reset(new mkldnn::eltwise_backward::primitive_desc( + *context_.bwd_desc, cpu_engine_, *context_.fwd_pd)); + + // create memory primitive based on dummy data + context_.src_mem.reset(new memory(*context_.src_mpd, DummyData)); + context_.diff_dst_mem.reset(new memory(*context_.diff_dst_mpd, DummyData)); + context_.diff_src_mem.reset(new memory( + context_.bwd_pd.get()->diff_src_primitive_desc(), DummyData)); + + // create eltwise primitive and add it to net + context_.eltwise_bwd.reset(new mkldnn::eltwise_backward(*context_.bwd_pd, + *context_.src_mem, *context_.diff_dst_mem, *context_.diff_src_mem)); + + context_.bwd_primitives.push_back(*context_.eltwise_bwd); + return; + } + + engine cpu_engine_ = engine(engine::cpu, 0); +}; + + +template +class MklEltwiseBwdPrimitiveFactory : public MklPrimitiveFactory { + private: + MklEltwiseBwdPrimitiveFactory() {} + ~MklEltwiseBwdPrimitiveFactory() {} + + public: + static MklEltwiseBwdPrimitive* Get( + const MklEltwiseBwdParams& bwdParams) { + MklEltwiseBwdPrimitive* eltwise_backward = nullptr; + + auto src_fmt = static_cast( + bwdParams.common_md.data.format); + auto diff_dst_fmt = static_cast( + bwdParams.common_md.data.format); + + // try to find a suitable one in pool + eltwise_backward = static_cast*> ( + MklEltwiseBwdPrimitiveFactory::GetInstance().GetEltwiseBwd( + bwdParams, src_fmt, diff_dst_fmt)); + + if (eltwise_backward == nullptr) { + eltwise_backward = new MklEltwiseBwdPrimitive(bwdParams); + MklEltwiseBwdPrimitiveFactory::GetInstance().SetEltwiseBwd( + bwdParams, src_fmt, diff_dst_fmt, eltwise_backward); + } + return eltwise_backward; + } + + static MklEltwiseBwdPrimitiveFactory& GetInstance() { + static MklEltwiseBwdPrimitiveFactory instance_; + return instance_; + } + + private: + static std::string CreateKey( + const MklEltwiseBwdParams& bwdParams, + const memory::format &src_fmt, + const memory::format &diff_dst_fmt) { + std::string prefix = "eltwise_bwd"; + FactoryKeyCreator key_creator; + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(bwdParams.src_dims); + key_creator.AddAsKey(static_cast(bwdParams.alg_kind)); + key_creator.AddAsKey(static_cast(bwdParams.alpha)); + key_creator.AddAsKey(static_cast(bwdParams.beta)); + key_creator.AddAsKey(static_cast(src_fmt)); + key_creator.AddAsKey(static_cast(diff_dst_fmt)); + return key_creator.GetKey(); + } + + MklPrimitive* GetEltwiseBwd(const MklEltwiseBwdParams& bwdParams, + const memory::format &src_fmt, const memory::format &diff_dst_fmt) { + std::string key = CreateKey(bwdParams, src_fmt, diff_dst_fmt); + return this->GetOp(key); + } + + void SetEltwiseBwd(const MklEltwiseBwdParams& bwdParams, + const memory::format &src_fmt, + const memory::format &diff_dst_fmt, MklPrimitive *op) { + std::string key = CreateKey(bwdParams, src_fmt, diff_dst_fmt); + this->SetOp(key, op); + } +}; + +#endif + typedef Eigen::ThreadPoolDevice CPUDevice; struct MklReluHelpers { @@ -367,104 +764,111 @@ void MklReluGradOp::Compute(OpKernelContext* context) { mkl_context.MklCleanup(); } - - #else // INTEL_MKL_ML - template class MklReluOpBase : public OpKernel { public: ~MklReluOpBase() {} explicit MklReluOpBase(OpKernelConstruction* context) : OpKernel(context) {} - virtual void Compute_Scalar(OpKernelContext* context) = 0; void Compute(OpKernelContext* context) override { try { - auto cpu_engine = engine(engine::cpu, 0); const size_t src_index = 0; // index of src input tensor const size_t dst_index = 0; // index of dst output tensor const Tensor& src_tensor = MklGetInput(context, src_index); MklDnnShape dnn_shape_src; GetMklShape(context, src_index, &dnn_shape_src); - Tensor* dst_tensor = nullptr; if (src_tensor.dims() == 0) { - Compute_Scalar(context); // scalar case doesn't use in-place operation + Compute_Scalar(context); return; } - // Create relu primitive. - MklDnnData src(&cpu_engine); - MklDnnData dst(&cpu_engine); - // Set DNN primitive - src + MklDnnData src(&cpu_engine); + memory::dims src_dims; memory::desc src_md({}, memory::data_undef, memory::format_undef); if (dnn_shape_src.IsMklTensor()) { src_md = dnn_shape_src.GetMklLayout(); + src_dims = dnn_shape_src.GetSizesAsMklDnnDims(); } else { - auto src_dims = TFShapeToMklDnnDims(src_tensor.shape()); + src_dims = TFShapeToMklDnnDims(src_tensor.shape()); auto src_strides = CalculateTFStrides(src_dims); // Create blocked memory descriptor src_md = MklDnnData::CreateBlockedMemDesc(src_dims, src_strides); } - src.SetUsrMem(src_md, &src_tensor); T alpha = 0, beta = 0; - std::shared_ptr relu_fwd_pd; - auto relu_fwd_desc = relu_forward::desc( - prop_kind::forward_training, - // Operator memory descriptor is same as user memory descriptor. - alg_kind, src.GetUsrMemDesc(), alpha, beta); - relu_fwd_pd.reset( - new relu_forward::primitive_desc(relu_fwd_desc, cpu_engine)); - - // allocate dst tensor + + // get a eltwise fwd from primitive pool + MklEltwiseFwdParams fwdParams(src_dims, src_md, + alg_kind, alpha, beta); + MklEltwiseFwdPrimitive *eltwise_fwd = + MklEltwiseFwdPrimitiveFactory::Get(fwdParams); + + // prepare for execuation + T* src_data = nullptr; + // check wehther src need to reorder + if (src_md.data.format != eltwise_fwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + std::vector net; + auto src_target_pd = memory::primitive_desc({{src_dims}, + MklDnnType(), eltwise_fwd->GetSrcMemoryFormat()}, cpu_engine); + src.CheckReorderToOpMem(src_target_pd, &net); + stream(stream::kind::eager).submit(net).wait(); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast( + const_cast(src_tensor.flat().data())); + } + + // allocate dst tensor, always set it as MKL-DNN layout + std::shared_ptr + eltwise_fwd_pd = eltwise_fwd->GetEltwiseFwdPd(); MklDnnShape dnn_shape_dst; TensorShape tf_shape_dst; if (dnn_shape_src.IsMklTensor()) { dnn_shape_dst.SetMklTensor(true); - auto dst_pd = relu_fwd_pd->dst_primitive_desc(); + auto dst_pd = eltwise_fwd_pd->dst_primitive_desc(); dnn_shape_dst.SetMklLayout(&dst_pd); dnn_shape_dst.SetElemType(MklDnnType()); dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(), dnn_shape_src.GetSizesAsMklDnnDims(), dnn_shape_src.GetTfDataFormat()); - tf_shape_dst.AddDim(dst_pd.get_size() / sizeof(T)); + tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T)); } else { + // TODO(yli135): why relu's input is TF tensor in VGG16?? dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } - - // Allocate output and MklDnnShape tensors separately for possible - // in-place operation + + Tensor* dst_tensor = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {static_cast(src_index)}, - static_cast(dst_index), - tf_shape_dst, &dst_tensor)); + {src_index}, dst_index, tf_shape_dst, &dst_tensor)); AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); - // Destination memory descriptor is same as source memory descriptor. - auto &dst_md = src_md; - dst.SetUsrMem(dst_md, dst_tensor); + T* dst_data = static_cast(const_cast( + dst_tensor->flat().data())); - // execute net - std::vector net; - auto relu_fwd = - relu_forward(*relu_fwd_pd, src.GetOpMem(), dst.GetOpMem()); - net.push_back(relu_fwd); - stream(stream::kind::eager).submit(net).wait(); - } catch (mkldnn::error& e) { + // execute eltwise + eltwise_fwd->Execute(src_data, dst_data); + } catch (mkldnn::error &e) { string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + ", message: " + string(e.message) + + ", in file " + string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", + error_msg)); } } + + private: + engine cpu_engine = engine(engine::cpu, 0); + std::shared_ptr relu_fwd_pd; }; template @@ -472,25 +876,25 @@ class MklReluGradOpBase : public OpKernel { public: ~MklReluGradOpBase() {} - explicit MklReluGradOpBase(OpKernelConstruction* context) - : OpKernel(context) {} + explicit MklReluGradOpBase(OpKernelConstruction* context) : + OpKernel(context) { + } virtual void Compute_Scalar(OpKernelContext* context) = 0; - void Compute(OpKernelContext* context) { + void Compute(OpKernelContext* context) { try { - auto cpu_engine = engine(engine::cpu, 0); + // auto cpu_engine = engine(engine::cpu, 0); MklDnnData src(&cpu_engine); MklDnnData diff_dst(&cpu_engine); - MklDnnData diff_src(&cpu_engine); const size_t diff_dst_index = 0; // index of diff_dst input tensor const size_t src_index = 1; // index of src input tensor const size_t diff_src_index = 0; // index of diff_src output tensor - const Tensor& src_tensor = MklGetInput(context, src_index); + const Tensor& src_tensor = MklGetInput(context, src_index); const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index); - Tensor* diff_src_tensor = nullptr; + Tensor* diff_src_tensor = nullptr; MklDnnShape dnn_shape_src, dnn_shape_diff_dst; GetMklShape(context, src_index, &dnn_shape_src); @@ -498,37 +902,23 @@ class MklReluGradOpBase : public OpKernel { int src_dims_size = src_tensor.dims(); if (src_dims_size == 0) { - Compute_Scalar(context); // scalar case doesn't use in-place operation + Compute_Scalar(context); return; } - // Set DNN primitives for src & diff_dst + // get a eltwise bwd from primitive pool + memory::dims src_dims = {}; memory::desc src_md({}, memory::data_undef, memory::format_undef); memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef); - - // For creating Sum primitive, we need to ensure that all inputs are in - // same format. What that means is if we have a mixed input case - where - // one input is in Tensorflow format and one input is in MKL format -, - // then we need to ensure that all inputs are in same format for - // primitive construction. For performance reason, we say that all inputs - // are in MKL format in such case, and insert reorder for input that is - // in Tensorflow format into MKL format. On the other hand, if both the - // inputs are in MKL format or both are in Tensorflow format, then we - // dont need reorder. if (!dnn_shape_src.IsMklTensor() && !dnn_shape_diff_dst.IsMklTensor()) { - // If both the inputs are in Tensorflow format, we create blocked memory - // descriptor. - auto src_dims = TFShapeToMklDnnDims(src_tensor.shape()); + src_dims = TFShapeToMklDnnDims(src_tensor.shape()); auto src_strides = CalculateTFStrides(src_dims); src_md = MklDnnData::CreateBlockedMemDesc(src_dims, src_strides); diff_dst_md = src_md; } else if (dnn_shape_src.IsMklTensor() && !dnn_shape_diff_dst.IsMklTensor()) { - // If one input is in MKL format and other is in Tensorflow, then - // create respective descriptors describing the actual case. For input - // in Mkl format, we just get Mkl layout from MklDnnShape. For input in - // Tensorflow format, we create memory descriptor using data format. src_md = dnn_shape_src.GetMklLayout(); + src_dims = dnn_shape_src.GetSizesAsMklDnnDims(); memory::format src_mkl_data_format = dnn_shape_src.GetTfDataFormat(); auto src_tf_data_format = @@ -539,26 +929,23 @@ class MklReluGradOpBase : public OpKernel { memory::desc(diff_dst_dims, MklDnnType(), src_mkl_data_format); } else if (!dnn_shape_src.IsMklTensor() && dnn_shape_diff_dst.IsMklTensor()) { - // Same comment as above. diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); memory::format diff_dst_mkl_data_format = dnn_shape_diff_dst.GetTfDataFormat(); auto diff_dst_tf_data_format = MklDnnDataFormatToTFDataFormat(diff_dst_mkl_data_format); - auto src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), + src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), diff_dst_tf_data_format); src_md = memory::desc(src_dims, MklDnnType(), diff_dst_mkl_data_format); } else { - // If both the inputs are in MKL format, we use Mkl layout of the input - // tensors. src_md = dnn_shape_src.GetMklLayout(); diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); + src_dims = dnn_shape_src.GetSizesAsMklDnnDims(); } - src.SetUsrMem(src_md, &src_tensor); - diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + T alpha = 0, beta = 0; // As per comment above, we tell MKLDNN that both the inputs are in same // format. So we set common memory descriptor in MKL format, if any of the @@ -573,83 +960,79 @@ class MklReluGradOpBase : public OpKernel { common_md = src_md; } - T alpha = 0, beta = 0; - std::shared_ptr relu_fwd_pd; - auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training, - alg_kind, src_md, alpha, beta); - relu_fwd_pd.reset( - new relu_forward::primitive_desc(relu_fwd_desc, cpu_engine)); - auto relu_bwd_desc = - relu_backward::desc(alg_kind, common_md, common_md, alpha, beta); - auto relu_bwd_pd = relu_backward::primitive_desc( - relu_bwd_desc, cpu_engine, *relu_fwd_pd); + MklEltwiseBwdParams bwdParams(src_dims, common_md, + alg_kind, alpha, beta); + MklEltwiseBwdPrimitive *eltwise_bwd = + MklEltwiseBwdPrimitiveFactory::Get(bwdParams); + auto eltwise_bwd_pd = eltwise_bwd->GetEltwiseBwdPd(); + + // check whether need reorder for src / diff_dst + T* src_data; + T* diff_dst_data; + std::vector net; + if (src_md.data.format != eltwise_bwd->GetSrcMemoryFormat()) { + src.SetUsrMem(src_md, &src_tensor); + src.CheckReorderToOpMem( + eltwise_bwd_pd.get()->diff_src_primitive_desc(), &net); + src_data = static_cast(src.GetOpMem().get_data_handle()); + } else { + src_data = static_cast( + const_cast(src_tensor.flat().data())); + } + + if (diff_dst_md.data.format != eltwise_bwd->GetDiffDstMemoryFormat()) { + diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); + diff_dst.CheckReorderToOpMem( + eltwise_bwd_pd.get()->diff_src_primitive_desc(), &net); + diff_dst_data = static_cast( + diff_dst.GetOpMem().get_data_handle()); + } else { + diff_dst_data = static_cast(const_cast( + diff_dst_tensor.flat().data())); + } + stream(stream::kind::eager).submit(net).wait(); // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor() || - dnn_shape_diff_dst.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor()) { + auto diff_src_pd = eltwise_bwd_pd->diff_src_primitive_desc(); dnn_shape_diff_src.SetMklTensor(true); - auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc(); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - if (dnn_shape_src.IsMklTensor()) { - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), - dnn_shape_src.GetSizesAsMklDnnDims(), - dnn_shape_src.GetTfDataFormat()); - } else { - dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), - dnn_shape_diff_dst.GetSizesAsMklDnnDims(), - dnn_shape_diff_dst.GetTfDataFormat()); - } - tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), + dnn_shape_src.GetSizesAsMklDnnDims(), + dnn_shape_src.GetTfDataFormat()); + tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); - // both src and diff_dst are TensorFlow layout, - // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } - // Allocate diff_src and MklDnnShape tensors separately for possible - // in-place operation - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {static_cast(diff_dst_index)}, - static_cast(diff_src_index), - tf_shape_diff_src, - &diff_src_tensor)); - AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); - - // diff_src memory descriptor is same as memory descriptor for both - // inputs. - diff_src.SetUsrMem(common_md, diff_src_tensor); - - PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst); - } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); - OP_REQUIRES_OK( - context, - errors::Aborted("Operation received an exception:", error_msg)); + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {diff_dst_index}, diff_src_index, tf_shape_diff_src, + &diff_src_tensor)); + AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); + + T* diff_src_data = static_cast(const_cast( + diff_src_tensor->flat().data())); + + // execute eltwise bwd + eltwise_bwd->Execute(src_data, diff_dst_data, diff_src_data); + } catch (mkldnn::error &e) { + string error_msg = "Status: " + std::to_string(e.status) + + ", message: " + string(e.message) + + ", in file " + string(__FILE__) + ":" + + std::to_string(__LINE__); + OP_REQUIRES_OK(context, + errors::Aborted("Operation received an exception:", + error_msg)); } } - void PrepareAndExecuteNet(const relu_backward::primitive_desc& relu_prim_desc, - MklDnnData* src, MklDnnData* diff_src, - MklDnnData* diff_dst) { - std::vector net; - - // Check if we need to reorder original input tensors into common_md layout - // that we set for primitive creation. diff_src_primitive_desc is same as - // common_md. - src->CheckReorderToOpMem(relu_prim_desc.diff_src_primitive_desc(), &net); - diff_dst->CheckReorderToOpMem(relu_prim_desc.diff_src_primitive_desc(), - &net); - - net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(), - diff_dst->GetOpMem(), diff_src->GetOpMem())); - stream(stream::kind::eager).submit(net).wait(); - } + private: + engine cpu_engine = engine(engine::cpu, 0); + std::shared_ptr relu_fwd_pd; }; template diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 230b4278ca..c4b5e124fb 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1794,11 +1794,11 @@ class MklDnnData { } }; -/// Base class for operations with reuse of DNN primitives +/// Base class for operations with reuse of primitives /// -class DnnOp { +class MklPrimitive { public: - virtual ~DnnOp() {} + virtual ~MklPrimitive() {} // Dummy data. Its size, hard-coded as 256 here, does // not matter since MKL should never operate on this buffer. @@ -1806,33 +1806,33 @@ class DnnOp { }; const mkldnn::memory::dims NONE_DIMS = {}; -// This constant is used to declare dummy buffer (size), for MKL primitives + template -class DnnOpFactory { +class MklPrimitiveFactory { public: - DnnOpFactory() {} - ~DnnOpFactory() {} + MklPrimitiveFactory() {} + ~MklPrimitiveFactory() {} - DnnOp* GetOp(const std::string& key) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); - if (stream_iter == DnnOpFactory::GetHashMap().end()) { + MklPrimitive* GetOp(const std::string& key) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { return nullptr; } else { return stream_iter->second; } } - void SetOp(const std::string& key, DnnOp* op) { - auto stream_iter = DnnOpFactory::GetHashMap().find(key); + void SetOp(const std::string& key, MklPrimitive* op) { + auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - CHECK(stream_iter == DnnOpFactory::GetHashMap().end()); + CHECK(stream_iter == MklPrimitiveFactory::GetHashMap().end()); - DnnOpFactory::GetHashMap()[key] = op; + MklPrimitiveFactory::GetHashMap()[key] = op; } private: - static inline std::unordered_map &GetHashMap() { - static thread_local std::unordered_map map_; + static inline std::unordered_map &GetHashMap() { + static thread_local std::unordered_map map_; return map_; } }; -- cgit v1.2.3 From 2bcd873e839c66b2405226508286da371dd8afbe Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 21 May 2018 13:27:46 -0700 Subject: revert mkl_conv_ops.cc to avoid PR review confusion --- tensorflow/core/kernels/mkl_conv_ops.cc | 280 +++++++++++++------------------- 1 file changed, 116 insertions(+), 164 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index c032add82e..f2b14f1278 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -59,8 +59,7 @@ namespace tensorflow { #ifndef INTEL_MKL_ML -// This structure aggregates multiple inputs to Conv2DFwd* methods. -struct MklConvFwdParams { +struct ConvFwdDimensions { memory::dims src_dims; memory::dims filter_dims; memory::dims bias_dims; @@ -70,7 +69,7 @@ struct MklConvFwdParams { memory::dims padding_left; memory::dims padding_right; - MklConvFwdParams(memory::dims src_dims, + ConvFwdDimensions(memory::dims src_dims, memory::dims filter_dims, memory::dims bias_dims, memory::dims dst_dims, memory::dims strides, memory::dims dilations, memory::dims padding_left, @@ -83,40 +82,35 @@ struct MklConvFwdParams { }; template -class MklConv2DFwdPrimitive: public MklPrimitive { +class Conv2DFwd : public DnnOp { public: - explicit MklConv2DFwdPrimitive(const MklConvFwdParams& convFwdDims) { - context_.fwd_stream.reset(new stream(stream::kind::eager)); + explicit Conv2DFwd(const ConvFwdDimensions& convFwdDims) { + fwd_stream_.reset(new stream(stream::kind::eager)); // create conv primitive - if (context_.conv_fwd == nullptr) { + if (conv_fwd_ == nullptr) { Setup(convFwdDims); } } - ~MklConv2DFwdPrimitive() {} + ~Conv2DFwd() {} // Convolution forward execute with bias // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // bias_data: input data buffer of bias // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* bias_data, const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.bias_mem->set_data_handle( - static_cast(const_cast(bias_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); + void Execute(T* src_data, T* filter_data, T* bias_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + bias_mem_->set_data_handle(static_cast(bias_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); // after exec, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.bias_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + bias_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); return; } @@ -125,174 +119,139 @@ class MklConv2DFwdPrimitive: public MklPrimitive { // src_data: input data buffer of src // filter_data: input data buffer of filter (weights) // dst_data: output data buffer of dst - void Execute(const T* src_data, const T* filter_data, - const T* dst_data) { - context_.src_mem->set_data_handle( - static_cast(const_cast(src_data))); - context_.filter_mem->set_data_handle( - static_cast(const_cast(filter_data))); - context_.dst_mem->set_data_handle( - static_cast(const_cast(dst_data))); - context_.fwd_stream->submit(context_.fwd_primitives); - - // after execution, set data handle back - context_.src_mem->set_data_handle(DummyData); - context_.filter_mem->set_data_handle(DummyData); - context_.dst_mem->set_data_handle(DummyData); + void Execute(T* src_data, T* filter_data, T* dst_data) { + src_mem_->set_data_handle(static_cast(src_data)); + filter_mem_->set_data_handle(static_cast(filter_data)); + dst_mem_->set_data_handle(static_cast(dst_data)); + fwd_stream_->submit(fwd_primitives_); - return; - } + // after exec, set data handle back + src_mem_->set_data_handle(DummyData); + filter_mem_->set_data_handle(DummyData); + dst_mem_->set_data_handle(DummyData); - memory::format GetSrcMemoryFormat() const { - return context_.src_fmt; + return; } - memory::format GetFilterMemoryFormat() const { - return context_.filter_fmt; - } + // expected memory format for this primitive instance + memory::format src_fmt_; + memory::format filter_fmt_; - std::shared_ptr - GetPrimitiveDesc() const { - return context_.fwd_pd; - } + // convolution primitive + std::shared_ptr fwd_pd_; + std::shared_ptr conv_fwd_; private: - // Primitive reuse context for Conv2D Fwd op - struct ConvFwdContext { - // expected memory format for this primitive instance - memory::format src_fmt; - memory::format filter_fmt; - - // MKLDNN memory - std::shared_ptr src_mem; - std::shared_ptr filter_mem; - std::shared_ptr bias_mem; - std::shared_ptr dst_mem; - - // desc & prmitive desc - std::shared_ptr fwd_desc; - - // memory desc - std::shared_ptr src_md; - std::shared_ptr filter_md; - std::shared_ptr bias_md; - std::shared_ptr dst_md; - - // convolution primitive - std::shared_ptr fwd_pd; - std::shared_ptr conv_fwd; - - std::shared_ptr fwd_stream; - std::vector fwd_primitives; - - ConvFwdContext() : - src_fmt(memory::format::any), filter_fmt(memory::format::any), - src_mem(nullptr), filter_mem(nullptr), bias_mem(nullptr), - dst_mem(nullptr), fwd_desc(nullptr), - src_md(nullptr), filter_md(nullptr), bias_md(nullptr), - fwd_pd(nullptr), conv_fwd(nullptr), fwd_stream(nullptr) { - } - } context_; - - engine cpu_engine_ = engine(engine::cpu, 0); - - void Setup(const MklConvFwdParams& convFwdDims) { + void Setup(const ConvFwdDimensions& convFwdDims) { // create memory descriptors for convolution data w/ no specified format - context_.src_md.reset(new memory::desc({convFwdDims.src_dims}, + src_md_.reset(new memory::desc({convFwdDims.src_dims}, MklDnnType(), memory::format::any)); - context_.filter_md.reset(new memory::desc({convFwdDims.filter_dims}, + filter_md_.reset(new memory::desc({convFwdDims.filter_dims}, MklDnnType(), memory::format::any)); - context_.dst_md.reset(new memory::desc({convFwdDims.dst_dims}, + dst_md_.reset(new memory::desc({convFwdDims.dst_dims}, MklDnnType(), memory::format::any)); if (!convFwdDims.bias_dims.empty()) - context_.bias_md.reset(new memory::desc({convFwdDims.bias_dims}, + bias_md_.reset(new memory::desc({convFwdDims.bias_dims}, MklDnnType(), memory::format::any)); // create a convolution if (!convFwdDims.bias_dims.empty()) { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.bias_md, *context_.dst_md, + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *bias_md_, *dst_md_, convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, convFwdDims.padding_right, padding_kind::zero)); } else { - context_.fwd_desc.reset(new convolution_forward::desc(prop_kind::forward, - convolution_direct, *context_.src_md, *context_.filter_md, - *context_.dst_md, convFwdDims.strides, convFwdDims.dilations, - convFwdDims.padding_left, convFwdDims.padding_right, - padding_kind::zero)); + fwd_desc_.reset(new convolution_forward::desc(prop_kind::forward, + convolution_direct, *src_md_, *filter_md_, *dst_md_, + convFwdDims.strides, convFwdDims.dilations, convFwdDims.padding_left, + convFwdDims.padding_right, padding_kind::zero)); } - context_.fwd_pd.reset(new convolution_forward::primitive_desc( - *context_.fwd_desc, cpu_engine_)); + fwd_pd_.reset(new convolution_forward::primitive_desc( + *fwd_desc_, cpu_engine_)); // store the expected memory format - context_.src_fmt = static_cast( - context_.fwd_pd.get()->src_primitive_desc().desc().data.format); + src_fmt_ = static_cast( + fwd_pd_.get()->src_primitive_desc().desc().data.format); - context_.filter_fmt = static_cast( - context_.fwd_pd.get()->weights_primitive_desc().desc().data.format); + filter_fmt_ = static_cast( + fwd_pd_.get()->weights_primitive_desc().desc().data.format); // create memory primitive based on dummy data - context_.src_mem.reset(new memory( - context_.fwd_pd.get()->src_primitive_desc(), DummyData)); - context_.filter_mem.reset(new memory( - context_.fwd_pd.get()->weights_primitive_desc(), DummyData)); - context_.dst_mem.reset(new memory( - context_.fwd_pd.get()->dst_primitive_desc(), DummyData)); + src_mem_.reset(new memory(fwd_pd_.get()->src_primitive_desc(), DummyData)); + filter_mem_.reset(new memory(fwd_pd_.get()->weights_primitive_desc(), + DummyData)); + dst_mem_.reset(new memory(fwd_pd_.get()->dst_primitive_desc(), DummyData)); // create convolution primitive and add it to net if (!convFwdDims.bias_dims.empty()) { - context_.bias_mem.reset(new memory({{{convFwdDims.bias_dims}, - MklDnnType(), memory::format::x}, cpu_engine_}, DummyData)); - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, *context_.filter_mem, - *context_.bias_mem, *context_.dst_mem)); + bias_mem_.reset(new memory({{{convFwdDims.bias_dims}, MklDnnType(), + memory::format::x}, cpu_engine_}, DummyData)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *bias_mem_, *dst_mem_)); } else { - context_.conv_fwd.reset(new convolution_forward( - *context_.fwd_pd, *context_.src_mem, - *context_.filter_mem, *context_.dst_mem)); + conv_fwd_.reset(new convolution_forward(*fwd_pd_, *src_mem_, + *filter_mem_, *dst_mem_)); } - context_.fwd_primitives.push_back(*context_.conv_fwd); + fwd_primitives_.push_back(*conv_fwd_); return; } + + // MKLDNN memory + std::shared_ptr src_mem_; + std::shared_ptr filter_mem_; + std::shared_ptr bias_mem_; + std::shared_ptr dst_mem_; + + std::shared_ptr fwd_stream_; + std::vector fwd_primitives_; + + // desc & prmitive desc + std::shared_ptr fwd_desc_; + + // memory desc + std::shared_ptr src_md_; + std::shared_ptr filter_md_; + std::shared_ptr bias_md_; + std::shared_ptr dst_md_; + + engine cpu_engine_ = engine(engine::cpu, 0); }; template -class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { +class Conv2DFwdFactory : public DnnOpFactory { public: - static MklConv2DFwdPrimitive* Get(const MklConvFwdParams& convFwdDims) { - MklConv2DFwdPrimitive* conv2d_fwd = nullptr; + static Conv2DFwd* Get(const ConvFwdDimensions& convFwdDims) { + Conv2DFwd* conv2d_fwd = nullptr; // try to find a suitable one in pool - conv2d_fwd = dynamic_cast*> ( - MklConv2DFwdPrimitiveFactory::GetInstance().GetConv2DFwd( - convFwdDims)); + conv2d_fwd = dynamic_cast*> ( + Conv2DFwdFactory::GetInstance().GetConv2DFwd(convFwdDims)); if (conv2d_fwd == nullptr) { - conv2d_fwd = new MklConv2DFwdPrimitive(convFwdDims); - MklConv2DFwdPrimitiveFactory::GetInstance().SetConv2DFwd( + conv2d_fwd = new Conv2DFwd(convFwdDims); + Conv2DFwdFactory::GetInstance().SetConv2DFwd( convFwdDims, conv2d_fwd); } return conv2d_fwd; } private: - MklConv2DFwdPrimitiveFactory() {} - ~MklConv2DFwdPrimitiveFactory() {} + Conv2DFwdFactory() {} + ~Conv2DFwdFactory() {} static const int kDilationH = 0, kDilationW = 1; - static MklConv2DFwdPrimitiveFactory& GetInstance() { - static MklConv2DFwdPrimitiveFactory instance_; + static Conv2DFwdFactory& GetInstance() { + static Conv2DFwdFactory instance_; return instance_; } - static std::string CreateKey(const MklConvFwdParams& convFwdDims) { + static std::string CreateKey(const ConvFwdDimensions& convFwdDims) { std::string prefix = "conv2d_fwd_"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -307,12 +266,12 @@ class MklConv2DFwdPrimitiveFactory : public MklPrimitiveFactory { return key_creator.GetKey(); } - MklPrimitive* GetConv2DFwd(const MklConvFwdParams& convFwdDims) { + DnnOp* GetConv2DFwd(const ConvFwdDimensions& convFwdDims) { std::string key = CreateKey(convFwdDims); return this->GetOp(key); } - void SetConv2DFwd(const MklConvFwdParams& convFwdDims, MklPrimitive *op) { + void SetConv2DFwd(const ConvFwdDimensions& convFwdDims, DnnOp *op) { std::string key = CreateKey(convFwdDims); this->SetOp(key, op); } @@ -803,6 +762,7 @@ class MklConv2DOp : public OpKernel { MklDnnData src(&cpu_engine); MklDnnData filter(&cpu_engine); + MklDnnData dst(&cpu_engine); // output memory::dims src_dims, filter_dims, padding_left, padding_right, dilations, strides; @@ -852,6 +812,7 @@ class MklConv2DOp : public OpKernel { auto src_md = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), tf_fmt); + src.SetUsrMem(src_md, &src_tensor); // Although filter shape (filter_dims) required is in MKL-DNN order, // the layout is Tensorflow's layout (HWIO). @@ -859,28 +820,29 @@ class MklConv2DOp : public OpKernel { ? filter_mkl_shape.GetMklLayout() : memory::desc(filter_dims, MklDnnType(), memory::format::hwio); + filter.SetUsrMem(filter_md, &filter_tensor); // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; // get a conv2d fwd from primitive pool - MklConv2DFwdPrimitive *conv2d_fwd = nullptr; + Conv2DFwd *conv2d_fwd = nullptr; if (biasEnabled) { memory::dims bias_dims = {}; conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims); - MklConvFwdParams convFwdDims(src_dims, filter_dims, bias_dims, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, bias_dims, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } else { - MklConvFwdParams convFwdDims(src_dims, filter_dims, NONE_DIMS, + ConvFwdDimensions convFwdDims(src_dims, filter_dims, NONE_DIMS, dst_dims_mkl_order, strides, dilations, padding_left, padding_right); - conv2d_fwd = MklConv2DFwdPrimitiveFactory::Get(convFwdDims); + conv2d_fwd = Conv2DFwdFactory::Get(convFwdDims); } // allocate output tensors output_tensor and filter_out_tensor std::shared_ptr - conv_fwd_pd = conv2d_fwd->GetPrimitiveDesc(); + conv_fwd_pd = conv2d_fwd->fwd_pd_; AllocateOutputTensor(context, *conv_fwd_pd, dst_dims_mkl_order, tf_fmt, &dst_tensor); Tensor* filter_out_tensor = nullptr; @@ -892,30 +854,20 @@ class MklConv2DOp : public OpKernel { // check whether src/filter need reorder std::vector net; - T *src_data = nullptr; - if (src_md.data.format != conv2d_fwd->GetSrcMemoryFormat()) { - src.SetUsrMem(src_md, &src_tensor); - src.CheckReorderToOpMem( - conv_fwd_pd.get()->src_primitive_desc(), &net); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast(const_cast( - src_tensor.flat().data())); - } - T *filter_data = nullptr; - if (filter_md.data.format != conv2d_fwd->GetFilterMemoryFormat()) { - filter.SetUsrMem(filter_md, &filter_tensor); - filter.CheckReorderToOpMem( - conv_fwd_pd.get()->weights_primitive_desc(), - filter.GetTensorBuffer(filter_out_tensor), &net); - filter_data = static_cast(filter.GetOpMem().get_data_handle()); - } else { - filter_data = static_cast(const_cast( - filter_tensor.flat().data())); - } - + if (src_md.data.format != conv2d_fwd->src_fmt_) + src.CheckReorderToOpMem( + conv_fwd_pd.get()->src_primitive_desc(), &net); + + if (filter_md.data.format != conv2d_fwd->filter_fmt_) + filter.CheckReorderToOpMem( + conv_fwd_pd.get()->weights_primitive_desc(), + filter.GetTensorBuffer(filter_out_tensor), &net); stream(stream::kind::eager).submit(net).wait(); + T* src_data = static_cast( + src.GetOpMem().get_data_handle()); + T* filter_data = static_cast( + filter.GetOpMem().get_data_handle()); // execute convolution if (biasEnabled) { -- cgit v1.2.3 From e92ab37a625d486931cdcfa6cbd8bc32f7cd5d3c Mon Sep 17 00:00:00 2001 From: "Benjamin H. Myara" Date: Wed, 6 Jun 2018 02:52:21 +0300 Subject: Correction of MatMulStatsTest unit test --- tensorflow/python/kernel_tests/matmul_op_test.py | 4 ++-- tensorflow/python/ops/math_ops.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/matmul_op_test.py b/tensorflow/python/kernel_tests/matmul_op_test.py index b167278984..9eaafb4435 100644 --- a/tensorflow/python/kernel_tests/matmul_op_test.py +++ b/tensorflow/python/kernel_tests/matmul_op_test.py @@ -142,7 +142,7 @@ class MatMulStatsTest(test_lib.TestCase): for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": - self.assertEqual(7200, flops) + self.assertEqual(6975, flops) def testTransposedStatistics(self): g = ops.Graph() @@ -153,7 +153,7 @@ class MatMulStatsTest(test_lib.TestCase): for op in g.get_operations(): flops = ops.get_stats_for_node_def(g, op.node_def, "flops").value if op.name == "MatMul": - self.assertEqual(7200, flops) + self.assertEqual(6975, flops) try: diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index b7e3de7e85..aff5af530c 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2033,7 +2033,7 @@ def _calc_mat_mul_flops(graph, node): output_shape = graph_util.tensor_shape_from_node_def_name(graph, node.name) output_shape.assert_is_fully_defined() output_count = np.prod(output_shape.as_list()) - return ops.OpStats("flops", (k * output_count * 2)) + return ops.OpStats("flops", ((2 * k - 1) * output_count)) def _as_indexed_slices(x, optimize=True): -- cgit v1.2.3 From ffc5c4e845d8bfb36e6c56d904cba3bd8e1de94e Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 12 Jun 2018 15:52:49 +0800 Subject: TST: add test case --- tensorflow/python/keras/backend_test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index 53e30e0e4a..0cff53b55c 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import numpy as np import scipy.sparse +from tensorflow.core.protobuf import config_pb2 from tensorflow.python import keras from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import variables @@ -234,10 +235,13 @@ class BackendUtilsTest(test.TestCase): x_placeholder = keras.backend.placeholder(shape=()) y_placeholder = keras.backend.placeholder(shape=()) + run_options = config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE) f = keras.backend.function(inputs=[x_placeholder, y_placeholder], outputs=[x_placeholder + y_placeholder], updates=[(x, x_placeholder + 1.)], - fetches=[keras.backend.update(y, 5.)]) + fetches=[keras.backend.update(y, 5.)], + options=run_options) output = f([10., 20.]) self.assertEqual(output, [30.]) self.assertEqual( -- cgit v1.2.3 From a5840964e0eb422fbe73dd3738c8d14c1147276f Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 12 Jun 2018 15:57:06 +0800 Subject: ENH: support run options for function --- tensorflow/python/keras/backend.py | 7 ++++++- tensorflow/python/keras/backend_test.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 2a4a1c861c..8abdd5238a 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -2759,7 +2759,8 @@ class Function(object): outputs: Output tensors to fetch. updates: Additional update ops to be run at function call. name: A name to help users identify what this function does. - session_kwargs: Arguments to `tf.Session.run()`: `fetches`, `feed_dict`. + session_kwargs: Arguments to `tf.Session.run()`: + `fetches`, `feed_dict`, `options`. """ def __init__(self, inputs, outputs, updates=None, name=None, @@ -2793,6 +2794,7 @@ class Function(object): self.fetches = session_kwargs.pop('fetches', []) if not isinstance(self.fetches, list): self.fetches = [self.fetches] + self.run_options = session_kwargs.pop('options', None) # The main use case of `fetches` being passed to a model is the ability # to run custom updates (since the outputs of fetches are never returned). # This requires us to wrap fetches in `identity` ops. @@ -2844,6 +2846,9 @@ class Function(object): callable_opts.fetch.append(x.name) # Handle updates. callable_opts.target.append(self.updates_op.name) + # Handle run_options. + if self.run_options: + callable_opts.run_options.CopyFrom(self.run_options) # Create callable. callable_fn = session._make_callable_from_options(callable_opts) # Cache parameters corresponding to the generated callable, so that diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index 0cff53b55c..8a794a7f23 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -236,7 +236,7 @@ class BackendUtilsTest(test.TestCase): y_placeholder = keras.backend.placeholder(shape=()) run_options = config_pb2.RunOptions( - trace_level=config_pb2.RunOptions.FULL_TRACE) + trace_level=config_pb2.RunOptions.NO_TRACE) f = keras.backend.function(inputs=[x_placeholder, y_placeholder], outputs=[x_placeholder + y_placeholder], updates=[(x, x_placeholder + 1.)], -- cgit v1.2.3 From f369de2bb9f28c36b8b654db3dbd4dd187482c22 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 12 Jun 2018 15:54:37 -0700 Subject: code refactoring per Rasmus's suggestions on PR 19754 --- tensorflow/core/kernels/mkl_relu_op.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 048d4883b2..a52c879721 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -65,7 +65,8 @@ class MklEltwiseFwdParams { template class MklEltwiseFwdPrimitive : public MklPrimitive { public: - explicit MklEltwiseFwdPrimitive(const MklEltwiseFwdParams& fwdParams) { + explicit MklEltwiseFwdPrimitive(const MklEltwiseFwdParams& fwdParams) : + cpu_engine_(engine::cpu, 0) { // store expected format context_.src_fmt = static_cast( fwdParams.src_md.data.format); @@ -90,7 +91,6 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { // after execution, set data handle back context_.src_mem->set_data_handle(DummyData); context_.dst_mem->set_data_handle(DummyData); - return; } std::shared_ptr GetEltwiseFwdPd() { @@ -133,7 +133,7 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { fwd_desc(nullptr), fwd_pd(nullptr), src_md(nullptr), dst_md(nullptr), src_mpd(nullptr), eltwise_fwd(nullptr), fwd_stream(nullptr) { } - } context_; + }; // Eltwise forward primitive setup void Setup(const MklEltwiseFwdParams& fwdParams) { @@ -159,10 +159,10 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { *context_.src_mem, *context_.dst_mem)); context_.fwd_primitives.push_back(*context_.eltwise_fwd); - return; } - engine cpu_engine_ = engine(engine::cpu, 0); + struct EltwiseFwdContext context_; + engine cpu_engine_; }; template @@ -242,7 +242,8 @@ class MklEltwiseBwdParams { template class MklEltwiseBwdPrimitive : public MklPrimitive { public: - explicit MklEltwiseBwdPrimitive(const MklEltwiseBwdParams& bwdParams) { + explicit MklEltwiseBwdPrimitive(const MklEltwiseBwdParams& bwdParams) : + cpu_engine_(engine::cpu, 0) { context_.src_fmt = static_cast( bwdParams.common_md.data.format); context_.diff_dst_fmt = static_cast( @@ -271,7 +272,6 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { context_.src_mem->set_data_handle(DummyData); context_.diff_dst_mem->set_data_handle(DummyData); context_.diff_src_mem->set_data_handle(DummyData); - return; } std::shared_ptr GetEltwiseBwdPd() { @@ -329,7 +329,7 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { fwd_desc(nullptr), fwd_pd(nullptr), bwd_pd(nullptr), eltwise_bwd(nullptr), bwd_stream(nullptr) { } - } context_; + }; // Eltwise backward primitive setup void Setup(const MklEltwiseBwdParams& bwdParams) { @@ -365,10 +365,10 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { *context_.src_mem, *context_.diff_dst_mem, *context_.diff_src_mem)); context_.bwd_primitives.push_back(*context_.eltwise_bwd); - return; } - engine cpu_engine_ = engine(engine::cpu, 0); + struct EltwiseBwdContext context_; + engine cpu_engine_; }; -- cgit v1.2.3 From 0059fe57ce7f6b8397b72acfb0ef30013d748116 Mon Sep 17 00:00:00 2001 From: PENGWA Date: Tue, 19 Jun 2018 20:37:58 +0800 Subject: consider gpu memory fraction option for memory optimizer (cherry picked from commit d7b2a4030d4b6d57f7453f986fdea346e8a76b7c) --- tensorflow/core/common_runtime/graph_execution_state.cc | 4 +++- tensorflow/core/grappler/optimizers/memory_optimizer.cc | 14 ++++++++------ tensorflow/core/grappler/optimizers/memory_optimizer.h | 3 +++ tensorflow/core/grappler/optimizers/meta_optimizer.cc | 12 +++++++----- tensorflow/core/grappler/optimizers/meta_optimizer.h | 9 ++++++++- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc index eb710bdbc5..d76f7b49b1 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.cc +++ b/tensorflow/core/common_runtime/graph_execution_state.cc @@ -407,6 +407,8 @@ Status GraphExecutionState::OptimizeGraph( const RewriterConfig& rewrite_options = session_options_->config.graph_options().rewrite_options(); + const GPUOptions& gpu_options = + session_options_->config.gpu_options(); if (grappler::MetaOptimizerEnabled(rewrite_options)) { // Adding this functionality in steps. The first step is to make sure @@ -493,7 +495,7 @@ Status GraphExecutionState::OptimizeGraph( grappler::VirtualCluster cluster(device_map, device_set_); GraphDef new_graph; TF_RETURN_IF_ERROR(grappler::RunMetaOptimizer( - item, rewrite_options, cpu_device, &cluster, &new_graph)); + item, rewrite_options, cpu_device, &cluster, &new_graph, gpu_options)); // Merge optimized graph function library with an original library. // Optimized graph might have new functions specialized for it's diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 1be5f8dcc2..5a2cec4358 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -943,7 +943,7 @@ struct MemInfo { static bool IdentifySwappingCandidates( Cluster* cluster, GrapplerItem* item, std::unordered_set* skip_list, - std::unordered_map* nodes_to_swap) { + std::unordered_map* nodes_to_swap, double memory_fraction) { GraphMemory memory(*item); const std::unordered_map& devices = cluster->GetDevices(); @@ -966,10 +966,10 @@ static bool IdentifySwappingCandidates( } const GraphMemory::MemoryUsage& mem_usage = memory.GetPeakMemoryUsage(name); - if (mem_usage.used_memory <= prop.memory_size()) { + if (mem_usage.used_memory <= memory_fraction * prop.memory_size()) { continue; } - int64 required_savings = mem_usage.used_memory - prop.memory_size(); + int64 required_savings = mem_usage.used_memory - memory_fraction * prop.memory_size(); std::unordered_map op_completion_times; { @@ -1105,13 +1105,14 @@ static bool IdentifySwappingCandidates( bool SwappingPass(RewriterConfig::MemOptType optimization_level, Cluster* cluster, GrapplerItem* item, - std::unordered_set* skip_list) { + std::unordered_set* skip_list, + double memory_fraction) { std::unordered_map nodes_to_swap; if (optimization_level == RewriterConfig::DEFAULT_MEM_OPT || optimization_level == RewriterConfig::SWAPPING_HEURISTICS || optimization_level == RewriterConfig::HEURISTICS) { // Use heuristics to figure out what needs to be swapped; - IdentifySwappingCandidates(cluster, item, skip_list, &nodes_to_swap); + IdentifySwappingCandidates(cluster, item, skip_list, &nodes_to_swap, memory_fraction); } // Look for manual annotatations in the graph. for (auto& node : *item->graph.mutable_node()) { @@ -1324,7 +1325,8 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimization_level_ == RewriterConfig::MANUAL) && cluster != nullptr) { updated_graph |= SwappingPass(optimization_level_, cluster, - &optimized_item, &skip_list); + &optimized_item, &skip_list, + per_process_gpu_memory_fraction_); } } diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.h b/tensorflow/core/grappler/optimizers/memory_optimizer.h index 653ffaec4c..6e03f442d6 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.h +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.h @@ -32,8 +32,10 @@ class MemoryOptimizer : public GraphOptimizer { // RewriterConfig::memory_optimizer_target_node_name_scope. explicit MemoryOptimizer( RewriterConfig::MemOptType optimization_level, + double per_process_gpu_memory_fraction = 1.0, const string& recomputation_targets_name_scope = "gradients/") : optimization_level_(optimization_level), + per_process_gpu_memory_fraction_(per_process_gpu_memory_fraction), recomputation_targets_name_scope_(recomputation_targets_name_scope) {} ~MemoryOptimizer() override {} @@ -47,6 +49,7 @@ class MemoryOptimizer : public GraphOptimizer { private: RewriterConfig::MemOptType optimization_level_; + double per_process_gpu_memory_fraction_; string recomputation_targets_name_scope_; }; diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 143d9dc1c6..e0ab7e00e9 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -83,7 +83,7 @@ std::unique_ptr MetaOptimizer::MakeNewOptimizer( MK_OPT("shape", new ShapeOptimizer()); MK_OPT("remap", new Remapper(cfg_.remapping())); MK_OPT("layout", new LayoutOptimizer()); - MK_OPT("memory", new MemoryOptimizer(RewriterConfig::MANUAL)); + MK_OPT("memory", new MemoryOptimizer(RewriterConfig::MANUAL, gpu_options_.per_process_gpu_memory_fraction())); MK_OPT("arithmetic", new ArithmeticOptimizer(cfg_.arithmetic_optimization())); MK_OPT("autoparallel", new AutoParallel(cfg_.auto_parallel().num_replicas())); MK_OPT("loop", new LoopOptimizer(cfg_.loop_optimization())); @@ -134,13 +134,14 @@ Status MetaOptimizer::InitializeOptimizers( optimizers->emplace_back(new LayoutOptimizer()); } if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { + double mem_fraction = gpu_options_.per_process_gpu_memory_fraction(); if (cfg_.memory_optimizer_target_node_name_scope().empty()) { optimizers->emplace_back( // Use the default target node name prefix "gradients/" - new MemoryOptimizer(cfg_.memory_optimization())); + new MemoryOptimizer(cfg_.memory_optimization(), mem_fraction)); } else { optimizers->emplace_back( - new MemoryOptimizer(cfg_.memory_optimization(), + new MemoryOptimizer(cfg_.memory_optimization(), mem_fraction, cfg_.memory_optimizer_target_node_name_scope())); } } @@ -412,8 +413,9 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, DeviceBase* cpu_device, Cluster* cluster, - GraphDef* optimized_graph) { - MetaOptimizer optimizer(cpu_device, cfg); + GraphDef* optimized_graph, + const GPUOptions& gpu_options) { + MetaOptimizer optimizer(cpu_device, cfg, gpu_options); return optimizer.Optimize(cluster, item, optimized_graph); } diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 151a54cbdf..74b6bb7f74 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { namespace grappler { @@ -30,6 +31,10 @@ class MetaOptimizer : public GraphOptimizer { public: MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg) : cpu_device_(cpu_device), cfg_(cfg) {} + + MetaOptimizer(DeviceBase* cpu_device, const RewriterConfig& cfg, const GPUOptions& gpu_options) + : cpu_device_(cpu_device), cfg_(cfg), gpu_options_(gpu_options) {} + ~MetaOptimizer() override = default; string name() const override { return "meta_optimizer"; }; @@ -77,6 +82,7 @@ class MetaOptimizer : public GraphOptimizer { GraphOptimizationResult* optimization_result); std::vector optimization_results_; + GPUOptions gpu_options_; }; bool MetaOptimizerEnabled(const RewriterConfig& cfg); @@ -89,7 +95,8 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg); // when possible. Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, DeviceBase* cpu_device, Cluster* cluster, - GraphDef* optimized_graph); + GraphDef* optimized_graph, + const GPUOptions& gpu_options); } // namespace grappler } // namespace tensorflow -- cgit v1.2.3 From 4bb5c2611c6df6c15d1e2720da184bb301538e50 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Fri, 22 Jun 2018 14:21:49 +0800 Subject: Revert "TST: add test case" This reverts commit ffc5c4e845d8bfb36e6c56d904cba3bd8e1de94e. --- tensorflow/python/keras/backend_test.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index 9cf1c79783..2ba6c8ef15 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -21,7 +21,6 @@ from absl.testing import parameterized import numpy as np import scipy.sparse -from tensorflow.core.protobuf import config_pb2 from tensorflow.python import keras from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor @@ -237,13 +236,10 @@ class BackendUtilsTest(test.TestCase): x_placeholder = keras.backend.placeholder(shape=()) y_placeholder = keras.backend.placeholder(shape=()) - run_options = config_pb2.RunOptions( - trace_level=config_pb2.RunOptions.NO_TRACE) f = keras.backend.function(inputs=[x_placeholder, y_placeholder], outputs=[x_placeholder + y_placeholder], updates=[(x, x_placeholder + 1.)], - fetches=[keras.backend.update(y, 5.)], - options=run_options) + fetches=[keras.backend.update(y, 5.)]) output = f([10., 20.]) self.assertEqual(output, [30.]) self.assertEqual( -- cgit v1.2.3 From c55145148189b95dc881b7e88fb7762cb884df94 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Fri, 22 Jun 2018 14:38:43 +0800 Subject: ENH: add run_metadata argument --- tensorflow/python/keras/backend.py | 9 +++++++-- tensorflow/python/keras/backend_test.py | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index 5f302d8e90..c82983fedf 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -2761,7 +2761,7 @@ class Function(object): updates: Additional update ops to be run at function call. name: A name to help users identify what this function does. session_kwargs: Arguments to `tf.Session.run()`: - `fetches`, `feed_dict`, `options`. + `fetches`, `feed_dict`, `options`, `run_metadata`. """ def __init__(self, inputs, outputs, updates=None, name=None, @@ -2796,6 +2796,7 @@ class Function(object): if not isinstance(self.fetches, list): self.fetches = [self.fetches] self.run_options = session_kwargs.pop('options', None) + self.run_metadata = session_kwargs.pop('run_metadata', None) # The main use case of `fetches` being passed to a model is the ability # to run custom updates (since the outputs of fetches are never returned). # This requires us to wrap fetches in `identity` ops. @@ -2903,7 +2904,11 @@ class Function(object): session != self._session): self._make_callable(feed_arrays, feed_symbols, symbol_vals, session) - fetched = self._callable_fn(*array_vals) + if self.run_metadata: + fetched = self._callable_fn(*array_vals, + run_metadata=self.run_metadata) + else: + fetched = self._callable_fn(*array_vals) return fetched[:len(self.outputs)] diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index 2ba6c8ef15..c94f151510 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -21,6 +21,7 @@ from absl.testing import parameterized import numpy as np import scipy.sparse +from tensorflow.core.protobuf import config_pb2 from tensorflow.python import keras from tensorflow.python.framework import dtypes from tensorflow.python.framework import sparse_tensor @@ -276,6 +277,29 @@ class BackendUtilsTest(test.TestCase): self.assertEqual( keras.backend.get_session().run(fetches=[x, y]), [30., 40.]) + def test_function_tf_run_options_with_run_metadata(self): + with self.test_session(): + x_placeholder = keras.backend.placeholder(shape=()) + y_placeholder = keras.backend.placeholder(shape=()) + + run_options = config_pb2.RunOptions(output_partition_graphs=True) + run_metadata = config_pb2.RunMetadata() + # enable run_options. + f = keras.backend.function(inputs=[x_placeholder, y_placeholder], + outputs=[x_placeholder + y_placeholder], + options=run_options, + run_metadata=run_metadata) + output = f([10., 20.]) + self.assertEqual(output, [30.]) + self.assertGreater(len(run_metadata.partition_graphs), 0) + # disable run_options. + f1 = keras.backend.function(inputs=[x_placeholder, y_placeholder], + outputs=[x_placeholder + y_placeholder], + run_metadata=run_metadata) + output1 = f1([10., 20.]) + self.assertEqual(output1, [30.]) + self.assertEqual(len(run_metadata.partition_graphs), 0) + class BackendVariableTest(test.TestCase): -- cgit v1.2.3 From e12a33cd193dcd32d236e7f67e27b31d40f0fb10 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Fri, 22 Jun 2018 22:34:41 +0800 Subject: CLN: remove redundant if...else --- tensorflow/python/keras/backend.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/backend.py b/tensorflow/python/keras/backend.py index c82983fedf..e294f76c6a 100644 --- a/tensorflow/python/keras/backend.py +++ b/tensorflow/python/keras/backend.py @@ -2904,11 +2904,8 @@ class Function(object): session != self._session): self._make_callable(feed_arrays, feed_symbols, symbol_vals, session) - if self.run_metadata: - fetched = self._callable_fn(*array_vals, - run_metadata=self.run_metadata) - else: - fetched = self._callable_fn(*array_vals) + fetched = self._callable_fn(*array_vals, + run_metadata=self.run_metadata) return fetched[:len(self.outputs)] -- cgit v1.2.3 From afbe36c5126cf118c60cbf22454d99d429425334 Mon Sep 17 00:00:00 2001 From: "Peng Wang(SIMPENG)" Date: Sat, 23 Jun 2018 06:03:41 +0000 Subject: Merge master change --- tensorflow/core/grappler/optimizers/meta_optimizer.cc | 7 +++++++ tensorflow/core/grappler/optimizers/meta_optimizer.h | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index e0ab7e00e9..0d2b9a5763 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -411,6 +411,13 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { !cfg.optimizers().empty() || !cfg.custom_optimizers().empty(); } +Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, + DeviceBase* cpu_device, Cluster* cluster, + GraphDef* optimized_graph) { + MetaOptimizer optimizer(cpu_device, cfg); + return optimizer.Optimize(cluster, item, optimized_graph); +} + Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, DeviceBase* cpu_device, Cluster* cluster, GraphDef* optimized_graph, diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 74b6bb7f74..c267b5fd8e 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -93,6 +93,10 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg); // during constant folding; if NULL, a new device is created for doing constant // folding. For performance, it is recommended to pass in an existing cpu_device // when possible. +Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, + DeviceBase* cpu_device, Cluster* cluster, + GraphDef* optimized_graph); + Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg, DeviceBase* cpu_device, Cluster* cluster, GraphDef* optimized_graph, -- cgit v1.2.3 From 53dd73a538916cbfd96877c95dfd42eb6b92ed1c Mon Sep 17 00:00:00 2001 From: fo40225 Date: Wed, 11 Jul 2018 19:09:57 +0800 Subject: fix cmake python 2.7 test import fail --- tensorflow/__init__.py | 3 ++- tensorflow/contrib/cmake/python_modules.txt | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/__init__.py b/tensorflow/__init__.py index 440e9f8dbd..21677512b6 100644 --- a/tensorflow/__init__.py +++ b/tensorflow/__init__.py @@ -28,7 +28,8 @@ contrib = LazyLoader('contrib', globals(), 'tensorflow.contrib') del LazyLoader from tensorflow.python.platform import flags # pylint: disable=g-import-not-at-top -app.flags = flags # pylint: disable=undefined-variable +from tensorflow.python.platform import app # pylint: disable=g-import-not-at-top +app.flags = flags del absolute_import del division diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index 40041d9c88..a465cbf0f1 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -4,6 +4,8 @@ tensorflow tensorflow/core tensorflow/core/example tensorflow/core/framework +tensorflow/core/kernels +tensorflow/core/kernels/boosted_trees tensorflow/core/lib tensorflow/core/lib/core tensorflow/core/profiler -- cgit v1.2.3 From f814e242d16997dba8b9bbded3ef6e2540e2d044 Mon Sep 17 00:00:00 2001 From: "Li, Yiqiang" Date: Sun, 15 Jul 2018 20:13:09 +0800 Subject: Replace to use fast reorder path in MklRelu op. --- tensorflow/core/kernels/mkl_relu_op.cc | 17 ++++++----------- tensorflow/core/util/mkl_util.h | 12 +++++++----- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index f73d3d81f9..3d5a05be73 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -814,11 +814,9 @@ class MklReluOpBase : public OpKernel { // check wehther src need to reorder if (src_md.data.format != eltwise_fwd->GetSrcMemoryFormat()) { src.SetUsrMem(src_md, &src_tensor); - std::vector net; auto src_target_pd = memory::primitive_desc({{src_dims}, MklDnnType(), eltwise_fwd->GetSrcMemoryFormat()}, cpu_engine); - src.CheckReorderToOpMem(src_target_pd, &net); - stream(stream::kind::eager).submit(net).wait(); + src.CheckReorderToOpMem(src_target_pd); src_data = static_cast(src.GetOpMem().get_data_handle()); } else { src_data = static_cast( @@ -882,9 +880,8 @@ class MklReluGradOpBase : public OpKernel { virtual void Compute_Scalar(OpKernelContext* context) = 0; - void Compute(OpKernelContext* context) { + void Compute(OpKernelContext* context) { try { - // auto cpu_engine = engine(engine::cpu, 0); MklDnnData src(&cpu_engine); MklDnnData diff_dst(&cpu_engine); @@ -892,9 +889,9 @@ class MklReluGradOpBase : public OpKernel { const size_t src_index = 1; // index of src input tensor const size_t diff_src_index = 0; // index of diff_src output tensor - const Tensor& src_tensor = MklGetInput(context, src_index); + const Tensor& src_tensor = MklGetInput(context, src_index); const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index); - Tensor* diff_src_tensor = nullptr; + Tensor* diff_src_tensor = nullptr; MklDnnShape dnn_shape_src, dnn_shape_diff_dst; GetMklShape(context, src_index, &dnn_shape_src); @@ -969,11 +966,10 @@ class MklReluGradOpBase : public OpKernel { // check whether need reorder for src / diff_dst T* src_data; T* diff_dst_data; - std::vector net; if (src_md.data.format != eltwise_bwd->GetSrcMemoryFormat()) { src.SetUsrMem(src_md, &src_tensor); src.CheckReorderToOpMem( - eltwise_bwd_pd.get()->diff_src_primitive_desc(), &net); + eltwise_bwd_pd.get()->diff_src_primitive_desc()); src_data = static_cast(src.GetOpMem().get_data_handle()); } else { src_data = static_cast( @@ -983,14 +979,13 @@ class MklReluGradOpBase : public OpKernel { if (diff_dst_md.data.format != eltwise_bwd->GetDiffDstMemoryFormat()) { diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); diff_dst.CheckReorderToOpMem( - eltwise_bwd_pd.get()->diff_src_primitive_desc(), &net); + eltwise_bwd_pd.get()->diff_src_primitive_desc()); diff_dst_data = static_cast( diff_dst.GetOpMem().get_data_handle()); } else { diff_dst_data = static_cast(const_cast( diff_dst_tensor.flat().data())); } - stream(stream::kind::eager).submit(net).wait(); // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index bb447e0393..b2c93a508d 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -1897,8 +1897,9 @@ class MklPrimitiveFactory { ~MklPrimitiveFactory() {} MklPrimitive* GetOp(const std::string& key) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { + auto &map = MklPrimitiveFactory::GetHashMap(); + auto stream_iter = map.find(key); + if (stream_iter == map.end()) { return nullptr; } else { return stream_iter->second; @@ -1906,11 +1907,12 @@ class MklPrimitiveFactory { } void SetOp(const std::string& key, MklPrimitive* op) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + auto &map = MklPrimitiveFactory::GetHashMap(); + auto stream_iter = map.find(key); - CHECK(stream_iter == MklPrimitiveFactory::GetHashMap().end()); + CHECK(stream_iter == map.end()); - MklPrimitiveFactory::GetHashMap()[key] = op; + map[key] = op; } private: -- cgit v1.2.3 From 2fcfb4abde9d847cff5a344cf06b2704cb6f9545 Mon Sep 17 00:00:00 2001 From: "Peng Wang (SIMPENG)" Date: Fri, 20 Jul 2018 16:25:56 +0800 Subject: fix build error --- tensorflow/core/grappler/optimizers/memory_optimizer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index a3f0e07861..49543645f6 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -114,7 +114,7 @@ TEST_F(RecomputeSubgraphTest, TwoInputSubgraphs) { (*pre_transform_node_map.GetNode("b")->mutable_attr())["_recompute_hint"] .set_i(0); - MemoryOptimizer optimizer(RewriterConfig::MANUAL, + MemoryOptimizer optimizer(RewriterConfig::MANUAL,1.0, "some_name_scope/gradients"); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); -- cgit v1.2.3 From d76aaad2ea9ee4df8c32b382db758854315d230e Mon Sep 17 00:00:00 2001 From: "Peng Wang (SIMPENG)" Date: Fri, 20 Jul 2018 17:50:51 +0800 Subject: change format a bit --- tensorflow/core/grappler/optimizers/memory_optimizer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index 49543645f6..1473e26cbd 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -114,7 +114,7 @@ TEST_F(RecomputeSubgraphTest, TwoInputSubgraphs) { (*pre_transform_node_map.GetNode("b")->mutable_attr())["_recompute_hint"] .set_i(0); - MemoryOptimizer optimizer(RewriterConfig::MANUAL,1.0, + MemoryOptimizer optimizer(RewriterConfig::MANUAL, 1.0, "some_name_scope/gradients"); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); -- cgit v1.2.3 From 7c930897dc2c0ad1481feb5f14e07ba5d979b85c Mon Sep 17 00:00:00 2001 From: Jie Date: Mon, 23 Jul 2018 17:02:43 -0700 Subject: [tftrt] trt 4 update input check on tensor dimensions. relaxing tensor dimension for trt 4 to support non-4-dimensional inputs added unit test (currently disabled due to failed INT8 conversion) --- tensorflow/contrib/tensorrt/BUILD | 1 + .../contrib/tensorrt/convert/convert_nodes.cc | 29 +++++++-- tensorflow/contrib/tensorrt/test/rank_two_test.py | 75 ++++++++++++++++++++++ 3 files changed, 100 insertions(+), 5 deletions(-) create mode 100644 tensorflow/contrib/tensorrt/test/rank_two_test.py diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 2fe1f2c242..b0337c3fe9 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -390,6 +390,7 @@ cuda_py_tests( "test/multi_connection_neighbor_engine_test.py", "test/neighboring_engine_test.py", "test/unary_test.py", + # "test/rank_two_test.py", # "test/vgg_block_nchw_test.py", # "test/vgg_block_test.py", ], diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 451d6fe698..7782919566 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2693,8 +2693,13 @@ tensorflow::Status ConvertGraphDefToEngine( VLOG(1) << "Converting op name=" << node_name << ", op=" << node_def.op(); if (tensorflow::str_util::StartsWith(node_name, kInputPHName) && (node_def.op() == "Placeholder")) { - nvinfer1::DimsCHW input_dim_pseudo_chw; - for (int i = 0; i < 8; i++) input_dim_pseudo_chw.d[i] = 0; + nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); + auto type_status = + ConvertDType(node_def.attr().at("dtype").type(), &dtype); + if (type_status != tensorflow::Status::OK()) { + LOG(WARNING) << "Type conversion failed for " << node_name; + return type_status; + } int32 slot_number = -1; if (!tensorflow::strings::safe_strto32( node_name.c_str() + strlen(kInputPHName), &slot_number)) { @@ -2721,13 +2726,27 @@ tensorflow::Status ConvertGraphDefToEngine( StrAppend(&dim_str, " ]"); VLOG(1) << dim_str; } + +#if NV_TENSORRT_MAJOR == 3 + nvinfer1::DimsCHW input_dim; + // TRT 3.x only support 4 dimensional input tensor. + if (shape.dims() != 4) { + string err_str = "Require 4 dimensional input."; + StrAppend(&err_str, " Got ", shape.dims(), " ", + node_name); + return tensorflow::errors::Unimplemented(err_str); + } +#elif NV_TENSORRT_MAJOR > 3 + nvinfer1::Dims input_dim; +#endif + for (int i = 1; i < shape.dims(); i++) { - input_dim_pseudo_chw.d[i - 1] = shape.dim_size(i); + input_dim.d[i - 1] = shape.dim_size(i); } - input_dim_pseudo_chw.nbDims = shape.dims() - 1; + input_dim.nbDims = shape.dims() - 1; nvinfer1::ITensor* input_tensor = converter.network()->addInput( - node_name.c_str(), dtype, input_dim_pseudo_chw); + node_name.c_str(), dtype, input_dim); if (!input_tensor) { return tensorflow::errors::InvalidArgument( "Failed to create Input layer tensor ", node_name, diff --git a/tensorflow/contrib/tensorrt/test/rank_two_test.py b/tensorflow/contrib/tensorrt/test/rank_two_test.py new file mode 100644 index 0000000000..a0c18da265 --- /dev/null +++ b/tensorflow/contrib/tensorrt/test/rank_two_test.py @@ -0,0 +1,75 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Model script to test TF-TensorRT integration.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class RankTwoTest(trt_test.TfTrtIntegrationTestBase): + + def GetParams(self): + """Test for rank 2 input in TF-TRT.""" + dtype = dtypes.float32 + input_name = "input" + input_dims = [12, 5] + input2_name = "input2" + input2_dims = [12, 5, 2, 2] + g = ops.Graph() + with g.as_default(): + # path 1 with rank 2 input + x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) + q = x + 1.0 + q = math_ops.abs(q) + q = q + 2.2 + q = math_ops.abs(q) + q = q + 3.0 + q = array_ops.expand_dims(q, -1) + q = array_ops.expand_dims(q, -1) + a = gen_math_ops.reciprocal(q) + # path 2 with rank 4 input + x = array_ops.placeholder(dtype=dtype, shape=input2_dims, name=input2_name) + q = x + 1.0 + q = math_ops.abs(q) + q = q + 2.2 + q = math_ops.abs(q) + q = q + 3.0 + b = gen_math_ops.reciprocal(q) + # combine path 1 & 2 + q = a + b + array_ops.squeeze(q, name=self.output_name) + return trt_test.TfTrtIntegrationTestParams( + gdef=g.as_graph_def(), + input_names=[input_name, input2_name], + input_dims=[input_dims, input2_dims], + num_expected_engines=2, + expected_output_dims=(12, 5, 2, 2), + allclose_atol=1.e-03, + allclose_rtol=1.e-03) + + +if __name__ == "__main__": + test.main() -- cgit v1.2.3 From 121e0161c5a7273c5a59f1e10a8577428c685796 Mon Sep 17 00:00:00 2001 From: Avijit <30507445+avijit-nervana@users.noreply.github.com> Date: Tue, 24 Jul 2018 23:35:27 -0700 Subject: nGraph integration with TensorFlow * Added nGraph bridge as a third_party to be built with TensorFlow based on user selection. * Added a limited set of C++ unit tests to verify the correctness of the computation --- WORKSPACE | 1 + configure.py | 2309 ++++++++++---------- tensorflow/BUILD | 14 +- tensorflow/core/BUILD | 5 +- .../common_runtime/threadpool_device_factory.cc | 1 + tensorflow/core/platform/default/build_config.bzl | 2 +- tensorflow/python/BUILD | 4 +- tensorflow/tensorflow.bzl | 5 + tensorflow/workspace.bzl | 33 + third_party/ngraph/BUILD | 1 + third_party/ngraph/LICENSE | 201 ++ third_party/ngraph/NGRAPH_LICENSE | 201 ++ third_party/ngraph/build_defs.bzl | 16 + third_party/ngraph/ngraph.BUILD | 45 + third_party/ngraph/ngraph_tf.BUILD | 96 + third_party/ngraph/nlohmann_json.BUILD | 23 + 16 files changed, 1820 insertions(+), 1137 deletions(-) create mode 100644 third_party/ngraph/BUILD create mode 100644 third_party/ngraph/LICENSE create mode 100644 third_party/ngraph/NGRAPH_LICENSE create mode 100644 third_party/ngraph/build_defs.bzl create mode 100644 third_party/ngraph/ngraph.BUILD create mode 100644 third_party/ngraph/ngraph_tf.BUILD create mode 100644 third_party/ngraph/nlohmann_json.BUILD diff --git a/WORKSPACE b/WORKSPACE index fd7570a80a..1c00b3fc7e 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -79,3 +79,4 @@ new_http_archive( "http://download.tensorflow.org/models/speech_commands_v0.01.zip", ], ) + diff --git a/configure.py b/configure.py index 8930c3a1f1..3bbbee7abf 100644 --- a/configure.py +++ b/configure.py @@ -28,9 +28,9 @@ import sys # pylint: disable=g-import-not-at-top try: - from shutil import which + from shutil import which except ImportError: - from distutils.spawn import find_executable as which + from distutils.spawn import find_executable as which # pylint: enable=g-import-not-at-top _DEFAULT_CUDA_VERSION = '9.0' @@ -56,218 +56,226 @@ _TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE') class UserInputError(Exception): - pass + pass def is_windows(): - return platform.system() == 'Windows' + return platform.system() == 'Windows' def is_linux(): - return platform.system() == 'Linux' + return platform.system() == 'Linux' def is_macos(): - return platform.system() == 'Darwin' + return platform.system() == 'Darwin' def is_ppc64le(): - return platform.machine() == 'ppc64le' + return platform.machine() == 'ppc64le' def is_cygwin(): - return platform.system().startswith('CYGWIN_NT') + return platform.system().startswith('CYGWIN_NT') def get_input(question): - try: try: - answer = raw_input(question) - except NameError: - answer = input(question) # pylint: disable=bad-builtin - except EOFError: - answer = '' - return answer + try: + answer = raw_input(question) + except NameError: + answer = input(question) # pylint: disable=bad-builtin + except EOFError: + answer = '' + return answer def symlink_force(target, link_name): - """Force symlink, equivalent of 'ln -sf'. + """Force symlink, equivalent of 'ln -sf'. Args: target: items to link to. link_name: name of the link. """ - try: - os.symlink(target, link_name) - except OSError as e: - if e.errno == errno.EEXIST: - os.remove(link_name) - os.symlink(target, link_name) - else: - raise e + try: + os.symlink(target, link_name) + except OSError as e: + if e.errno == errno.EEXIST: + os.remove(link_name) + os.symlink(target, link_name) + else: + raise e def sed_in_place(filename, old, new): - """Replace old string with new string in file. + """Replace old string with new string in file. Args: filename: string for filename. old: string to replace. new: new string to replace to. """ - with open(filename, 'r') as f: - filedata = f.read() - newdata = filedata.replace(old, new) - with open(filename, 'w') as f: - f.write(newdata) + with open(filename, 'r') as f: + filedata = f.read() + newdata = filedata.replace(old, new) + with open(filename, 'w') as f: + f.write(newdata) def write_to_bazelrc(line): - with open(_TF_BAZELRC, 'a') as f: - f.write(line + '\n') + with open(_TF_BAZELRC, 'a') as f: + f.write(line + '\n') def write_action_env_to_bazelrc(var_name, var): - write_to_bazelrc('build --action_env %s="%s"' % (var_name, str(var))) + write_to_bazelrc('build --action_env %s="%s"' % (var_name, str(var))) def run_shell(cmd, allow_non_zero=False): - if allow_non_zero: - try: - output = subprocess.check_output(cmd) - except subprocess.CalledProcessError as e: - output = e.output - else: - output = subprocess.check_output(cmd) - return output.decode('UTF-8').strip() + if allow_non_zero: + try: + output = subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + output = e.output + else: + output = subprocess.check_output(cmd) + return output.decode('UTF-8').strip() def cygpath(path): - """Convert path from posix to windows.""" - return os.path.abspath(path).replace('\\', '/') + """Convert path from posix to windows.""" + return os.path.abspath(path).replace('\\', '/') def get_python_path(environ_cp, python_bin_path): - """Get the python site package paths.""" - python_paths = [] - if environ_cp.get('PYTHONPATH'): - python_paths = environ_cp.get('PYTHONPATH').split(':') - try: - library_paths = run_shell( - [python_bin_path, '-c', - 'import site; print("\\n".join(site.getsitepackages()))']).split('\n') - except subprocess.CalledProcessError: - library_paths = [run_shell( - [python_bin_path, '-c', - 'from distutils.sysconfig import get_python_lib;' - 'print(get_python_lib())'])] - - all_paths = set(python_paths + library_paths) - - paths = [] - for path in all_paths: - if os.path.isdir(path): - paths.append(path) - return paths + """Get the python site package paths.""" + python_paths = [] + if environ_cp.get('PYTHONPATH'): + python_paths = environ_cp.get('PYTHONPATH').split(':') + try: + library_paths = run_shell([ + python_bin_path, '-c', + 'import site; print("\\n".join(site.getsitepackages()))' + ]).split('\n') + except subprocess.CalledProcessError: + library_paths = [ + run_shell([ + python_bin_path, '-c', + 'from distutils.sysconfig import get_python_lib;' + 'print(get_python_lib())' + ]) + ] + + all_paths = set(python_paths + library_paths) + + paths = [] + for path in all_paths: + if os.path.isdir(path): + paths.append(path) + return paths def get_python_major_version(python_bin_path): - """Get the python major version.""" - return run_shell([python_bin_path, '-c', 'import sys; print(sys.version[0])']) + """Get the python major version.""" + return run_shell( + [python_bin_path, '-c', 'import sys; print(sys.version[0])']) def setup_python(environ_cp): - """Setup python related env variables.""" - # Get PYTHON_BIN_PATH, default is the current running python. - default_python_bin_path = sys.executable - ask_python_bin_path = ('Please specify the location of python. [Default is ' - '%s]: ') % default_python_bin_path - while True: - python_bin_path = get_from_env_or_user_or_default( - environ_cp, 'PYTHON_BIN_PATH', ask_python_bin_path, - default_python_bin_path) - # Check if the path is valid - if os.path.isfile(python_bin_path) and os.access( - python_bin_path, os.X_OK): - break - elif not os.path.exists(python_bin_path): - print('Invalid python path: %s cannot be found.' % python_bin_path) - else: - print('%s is not executable. Is it the python binary?' % python_bin_path) - environ_cp['PYTHON_BIN_PATH'] = '' - - # Convert python path to Windows style before checking lib and version - if is_windows() or is_cygwin(): - python_bin_path = cygpath(python_bin_path) - - # Get PYTHON_LIB_PATH - python_lib_path = environ_cp.get('PYTHON_LIB_PATH') - if not python_lib_path: - python_lib_paths = get_python_path(environ_cp, python_bin_path) - if environ_cp.get('USE_DEFAULT_PYTHON_LIB_PATH') == '1': - python_lib_path = python_lib_paths[0] - else: - print('Found possible Python library paths:\n %s' % - '\n '.join(python_lib_paths)) - default_python_lib_path = python_lib_paths[0] - python_lib_path = get_input( - 'Please input the desired Python library path to use. ' - 'Default is [%s]\n' % python_lib_paths[0]) - if not python_lib_path: - python_lib_path = default_python_lib_path - environ_cp['PYTHON_LIB_PATH'] = python_lib_path - - python_major_version = get_python_major_version(python_bin_path) - - # Convert python path to Windows style before writing into bazel.rc - if is_windows() or is_cygwin(): - python_lib_path = cygpath(python_lib_path) - - # Set-up env variables used by python_configure.bzl - write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path) - write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path) - write_to_bazelrc('build --python_path=\"%s"' % python_bin_path) - environ_cp['PYTHON_BIN_PATH'] = python_bin_path - - # Write tools/python_bin_path.sh - with open(os.path.join( - _TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), 'w') as f: - f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path) + """Setup python related env variables.""" + # Get PYTHON_BIN_PATH, default is the current running python. + default_python_bin_path = sys.executable + ask_python_bin_path = ( + 'Please specify the location of python. [Default is ' + '%s]: ') % default_python_bin_path + while True: + python_bin_path = get_from_env_or_user_or_default( + environ_cp, 'PYTHON_BIN_PATH', ask_python_bin_path, + default_python_bin_path) + # Check if the path is valid + if os.path.isfile(python_bin_path) and os.access( + python_bin_path, os.X_OK): + break + elif not os.path.exists(python_bin_path): + print('Invalid python path: %s cannot be found.' % python_bin_path) + else: + print('%s is not executable. Is it the python binary?' % + python_bin_path) + environ_cp['PYTHON_BIN_PATH'] = '' + + # Convert python path to Windows style before checking lib and version + if is_windows() or is_cygwin(): + python_bin_path = cygpath(python_bin_path) + + # Get PYTHON_LIB_PATH + python_lib_path = environ_cp.get('PYTHON_LIB_PATH') + if not python_lib_path: + python_lib_paths = get_python_path(environ_cp, python_bin_path) + if environ_cp.get('USE_DEFAULT_PYTHON_LIB_PATH') == '1': + python_lib_path = python_lib_paths[0] + else: + print('Found possible Python library paths:\n %s' % + '\n '.join(python_lib_paths)) + default_python_lib_path = python_lib_paths[0] + python_lib_path = get_input( + 'Please input the desired Python library path to use. ' + 'Default is [%s]\n' % python_lib_paths[0]) + if not python_lib_path: + python_lib_path = default_python_lib_path + environ_cp['PYTHON_LIB_PATH'] = python_lib_path + + python_major_version = get_python_major_version(python_bin_path) + + # Convert python path to Windows style before writing into bazel.rc + if is_windows() or is_cygwin(): + python_lib_path = cygpath(python_lib_path) + + # Set-up env variables used by python_configure.bzl + write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path) + write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path) + write_to_bazelrc('build --python_path=\"%s"' % python_bin_path) + environ_cp['PYTHON_BIN_PATH'] = python_bin_path + + # Write tools/python_bin_path.sh + with open( + os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), + 'w') as f: + f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path) def reset_tf_configure_bazelrc(workspace_path): - """Reset file that contains customized config settings.""" - open(_TF_BAZELRC, 'w').close() - bazelrc_path = os.path.join(workspace_path, '.bazelrc') - - data = [] - if os.path.exists(bazelrc_path): - with open(bazelrc_path, 'r') as f: - data = f.read().splitlines() - with open(bazelrc_path, 'w') as f: - for l in data: - if _TF_BAZELRC_FILENAME in l: - continue - f.write('%s\n' % l) - if is_windows(): - tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/") - else: - tf_bazelrc_path = _TF_BAZELRC - f.write('import %s\n' % tf_bazelrc_path) + """Reset file that contains customized config settings.""" + open(_TF_BAZELRC, 'w').close() + bazelrc_path = os.path.join(workspace_path, '.bazelrc') + + data = [] + if os.path.exists(bazelrc_path): + with open(bazelrc_path, 'r') as f: + data = f.read().splitlines() + with open(bazelrc_path, 'w') as f: + for l in data: + if _TF_BAZELRC_FILENAME in l: + continue + f.write('%s\n' % l) + if is_windows(): + tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/") + else: + tf_bazelrc_path = _TF_BAZELRC + f.write('import %s\n' % tf_bazelrc_path) def cleanup_makefile(): - """Delete any leftover BUILD files from the Makefile build. + """Delete any leftover BUILD files from the Makefile build. These files could interfere with Bazel parsing. """ - makefile_download_dir = os.path.join( - _TF_WORKSPACE_ROOT, 'tensorflow', 'contrib', 'makefile', 'downloads') - if os.path.isdir(makefile_download_dir): - for root, _, filenames in os.walk(makefile_download_dir): - for f in filenames: - if f.endswith('BUILD'): - os.remove(os.path.join(root, f)) + makefile_download_dir = os.path.join(_TF_WORKSPACE_ROOT, 'tensorflow', + 'contrib', 'makefile', 'downloads') + if os.path.isdir(makefile_download_dir): + for root, _, filenames in os.walk(makefile_download_dir): + for f in filenames: + if f.endswith('BUILD'): + os.remove(os.path.join(root, f)) def get_var(environ_cp, @@ -277,7 +285,7 @@ def get_var(environ_cp, question=None, yes_reply=None, no_reply=None): - """Get boolean input from user. + """Get boolean input from user. If var_name is not set in env, ask user to enable query_item or not. If the response is empty, use the default. @@ -301,63 +309,66 @@ def get_var(environ_cp, scripting error, and will continue to provide invalid input. Raise the error to avoid infinitely looping. """ - if not question: - question = 'Do you wish to build TensorFlow with %s support?' % query_item - if not yes_reply: - yes_reply = '%s support will be enabled for TensorFlow.' % query_item - if not no_reply: - no_reply = 'No %s' % yes_reply - - yes_reply += '\n' - no_reply += '\n' - - if enabled_by_default: - question += ' [Y/n]: ' - else: - question += ' [y/N]: ' - - var = environ_cp.get(var_name) - if var is not None: - var_content = var.strip().lower() - true_strings = ('1', 't', 'true', 'y', 'yes') - false_strings = ('0', 'f', 'false', 'n', 'no') - if var_content in true_strings: - var = True - elif var_content in false_strings: - var = False - else: - raise UserInputError( - 'Environment variable %s must be set as a boolean indicator.\n' - 'The following are accepted as TRUE : %s.\n' - 'The following are accepted as FALSE: %s.\n' - 'Current value is %s.' % ( - var_name, ', '.join(true_strings), ', '.join(false_strings), - var)) - - while var is None: - user_input_origin = get_input(question) - user_input = user_input_origin.strip().lower() - if user_input == 'y': - print(yes_reply) - var = True - elif user_input == 'n': - print(no_reply) - var = False - elif not user_input: - if enabled_by_default: - print(yes_reply) - var = True - else: - print(no_reply) - var = False + if not question: + question = 'Do you wish to build TensorFlow with %s support?' % query_item + if not yes_reply: + yes_reply = '%s support will be enabled for TensorFlow.' % query_item + if not no_reply: + no_reply = 'No %s' % yes_reply + + yes_reply += '\n' + no_reply += '\n' + + if enabled_by_default: + question += ' [Y/n]: ' else: - print('Invalid selection: %s' % user_input_origin) - return var - - -def set_build_var(environ_cp, var_name, query_item, option_name, - enabled_by_default, bazel_config_name=None): - """Set if query_item will be enabled for the build. + question += ' [y/N]: ' + + var = environ_cp.get(var_name) + if var is not None: + var_content = var.strip().lower() + true_strings = ('1', 't', 'true', 'y', 'yes') + false_strings = ('0', 'f', 'false', 'n', 'no') + if var_content in true_strings: + var = True + elif var_content in false_strings: + var = False + else: + raise UserInputError( + 'Environment variable %s must be set as a boolean indicator.\n' + 'The following are accepted as TRUE : %s.\n' + 'The following are accepted as FALSE: %s.\n' + 'Current value is %s.' % (var_name, ', '.join(true_strings), + ', '.join(false_strings), var)) + + while var is None: + user_input_origin = get_input(question) + user_input = user_input_origin.strip().lower() + if user_input == 'y': + print(yes_reply) + var = True + elif user_input == 'n': + print(no_reply) + var = False + elif not user_input: + if enabled_by_default: + print(yes_reply) + var = True + else: + print(no_reply) + var = False + else: + print('Invalid selection: %s' % user_input_origin) + return var + + +def set_build_var(environ_cp, + var_name, + query_item, + option_name, + enabled_by_default, + bazel_config_name=None): + """Set if query_item will be enabled for the build. Ask user if query_item will be enabled. Default is used if no input is given. Set subprocess environment variable and write to .bazelrc if enabled. @@ -372,15 +383,16 @@ def set_build_var(environ_cp, var_name, query_item, option_name, bazel_config_name: Name for Bazel --config argument to enable build feature. """ - var = str(int(get_var(environ_cp, var_name, query_item, enabled_by_default))) - environ_cp[var_name] = var - if var == '1': - write_to_bazelrc('build --define %s=true' % option_name) - elif bazel_config_name is not None: - # TODO(mikecase): Migrate all users of configure.py to use --config Bazel - # options and not to set build configs through environment variables. - write_to_bazelrc('build:%s --define %s=true' - % (bazel_config_name, option_name)) + var = str( + int(get_var(environ_cp, var_name, query_item, enabled_by_default))) + environ_cp[var_name] = var + if var == '1': + write_to_bazelrc('build --define %s=true' % option_name) + elif bazel_config_name is not None: + # TODO(mikecase): Migrate all users of configure.py to use --config Bazel + # options and not to set build configs through environment variables. + write_to_bazelrc( + 'build:%s --define %s=true' % (bazel_config_name, option_name)) def set_action_env_var(environ_cp, @@ -390,7 +402,7 @@ def set_action_env_var(environ_cp, question=None, yes_reply=None, no_reply=None): - """Set boolean action_env variable. + """Set boolean action_env variable. Ask user if query_item will be enabled. Default is used if no input is given. Set environment variable and write to .bazelrc. @@ -405,16 +417,16 @@ def set_action_env_var(environ_cp, yes_reply: optional string for reply when feature is enabled. no_reply: optional string for reply when feature is disabled. """ - var = int( - get_var(environ_cp, var_name, query_item, enabled_by_default, question, - yes_reply, no_reply)) + var = int( + get_var(environ_cp, var_name, query_item, enabled_by_default, question, + yes_reply, no_reply)) - write_action_env_to_bazelrc(var_name, var) - environ_cp[var_name] = str(var) + write_action_env_to_bazelrc(var_name, var) + environ_cp[var_name] = str(var) def convert_version_to_int(version): - """Convert a version number to a integer that can be used to compare. + """Convert a version number to a integer that can be used to compare. Version strings of the form X.YZ and X.Y.Z-xxxxx are supported. The 'xxxxx' part, for instance 'homebrew' on OS/X, is ignored. @@ -425,18 +437,18 @@ def convert_version_to_int(version): Returns: An integer if converted successfully, otherwise return None. """ - version = version.split('-')[0] - version_segments = version.split('.') - for seg in version_segments: - if not seg.isdigit(): - return None + version = version.split('-')[0] + version_segments = version.split('.') + for seg in version_segments: + if not seg.isdigit(): + return None - version_str = ''.join(['%03d' % int(seg) for seg in version_segments]) - return int(version_str) + version_str = ''.join(['%03d' % int(seg) for seg in version_segments]) + return int(version_str) def check_bazel_version(min_version): - """Check installed bazel version is at least min_version. + """Check installed bazel version is at least min_version. Args: min_version: string for minimum bazel version. @@ -444,98 +456,102 @@ def check_bazel_version(min_version): Returns: The bazel version detected. """ - if which('bazel') is None: - print('Cannot find bazel. Please install bazel.') - sys.exit(0) - curr_version = run_shell(['bazel', '--batch', '--bazelrc=/dev/null', 'version']) - - for line in curr_version.split('\n'): - if 'Build label: ' in line: - curr_version = line.split('Build label: ')[1] - break - - min_version_int = convert_version_to_int(min_version) - curr_version_int = convert_version_to_int(curr_version) - - # Check if current bazel version can be detected properly. - if not curr_version_int: - print('WARNING: current bazel installation is not a release version.') - print('Make sure you are running at least bazel %s' % min_version) + if which('bazel') is None: + print('Cannot find bazel. Please install bazel.') + sys.exit(0) + curr_version = run_shell( + ['bazel', '--batch', '--bazelrc=/dev/null', 'version']) + + for line in curr_version.split('\n'): + if 'Build label: ' in line: + curr_version = line.split('Build label: ')[1] + break + + min_version_int = convert_version_to_int(min_version) + curr_version_int = convert_version_to_int(curr_version) + + # Check if current bazel version can be detected properly. + if not curr_version_int: + print('WARNING: current bazel installation is not a release version.') + print('Make sure you are running at least bazel %s' % min_version) + return curr_version + + print('You have bazel %s installed.' % curr_version) + + if curr_version_int < min_version_int: + print( + 'Please upgrade your bazel installation to version %s or higher to ' + 'build TensorFlow!' % min_version) + sys.exit(0) return curr_version - print('You have bazel %s installed.' % curr_version) - - if curr_version_int < min_version_int: - print('Please upgrade your bazel installation to version %s or higher to ' - 'build TensorFlow!' % min_version) - sys.exit(0) - return curr_version - def set_cc_opt_flags(environ_cp): - """Set up architecture-dependent optimization flags. + """Set up architecture-dependent optimization flags. Also append CC optimization flags to bazel.rc.. Args: environ_cp: copy of the os.environ. """ - if is_ppc64le(): - # gcc on ppc64le does not support -march, use mcpu instead - default_cc_opt_flags = '-mcpu=native' - elif is_windows(): - default_cc_opt_flags = '/arch:AVX' - else: - default_cc_opt_flags = '-march=native' - question = ('Please specify optimization flags to use during compilation when' - ' bazel option "--config=opt" is specified [Default is %s]: ' - ) % default_cc_opt_flags - cc_opt_flags = get_from_env_or_user_or_default(environ_cp, 'CC_OPT_FLAGS', - question, default_cc_opt_flags) - for opt in cc_opt_flags.split(): - write_to_bazelrc('build:opt --copt=%s' % opt) - # It should be safe on the same build host. - if not is_ppc64le() and not is_windows(): - write_to_bazelrc('build:opt --host_copt=-march=native') - write_to_bazelrc('build:opt --define with_default_optimizations=true') + if is_ppc64le(): + # gcc on ppc64le does not support -march, use mcpu instead + default_cc_opt_flags = '-mcpu=native' + elif is_windows(): + default_cc_opt_flags = '/arch:AVX' + else: + default_cc_opt_flags = '-march=native' + question = ( + 'Please specify optimization flags to use during compilation when' + ' bazel option "--config=opt" is specified [Default is %s]: ' + ) % default_cc_opt_flags + cc_opt_flags = get_from_env_or_user_or_default( + environ_cp, 'CC_OPT_FLAGS', question, default_cc_opt_flags) + for opt in cc_opt_flags.split(): + write_to_bazelrc('build:opt --copt=%s' % opt) + # It should be safe on the same build host. + if not is_ppc64le() and not is_windows(): + write_to_bazelrc('build:opt --host_copt=-march=native') + write_to_bazelrc('build:opt --define with_default_optimizations=true') + def set_tf_cuda_clang(environ_cp): - """set TF_CUDA_CLANG action_env. + """set TF_CUDA_CLANG action_env. Args: environ_cp: copy of the os.environ. """ - question = 'Do you want to use clang as CUDA compiler?' - yes_reply = 'Clang will be used as CUDA compiler.' - no_reply = 'nvcc will be used as CUDA compiler.' - set_action_env_var( - environ_cp, - 'TF_CUDA_CLANG', - None, - False, - question=question, - yes_reply=yes_reply, - no_reply=no_reply) + question = 'Do you want to use clang as CUDA compiler?' + yes_reply = 'Clang will be used as CUDA compiler.' + no_reply = 'nvcc will be used as CUDA compiler.' + set_action_env_var( + environ_cp, + 'TF_CUDA_CLANG', + None, + False, + question=question, + yes_reply=yes_reply, + no_reply=no_reply) def set_tf_download_clang(environ_cp): - """Set TF_DOWNLOAD_CLANG action_env.""" - question = 'Do you wish to download a fresh release of clang? (Experimental)' - yes_reply = 'Clang will be downloaded and used to compile tensorflow.' - no_reply = 'Clang will not be downloaded.' - set_action_env_var( - environ_cp, - 'TF_DOWNLOAD_CLANG', - None, - False, - question=question, - yes_reply=yes_reply, - no_reply=no_reply) + """Set TF_DOWNLOAD_CLANG action_env.""" + question = 'Do you wish to download a fresh release of clang? (Experimental)' + yes_reply = 'Clang will be downloaded and used to compile tensorflow.' + no_reply = 'Clang will not be downloaded.' + set_action_env_var( + environ_cp, + 'TF_DOWNLOAD_CLANG', + None, + False, + question=question, + yes_reply=yes_reply, + no_reply=no_reply) def get_from_env_or_user_or_default(environ_cp, var_name, ask_for_var, var_default): - """Get var_name either from env, or user or default. + """Get var_name either from env, or user or default. If var_name has been set as environment variable, use the preset value, else ask for user input. If no input is provided, the default is used. @@ -549,49 +565,49 @@ def get_from_env_or_user_or_default(environ_cp, var_name, ask_for_var, Returns: string value for var_name """ - var = environ_cp.get(var_name) - if not var: - var = get_input(ask_for_var) - print('\n') - if not var: - var = var_default - return var + var = environ_cp.get(var_name) + if not var: + var = get_input(ask_for_var) + print('\n') + if not var: + var = var_default + return var def set_clang_cuda_compiler_path(environ_cp): - """Set CLANG_CUDA_COMPILER_PATH.""" - default_clang_path = which('clang') or '' - ask_clang_path = ('Please specify which clang should be used as device and ' - 'host compiler. [Default is %s]: ') % default_clang_path - - while True: - clang_cuda_compiler_path = get_from_env_or_user_or_default( - environ_cp, 'CLANG_CUDA_COMPILER_PATH', ask_clang_path, - default_clang_path) - if os.path.exists(clang_cuda_compiler_path): - break - - # Reset and retry - print('Invalid clang path: %s cannot be found.' % clang_cuda_compiler_path) - environ_cp['CLANG_CUDA_COMPILER_PATH'] = '' - - # Set CLANG_CUDA_COMPILER_PATH - environ_cp['CLANG_CUDA_COMPILER_PATH'] = clang_cuda_compiler_path - write_action_env_to_bazelrc('CLANG_CUDA_COMPILER_PATH', - clang_cuda_compiler_path) - - -def prompt_loop_or_load_from_env( - environ_cp, - var_name, - var_default, - ask_for_var, - check_success, - error_msg, - suppress_default_error=False, - n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS -): - """Loop over user prompts for an ENV param until receiving a valid response. + """Set CLANG_CUDA_COMPILER_PATH.""" + default_clang_path = which('clang') or '' + ask_clang_path = ( + 'Please specify which clang should be used as device and ' + 'host compiler. [Default is %s]: ') % default_clang_path + + while True: + clang_cuda_compiler_path = get_from_env_or_user_or_default( + environ_cp, 'CLANG_CUDA_COMPILER_PATH', ask_clang_path, + default_clang_path) + if os.path.exists(clang_cuda_compiler_path): + break + + # Reset and retry + print('Invalid clang path: %s cannot be found.' % + clang_cuda_compiler_path) + environ_cp['CLANG_CUDA_COMPILER_PATH'] = '' + + # Set CLANG_CUDA_COMPILER_PATH + environ_cp['CLANG_CUDA_COMPILER_PATH'] = clang_cuda_compiler_path + write_action_env_to_bazelrc('CLANG_CUDA_COMPILER_PATH', + clang_cuda_compiler_path) + + +def prompt_loop_or_load_from_env(environ_cp, + var_name, + var_default, + ask_for_var, + check_success, + error_msg, + suppress_default_error=False, + n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS): + """Loop over user prompts for an ENV param until receiving a valid response. For the env param var_name, read from the environment or verify user input until receiving valid input. When done, set var_name in the environ_cp to its @@ -622,172 +638,168 @@ def prompt_loop_or_load_from_env( continue to provide invalid input. Raise the error to avoid infinitely looping. """ - default = environ_cp.get(var_name) or var_default - full_query = '%s [Default is %s]: ' % ( - ask_for_var, - default, - ) - - for _ in range(n_ask_attempts): - val = get_from_env_or_user_or_default(environ_cp, - var_name, - full_query, - default) - if check_success(val): - break - if not suppress_default_error: - print(error_msg % val) - environ_cp[var_name] = '' - else: - raise UserInputError('Invalid %s setting was provided %d times in a row. ' - 'Assuming to be a scripting mistake.' % - (var_name, n_ask_attempts)) - - environ_cp[var_name] = val - return val + default = environ_cp.get(var_name) or var_default + full_query = '%s [Default is %s]: ' % ( + ask_for_var, + default, + ) + + for _ in range(n_ask_attempts): + val = get_from_env_or_user_or_default(environ_cp, var_name, full_query, + default) + if check_success(val): + break + if not suppress_default_error: + print(error_msg % val) + environ_cp[var_name] = '' + else: + raise UserInputError( + 'Invalid %s setting was provided %d times in a row. ' + 'Assuming to be a scripting mistake.' % (var_name, n_ask_attempts)) + + environ_cp[var_name] = val + return val def create_android_ndk_rule(environ_cp): - """Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule.""" - if is_windows() or is_cygwin(): - default_ndk_path = cygpath('%s/Android/Sdk/ndk-bundle' % - environ_cp['APPDATA']) - elif is_macos(): - default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] - else: - default_ndk_path = '%s/Android/Sdk/ndk-bundle' % environ_cp['HOME'] - - def valid_ndk_path(path): - return (os.path.exists(path) and - os.path.exists(os.path.join(path, 'source.properties'))) - - android_ndk_home_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='ANDROID_NDK_HOME', - var_default=default_ndk_path, - ask_for_var='Please specify the home path of the Android NDK to use.', - check_success=valid_ndk_path, - error_msg=('The path %s or its child file "source.properties" ' - 'does not exist.') - ) - write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path) - write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL', - check_ndk_level(android_ndk_home_path)) + """Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule.""" + if is_windows() or is_cygwin(): + default_ndk_path = cygpath( + '%s/Android/Sdk/ndk-bundle' % environ_cp['APPDATA']) + elif is_macos(): + default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] + else: + default_ndk_path = '%s/Android/Sdk/ndk-bundle' % environ_cp['HOME'] + + def valid_ndk_path(path): + return (os.path.exists(path) + and os.path.exists(os.path.join(path, 'source.properties'))) + + android_ndk_home_path = prompt_loop_or_load_from_env( + environ_cp, + var_name='ANDROID_NDK_HOME', + var_default=default_ndk_path, + ask_for_var='Please specify the home path of the Android NDK to use.', + check_success=valid_ndk_path, + error_msg=('The path %s or its child file "source.properties" ' + 'does not exist.')) + write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path) + write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL', + check_ndk_level(android_ndk_home_path)) def create_android_sdk_rule(environ_cp): - """Set Android variables and write Android SDK WORKSPACE rule.""" - if is_windows() or is_cygwin(): - default_sdk_path = cygpath('%s/Android/Sdk' % environ_cp['APPDATA']) - elif is_macos(): - default_sdk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] - else: - default_sdk_path = '%s/Android/Sdk' % environ_cp['HOME'] - - def valid_sdk_path(path): - return (os.path.exists(path) and - os.path.exists(os.path.join(path, 'platforms')) and - os.path.exists(os.path.join(path, 'build-tools'))) - - android_sdk_home_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='ANDROID_SDK_HOME', - var_default=default_sdk_path, - ask_for_var='Please specify the home path of the Android SDK to use.', - check_success=valid_sdk_path, - error_msg=('Either %s does not exist, or it does not contain the ' - 'subdirectories "platforms" and "build-tools".')) - - platforms = os.path.join(android_sdk_home_path, 'platforms') - api_levels = sorted(os.listdir(platforms)) - api_levels = [x.replace('android-', '') for x in api_levels] - - def valid_api_level(api_level): - return os.path.exists(os.path.join(android_sdk_home_path, - 'platforms', - 'android-' + api_level)) - - android_api_level = prompt_loop_or_load_from_env( - environ_cp, - var_name='ANDROID_API_LEVEL', - var_default=api_levels[-1], - ask_for_var=('Please specify the Android SDK API level to use. ' - '[Available levels: %s]') % api_levels, - check_success=valid_api_level, - error_msg='Android-%s is not present in the SDK path.') - - build_tools = os.path.join(android_sdk_home_path, 'build-tools') - versions = sorted(os.listdir(build_tools)) - - def valid_build_tools(version): - return os.path.exists(os.path.join(android_sdk_home_path, - 'build-tools', - version)) - - android_build_tools_version = prompt_loop_or_load_from_env( - environ_cp, - var_name='ANDROID_BUILD_TOOLS_VERSION', - var_default=versions[-1], - ask_for_var=('Please specify an Android build tools version to use. ' - '[Available versions: %s]') % versions, - check_success=valid_build_tools, - error_msg=('The selected SDK does not have build-tools version %s ' - 'available.')) - - write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION', - android_build_tools_version) - write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', - android_api_level) - write_action_env_to_bazelrc('ANDROID_SDK_HOME', - android_sdk_home_path) + """Set Android variables and write Android SDK WORKSPACE rule.""" + if is_windows() or is_cygwin(): + default_sdk_path = cygpath('%s/Android/Sdk' % environ_cp['APPDATA']) + elif is_macos(): + default_sdk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] + else: + default_sdk_path = '%s/Android/Sdk' % environ_cp['HOME'] + + def valid_sdk_path(path): + return (os.path.exists(path) + and os.path.exists(os.path.join(path, 'platforms')) + and os.path.exists(os.path.join(path, 'build-tools'))) + + android_sdk_home_path = prompt_loop_or_load_from_env( + environ_cp, + var_name='ANDROID_SDK_HOME', + var_default=default_sdk_path, + ask_for_var='Please specify the home path of the Android SDK to use.', + check_success=valid_sdk_path, + error_msg=('Either %s does not exist, or it does not contain the ' + 'subdirectories "platforms" and "build-tools".')) + + platforms = os.path.join(android_sdk_home_path, 'platforms') + api_levels = sorted(os.listdir(platforms)) + api_levels = [x.replace('android-', '') for x in api_levels] + + def valid_api_level(api_level): + return os.path.exists( + os.path.join(android_sdk_home_path, 'platforms', + 'android-' + api_level)) + + android_api_level = prompt_loop_or_load_from_env( + environ_cp, + var_name='ANDROID_API_LEVEL', + var_default=api_levels[-1], + ask_for_var=('Please specify the Android SDK API level to use. ' + '[Available levels: %s]') % api_levels, + check_success=valid_api_level, + error_msg='Android-%s is not present in the SDK path.') + + build_tools = os.path.join(android_sdk_home_path, 'build-tools') + versions = sorted(os.listdir(build_tools)) + + def valid_build_tools(version): + return os.path.exists( + os.path.join(android_sdk_home_path, 'build-tools', version)) + + android_build_tools_version = prompt_loop_or_load_from_env( + environ_cp, + var_name='ANDROID_BUILD_TOOLS_VERSION', + var_default=versions[-1], + ask_for_var=('Please specify an Android build tools version to use. ' + '[Available versions: %s]') % versions, + check_success=valid_build_tools, + error_msg=('The selected SDK does not have build-tools version %s ' + 'available.')) + + write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION', + android_build_tools_version) + write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', android_api_level) + write_action_env_to_bazelrc('ANDROID_SDK_HOME', android_sdk_home_path) def check_ndk_level(android_ndk_home_path): - """Check the revision number of an Android NDK path.""" - properties_path = '%s/source.properties' % android_ndk_home_path - if is_windows() or is_cygwin(): - properties_path = cygpath(properties_path) - with open(properties_path, 'r') as f: - filedata = f.read() - - revision = re.search(r'Pkg.Revision = (\d+)', filedata) - if revision: - ndk_api_level = revision.group(1) - else: - raise Exception('Unable to parse NDK revision.') - if int(ndk_api_level) not in _SUPPORTED_ANDROID_NDK_VERSIONS: - print('WARNING: The API level of the NDK in %s is %s, which is not ' - 'supported by Bazel (officially supported versions: %s). Please use ' - 'another version. Compiling Android targets may result in confusing ' - 'errors.\n' % (android_ndk_home_path, ndk_api_level, - _SUPPORTED_ANDROID_NDK_VERSIONS)) - return ndk_api_level + """Check the revision number of an Android NDK path.""" + properties_path = '%s/source.properties' % android_ndk_home_path + if is_windows() or is_cygwin(): + properties_path = cygpath(properties_path) + with open(properties_path, 'r') as f: + filedata = f.read() + + revision = re.search(r'Pkg.Revision = (\d+)', filedata) + if revision: + ndk_api_level = revision.group(1) + else: + raise Exception('Unable to parse NDK revision.') + if int(ndk_api_level) not in _SUPPORTED_ANDROID_NDK_VERSIONS: + print( + 'WARNING: The API level of the NDK in %s is %s, which is not ' + 'supported by Bazel (officially supported versions: %s). Please use ' + 'another version. Compiling Android targets may result in confusing ' + 'errors.\n' % (android_ndk_home_path, ndk_api_level, + _SUPPORTED_ANDROID_NDK_VERSIONS)) + return ndk_api_level def set_gcc_host_compiler_path(environ_cp): - """Set GCC_HOST_COMPILER_PATH.""" - default_gcc_host_compiler_path = which('gcc') or '' - cuda_bin_symlink = '%s/bin/gcc' % environ_cp.get('CUDA_TOOLKIT_PATH') + """Set GCC_HOST_COMPILER_PATH.""" + default_gcc_host_compiler_path = which('gcc') or '' + cuda_bin_symlink = '%s/bin/gcc' % environ_cp.get('CUDA_TOOLKIT_PATH') - if os.path.islink(cuda_bin_symlink): - # os.readlink is only available in linux - default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink) + if os.path.islink(cuda_bin_symlink): + # os.readlink is only available in linux + default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink) - gcc_host_compiler_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='GCC_HOST_COMPILER_PATH', - var_default=default_gcc_host_compiler_path, - ask_for_var= - 'Please specify which gcc should be used by nvcc as the host compiler.', - check_success=os.path.exists, - error_msg='Invalid gcc path. %s cannot be found.', - ) + gcc_host_compiler_path = prompt_loop_or_load_from_env( + environ_cp, + var_name='GCC_HOST_COMPILER_PATH', + var_default=default_gcc_host_compiler_path, + ask_for_var= + 'Please specify which gcc should be used by nvcc as the host compiler.', + check_success=os.path.exists, + error_msg='Invalid gcc path. %s cannot be found.', + ) - write_action_env_to_bazelrc('GCC_HOST_COMPILER_PATH', gcc_host_compiler_path) + write_action_env_to_bazelrc('GCC_HOST_COMPILER_PATH', + gcc_host_compiler_path) def reformat_version_sequence(version_str, sequence_count): - """Reformat the version string to have the given number of sequences. + """Reformat the version string to have the given number of sequences. For example: Given (7, 2) -> 7.0 @@ -801,181 +813,190 @@ def reformat_version_sequence(version_str, sequence_count): Returns: string, reformatted version string. """ - v = version_str.split('.') - if len(v) < sequence_count: - v = v + (['0'] * (sequence_count - len(v))) + v = version_str.split('.') + if len(v) < sequence_count: + v = v + (['0'] * (sequence_count - len(v))) - return '.'.join(v[:sequence_count]) + return '.'.join(v[:sequence_count]) def set_tf_cuda_version(environ_cp): - """Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION.""" - ask_cuda_version = ( - 'Please specify the CUDA SDK version you want to use. ' - '[Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION - - for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - # Configure the Cuda SDK version to use. - tf_cuda_version = get_from_env_or_user_or_default( - environ_cp, 'TF_CUDA_VERSION', ask_cuda_version, _DEFAULT_CUDA_VERSION) - tf_cuda_version = reformat_version_sequence(str(tf_cuda_version), 2) - - # Find out where the CUDA toolkit is installed - default_cuda_path = _DEFAULT_CUDA_PATH - if is_windows() or is_cygwin(): - default_cuda_path = cygpath( - environ_cp.get('CUDA_PATH', _DEFAULT_CUDA_PATH_WIN)) - elif is_linux(): - # If the default doesn't exist, try an alternative default. - if (not os.path.exists(default_cuda_path) - ) and os.path.exists(_DEFAULT_CUDA_PATH_LINUX): - default_cuda_path = _DEFAULT_CUDA_PATH_LINUX - ask_cuda_path = ('Please specify the location where CUDA %s toolkit is' - ' installed. Refer to README.md for more details. ' - '[Default is %s]: ') % (tf_cuda_version, default_cuda_path) - cuda_toolkit_path = get_from_env_or_user_or_default( - environ_cp, 'CUDA_TOOLKIT_PATH', ask_cuda_path, default_cuda_path) - if is_windows() or is_cygwin(): - cuda_toolkit_path = cygpath(cuda_toolkit_path) - - if is_windows(): - cuda_rt_lib_path = 'lib/x64/cudart.lib' - elif is_linux(): - cuda_rt_lib_path = 'lib64/libcudart.so.%s' % tf_cuda_version - elif is_macos(): - cuda_rt_lib_path = 'lib/libcudart.%s.dylib' % tf_cuda_version - - cuda_toolkit_path_full = os.path.join(cuda_toolkit_path, cuda_rt_lib_path) - if os.path.exists(cuda_toolkit_path_full): - break + """Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION.""" + ask_cuda_version = ( + 'Please specify the CUDA SDK version you want to use. ' + '[Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + # Configure the Cuda SDK version to use. + tf_cuda_version = get_from_env_or_user_or_default( + environ_cp, 'TF_CUDA_VERSION', ask_cuda_version, + _DEFAULT_CUDA_VERSION) + tf_cuda_version = reformat_version_sequence(str(tf_cuda_version), 2) + + # Find out where the CUDA toolkit is installed + default_cuda_path = _DEFAULT_CUDA_PATH + if is_windows() or is_cygwin(): + default_cuda_path = cygpath( + environ_cp.get('CUDA_PATH', _DEFAULT_CUDA_PATH_WIN)) + elif is_linux(): + # If the default doesn't exist, try an alternative default. + if (not os.path.exists(default_cuda_path) + ) and os.path.exists(_DEFAULT_CUDA_PATH_LINUX): + default_cuda_path = _DEFAULT_CUDA_PATH_LINUX + ask_cuda_path = ('Please specify the location where CUDA %s toolkit is' + ' installed. Refer to README.md for more details. ' + '[Default is %s]: ') % (tf_cuda_version, + default_cuda_path) + cuda_toolkit_path = get_from_env_or_user_or_default( + environ_cp, 'CUDA_TOOLKIT_PATH', ask_cuda_path, default_cuda_path) + if is_windows() or is_cygwin(): + cuda_toolkit_path = cygpath(cuda_toolkit_path) + + if is_windows(): + cuda_rt_lib_path = 'lib/x64/cudart.lib' + elif is_linux(): + cuda_rt_lib_path = 'lib64/libcudart.so.%s' % tf_cuda_version + elif is_macos(): + cuda_rt_lib_path = 'lib/libcudart.%s.dylib' % tf_cuda_version + + cuda_toolkit_path_full = os.path.join(cuda_toolkit_path, + cuda_rt_lib_path) + if os.path.exists(cuda_toolkit_path_full): + break + + # Reset and retry + print('Invalid path to CUDA %s toolkit. %s cannot be found' % + (tf_cuda_version, cuda_toolkit_path_full)) + environ_cp['TF_CUDA_VERSION'] = '' + environ_cp['CUDA_TOOLKIT_PATH'] = '' - # Reset and retry - print('Invalid path to CUDA %s toolkit. %s cannot be found' % - (tf_cuda_version, cuda_toolkit_path_full)) - environ_cp['TF_CUDA_VERSION'] = '' - environ_cp['CUDA_TOOLKIT_PATH'] = '' - - else: - raise UserInputError('Invalid TF_CUDA_SETTING setting was provided %d ' - 'times in a row. Assuming to be a scripting mistake.' % - _DEFAULT_PROMPT_ASK_ATTEMPTS) + else: + raise UserInputError( + 'Invalid TF_CUDA_SETTING setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) - # Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION - environ_cp['CUDA_TOOLKIT_PATH'] = cuda_toolkit_path - write_action_env_to_bazelrc('CUDA_TOOLKIT_PATH', cuda_toolkit_path) - environ_cp['TF_CUDA_VERSION'] = tf_cuda_version - write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version) + # Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION + environ_cp['CUDA_TOOLKIT_PATH'] = cuda_toolkit_path + write_action_env_to_bazelrc('CUDA_TOOLKIT_PATH', cuda_toolkit_path) + environ_cp['TF_CUDA_VERSION'] = tf_cuda_version + write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version) def set_tf_cudnn_version(environ_cp): - """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" - ask_cudnn_version = ( - 'Please specify the cuDNN version you want to use. ' - '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION - - for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - tf_cudnn_version = get_from_env_or_user_or_default( - environ_cp, 'TF_CUDNN_VERSION', ask_cudnn_version, - _DEFAULT_CUDNN_VERSION) - tf_cudnn_version = reformat_version_sequence(str(tf_cudnn_version), 1) - - default_cudnn_path = environ_cp.get('CUDA_TOOLKIT_PATH') - ask_cudnn_path = (r'Please specify the location where cuDNN %s library is ' - 'installed. Refer to README.md for more details. [Default' - ' is %s]:') % (tf_cudnn_version, default_cudnn_path) - cudnn_install_path = get_from_env_or_user_or_default( - environ_cp, 'CUDNN_INSTALL_PATH', ask_cudnn_path, default_cudnn_path) - - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - cudnn_install_path = os.path.realpath( - os.path.expanduser(cudnn_install_path)) - if is_windows() or is_cygwin(): - cudnn_install_path = cygpath(cudnn_install_path) + """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" + ask_cudnn_version = ( + 'Please specify the cuDNN version you want to use. ' + '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + tf_cudnn_version = get_from_env_or_user_or_default( + environ_cp, 'TF_CUDNN_VERSION', ask_cudnn_version, + _DEFAULT_CUDNN_VERSION) + tf_cudnn_version = reformat_version_sequence(str(tf_cudnn_version), 1) + + default_cudnn_path = environ_cp.get('CUDA_TOOLKIT_PATH') + ask_cudnn_path = ( + r'Please specify the location where cuDNN %s library is ' + 'installed. Refer to README.md for more details. [Default' + ' is %s]:') % (tf_cudnn_version, default_cudnn_path) + cudnn_install_path = get_from_env_or_user_or_default( + environ_cp, 'CUDNN_INSTALL_PATH', ask_cudnn_path, + default_cudnn_path) + + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + cudnn_install_path = os.path.realpath( + os.path.expanduser(cudnn_install_path)) + if is_windows() or is_cygwin(): + cudnn_install_path = cygpath(cudnn_install_path) + + if is_windows(): + cuda_dnn_lib_path = 'lib/x64/cudnn.lib' + cuda_dnn_lib_alt_path = 'lib/x64/cudnn.lib' + elif is_linux(): + cuda_dnn_lib_path = 'lib64/libcudnn.so.%s' % tf_cudnn_version + cuda_dnn_lib_alt_path = 'libcudnn.so.%s' % tf_cudnn_version + elif is_macos(): + cuda_dnn_lib_path = 'lib/libcudnn.%s.dylib' % tf_cudnn_version + cuda_dnn_lib_alt_path = 'libcudnn.%s.dylib' % tf_cudnn_version + + cuda_dnn_lib_path_full = os.path.join(cudnn_install_path, + cuda_dnn_lib_path) + cuda_dnn_lib_alt_path_full = os.path.join(cudnn_install_path, + cuda_dnn_lib_alt_path) + if os.path.exists(cuda_dnn_lib_path_full) or os.path.exists( + cuda_dnn_lib_alt_path_full): + break + + # Try another alternative for Linux + if is_linux(): + ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' + cudnn_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) + cudnn_path_from_ldconfig = re.search('.*libcudnn.so .* => (.*)', + cudnn_path_from_ldconfig) + if cudnn_path_from_ldconfig: + cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1) + if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, + tf_cudnn_version)): + cudnn_install_path = os.path.dirname( + cudnn_path_from_ldconfig) + break + + # Reset and Retry + print( + 'Invalid path to cuDNN %s toolkit. None of the following files can be ' + 'found:' % tf_cudnn_version) + print(cuda_dnn_lib_path_full) + print(cuda_dnn_lib_alt_path_full) + if is_linux(): + print('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)) + + environ_cp['TF_CUDNN_VERSION'] = '' + else: + raise UserInputError( + 'Invalid TF_CUDNN setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) - if is_windows(): - cuda_dnn_lib_path = 'lib/x64/cudnn.lib' - cuda_dnn_lib_alt_path = 'lib/x64/cudnn.lib' - elif is_linux(): - cuda_dnn_lib_path = 'lib64/libcudnn.so.%s' % tf_cudnn_version - cuda_dnn_lib_alt_path = 'libcudnn.so.%s' % tf_cudnn_version - elif is_macos(): - cuda_dnn_lib_path = 'lib/libcudnn.%s.dylib' % tf_cudnn_version - cuda_dnn_lib_alt_path = 'libcudnn.%s.dylib' % tf_cudnn_version - - cuda_dnn_lib_path_full = os.path.join(cudnn_install_path, cuda_dnn_lib_path) - cuda_dnn_lib_alt_path_full = os.path.join(cudnn_install_path, - cuda_dnn_lib_alt_path) - if os.path.exists(cuda_dnn_lib_path_full) or os.path.exists( - cuda_dnn_lib_alt_path_full): - break - - # Try another alternative for Linux - if is_linux(): - ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' - cudnn_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) - cudnn_path_from_ldconfig = re.search('.*libcudnn.so .* => (.*)', - cudnn_path_from_ldconfig) - if cudnn_path_from_ldconfig: - cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1) - if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, - tf_cudnn_version)): - cudnn_install_path = os.path.dirname(cudnn_path_from_ldconfig) - break - - # Reset and Retry - print( - 'Invalid path to cuDNN %s toolkit. None of the following files can be ' - 'found:' % tf_cudnn_version) - print(cuda_dnn_lib_path_full) - print(cuda_dnn_lib_alt_path_full) - if is_linux(): - print('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)) - - environ_cp['TF_CUDNN_VERSION'] = '' - else: - raise UserInputError('Invalid TF_CUDNN setting was provided %d ' - 'times in a row. Assuming to be a scripting mistake.' % - _DEFAULT_PROMPT_ASK_ATTEMPTS) - - # Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION - environ_cp['CUDNN_INSTALL_PATH'] = cudnn_install_path - write_action_env_to_bazelrc('CUDNN_INSTALL_PATH', cudnn_install_path) - environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version - write_action_env_to_bazelrc('TF_CUDNN_VERSION', tf_cudnn_version) + # Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION + environ_cp['CUDNN_INSTALL_PATH'] = cudnn_install_path + write_action_env_to_bazelrc('CUDNN_INSTALL_PATH', cudnn_install_path) + environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version + write_action_env_to_bazelrc('TF_CUDNN_VERSION', tf_cudnn_version) def is_cuda_compatible(lib, cuda_ver, cudnn_ver): - """Check compatibility between given library and cudnn/cudart libraries.""" - ldd_bin = which('ldd') or '/usr/bin/ldd' - ldd_out = run_shell([ldd_bin, lib], True) - ldd_out = ldd_out.split(os.linesep) - cudnn_pattern = re.compile('.*libcudnn.so\\.?(.*) =>.*$') - cuda_pattern = re.compile('.*libcudart.so\\.?(.*) =>.*$') - cudnn = None - cudart = None - cudnn_ok = True # assume no cudnn dependency by default - cuda_ok = True # assume no cuda dependency by default - for line in ldd_out: - if 'libcudnn.so' in line: - cudnn = cudnn_pattern.search(line) - cudnn_ok = False - elif 'libcudart.so' in line: - cudart = cuda_pattern.search(line) - cuda_ok = False - if cudnn and len(cudnn.group(1)): - cudnn = convert_version_to_int(cudnn.group(1)) - if cudart and len(cudart.group(1)): - cudart = convert_version_to_int(cudart.group(1)) - if cudnn is not None: - cudnn_ok = (cudnn == cudnn_ver) - if cudart is not None: - cuda_ok = (cudart == cuda_ver) - return cudnn_ok and cuda_ok + """Check compatibility between given library and cudnn/cudart libraries.""" + ldd_bin = which('ldd') or '/usr/bin/ldd' + ldd_out = run_shell([ldd_bin, lib], True) + ldd_out = ldd_out.split(os.linesep) + cudnn_pattern = re.compile('.*libcudnn.so\\.?(.*) =>.*$') + cuda_pattern = re.compile('.*libcudart.so\\.?(.*) =>.*$') + cudnn = None + cudart = None + cudnn_ok = True # assume no cudnn dependency by default + cuda_ok = True # assume no cuda dependency by default + for line in ldd_out: + if 'libcudnn.so' in line: + cudnn = cudnn_pattern.search(line) + cudnn_ok = False + elif 'libcudart.so' in line: + cudart = cuda_pattern.search(line) + cuda_ok = False + if cudnn and len(cudnn.group(1)): + cudnn = convert_version_to_int(cudnn.group(1)) + if cudart and len(cudart.group(1)): + cudart = convert_version_to_int(cudart.group(1)) + if cudnn is not None: + cudnn_ok = (cudnn == cudnn_ver) + if cudart is not None: + cuda_ok = (cudart == cuda_ver) + return cudnn_ok and cuda_ok def set_tf_tensorrt_install_path(environ_cp): - """Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION. + """Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION. Adapted from code contributed by Sami Kama (https://github.com/samikama). @@ -986,105 +1007,112 @@ def set_tf_tensorrt_install_path(environ_cp): ValueError: if this method was called under non-Linux platform. UserInputError: if user has provided invalid input multiple times. """ - if not is_linux(): - raise ValueError('Currently TensorRT is only supported on Linux platform.') - - # Ask user whether to add TensorRT support. - if str(int(get_var(environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', - False))) != '1': - return - - for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - ask_tensorrt_path = (r'Please specify the location where TensorRT is ' - 'installed. [Default is %s]:') % ( - _DEFAULT_TENSORRT_PATH_LINUX) - trt_install_path = get_from_env_or_user_or_default( - environ_cp, 'TENSORRT_INSTALL_PATH', ask_tensorrt_path, - _DEFAULT_TENSORRT_PATH_LINUX) - - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - trt_install_path = os.path.realpath(os.path.expanduser(trt_install_path)) - - def find_libs(search_path): - """Search for libnvinfer.so in "search_path".""" - fl = set() - if os.path.exists(search_path) and os.path.isdir(search_path): - fl.update([ - os.path.realpath(os.path.join(search_path, x)) - for x in os.listdir(search_path) - if 'libnvinfer.so' in x - ]) - return fl - - possible_files = find_libs(trt_install_path) - possible_files.update(find_libs(os.path.join(trt_install_path, 'lib'))) - possible_files.update(find_libs(os.path.join(trt_install_path, 'lib64'))) - cuda_ver = convert_version_to_int(environ_cp['TF_CUDA_VERSION']) - cudnn_ver = convert_version_to_int(environ_cp['TF_CUDNN_VERSION']) - nvinfer_pattern = re.compile('.*libnvinfer.so.?(.*)$') - highest_ver = [0, None, None] - - for lib_file in possible_files: - if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver): - matches = nvinfer_pattern.search(lib_file) - if len(matches.groups()) == 0: - continue - ver_str = matches.group(1) - ver = convert_version_to_int(ver_str) if len(ver_str) else 0 - if ver > highest_ver[0]: - highest_ver = [ver, ver_str, lib_file] - if highest_ver[1] is not None: - trt_install_path = os.path.dirname(highest_ver[2]) - tf_tensorrt_version = highest_ver[1] - break - - # Try another alternative from ldconfig. - ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' - ldconfig_output = run_shell([ldconfig_bin, '-p']) - search_result = re.search('.*libnvinfer.so\\.?([0-9.]*).* => (.*)', - ldconfig_output) - if search_result: - libnvinfer_path_from_ldconfig = search_result.group(2) - if os.path.exists(libnvinfer_path_from_ldconfig): - if is_cuda_compatible(libnvinfer_path_from_ldconfig, cuda_ver, - cudnn_ver): - trt_install_path = os.path.dirname(libnvinfer_path_from_ldconfig) - tf_tensorrt_version = search_result.group(1) - break - - # Reset and Retry - if possible_files: - print('TensorRT libraries found in one the following directories', - 'are not compatible with selected cuda and cudnn installations') - print(trt_install_path) - print(os.path.join(trt_install_path, 'lib')) - print(os.path.join(trt_install_path, 'lib64')) - if search_result: - print(libnvinfer_path_from_ldconfig) + if not is_linux(): + raise ValueError( + 'Currently TensorRT is only supported on Linux platform.') + + # Ask user whether to add TensorRT support. + if str(int(get_var(environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', + False))) != '1': + return + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + ask_tensorrt_path = (r'Please specify the location where TensorRT is ' + 'installed. [Default is %s]:') % ( + _DEFAULT_TENSORRT_PATH_LINUX) + trt_install_path = get_from_env_or_user_or_default( + environ_cp, 'TENSORRT_INSTALL_PATH', ask_tensorrt_path, + _DEFAULT_TENSORRT_PATH_LINUX) + + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + trt_install_path = os.path.realpath( + os.path.expanduser(trt_install_path)) + + def find_libs(search_path): + """Search for libnvinfer.so in "search_path".""" + fl = set() + if os.path.exists(search_path) and os.path.isdir(search_path): + fl.update([ + os.path.realpath(os.path.join(search_path, x)) + for x in os.listdir(search_path) if 'libnvinfer.so' in x + ]) + return fl + + possible_files = find_libs(trt_install_path) + possible_files.update(find_libs(os.path.join(trt_install_path, 'lib'))) + possible_files.update( + find_libs(os.path.join(trt_install_path, 'lib64'))) + cuda_ver = convert_version_to_int(environ_cp['TF_CUDA_VERSION']) + cudnn_ver = convert_version_to_int(environ_cp['TF_CUDNN_VERSION']) + nvinfer_pattern = re.compile('.*libnvinfer.so.?(.*)$') + highest_ver = [0, None, None] + + for lib_file in possible_files: + if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver): + matches = nvinfer_pattern.search(lib_file) + if len(matches.groups()) == 0: + continue + ver_str = matches.group(1) + ver = convert_version_to_int(ver_str) if len(ver_str) else 0 + if ver > highest_ver[0]: + highest_ver = [ver, ver_str, lib_file] + if highest_ver[1] is not None: + trt_install_path = os.path.dirname(highest_ver[2]) + tf_tensorrt_version = highest_ver[1] + break + + # Try another alternative from ldconfig. + ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' + ldconfig_output = run_shell([ldconfig_bin, '-p']) + search_result = re.search('.*libnvinfer.so\\.?([0-9.]*).* => (.*)', + ldconfig_output) + if search_result: + libnvinfer_path_from_ldconfig = search_result.group(2) + if os.path.exists(libnvinfer_path_from_ldconfig): + if is_cuda_compatible(libnvinfer_path_from_ldconfig, cuda_ver, + cudnn_ver): + trt_install_path = os.path.dirname( + libnvinfer_path_from_ldconfig) + tf_tensorrt_version = search_result.group(1) + break + + # Reset and Retry + if possible_files: + print( + 'TensorRT libraries found in one the following directories', + 'are not compatible with selected cuda and cudnn installations' + ) + print(trt_install_path) + print(os.path.join(trt_install_path, 'lib')) + print(os.path.join(trt_install_path, 'lib64')) + if search_result: + print(libnvinfer_path_from_ldconfig) + else: + print( + 'Invalid path to TensorRT. None of the following files can be found:' + ) + print(trt_install_path) + print(os.path.join(trt_install_path, 'lib')) + print(os.path.join(trt_install_path, 'lib64')) + if search_result: + print(libnvinfer_path_from_ldconfig) + else: - print( - 'Invalid path to TensorRT. None of the following files can be found:') - print(trt_install_path) - print(os.path.join(trt_install_path, 'lib')) - print(os.path.join(trt_install_path, 'lib64')) - if search_result: - print(libnvinfer_path_from_ldconfig) - - else: - raise UserInputError('Invalid TF_TENSORRT setting was provided %d ' - 'times in a row. Assuming to be a scripting mistake.' % - _DEFAULT_PROMPT_ASK_ATTEMPTS) - - # Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION - environ_cp['TENSORRT_INSTALL_PATH'] = trt_install_path - write_action_env_to_bazelrc('TENSORRT_INSTALL_PATH', trt_install_path) - environ_cp['TF_TENSORRT_VERSION'] = tf_tensorrt_version - write_action_env_to_bazelrc('TF_TENSORRT_VERSION', tf_tensorrt_version) + raise UserInputError( + 'Invalid TF_TENSORRT setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) + + # Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION + environ_cp['TENSORRT_INSTALL_PATH'] = trt_install_path + write_action_env_to_bazelrc('TENSORRT_INSTALL_PATH', trt_install_path) + environ_cp['TF_TENSORRT_VERSION'] = tf_tensorrt_version + write_action_env_to_bazelrc('TF_TENSORRT_VERSION', tf_tensorrt_version) def set_tf_nccl_install_path(environ_cp): - """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION. + """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION. Args: environ_cp: copy of the os.environ. @@ -1093,455 +1121,472 @@ def set_tf_nccl_install_path(environ_cp): ValueError: if this method was called under non-Linux platform. UserInputError: if user has provided invalid input multiple times. """ - if not is_linux(): - raise ValueError('Currently NCCL is only supported on Linux platforms.') - - ask_nccl_version = ( - 'Please specify the NCCL version you want to use. ' - '[Leave empty to default to NCCL %s]: ') % _DEFAULT_NCCL_VERSION - - for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - tf_nccl_version = get_from_env_or_user_or_default( - environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, _DEFAULT_NCCL_VERSION) - tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) - - if tf_nccl_version == '1': - break # No need to get install path, NCCL 1 is a GitHub repo. - - # TODO(csigg): Look with ldconfig first if we can find the library in paths - # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding - # include directory. This is where the NCCL .deb packages install them. - # Then ask the user if we should use that. Instead of a single - # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to - # nccl_configure.bzl - default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') - ask_nccl_path = (r'Please specify the location where NCCL %s library is ' - 'installed. Refer to README.md for more details. [Default ' - 'is %s]:') % (tf_nccl_version, default_nccl_path) - nccl_install_path = get_from_env_or_user_or_default( - environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) - - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path)) - if is_windows() or is_cygwin(): - nccl_install_path = cygpath(nccl_install_path) + if not is_linux(): + raise ValueError( + 'Currently NCCL is only supported on Linux platforms.') + + ask_nccl_version = ( + 'Please specify the NCCL version you want to use. ' + '[Leave empty to default to NCCL %s]: ') % _DEFAULT_NCCL_VERSION + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + tf_nccl_version = get_from_env_or_user_or_default( + environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, + _DEFAULT_NCCL_VERSION) + tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) + + if tf_nccl_version == '1': + break # No need to get install path, NCCL 1 is a GitHub repo. + + # TODO(csigg): Look with ldconfig first if we can find the library in paths + # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding + # include directory. This is where the NCCL .deb packages install them. + # Then ask the user if we should use that. Instead of a single + # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to + # nccl_configure.bzl + default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') + ask_nccl_path = ( + r'Please specify the location where NCCL %s library is ' + 'installed. Refer to README.md for more details. [Default ' + 'is %s]:') % (tf_nccl_version, default_nccl_path) + nccl_install_path = get_from_env_or_user_or_default( + environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) + + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + nccl_install_path = os.path.realpath( + os.path.expanduser(nccl_install_path)) + if is_windows() or is_cygwin(): + nccl_install_path = cygpath(nccl_install_path) + + if is_windows(): + nccl_lib_path = 'lib/x64/nccl.lib' + elif is_linux(): + nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version + elif is_macos(): + nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version + + nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) + nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h') + nccl_license_path = os.path.join(nccl_install_path, 'NCCL-SLA.txt') + if os.path.exists(nccl_lib_path) and os.path.exists( + nccl_hdr_path) and os.path.exists(nccl_license_path): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) + break + + # Reset and Retry + print( + 'Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' + 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, + nccl_hdr_path)) + + environ_cp['TF_NCCL_VERSION'] = '' + else: + raise UserInputError( + 'Invalid TF_NCCL setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) - if is_windows(): - nccl_lib_path = 'lib/x64/nccl.lib' - elif is_linux(): - nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version - elif is_macos(): - nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version - - nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) - nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h') - nccl_license_path = os.path.join(nccl_install_path, 'NCCL-SLA.txt') - if os.path.exists(nccl_lib_path) and os.path.exists( - nccl_hdr_path) and os.path.exists(nccl_license_path): - # Set NCCL_INSTALL_PATH - environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path - write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) - break - - # Reset and Retry - print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' - 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, - nccl_hdr_path)) - - environ_cp['TF_NCCL_VERSION'] = '' - else: - raise UserInputError('Invalid TF_NCCL setting was provided %d ' - 'times in a row. Assuming to be a scripting mistake.' % - _DEFAULT_PROMPT_ASK_ATTEMPTS) - - # Set TF_NCCL_VERSION - environ_cp['TF_NCCL_VERSION'] = tf_nccl_version - write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version) + # Set TF_NCCL_VERSION + environ_cp['TF_NCCL_VERSION'] = tf_nccl_version + write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version) def get_native_cuda_compute_capabilities(environ_cp): - """Get native cuda compute capabilities. + """Get native cuda compute capabilities. Args: environ_cp: copy of the os.environ. Returns: string of native cuda compute capabilities, separated by comma. """ - device_query_bin = os.path.join( - environ_cp.get('CUDA_TOOLKIT_PATH'), 'extras/demo_suite/deviceQuery') - if os.path.isfile(device_query_bin) and os.access(device_query_bin, os.X_OK): - try: - output = run_shell(device_query_bin).split('\n') - pattern = re.compile('[0-9]*\\.[0-9]*') - output = [pattern.search(x) for x in output if 'Capability' in x] - output = ','.join(x.group() for x in output if x is not None) - except subprocess.CalledProcessError: - output = '' - else: - output = '' - return output + device_query_bin = os.path.join( + environ_cp.get('CUDA_TOOLKIT_PATH'), 'extras/demo_suite/deviceQuery') + if os.path.isfile(device_query_bin) and os.access(device_query_bin, + os.X_OK): + try: + output = run_shell(device_query_bin).split('\n') + pattern = re.compile('[0-9]*\\.[0-9]*') + output = [pattern.search(x) for x in output if 'Capability' in x] + output = ','.join(x.group() for x in output if x is not None) + except subprocess.CalledProcessError: + output = '' + else: + output = '' + return output def set_tf_cuda_compute_capabilities(environ_cp): - """Set TF_CUDA_COMPUTE_CAPABILITIES.""" - while True: - native_cuda_compute_capabilities = get_native_cuda_compute_capabilities( - environ_cp) - if not native_cuda_compute_capabilities: - default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES - else: - default_cuda_compute_capabilities = native_cuda_compute_capabilities - - ask_cuda_compute_capabilities = ( - 'Please specify a list of comma-separated ' - 'Cuda compute capabilities you want to ' - 'build with.\nYou can find the compute ' - 'capability of your device at: ' - 'https://developer.nvidia.com/cuda-gpus.\nPlease' - ' note that each additional compute ' - 'capability significantly increases your ' - 'build time and binary size. [Default is: %s]' % - default_cuda_compute_capabilities) - tf_cuda_compute_capabilities = get_from_env_or_user_or_default( - environ_cp, 'TF_CUDA_COMPUTE_CAPABILITIES', - ask_cuda_compute_capabilities, default_cuda_compute_capabilities) - # Check whether all capabilities from the input is valid - all_valid = True - # Remove all whitespace characters before splitting the string - # that users may insert by accident, as this will result in error - tf_cuda_compute_capabilities = ''.join(tf_cuda_compute_capabilities.split()) - for compute_capability in tf_cuda_compute_capabilities.split(','): - m = re.match('[0-9]+.[0-9]+', compute_capability) - if not m: - print('Invalid compute capability: ' % compute_capability) - all_valid = False - else: - ver = int(m.group(0).split('.')[0]) - if ver < 3: - print('Only compute capabilities 3.0 or higher are supported.') - all_valid = False - - if all_valid: - break - - # Reset and Retry - environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = '' - - # Set TF_CUDA_COMPUTE_CAPABILITIES - environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = tf_cuda_compute_capabilities - write_action_env_to_bazelrc('TF_CUDA_COMPUTE_CAPABILITIES', - tf_cuda_compute_capabilities) + """Set TF_CUDA_COMPUTE_CAPABILITIES.""" + while True: + native_cuda_compute_capabilities = get_native_cuda_compute_capabilities( + environ_cp) + if not native_cuda_compute_capabilities: + default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES + else: + default_cuda_compute_capabilities = native_cuda_compute_capabilities + + ask_cuda_compute_capabilities = ( + 'Please specify a list of comma-separated ' + 'Cuda compute capabilities you want to ' + 'build with.\nYou can find the compute ' + 'capability of your device at: ' + 'https://developer.nvidia.com/cuda-gpus.\nPlease' + ' note that each additional compute ' + 'capability significantly increases your ' + 'build time and binary size. [Default is: %s]' % + default_cuda_compute_capabilities) + tf_cuda_compute_capabilities = get_from_env_or_user_or_default( + environ_cp, 'TF_CUDA_COMPUTE_CAPABILITIES', + ask_cuda_compute_capabilities, default_cuda_compute_capabilities) + # Check whether all capabilities from the input is valid + all_valid = True + # Remove all whitespace characters before splitting the string + # that users may insert by accident, as this will result in error + tf_cuda_compute_capabilities = ''.join( + tf_cuda_compute_capabilities.split()) + for compute_capability in tf_cuda_compute_capabilities.split(','): + m = re.match('[0-9]+.[0-9]+', compute_capability) + if not m: + print('Invalid compute capability: ' % compute_capability) + all_valid = False + else: + ver = int(m.group(0).split('.')[0]) + if ver < 3: + print( + 'Only compute capabilities 3.0 or higher are supported.' + ) + all_valid = False + + if all_valid: + break + + # Reset and Retry + environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = '' + + # Set TF_CUDA_COMPUTE_CAPABILITIES + environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = tf_cuda_compute_capabilities + write_action_env_to_bazelrc('TF_CUDA_COMPUTE_CAPABILITIES', + tf_cuda_compute_capabilities) def set_other_cuda_vars(environ_cp): - """Set other CUDA related variables.""" - # If CUDA is enabled, always use GPU during build and test. - if environ_cp.get('TF_CUDA_CLANG') == '1': - write_to_bazelrc('build --config=cuda_clang') - write_to_bazelrc('test --config=cuda_clang') - else: - write_to_bazelrc('build --config=cuda') - write_to_bazelrc('test --config=cuda') + """Set other CUDA related variables.""" + # If CUDA is enabled, always use GPU during build and test. + if environ_cp.get('TF_CUDA_CLANG') == '1': + write_to_bazelrc('build --config=cuda_clang') + write_to_bazelrc('test --config=cuda_clang') + else: + write_to_bazelrc('build --config=cuda') + write_to_bazelrc('test --config=cuda') def set_host_cxx_compiler(environ_cp): - """Set HOST_CXX_COMPILER.""" - default_cxx_host_compiler = which('g++') or '' + """Set HOST_CXX_COMPILER.""" + default_cxx_host_compiler = which('g++') or '' - host_cxx_compiler = prompt_loop_or_load_from_env( - environ_cp, - var_name='HOST_CXX_COMPILER', - var_default=default_cxx_host_compiler, - ask_for_var=('Please specify which C++ compiler should be used as the ' - 'host C++ compiler.'), - check_success=os.path.exists, - error_msg='Invalid C++ compiler path. %s cannot be found.', - ) + host_cxx_compiler = prompt_loop_or_load_from_env( + environ_cp, + var_name='HOST_CXX_COMPILER', + var_default=default_cxx_host_compiler, + ask_for_var=('Please specify which C++ compiler should be used as the ' + 'host C++ compiler.'), + check_success=os.path.exists, + error_msg='Invalid C++ compiler path. %s cannot be found.', + ) - write_action_env_to_bazelrc('HOST_CXX_COMPILER', host_cxx_compiler) + write_action_env_to_bazelrc('HOST_CXX_COMPILER', host_cxx_compiler) def set_host_c_compiler(environ_cp): - """Set HOST_C_COMPILER.""" - default_c_host_compiler = which('gcc') or '' + """Set HOST_C_COMPILER.""" + default_c_host_compiler = which('gcc') or '' - host_c_compiler = prompt_loop_or_load_from_env( - environ_cp, - var_name='HOST_C_COMPILER', - var_default=default_c_host_compiler, - ask_for_var=('Please specify which C compiler should be used as the host ' - 'C compiler.'), - check_success=os.path.exists, - error_msg='Invalid C compiler path. %s cannot be found.', - ) + host_c_compiler = prompt_loop_or_load_from_env( + environ_cp, + var_name='HOST_C_COMPILER', + var_default=default_c_host_compiler, + ask_for_var=( + 'Please specify which C compiler should be used as the host ' + 'C compiler.'), + check_success=os.path.exists, + error_msg='Invalid C compiler path. %s cannot be found.', + ) - write_action_env_to_bazelrc('HOST_C_COMPILER', host_c_compiler) + write_action_env_to_bazelrc('HOST_C_COMPILER', host_c_compiler) def set_computecpp_toolkit_path(environ_cp): - """Set COMPUTECPP_TOOLKIT_PATH.""" - - def toolkit_exists(toolkit_path): - """Check if a computecpp toolkit path is valid.""" - if is_linux(): - sycl_rt_lib_path = 'lib/libComputeCpp.so' - else: - sycl_rt_lib_path = '' - - sycl_rt_lib_path_full = os.path.join(toolkit_path, - sycl_rt_lib_path) - exists = os.path.exists(sycl_rt_lib_path_full) - if not exists: - print('Invalid SYCL %s library path. %s cannot be found' % - (_TF_OPENCL_VERSION, sycl_rt_lib_path_full)) - return exists - - computecpp_toolkit_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='COMPUTECPP_TOOLKIT_PATH', - var_default=_DEFAULT_COMPUTECPP_TOOLKIT_PATH, - ask_for_var=( - 'Please specify the location where ComputeCpp for SYCL %s is ' - 'installed.' % _TF_OPENCL_VERSION), - check_success=toolkit_exists, - error_msg='Invalid SYCL compiler path. %s cannot be found.', - suppress_default_error=True) - - write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', - computecpp_toolkit_path) + """Set COMPUTECPP_TOOLKIT_PATH.""" + + def toolkit_exists(toolkit_path): + """Check if a computecpp toolkit path is valid.""" + if is_linux(): + sycl_rt_lib_path = 'lib/libComputeCpp.so' + else: + sycl_rt_lib_path = '' + + sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path) + exists = os.path.exists(sycl_rt_lib_path_full) + if not exists: + print('Invalid SYCL %s library path. %s cannot be found' % + (_TF_OPENCL_VERSION, sycl_rt_lib_path_full)) + return exists + + computecpp_toolkit_path = prompt_loop_or_load_from_env( + environ_cp, + var_name='COMPUTECPP_TOOLKIT_PATH', + var_default=_DEFAULT_COMPUTECPP_TOOLKIT_PATH, + ask_for_var=( + 'Please specify the location where ComputeCpp for SYCL %s is ' + 'installed.' % _TF_OPENCL_VERSION), + check_success=toolkit_exists, + error_msg='Invalid SYCL compiler path. %s cannot be found.', + suppress_default_error=True) + + write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', + computecpp_toolkit_path) def set_trisycl_include_dir(environ_cp): - """Set TRISYCL_INCLUDE_DIR.""" + """Set TRISYCL_INCLUDE_DIR.""" - ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' - 'include directory. (Use --config=sycl_trisycl ' - 'when building with Bazel) ' - '[Default is %s]: ' - ) % (_DEFAULT_TRISYCL_INCLUDE_DIR) + ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' + 'include directory. (Use --config=sycl_trisycl ' + 'when building with Bazel) ' + '[Default is %s]: ') % ( + _DEFAULT_TRISYCL_INCLUDE_DIR) - while True: - trisycl_include_dir = get_from_env_or_user_or_default( - environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, - _DEFAULT_TRISYCL_INCLUDE_DIR) - if os.path.exists(trisycl_include_dir): - break + while True: + trisycl_include_dir = get_from_env_or_user_or_default( + environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, + _DEFAULT_TRISYCL_INCLUDE_DIR) + if os.path.exists(trisycl_include_dir): + break - print('Invalid triSYCL include directory, %s cannot be found' - % (trisycl_include_dir)) + print('Invalid triSYCL include directory, %s cannot be found' % + (trisycl_include_dir)) - # Set TRISYCL_INCLUDE_DIR - environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir - write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', - trisycl_include_dir) + # Set TRISYCL_INCLUDE_DIR + environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) def set_mpi_home(environ_cp): - """Set MPI_HOME.""" - - default_mpi_home = which('mpirun') or which('mpiexec') or '' - default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home)) - - def valid_mpi_path(mpi_home): - exists = (os.path.exists(os.path.join(mpi_home, 'include')) and - os.path.exists(os.path.join(mpi_home, 'lib'))) - if not exists: - print('Invalid path to the MPI Toolkit. %s or %s cannot be found' % - (os.path.join(mpi_home, 'include'), - os.path.exists(os.path.join(mpi_home, 'lib')))) - return exists - - _ = prompt_loop_or_load_from_env( - environ_cp, - var_name='MPI_HOME', - var_default=default_mpi_home, - ask_for_var='Please specify the MPI toolkit folder.', - check_success=valid_mpi_path, - error_msg='', - suppress_default_error=True) + """Set MPI_HOME.""" + + default_mpi_home = which('mpirun') or which('mpiexec') or '' + default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home)) + + def valid_mpi_path(mpi_home): + exists = (os.path.exists(os.path.join(mpi_home, 'include')) + and os.path.exists(os.path.join(mpi_home, 'lib'))) + if not exists: + print('Invalid path to the MPI Toolkit. %s or %s cannot be found' % + (os.path.join(mpi_home, 'include'), + os.path.exists(os.path.join(mpi_home, 'lib')))) + return exists + + _ = prompt_loop_or_load_from_env( + environ_cp, + var_name='MPI_HOME', + var_default=default_mpi_home, + ask_for_var='Please specify the MPI toolkit folder.', + check_success=valid_mpi_path, + error_msg='', + suppress_default_error=True) def set_other_mpi_vars(environ_cp): - """Set other MPI related variables.""" - # Link the MPI header files - mpi_home = environ_cp.get('MPI_HOME') - symlink_force('%s/include/mpi.h' % mpi_home, 'third_party/mpi/mpi.h') - - # Determine if we use OpenMPI or MVAPICH, these require different header files - # to be included here to make bazel dependency checker happy - if os.path.exists(os.path.join(mpi_home, 'include/mpi_portable_platform.h')): - symlink_force( - os.path.join(mpi_home, 'include/mpi_portable_platform.h'), - 'third_party/mpi/mpi_portable_platform.h') - # TODO(gunan): avoid editing files in configure - sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI=False', - 'MPI_LIB_IS_OPENMPI=True') - else: - # MVAPICH / MPICH - symlink_force( - os.path.join(mpi_home, 'include/mpio.h'), 'third_party/mpi/mpio.h') - symlink_force( - os.path.join(mpi_home, 'include/mpicxx.h'), 'third_party/mpi/mpicxx.h') - # TODO(gunan): avoid editing files in configure - sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI=True', - 'MPI_LIB_IS_OPENMPI=False') - - if os.path.exists(os.path.join(mpi_home, 'lib/libmpi.so')): - symlink_force( - os.path.join(mpi_home, 'lib/libmpi.so'), 'third_party/mpi/libmpi.so') - else: - raise ValueError('Cannot find the MPI library file in %s/lib' % mpi_home) + """Set other MPI related variables.""" + # Link the MPI header files + mpi_home = environ_cp.get('MPI_HOME') + symlink_force('%s/include/mpi.h' % mpi_home, 'third_party/mpi/mpi.h') + + # Determine if we use OpenMPI or MVAPICH, these require different header files + # to be included here to make bazel dependency checker happy + if os.path.exists( + os.path.join(mpi_home, 'include/mpi_portable_platform.h')): + symlink_force( + os.path.join(mpi_home, 'include/mpi_portable_platform.h'), + 'third_party/mpi/mpi_portable_platform.h') + # TODO(gunan): avoid editing files in configure + sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI=False', + 'MPI_LIB_IS_OPENMPI=True') + else: + # MVAPICH / MPICH + symlink_force( + os.path.join(mpi_home, 'include/mpio.h'), 'third_party/mpi/mpio.h') + symlink_force( + os.path.join(mpi_home, 'include/mpicxx.h'), + 'third_party/mpi/mpicxx.h') + # TODO(gunan): avoid editing files in configure + sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI=True', + 'MPI_LIB_IS_OPENMPI=False') + + if os.path.exists(os.path.join(mpi_home, 'lib/libmpi.so')): + symlink_force( + os.path.join(mpi_home, 'lib/libmpi.so'), + 'third_party/mpi/libmpi.so') + else: + raise ValueError( + 'Cannot find the MPI library file in %s/lib' % mpi_home) def set_grpc_build_flags(): - write_to_bazelrc('build --define grpc_no_ares=true') + write_to_bazelrc('build --define grpc_no_ares=true') def set_build_strip_flag(): - write_to_bazelrc('build --strip=always') + write_to_bazelrc('build --strip=always') def set_windows_build_flags(): - if is_windows(): - # The non-monolithic build is not supported yet - write_to_bazelrc('build --config monolithic') - # Suppress warning messages - write_to_bazelrc('build --copt=-w --host_copt=-w') - # Output more verbose information when something goes wrong - write_to_bazelrc('build --verbose_failures') + if is_windows(): + # The non-monolithic build is not supported yet + write_to_bazelrc('build --config monolithic') + # Suppress warning messages + write_to_bazelrc('build --copt=-w --host_copt=-w') + # Output more verbose information when something goes wrong + write_to_bazelrc('build --verbose_failures') def config_info_line(name, help_text): - """Helper function to print formatted help text for Bazel config options.""" - print('\t--config=%-12s\t# %s' % (name, help_text)) + """Helper function to print formatted help text for Bazel config options.""" + print('\t--config=%-12s\t# %s' % (name, help_text)) def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--workspace", - type=str, - default=_TF_WORKSPACE_ROOT, - help="The absolute path to your active Bazel workspace.") - args = parser.parse_args() - - # Make a copy of os.environ to be clear when functions and getting and setting - # environment variables. - environ_cp = dict(os.environ) - - check_bazel_version('0.10.0') - - reset_tf_configure_bazelrc(args.workspace) - cleanup_makefile() - setup_python(environ_cp) - - if is_windows(): - environ_cp['TF_NEED_AWS'] = '0' - environ_cp['TF_NEED_GCP'] = '0' - environ_cp['TF_NEED_HDFS'] = '0' - environ_cp['TF_NEED_JEMALLOC'] = '0' - environ_cp['TF_NEED_KAFKA'] = '0' - environ_cp['TF_NEED_OPENCL_SYCL'] = '0' - environ_cp['TF_NEED_COMPUTECPP'] = '0' - environ_cp['TF_NEED_OPENCL'] = '0' - environ_cp['TF_CUDA_CLANG'] = '0' - environ_cp['TF_NEED_TENSORRT'] = '0' - # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on - # Windows. - environ_cp['TF_DOWNLOAD_CLANG'] = '0' - - if is_macos(): - environ_cp['TF_NEED_JEMALLOC'] = '0' - environ_cp['TF_NEED_TENSORRT'] = '0' - - set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', - 'with_jemalloc', True) - set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform', - 'with_gcp_support', True, 'gcp') - set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System', - 'with_hdfs_support', True, 'hdfs') - set_build_var(environ_cp, 'TF_NEED_AWS', 'Amazon AWS Platform', - 'with_aws_support', True, 'aws') - set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', - 'with_kafka_support', True, 'kafka') - set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', - False, 'xla') - set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', - False, 'gdr') - set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', - False, 'verbs') - - set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) - if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': - set_host_cxx_compiler(environ_cp) - set_host_c_compiler(environ_cp) - set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', True) - if environ_cp.get('TF_NEED_COMPUTECPP') == '1': - set_computecpp_toolkit_path(environ_cp) - else: - set_trisycl_include_dir(environ_cp) - - set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) - if (environ_cp.get('TF_NEED_CUDA') == '1' and - 'TF_CUDA_CONFIG_REPO' not in environ_cp): - set_tf_cuda_version(environ_cp) - set_tf_cudnn_version(environ_cp) - if is_linux(): - set_tf_tensorrt_install_path(environ_cp) - set_tf_nccl_install_path(environ_cp) - - set_tf_cuda_compute_capabilities(environ_cp) - if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( - 'LD_LIBRARY_PATH') != '1': - write_action_env_to_bazelrc('LD_LIBRARY_PATH', - environ_cp.get('LD_LIBRARY_PATH')) - - set_tf_cuda_clang(environ_cp) - if environ_cp.get('TF_CUDA_CLANG') == '1': - # Ask whether we should download the clang toolchain. - set_tf_download_clang(environ_cp) - if environ_cp.get('TF_DOWNLOAD_CLANG') != '1': - # Set up which clang we should use as the cuda / host compiler. - set_clang_cuda_compiler_path(environ_cp) + parser = argparse.ArgumentParser() + parser.add_argument( + "--workspace", + type=str, + default=_TF_WORKSPACE_ROOT, + help="The absolute path to your active Bazel workspace.") + args = parser.parse_args() + + # Make a copy of os.environ to be clear when functions and getting and setting + # environment variables. + environ_cp = dict(os.environ) + + check_bazel_version('0.10.0') + + reset_tf_configure_bazelrc(args.workspace) + cleanup_makefile() + setup_python(environ_cp) + + if is_windows(): + environ_cp['TF_NEED_AWS'] = '0' + environ_cp['TF_NEED_GCP'] = '0' + environ_cp['TF_NEED_HDFS'] = '0' + environ_cp['TF_NEED_JEMALLOC'] = '0' + environ_cp['TF_NEED_KAFKA'] = '0' + environ_cp['TF_NEED_OPENCL_SYCL'] = '0' + environ_cp['TF_NEED_COMPUTECPP'] = '0' + environ_cp['TF_NEED_OPENCL'] = '0' + environ_cp['TF_CUDA_CLANG'] = '0' + environ_cp['TF_NEED_TENSORRT'] = '0' + # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on + # Windows. + environ_cp['TF_DOWNLOAD_CLANG'] = '0' + + if is_macos(): + environ_cp['TF_NEED_JEMALLOC'] = '0' + environ_cp['TF_NEED_TENSORRT'] = '0' + + set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', + 'with_jemalloc', True) + set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform', + 'with_gcp_support', True, 'gcp') + set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System', + 'with_hdfs_support', True, 'hdfs') + set_build_var(environ_cp, 'TF_NEED_AWS', 'Amazon AWS Platform', + 'with_aws_support', True, 'aws') + set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', + 'with_kafka_support', True, 'kafka') + set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', + False, 'xla') + set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', False, + 'gdr') + set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', + False, 'verbs') + set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph', + 'with_ngraph_support', False, 'ngraph') + + set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': + set_host_cxx_compiler(environ_cp) + set_host_c_compiler(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', + True) + if environ_cp.get('TF_NEED_COMPUTECPP') == '1': + set_computecpp_toolkit_path(environ_cp) + else: + set_trisycl_include_dir(environ_cp) + + set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) + if (environ_cp.get('TF_NEED_CUDA') == '1' + and 'TF_CUDA_CONFIG_REPO' not in environ_cp): + set_tf_cuda_version(environ_cp) + set_tf_cudnn_version(environ_cp) + if is_linux(): + set_tf_tensorrt_install_path(environ_cp) + set_tf_nccl_install_path(environ_cp) + + set_tf_cuda_compute_capabilities(environ_cp) + if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( + 'LD_LIBRARY_PATH') != '1': + write_action_env_to_bazelrc('LD_LIBRARY_PATH', + environ_cp.get('LD_LIBRARY_PATH')) + + set_tf_cuda_clang(environ_cp) + if environ_cp.get('TF_CUDA_CLANG') == '1': + # Ask whether we should download the clang toolchain. + set_tf_download_clang(environ_cp) + if environ_cp.get('TF_DOWNLOAD_CLANG') != '1': + # Set up which clang we should use as the cuda / host compiler. + set_clang_cuda_compiler_path(environ_cp) + else: + # Set up which gcc nvcc should use as the host compiler + # No need to set this on Windows + if not is_windows(): + set_gcc_host_compiler_path(environ_cp) + set_other_cuda_vars(environ_cp) else: - # Set up which gcc nvcc should use as the host compiler - # No need to set this on Windows - if not is_windows(): - set_gcc_host_compiler_path(environ_cp) - set_other_cuda_vars(environ_cp) - else: - # CUDA not required. Ask whether we should download the clang toolchain and - # use it for the CPU build. - set_tf_download_clang(environ_cp) - if environ_cp.get('TF_DOWNLOAD_CLANG') == '1': - write_to_bazelrc('build --config=download_clang') - write_to_bazelrc('test --config=download_clang') - - set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) - if environ_cp.get('TF_NEED_MPI') == '1': - set_mpi_home(environ_cp) - set_other_mpi_vars(environ_cp) - - set_grpc_build_flags() - set_cc_opt_flags(environ_cp) - set_build_strip_flag() - set_windows_build_flags() - - if get_var( - environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', - False, - ('Would you like to interactively configure ./WORKSPACE for ' - 'Android builds?'), - 'Searching for NDK and SDK installations.', - 'Not configuring the WORKSPACE for Android builds.'): - create_android_ndk_rule(environ_cp) - create_android_sdk_rule(environ_cp) - - print('Preconfigured Bazel build configs. You can use any of the below by ' - 'adding "--config=<>" to your build command. See tools/bazel.rc for ' - 'more details.') - config_info_line('mkl', 'Build with MKL support.') - config_info_line('monolithic', 'Config for mostly static monolithic build.') + # CUDA not required. Ask whether we should download the clang toolchain and + # use it for the CPU build. + set_tf_download_clang(environ_cp) + if environ_cp.get('TF_DOWNLOAD_CLANG') == '1': + write_to_bazelrc('build --config=download_clang') + write_to_bazelrc('test --config=download_clang') + + set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) + if environ_cp.get('TF_NEED_MPI') == '1': + set_mpi_home(environ_cp) + set_other_mpi_vars(environ_cp) + + set_grpc_build_flags() + set_cc_opt_flags(environ_cp) + set_build_strip_flag() + set_windows_build_flags() + + if get_var(environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', + False, + ('Would you like to interactively configure ./WORKSPACE for ' + 'Android builds?'), 'Searching for NDK and SDK installations.', + 'Not configuring the WORKSPACE for Android builds.'): + create_android_ndk_rule(environ_cp) + create_android_sdk_rule(environ_cp) + + print('Preconfigured Bazel build configs. You can use any of the below by ' + 'adding "--config=<>" to your build command. See tools/bazel.rc for ' + 'more details.') + config_info_line('mkl', 'Build with MKL support.') + config_info_line('monolithic', + 'Config for mostly static monolithic build.') + if __name__ == '__main__': - main() + main() diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 51eea94847..6d443eb9f2 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -24,6 +24,8 @@ load( "gen_api_init_files", # @unused ) +load("//third_party/ngraph:build_defs.bzl", "if_ngraph") + # Config setting for determining if we are building for Android. config_setting( name = "android", @@ -408,6 +410,14 @@ config_setting( visibility = ["//visibility:public"], ) +# This flag is set from the configure step when the user selects with nGraph option. +# By default it should be false +config_setting( + name = "with_ngraph_support", + values = {"define": "with_ngraph_support=true"}, + visibility = ["//visibility:public"], +) + package_group( name = "internal", packages = [ @@ -540,7 +550,7 @@ tf_cc_shared_object( "//tensorflow/c:version_script.lds", "//tensorflow/c/eager:c_api", "//tensorflow/core:tensorflow", - ], + ] ) tf_cc_shared_object( @@ -568,7 +578,7 @@ tf_cc_shared_object( "//tensorflow/cc:scope", "//tensorflow/cc/profiler", "//tensorflow/core:tensorflow", - ], + ] + if_ngraph(["@ngraph_tf//:ngraph_tf"]) ) exports_files( diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index dbe87a6dbb..19060c5ce7 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2325,6 +2325,7 @@ tf_generate_proto_text_sources( ":lib_internal", ":protos_all_proto_cc", ], + visibility = ["//visibility:public"], ) cc_library( @@ -2435,6 +2436,7 @@ cc_header_only_library( deps = [ ":core_cpu_lib", ], + visibility = ["//visibility:public"], ) tf_cuda_library( @@ -2502,7 +2504,7 @@ tf_cuda_library( "//third_party/mkl:intel_binary_blob", "@mkl_dnn", ], - ), + ) , alwayslink = 1, ) @@ -2560,6 +2562,7 @@ tf_cuda_library( cc_library( name = "protos_cc", deps = ["//tensorflow/core/platform/default/build_config:protos_cc"], + visibility = ["//visibility:public"], ) # Library containing all of the graph construction code that is diff --git a/tensorflow/core/common_runtime/threadpool_device_factory.cc b/tensorflow/core/common_runtime/threadpool_device_factory.cc index 6a900c02c0..61a62621ba 100644 --- a/tensorflow/core/common_runtime/threadpool_device_factory.cc +++ b/tensorflow/core/common_runtime/threadpool_device_factory.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/public/session_options.h" namespace tensorflow { diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 28891320c4..9f6bc70f04 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -707,4 +707,4 @@ def tf_additional_binary_deps(): [ "//third_party/mkl:intel_binary_blob", ], - ) + ) \ No newline at end of file diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index d60d37df50..f2ab2f80e6 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -44,6 +44,7 @@ load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_ load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_mpi_deps") load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_gdr_deps") load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") +load("//third_party/ngraph:build_defs.bzl","if_ngraph") py_library( name = "python", @@ -3669,7 +3670,8 @@ tf_py_wrap_cc( tf_additional_plugin_deps() + tf_additional_verbs_deps() + tf_additional_mpi_deps() + - tf_additional_gdr_deps()), + tf_additional_gdr_deps())+ + if_ngraph(["@ngraph_tf//:ngraph_tf"]) ) # ** Targets for Windows build (start) ** diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index e4241667ad..5884870daa 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -24,6 +24,10 @@ load( "if_mkl", "if_mkl_lnx_x64" ) +load( + "//third_party/ngraph:build_defs.bzl", + "if_ngraph", +) def register_extension_info(**kwargs): pass @@ -214,6 +218,7 @@ def tf_copts(android_optimization_level_override="-O2", is_external=False): + if_cuda(["-DGOOGLE_CUDA=1"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"]) + if_mkl(["-DINTEL_MKL=1", "-DEIGEN_USE_VML"]) + + if_ngraph(["-DINTEL_NGRAPH=1"]) + if_mkl_lnx_x64(["-fopenmp"]) + if_android_arm(["-mfpu=neon"]) + if_linux_x86_64(["-msse3"]) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b712954d6d..8953edf8a6 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -803,6 +803,39 @@ def tf_workspace(path_prefix="", tf_repo_name=""): strip_prefix = "rules_android-0.1.1", ) + tf_http_archive( + name = "ngraph", + urls = [ + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", + "https://github.com/NervanaSystems/ngraph/archive/v0.5.0.tar.gz", + ], + sha256 = "cb35d3d98836f615408afd18371fb13e3400711247e0d822ba7f306c45e9bb2c", + strip_prefix = "ngraph-0.5.0", + build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), + ) + + tf_http_archive( + name = "nlohmann_json_lib", + urls = [ + "https://mirror.bazel.build/github.com/nlohmann/json/archive/v3.1.1.tar.gz", + "https://github.com/nlohmann/json/archive/v3.1.1.tar.gz", + ], + sha256 = "9f3549824af3ca7e9707a2503959886362801fb4926b869789d6929098a79e47", + strip_prefix = "json-3.1.1", + build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"), + ) + + tf_http_archive( + name = "ngraph_tf", + urls = [ + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc0.tar.gz", + "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc0.tar.gz" + ], + sha256 = "c09a35d0a605afeeaf5aad81181a6abc7e9b9e39312e8fdfbae20cbd8eb58523", + strip_prefix = "ngraph-tf-0.3.0-rc0", + build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), + ) + ############################################################################## # BIND DEFINITIONS # diff --git a/third_party/ngraph/BUILD b/third_party/ngraph/BUILD new file mode 100644 index 0000000000..067771b43f --- /dev/null +++ b/third_party/ngraph/BUILD @@ -0,0 +1 @@ +licenses(["notice"]) # 3-Clause BSD diff --git a/third_party/ngraph/LICENSE b/third_party/ngraph/LICENSE new file mode 100644 index 0000000000..9c8f3ea087 --- /dev/null +++ b/third_party/ngraph/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/third_party/ngraph/NGRAPH_LICENSE b/third_party/ngraph/NGRAPH_LICENSE new file mode 100644 index 0000000000..9c8f3ea087 --- /dev/null +++ b/third_party/ngraph/NGRAPH_LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/third_party/ngraph/build_defs.bzl b/third_party/ngraph/build_defs.bzl new file mode 100644 index 0000000000..2c9027a6b8 --- /dev/null +++ b/third_party/ngraph/build_defs.bzl @@ -0,0 +1,16 @@ +def clean_dep(dep): + return str(Label(dep)) + +def if_ngraph(a): + """Shorthand for select()'ing on whether we're building with nGraph support. + + Returns a select statement which evaluates to if_true if we're building + with nGraph. Otherwise, the select statement evaluates to default. + + """ + ret_val = select({ + clean_dep("//tensorflow:with_ngraph_support"): a, + "//conditions:default": [] + }) + + return ret_val diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD new file mode 100644 index 0000000000..17710b2cb9 --- /dev/null +++ b/third_party/ngraph/ngraph.BUILD @@ -0,0 +1,45 @@ +licenses(["notice"]) # 3-Clause BSD + +exports_files(["license.txt"]) + +filegroup( + name = "LICENSE", + srcs = [ + "license.txt", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "ngraph_core", + srcs = glob([ + "src/ngraph/*.cpp", + "src/ngraph/autodiff/*.cpp", + "src/ngraph/builder/*.cpp", + "src/ngraph/descriptor/*.cpp", + "src/ngraph/descriptor/layout/*.cpp", + "src/ngraph/op/*.cpp", + "src/ngraph/op/util/*.cpp", + "src/ngraph/pattern/*.cpp", + "src/ngraph/pattern/*.hpp", + "src/ngraph/pass/*.cpp", + "src/ngraph/pass/*.hpp", + "src/ngraph/runtime/*.cpp", + "src/ngraph/type/*.cpp", + "src/ngraph/runtime/interpreter/*.cpp", + "src/ngraph/runtime/interpreter/*.hpp", + ]), + hdrs = glob(["src/ngraph/**/*.hpp"]), + deps = [ + "@eigen_archive//:eigen", + "@nlohmann_json_lib", + ], + copts = [ + "-I external/ngraph/src", + "-I external/nlohmann_json_lib/include/", + '-D SHARED_LIB_EXT=\\".so\\"', + '-D NGRAPH_VERSION=\\"0.5.0\\"', + ], + visibility = ["//visibility:public"], + alwayslink=1 +) diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD new file mode 100644 index 0000000000..bbac74db0f --- /dev/null +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -0,0 +1,96 @@ +licenses(["notice"]) # 3-Clause BSD + +exports_files(["license.txt"]) + +filegroup( + name = "LICENSE", + srcs = [ + "license.txt", + ], + visibility = ["//visibility:public"], +) + +load( + "@org_tensorflow//tensorflow:tensorflow.bzl", + "tf_cc_test" +) + +cc_library( + name = "ngraph_libs_linux", + srcs = [ + "lib/libiomp5.so", + "lib/libmklml_intel.so", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "ngraph_tf", + srcs = + [ + "src/ngraph_builder.h", + "src/ngraph_builder.cc", + "src/ngraph_cluster.h", + "src/ngraph_cluster.cc", + "src/ngraph_cluster_manager.h", + "src/ngraph_cluster_manager.cc", + "src/ngraph_confirm_pass.cc", + "src/ngraph_device.cc", + "src/ngraph_encapsulate_op.cc", + "src/ngraph_encapsulate_pass.cc", + "src/ngraph_freshness_tracker.h", + "src/ngraph_freshness_tracker.cc", + "src/ngraph_graph_rewrite_passes.cc", + "src/ngraph_liberate_pass.cc", + "src/ngraph_op_kernels.cc", + "src/ngraph_stub_ops.cc", + "src/ngraph_utils.h", + "src/ngraph_utils.cc", + "src/ngraph_send_recv_ops.cc", + "src/ngraph_variable_ops.cc", + "src/tf_graphcycles.cc", + "logging/ngraph_log.h", + "logging/ngraph_log.cc", + "logging/tf_graph_writer.h", + "logging/tf_graph_writer.cc", + ], + hdrs = [ + "src/tf_graphcycles.h" + ], + deps = [ + "@org_tensorflow//tensorflow/core:protos_all_proto_text", + "@org_tensorflow//tensorflow/core:framework_headers_lib", + "@org_tensorflow//tensorflow/core:core_cpu_headers_lib", + "@ngraph//:ngraph_core" + ], + copts = [ + "-I external/ngraph_tf/src", + "-I external/ngraph_tf/logging", + "-I external/ngraph/src", + "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", + ], + visibility = ["//visibility:public"], +) + +tf_cc_test( + name = "ngraph_tf_tests", + size = "small", + srcs = [ + "test/tf_exec.cpp", + "test/main.cpp", + ], + deps = [ + ":ngraph_tf", + "@com_google_googletest//:gtest", + "@org_tensorflow//tensorflow/cc:cc_ops", + "@org_tensorflow//tensorflow/cc:client_session", + "@org_tensorflow//tensorflow/core:tensorflow", + ], + extra_copts = [ + "-fexceptions ", + "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", + "-I external/ngraph_tf/src", + "-I external/ngraph_tf/logging", + "-I external/ngraph/src", + ], +) diff --git a/third_party/ngraph/nlohmann_json.BUILD b/third_party/ngraph/nlohmann_json.BUILD new file mode 100644 index 0000000000..396e158535 --- /dev/null +++ b/third_party/ngraph/nlohmann_json.BUILD @@ -0,0 +1,23 @@ +licenses(["notice"]) # 3-Clause BSD + +exports_files(["license.txt"]) + +filegroup( + name = "LICENSE", + srcs = [ + "license.txt", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "nlohmann_json_lib", + hdrs = glob([ + "include/nlohmann/**/*.hpp", + ]), + copts = [ + "-I external/nlohmann_json_lib", + ], + visibility = ["//visibility:public"], + alwayslink=1 +) -- cgit v1.2.3 From 1d4a8296b26150f7eabf5bbb981b9b2438a9fb2a Mon Sep 17 00:00:00 2001 From: Jie Date: Tue, 24 Jul 2018 23:40:39 -0700 Subject: merge upstream master; addressing review comments per changes upstream --- .../contrib/tensorrt/convert/convert_nodes.cc | 24 ++++++---------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 7782919566..9d881eda90 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2693,13 +2693,6 @@ tensorflow::Status ConvertGraphDefToEngine( VLOG(1) << "Converting op name=" << node_name << ", op=" << node_def.op(); if (tensorflow::str_util::StartsWith(node_name, kInputPHName) && (node_def.op() == "Placeholder")) { - nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - auto type_status = - ConvertDType(node_def.attr().at("dtype").type(), &dtype); - if (type_status != tensorflow::Status::OK()) { - LOG(WARNING) << "Type conversion failed for " << node_name; - return type_status; - } int32 slot_number = -1; if (!tensorflow::strings::safe_strto32( node_name.c_str() + strlen(kInputPHName), &slot_number)) { @@ -2729,21 +2722,12 @@ tensorflow::Status ConvertGraphDefToEngine( #if NV_TENSORRT_MAJOR == 3 nvinfer1::DimsCHW input_dim; - // TRT 3.x only support 4 dimensional input tensor. - if (shape.dims() != 4) { - string err_str = "Require 4 dimensional input."; - StrAppend(&err_str, " Got ", shape.dims(), " ", - node_name); - return tensorflow::errors::Unimplemented(err_str); - } #elif NV_TENSORRT_MAJOR > 3 nvinfer1::Dims input_dim; #endif - for (int i = 1; i < shape.dims(); i++) { input_dim.d[i - 1] = shape.dim_size(i); } - input_dim.nbDims = shape.dims() - 1; nvinfer1::ITensor* input_tensor = converter.network()->addInput( node_name.c_str(), dtype, input_dim); @@ -2920,12 +2904,16 @@ bool InputEdgeValidator::operator()(const tensorflow::Edge* in_edge) const { << ": " << status; return false; } - if (shape.dims() < 3 && in_edge->src()->type_string() != "Const") { + +#if NV_TENSORRT_MAJOR == 3 + // TRT 3.x only support 4 dimensional input tensor. + if (shape.dims() != 4 && in_edge->src()->type_string() != "Const") { VLOG(2) << "--> Need to remove input node " << in_edge->dst()->name() << " which has an input at port " << in_edge->dst_input() - << " with #dim<3 and is not a const: " << shape; + << " with #dim!=4 and is not a const: " << shape; return false; } +#endif return true; } -- cgit v1.2.3 From f88a6f93bee89c610fa8b399d037c7a33c1a0a3e Mon Sep 17 00:00:00 2001 From: Avijit Date: Tue, 24 Jul 2018 23:58:58 -0700 Subject: Updated the ngraph_tf hash and PEP8 for configure.py --- configure.py | 2627 +++++++++++++++++++++++----------------------- tensorflow/workspace.bzl | 2 +- 2 files changed, 1314 insertions(+), 1315 deletions(-) diff --git a/configure.py b/configure.py index 3bbbee7abf..e4495fb684 100644 --- a/configure.py +++ b/configure.py @@ -28,9 +28,9 @@ import sys # pylint: disable=g-import-not-at-top try: - from shutil import which + from shutil import which except ImportError: - from distutils.spawn import find_executable as which + from distutils.spawn import find_executable as which # pylint: enable=g-import-not-at-top _DEFAULT_CUDA_VERSION = '9.0' @@ -56,226 +56,226 @@ _TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE') class UserInputError(Exception): - pass + pass def is_windows(): - return platform.system() == 'Windows' + return platform.system() == 'Windows' def is_linux(): - return platform.system() == 'Linux' + return platform.system() == 'Linux' def is_macos(): - return platform.system() == 'Darwin' + return platform.system() == 'Darwin' def is_ppc64le(): - return platform.machine() == 'ppc64le' + return platform.machine() == 'ppc64le' def is_cygwin(): - return platform.system().startswith('CYGWIN_NT') + return platform.system().startswith('CYGWIN_NT') def get_input(question): + try: try: - try: - answer = raw_input(question) - except NameError: - answer = input(question) # pylint: disable=bad-builtin - except EOFError: - answer = '' - return answer + answer = raw_input(question) + except NameError: + answer = input(question) # pylint: disable=bad-builtin + except EOFError: + answer = '' + return answer def symlink_force(target, link_name): - """Force symlink, equivalent of 'ln -sf'. - - Args: - target: items to link to. - link_name: name of the link. - """ - try: - os.symlink(target, link_name) - except OSError as e: - if e.errno == errno.EEXIST: - os.remove(link_name) - os.symlink(target, link_name) - else: - raise e + """Force symlink, equivalent of 'ln -sf'. + +Args: + target: items to link to. + link_name: name of the link. +""" + try: + os.symlink(target, link_name) + except OSError as e: + if e.errno == errno.EEXIST: + os.remove(link_name) + os.symlink(target, link_name) + else: + raise e def sed_in_place(filename, old, new): - """Replace old string with new string in file. + """Replace old string with new string in file. - Args: - filename: string for filename. - old: string to replace. - new: new string to replace to. - """ - with open(filename, 'r') as f: - filedata = f.read() - newdata = filedata.replace(old, new) - with open(filename, 'w') as f: - f.write(newdata) +Args: + filename: string for filename. + old: string to replace. + new: new string to replace to. +""" + with open(filename, 'r') as f: + filedata = f.read() + newdata = filedata.replace(old, new) + with open(filename, 'w') as f: + f.write(newdata) def write_to_bazelrc(line): - with open(_TF_BAZELRC, 'a') as f: - f.write(line + '\n') + with open(_TF_BAZELRC, 'a') as f: + f.write(line + '\n') def write_action_env_to_bazelrc(var_name, var): - write_to_bazelrc('build --action_env %s="%s"' % (var_name, str(var))) + write_to_bazelrc('build --action_env %s="%s"' % (var_name, str(var))) def run_shell(cmd, allow_non_zero=False): - if allow_non_zero: - try: - output = subprocess.check_output(cmd) - except subprocess.CalledProcessError as e: - output = e.output - else: - output = subprocess.check_output(cmd) - return output.decode('UTF-8').strip() + if allow_non_zero: + try: + output = subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + output = e.output + else: + output = subprocess.check_output(cmd) + return output.decode('UTF-8').strip() def cygpath(path): - """Convert path from posix to windows.""" - return os.path.abspath(path).replace('\\', '/') + """Convert path from posix to windows.""" + return os.path.abspath(path).replace('\\', '/') def get_python_path(environ_cp, python_bin_path): - """Get the python site package paths.""" - python_paths = [] - if environ_cp.get('PYTHONPATH'): - python_paths = environ_cp.get('PYTHONPATH').split(':') - try: - library_paths = run_shell([ + """Get the python site package paths.""" + python_paths = [] + if environ_cp.get('PYTHONPATH'): + python_paths = environ_cp.get('PYTHONPATH').split(':') + try: + library_paths = run_shell([ + python_bin_path, '-c', + 'import site; print("\\n".join(site.getsitepackages()))' + ]).split('\n') + except subprocess.CalledProcessError: + library_paths = [ + run_shell([ python_bin_path, '-c', - 'import site; print("\\n".join(site.getsitepackages()))' - ]).split('\n') - except subprocess.CalledProcessError: - library_paths = [ - run_shell([ - python_bin_path, '-c', - 'from distutils.sysconfig import get_python_lib;' - 'print(get_python_lib())' - ]) - ] + 'from distutils.sysconfig import get_python_lib;' + 'print(get_python_lib())' + ]) + ] - all_paths = set(python_paths + library_paths) + all_paths = set(python_paths + library_paths) - paths = [] - for path in all_paths: - if os.path.isdir(path): - paths.append(path) - return paths + paths = [] + for path in all_paths: + if os.path.isdir(path): + paths.append(path) + return paths def get_python_major_version(python_bin_path): - """Get the python major version.""" - return run_shell( - [python_bin_path, '-c', 'import sys; print(sys.version[0])']) + """Get the python major version.""" + return run_shell( + [python_bin_path, '-c', 'import sys; print(sys.version[0])']) def setup_python(environ_cp): - """Setup python related env variables.""" - # Get PYTHON_BIN_PATH, default is the current running python. - default_python_bin_path = sys.executable - ask_python_bin_path = ( - 'Please specify the location of python. [Default is ' - '%s]: ') % default_python_bin_path - while True: - python_bin_path = get_from_env_or_user_or_default( - environ_cp, 'PYTHON_BIN_PATH', ask_python_bin_path, - default_python_bin_path) - # Check if the path is valid - if os.path.isfile(python_bin_path) and os.access( - python_bin_path, os.X_OK): - break - elif not os.path.exists(python_bin_path): - print('Invalid python path: %s cannot be found.' % python_bin_path) - else: - print('%s is not executable. Is it the python binary?' % - python_bin_path) - environ_cp['PYTHON_BIN_PATH'] = '' - - # Convert python path to Windows style before checking lib and version - if is_windows() or is_cygwin(): - python_bin_path = cygpath(python_bin_path) - - # Get PYTHON_LIB_PATH - python_lib_path = environ_cp.get('PYTHON_LIB_PATH') - if not python_lib_path: - python_lib_paths = get_python_path(environ_cp, python_bin_path) - if environ_cp.get('USE_DEFAULT_PYTHON_LIB_PATH') == '1': - python_lib_path = python_lib_paths[0] - else: - print('Found possible Python library paths:\n %s' % - '\n '.join(python_lib_paths)) - default_python_lib_path = python_lib_paths[0] - python_lib_path = get_input( - 'Please input the desired Python library path to use. ' - 'Default is [%s]\n' % python_lib_paths[0]) - if not python_lib_path: - python_lib_path = default_python_lib_path - environ_cp['PYTHON_LIB_PATH'] = python_lib_path - - python_major_version = get_python_major_version(python_bin_path) - - # Convert python path to Windows style before writing into bazel.rc - if is_windows() or is_cygwin(): - python_lib_path = cygpath(python_lib_path) - - # Set-up env variables used by python_configure.bzl - write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path) - write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path) - write_to_bazelrc('build --python_path=\"%s"' % python_bin_path) - environ_cp['PYTHON_BIN_PATH'] = python_bin_path - - # Write tools/python_bin_path.sh - with open( - os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), - 'w') as f: - f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path) + """Setup python related env variables.""" + # Get PYTHON_BIN_PATH, default is the current running python. + default_python_bin_path = sys.executable + ask_python_bin_path = ( + 'Please specify the location of python. [Default is ' + '%s]: ') % default_python_bin_path + while True: + python_bin_path = get_from_env_or_user_or_default( + environ_cp, 'PYTHON_BIN_PATH', ask_python_bin_path, + default_python_bin_path) + # Check if the path is valid + if os.path.isfile(python_bin_path) and os.access( + python_bin_path, os.X_OK): + break + elif not os.path.exists(python_bin_path): + print('Invalid python path: %s cannot be found.' % python_bin_path) + else: + print('%s is not executable. Is it the python binary?' % + python_bin_path) + environ_cp['PYTHON_BIN_PATH'] = '' + + # Convert python path to Windows style before checking lib and version + if is_windows() or is_cygwin(): + python_bin_path = cygpath(python_bin_path) + + # Get PYTHON_LIB_PATH + python_lib_path = environ_cp.get('PYTHON_LIB_PATH') + if not python_lib_path: + python_lib_paths = get_python_path(environ_cp, python_bin_path) + if environ_cp.get('USE_DEFAULT_PYTHON_LIB_PATH') == '1': + python_lib_path = python_lib_paths[0] + else: + print('Found possible Python library paths:\n %s' % + '\n '.join(python_lib_paths)) + default_python_lib_path = python_lib_paths[0] + python_lib_path = get_input( + 'Please input the desired Python library path to use. ' + 'Default is [%s]\n' % python_lib_paths[0]) + if not python_lib_path: + python_lib_path = default_python_lib_path + environ_cp['PYTHON_LIB_PATH'] = python_lib_path + + python_major_version = get_python_major_version(python_bin_path) + + # Convert python path to Windows style before writing into bazel.rc + if is_windows() or is_cygwin(): + python_lib_path = cygpath(python_lib_path) + + # Set-up env variables used by python_configure.bzl + write_action_env_to_bazelrc('PYTHON_BIN_PATH', python_bin_path) + write_action_env_to_bazelrc('PYTHON_LIB_PATH', python_lib_path) + write_to_bazelrc('build --python_path=\"%s"' % python_bin_path) + environ_cp['PYTHON_BIN_PATH'] = python_bin_path + + # Write tools/python_bin_path.sh + with open( + os.path.join(_TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), + 'w') as f: + f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path) def reset_tf_configure_bazelrc(workspace_path): - """Reset file that contains customized config settings.""" - open(_TF_BAZELRC, 'w').close() - bazelrc_path = os.path.join(workspace_path, '.bazelrc') - - data = [] - if os.path.exists(bazelrc_path): - with open(bazelrc_path, 'r') as f: - data = f.read().splitlines() - with open(bazelrc_path, 'w') as f: - for l in data: - if _TF_BAZELRC_FILENAME in l: - continue - f.write('%s\n' % l) - if is_windows(): - tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/") - else: - tf_bazelrc_path = _TF_BAZELRC - f.write('import %s\n' % tf_bazelrc_path) + """Reset file that contains customized config settings.""" + open(_TF_BAZELRC, 'w').close() + bazelrc_path = os.path.join(workspace_path, '.bazelrc') + + data = [] + if os.path.exists(bazelrc_path): + with open(bazelrc_path, 'r') as f: + data = f.read().splitlines() + with open(bazelrc_path, 'w') as f: + for l in data: + if _TF_BAZELRC_FILENAME in l: + continue + f.write('%s\n' % l) + if is_windows(): + tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/") + else: + tf_bazelrc_path = _TF_BAZELRC + f.write('import %s\n' % tf_bazelrc_path) def cleanup_makefile(): - """Delete any leftover BUILD files from the Makefile build. + """Delete any leftover BUILD files from the Makefile build. - These files could interfere with Bazel parsing. - """ - makefile_download_dir = os.path.join(_TF_WORKSPACE_ROOT, 'tensorflow', - 'contrib', 'makefile', 'downloads') - if os.path.isdir(makefile_download_dir): - for root, _, filenames in os.walk(makefile_download_dir): - for f in filenames: - if f.endswith('BUILD'): - os.remove(os.path.join(root, f)) +These files could interfere with Bazel parsing. +""" + makefile_download_dir = os.path.join(_TF_WORKSPACE_ROOT, 'tensorflow', + 'contrib', 'makefile', 'downloads') + if os.path.isdir(makefile_download_dir): + for root, _, filenames in os.walk(makefile_download_dir): + for f in filenames: + if f.endswith('BUILD'): + os.remove(os.path.join(root, f)) def get_var(environ_cp, @@ -285,81 +285,81 @@ def get_var(environ_cp, question=None, yes_reply=None, no_reply=None): - """Get boolean input from user. - - If var_name is not set in env, ask user to enable query_item or not. If the - response is empty, use the default. - - Args: - environ_cp: copy of the os.environ. - var_name: string for name of environment variable, e.g. "TF_NEED_HDFS". - query_item: string for feature related to the variable, e.g. "Hadoop File - System". - enabled_by_default: boolean for default behavior. - question: optional string for how to ask for user input. - yes_reply: optional string for reply when feature is enabled. - no_reply: optional string for reply when feature is disabled. - - Returns: - boolean value of the variable. - - Raises: - UserInputError: if an environment variable is set, but it cannot be - interpreted as a boolean indicator, assume that the user has made a - scripting error, and will continue to provide invalid input. - Raise the error to avoid infinitely looping. - """ - if not question: - question = 'Do you wish to build TensorFlow with %s support?' % query_item - if not yes_reply: - yes_reply = '%s support will be enabled for TensorFlow.' % query_item - if not no_reply: - no_reply = 'No %s' % yes_reply - - yes_reply += '\n' - no_reply += '\n' - - if enabled_by_default: - question += ' [Y/n]: ' + """Get boolean input from user. + +If var_name is not set in env, ask user to enable query_item or not. If the +response is empty, use the default. + +Args: + environ_cp: copy of the os.environ. + var_name: string for name of environment variable, e.g. "TF_NEED_HDFS". + query_item: string for feature related to the variable, e.g. "Hadoop File + System". + enabled_by_default: boolean for default behavior. + question: optional string for how to ask for user input. + yes_reply: optional string for reply when feature is enabled. + no_reply: optional string for reply when feature is disabled. + +Returns: + boolean value of the variable. + +Raises: + UserInputError: if an environment variable is set, but it cannot be + interpreted as a boolean indicator, assume that the user has made a + scripting error, and will continue to provide invalid input. + Raise the error to avoid infinitely looping. +""" + if not question: + question = 'Do you wish to build TensorFlow with %s support?' % query_item + if not yes_reply: + yes_reply = '%s support will be enabled for TensorFlow.' % query_item + if not no_reply: + no_reply = 'No %s' % yes_reply + + yes_reply += '\n' + no_reply += '\n' + + if enabled_by_default: + question += ' [Y/n]: ' + else: + question += ' [y/N]: ' + + var = environ_cp.get(var_name) + if var is not None: + var_content = var.strip().lower() + true_strings = ('1', 't', 'true', 'y', 'yes') + false_strings = ('0', 'f', 'false', 'n', 'no') + if var_content in true_strings: + var = True + elif var_content in false_strings: + var = False else: - question += ' [y/N]: ' - - var = environ_cp.get(var_name) - if var is not None: - var_content = var.strip().lower() - true_strings = ('1', 't', 'true', 'y', 'yes') - false_strings = ('0', 'f', 'false', 'n', 'no') - if var_content in true_strings: - var = True - elif var_content in false_strings: - var = False - else: - raise UserInputError( - 'Environment variable %s must be set as a boolean indicator.\n' - 'The following are accepted as TRUE : %s.\n' - 'The following are accepted as FALSE: %s.\n' - 'Current value is %s.' % (var_name, ', '.join(true_strings), - ', '.join(false_strings), var)) - - while var is None: - user_input_origin = get_input(question) - user_input = user_input_origin.strip().lower() - if user_input == 'y': - print(yes_reply) - var = True - elif user_input == 'n': - print(no_reply) - var = False - elif not user_input: - if enabled_by_default: - print(yes_reply) - var = True - else: - print(no_reply) - var = False - else: - print('Invalid selection: %s' % user_input_origin) - return var + raise UserInputError( + 'Environment variable %s must be set as a boolean indicator.\n' + 'The following are accepted as TRUE : %s.\n' + 'The following are accepted as FALSE: %s.\n' + 'Current value is %s.' % (var_name, ', '.join(true_strings), + ', '.join(false_strings), var)) + + while var is None: + user_input_origin = get_input(question) + user_input = user_input_origin.strip().lower() + if user_input == 'y': + print(yes_reply) + var = True + elif user_input == 'n': + print(no_reply) + var = False + elif not user_input: + if enabled_by_default: + print(yes_reply) + var = True + else: + print(no_reply) + var = False + else: + print('Invalid selection: %s' % user_input_origin) + return var def set_build_var(environ_cp, @@ -368,31 +368,31 @@ def set_build_var(environ_cp, option_name, enabled_by_default, bazel_config_name=None): - """Set if query_item will be enabled for the build. - - Ask user if query_item will be enabled. Default is used if no input is given. - Set subprocess environment variable and write to .bazelrc if enabled. - - Args: - environ_cp: copy of the os.environ. - var_name: string for name of environment variable, e.g. "TF_NEED_HDFS". - query_item: string for feature related to the variable, e.g. "Hadoop File - System". - option_name: string for option to define in .bazelrc. - enabled_by_default: boolean for default behavior. - bazel_config_name: Name for Bazel --config argument to enable build feature. - """ - - var = str( - int(get_var(environ_cp, var_name, query_item, enabled_by_default))) - environ_cp[var_name] = var - if var == '1': - write_to_bazelrc('build --define %s=true' % option_name) - elif bazel_config_name is not None: - # TODO(mikecase): Migrate all users of configure.py to use --config Bazel - # options and not to set build configs through environment variables. - write_to_bazelrc( - 'build:%s --define %s=true' % (bazel_config_name, option_name)) + """Set if query_item will be enabled for the build. + +Ask user if query_item will be enabled. Default is used if no input is given. +Set subprocess environment variable and write to .bazelrc if enabled. + +Args: + environ_cp: copy of the os.environ. + var_name: string for name of environment variable, e.g. "TF_NEED_HDFS". + query_item: string for feature related to the variable, e.g. "Hadoop File + System". + option_name: string for option to define in .bazelrc. + enabled_by_default: boolean for default behavior. + bazel_config_name: Name for Bazel --config argument to enable build feature. +""" + + var = str( + int(get_var(environ_cp, var_name, query_item, enabled_by_default))) + environ_cp[var_name] = var + if var == '1': + write_to_bazelrc('build --define %s=true' % option_name) + elif bazel_config_name is not None: + # TODO(mikecase): Migrate all users of configure.py to use --config Bazel + # options and not to set build configs through environment variables. + write_to_bazelrc( + 'build:%s --define %s=true' % (bazel_config_name, option_name)) def set_action_env_var(environ_cp, @@ -402,201 +402,201 @@ def set_action_env_var(environ_cp, question=None, yes_reply=None, no_reply=None): - """Set boolean action_env variable. + """Set boolean action_env variable. - Ask user if query_item will be enabled. Default is used if no input is given. - Set environment variable and write to .bazelrc. +Ask user if query_item will be enabled. Default is used if no input is given. +Set environment variable and write to .bazelrc. - Args: - environ_cp: copy of the os.environ. - var_name: string for name of environment variable, e.g. "TF_NEED_HDFS". - query_item: string for feature related to the variable, e.g. "Hadoop File - System". - enabled_by_default: boolean for default behavior. - question: optional string for how to ask for user input. - yes_reply: optional string for reply when feature is enabled. - no_reply: optional string for reply when feature is disabled. - """ - var = int( - get_var(environ_cp, var_name, query_item, enabled_by_default, question, - yes_reply, no_reply)) +Args: + environ_cp: copy of the os.environ. + var_name: string for name of environment variable, e.g. "TF_NEED_HDFS". + query_item: string for feature related to the variable, e.g. "Hadoop File + System". + enabled_by_default: boolean for default behavior. + question: optional string for how to ask for user input. + yes_reply: optional string for reply when feature is enabled. + no_reply: optional string for reply when feature is disabled. +""" + var = int( + get_var(environ_cp, var_name, query_item, enabled_by_default, question, + yes_reply, no_reply)) - write_action_env_to_bazelrc(var_name, var) - environ_cp[var_name] = str(var) + write_action_env_to_bazelrc(var_name, var) + environ_cp[var_name] = str(var) def convert_version_to_int(version): - """Convert a version number to a integer that can be used to compare. + """Convert a version number to a integer that can be used to compare. - Version strings of the form X.YZ and X.Y.Z-xxxxx are supported. The - 'xxxxx' part, for instance 'homebrew' on OS/X, is ignored. +Version strings of the form X.YZ and X.Y.Z-xxxxx are supported. The +'xxxxx' part, for instance 'homebrew' on OS/X, is ignored. - Args: - version: a version to be converted +Args: + version: a version to be converted - Returns: - An integer if converted successfully, otherwise return None. - """ - version = version.split('-')[0] - version_segments = version.split('.') - for seg in version_segments: - if not seg.isdigit(): - return None +Returns: + An integer if converted successfully, otherwise return None. +""" + version = version.split('-')[0] + version_segments = version.split('.') + for seg in version_segments: + if not seg.isdigit(): + return None - version_str = ''.join(['%03d' % int(seg) for seg in version_segments]) - return int(version_str) + version_str = ''.join(['%03d' % int(seg) for seg in version_segments]) + return int(version_str) def check_bazel_version(min_version): - """Check installed bazel version is at least min_version. - - Args: - min_version: string for minimum bazel version. - - Returns: - The bazel version detected. - """ - if which('bazel') is None: - print('Cannot find bazel. Please install bazel.') - sys.exit(0) - curr_version = run_shell( - ['bazel', '--batch', '--bazelrc=/dev/null', 'version']) - - for line in curr_version.split('\n'): - if 'Build label: ' in line: - curr_version = line.split('Build label: ')[1] - break - - min_version_int = convert_version_to_int(min_version) - curr_version_int = convert_version_to_int(curr_version) - - # Check if current bazel version can be detected properly. - if not curr_version_int: - print('WARNING: current bazel installation is not a release version.') - print('Make sure you are running at least bazel %s' % min_version) - return curr_version - - print('You have bazel %s installed.' % curr_version) - - if curr_version_int < min_version_int: - print( - 'Please upgrade your bazel installation to version %s or higher to ' - 'build TensorFlow!' % min_version) - sys.exit(0) + """Check installed bazel version is at least min_version. + +Args: + min_version: string for minimum bazel version. + +Returns: + The bazel version detected. +""" + if which('bazel') is None: + print('Cannot find bazel. Please install bazel.') + sys.exit(0) + curr_version = run_shell( + ['bazel', '--batch', '--bazelrc=/dev/null', 'version']) + + for line in curr_version.split('\n'): + if 'Build label: ' in line: + curr_version = line.split('Build label: ')[1] + break + + min_version_int = convert_version_to_int(min_version) + curr_version_int = convert_version_to_int(curr_version) + + # Check if current bazel version can be detected properly. + if not curr_version_int: + print('WARNING: current bazel installation is not a release version.') + print('Make sure you are running at least bazel %s' % min_version) return curr_version + print('You have bazel %s installed.' % curr_version) + + if curr_version_int < min_version_int: + print( + 'Please upgrade your bazel installation to version %s or higher to ' + 'build TensorFlow!' % min_version) + sys.exit(0) + return curr_version + def set_cc_opt_flags(environ_cp): - """Set up architecture-dependent optimization flags. - - Also append CC optimization flags to bazel.rc.. - - Args: - environ_cp: copy of the os.environ. - """ - if is_ppc64le(): - # gcc on ppc64le does not support -march, use mcpu instead - default_cc_opt_flags = '-mcpu=native' - elif is_windows(): - default_cc_opt_flags = '/arch:AVX' - else: - default_cc_opt_flags = '-march=native' - question = ( - 'Please specify optimization flags to use during compilation when' - ' bazel option "--config=opt" is specified [Default is %s]: ' - ) % default_cc_opt_flags - cc_opt_flags = get_from_env_or_user_or_default( - environ_cp, 'CC_OPT_FLAGS', question, default_cc_opt_flags) - for opt in cc_opt_flags.split(): - write_to_bazelrc('build:opt --copt=%s' % opt) - # It should be safe on the same build host. - if not is_ppc64le() and not is_windows(): - write_to_bazelrc('build:opt --host_copt=-march=native') - write_to_bazelrc('build:opt --define with_default_optimizations=true') + """Set up architecture-dependent optimization flags. + +Also append CC optimization flags to bazel.rc.. + +Args: + environ_cp: copy of the os.environ. +""" + if is_ppc64le(): + # gcc on ppc64le does not support -march, use mcpu instead + default_cc_opt_flags = '-mcpu=native' + elif is_windows(): + default_cc_opt_flags = '/arch:AVX' + else: + default_cc_opt_flags = '-march=native' + question = ( + 'Please specify optimization flags to use during compilation when' + ' bazel option "--config=opt" is specified [Default is %s]: ' + ) % default_cc_opt_flags + cc_opt_flags = get_from_env_or_user_or_default( + environ_cp, 'CC_OPT_FLAGS', question, default_cc_opt_flags) + for opt in cc_opt_flags.split(): + write_to_bazelrc('build:opt --copt=%s' % opt) + # It should be safe on the same build host. + if not is_ppc64le() and not is_windows(): + write_to_bazelrc('build:opt --host_copt=-march=native') + write_to_bazelrc('build:opt --define with_default_optimizations=true') def set_tf_cuda_clang(environ_cp): - """set TF_CUDA_CLANG action_env. - - Args: - environ_cp: copy of the os.environ. - """ - question = 'Do you want to use clang as CUDA compiler?' - yes_reply = 'Clang will be used as CUDA compiler.' - no_reply = 'nvcc will be used as CUDA compiler.' - set_action_env_var( - environ_cp, - 'TF_CUDA_CLANG', - None, - False, - question=question, - yes_reply=yes_reply, - no_reply=no_reply) + """set TF_CUDA_CLANG action_env. + +Args: + environ_cp: copy of the os.environ. +""" + question = 'Do you want to use clang as CUDA compiler?' + yes_reply = 'Clang will be used as CUDA compiler.' + no_reply = 'nvcc will be used as CUDA compiler.' + set_action_env_var( + environ_cp, + 'TF_CUDA_CLANG', + None, + False, + question=question, + yes_reply=yes_reply, + no_reply=no_reply) def set_tf_download_clang(environ_cp): - """Set TF_DOWNLOAD_CLANG action_env.""" - question = 'Do you wish to download a fresh release of clang? (Experimental)' - yes_reply = 'Clang will be downloaded and used to compile tensorflow.' - no_reply = 'Clang will not be downloaded.' - set_action_env_var( - environ_cp, - 'TF_DOWNLOAD_CLANG', - None, - False, - question=question, - yes_reply=yes_reply, - no_reply=no_reply) + """Set TF_DOWNLOAD_CLANG action_env.""" + question = 'Do you wish to download a fresh release of clang? (Experimental)' + yes_reply = 'Clang will be downloaded and used to compile tensorflow.' + no_reply = 'Clang will not be downloaded.' + set_action_env_var( + environ_cp, + 'TF_DOWNLOAD_CLANG', + None, + False, + question=question, + yes_reply=yes_reply, + no_reply=no_reply) def get_from_env_or_user_or_default(environ_cp, var_name, ask_for_var, var_default): - """Get var_name either from env, or user or default. + """Get var_name either from env, or user or default. - If var_name has been set as environment variable, use the preset value, else - ask for user input. If no input is provided, the default is used. +If var_name has been set as environment variable, use the preset value, else +ask for user input. If no input is provided, the default is used. - Args: - environ_cp: copy of the os.environ. - var_name: string for name of environment variable, e.g. "TF_NEED_HDFS". - ask_for_var: string for how to ask for user input. - var_default: default value string. +Args: + environ_cp: copy of the os.environ. + var_name: string for name of environment variable, e.g. "TF_NEED_HDFS". + ask_for_var: string for how to ask for user input. + var_default: default value string. - Returns: - string value for var_name - """ - var = environ_cp.get(var_name) - if not var: - var = get_input(ask_for_var) - print('\n') - if not var: - var = var_default - return var +Returns: + string value for var_name +""" + var = environ_cp.get(var_name) + if not var: + var = get_input(ask_for_var) + print('\n') + if not var: + var = var_default + return var def set_clang_cuda_compiler_path(environ_cp): - """Set CLANG_CUDA_COMPILER_PATH.""" - default_clang_path = which('clang') or '' - ask_clang_path = ( - 'Please specify which clang should be used as device and ' - 'host compiler. [Default is %s]: ') % default_clang_path - - while True: - clang_cuda_compiler_path = get_from_env_or_user_or_default( - environ_cp, 'CLANG_CUDA_COMPILER_PATH', ask_clang_path, - default_clang_path) - if os.path.exists(clang_cuda_compiler_path): - break - - # Reset and retry - print('Invalid clang path: %s cannot be found.' % - clang_cuda_compiler_path) - environ_cp['CLANG_CUDA_COMPILER_PATH'] = '' - - # Set CLANG_CUDA_COMPILER_PATH - environ_cp['CLANG_CUDA_COMPILER_PATH'] = clang_cuda_compiler_path - write_action_env_to_bazelrc('CLANG_CUDA_COMPILER_PATH', - clang_cuda_compiler_path) + """Set CLANG_CUDA_COMPILER_PATH.""" + default_clang_path = which('clang') or '' + ask_clang_path = ( + 'Please specify which clang should be used as device and ' + 'host compiler. [Default is %s]: ') % default_clang_path + + while True: + clang_cuda_compiler_path = get_from_env_or_user_or_default( + environ_cp, 'CLANG_CUDA_COMPILER_PATH', ask_clang_path, + default_clang_path) + if os.path.exists(clang_cuda_compiler_path): + break + + # Reset and retry + print('Invalid clang path: %s cannot be found.' % + clang_cuda_compiler_path) + environ_cp['CLANG_CUDA_COMPILER_PATH'] = '' + + # Set CLANG_CUDA_COMPILER_PATH + environ_cp['CLANG_CUDA_COMPILER_PATH'] = clang_cuda_compiler_path + write_action_env_to_bazelrc('CLANG_CUDA_COMPILER_PATH', + clang_cuda_compiler_path) def prompt_loop_or_load_from_env(environ_cp, @@ -607,986 +607,985 @@ def prompt_loop_or_load_from_env(environ_cp, error_msg, suppress_default_error=False, n_ask_attempts=_DEFAULT_PROMPT_ASK_ATTEMPTS): - """Loop over user prompts for an ENV param until receiving a valid response. - - For the env param var_name, read from the environment or verify user input - until receiving valid input. When done, set var_name in the environ_cp to its - new value. - - Args: - environ_cp: (Dict) copy of the os.environ. - var_name: (String) string for name of environment variable, e.g. "TF_MYVAR". - var_default: (String) default value string. - ask_for_var: (String) string for how to ask for user input. - check_success: (Function) function that takes one argument and returns a - boolean. Should return True if the value provided is considered valid. May - contain a complex error message if error_msg does not provide enough - information. In that case, set suppress_default_error to True. - error_msg: (String) String with one and only one '%s'. Formatted with each - invalid response upon check_success(input) failure. - suppress_default_error: (Bool) Suppress the above error message in favor of - one from the check_success function. - n_ask_attempts: (Integer) Number of times to query for valid input before - raising an error and quitting. - - Returns: - [String] The value of var_name after querying for input. - - Raises: - UserInputError: if a query has been attempted n_ask_attempts times without - success, assume that the user has made a scripting error, and will - continue to provide invalid input. Raise the error to avoid infinitely - looping. - """ - default = environ_cp.get(var_name) or var_default - full_query = '%s [Default is %s]: ' % ( - ask_for_var, - default, - ) - - for _ in range(n_ask_attempts): - val = get_from_env_or_user_or_default(environ_cp, var_name, full_query, - default) - if check_success(val): - break - if not suppress_default_error: - print(error_msg % val) - environ_cp[var_name] = '' - else: - raise UserInputError( - 'Invalid %s setting was provided %d times in a row. ' - 'Assuming to be a scripting mistake.' % (var_name, n_ask_attempts)) - - environ_cp[var_name] = val - return val + """Loop over user prompts for an ENV param until receiving a valid response. + +For the env param var_name, read from the environment or verify user input +until receiving valid input. When done, set var_name in the environ_cp to its +new value. + +Args: + environ_cp: (Dict) copy of the os.environ. + var_name: (String) string for name of environment variable, e.g. "TF_MYVAR". + var_default: (String) default value string. + ask_for_var: (String) string for how to ask for user input. + check_success: (Function) function that takes one argument and returns a + boolean. Should return True if the value provided is considered valid. May + contain a complex error message if error_msg does not provide enough + information. In that case, set suppress_default_error to True. + error_msg: (String) String with one and only one '%s'. Formatted with each + invalid response upon check_success(input) failure. + suppress_default_error: (Bool) Suppress the above error message in favor of + one from the check_success function. + n_ask_attempts: (Integer) Number of times to query for valid input before + raising an error and quitting. + +Returns: + [String] The value of var_name after querying for input. + +Raises: + UserInputError: if a query has been attempted n_ask_attempts times without + success, assume that the user has made a scripting error, and will + continue to provide invalid input. Raise the error to avoid infinitely + looping. +""" + default = environ_cp.get(var_name) or var_default + full_query = '%s [Default is %s]: ' % ( + ask_for_var, + default, + ) + + for _ in range(n_ask_attempts): + val = get_from_env_or_user_or_default(environ_cp, var_name, full_query, + default) + if check_success(val): + break + if not suppress_default_error: + print(error_msg % val) + environ_cp[var_name] = '' + else: + raise UserInputError( + 'Invalid %s setting was provided %d times in a row. ' + 'Assuming to be a scripting mistake.' % (var_name, n_ask_attempts)) + + environ_cp[var_name] = val + return val def create_android_ndk_rule(environ_cp): - """Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule.""" - if is_windows() or is_cygwin(): - default_ndk_path = cygpath( - '%s/Android/Sdk/ndk-bundle' % environ_cp['APPDATA']) - elif is_macos(): - default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] - else: - default_ndk_path = '%s/Android/Sdk/ndk-bundle' % environ_cp['HOME'] - - def valid_ndk_path(path): - return (os.path.exists(path) - and os.path.exists(os.path.join(path, 'source.properties'))) - - android_ndk_home_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='ANDROID_NDK_HOME', - var_default=default_ndk_path, - ask_for_var='Please specify the home path of the Android NDK to use.', - check_success=valid_ndk_path, - error_msg=('The path %s or its child file "source.properties" ' - 'does not exist.')) - write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path) - write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL', - check_ndk_level(android_ndk_home_path)) + """Set ANDROID_NDK_HOME and write Android NDK WORKSPACE rule.""" + if is_windows() or is_cygwin(): + default_ndk_path = cygpath( + '%s/Android/Sdk/ndk-bundle' % environ_cp['APPDATA']) + elif is_macos(): + default_ndk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] + else: + default_ndk_path = '%s/Android/Sdk/ndk-bundle' % environ_cp['HOME'] + + def valid_ndk_path(path): + return (os.path.exists(path) + and os.path.exists(os.path.join(path, 'source.properties'))) + + android_ndk_home_path = prompt_loop_or_load_from_env( + environ_cp, + var_name='ANDROID_NDK_HOME', + var_default=default_ndk_path, + ask_for_var='Please specify the home path of the Android NDK to use.', + check_success=valid_ndk_path, + error_msg=('The path %s or its child file "source.properties" ' + 'does not exist.')) + write_action_env_to_bazelrc('ANDROID_NDK_HOME', android_ndk_home_path) + write_action_env_to_bazelrc('ANDROID_NDK_API_LEVEL', + check_ndk_level(android_ndk_home_path)) def create_android_sdk_rule(environ_cp): - """Set Android variables and write Android SDK WORKSPACE rule.""" - if is_windows() or is_cygwin(): - default_sdk_path = cygpath('%s/Android/Sdk' % environ_cp['APPDATA']) - elif is_macos(): - default_sdk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] - else: - default_sdk_path = '%s/Android/Sdk' % environ_cp['HOME'] - - def valid_sdk_path(path): - return (os.path.exists(path) - and os.path.exists(os.path.join(path, 'platforms')) - and os.path.exists(os.path.join(path, 'build-tools'))) - - android_sdk_home_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='ANDROID_SDK_HOME', - var_default=default_sdk_path, - ask_for_var='Please specify the home path of the Android SDK to use.', - check_success=valid_sdk_path, - error_msg=('Either %s does not exist, or it does not contain the ' - 'subdirectories "platforms" and "build-tools".')) - - platforms = os.path.join(android_sdk_home_path, 'platforms') - api_levels = sorted(os.listdir(platforms)) - api_levels = [x.replace('android-', '') for x in api_levels] - - def valid_api_level(api_level): - return os.path.exists( - os.path.join(android_sdk_home_path, 'platforms', - 'android-' + api_level)) - - android_api_level = prompt_loop_or_load_from_env( - environ_cp, - var_name='ANDROID_API_LEVEL', - var_default=api_levels[-1], - ask_for_var=('Please specify the Android SDK API level to use. ' - '[Available levels: %s]') % api_levels, - check_success=valid_api_level, - error_msg='Android-%s is not present in the SDK path.') - - build_tools = os.path.join(android_sdk_home_path, 'build-tools') - versions = sorted(os.listdir(build_tools)) - - def valid_build_tools(version): - return os.path.exists( - os.path.join(android_sdk_home_path, 'build-tools', version)) - - android_build_tools_version = prompt_loop_or_load_from_env( - environ_cp, - var_name='ANDROID_BUILD_TOOLS_VERSION', - var_default=versions[-1], - ask_for_var=('Please specify an Android build tools version to use. ' - '[Available versions: %s]') % versions, - check_success=valid_build_tools, - error_msg=('The selected SDK does not have build-tools version %s ' - 'available.')) - - write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION', - android_build_tools_version) - write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', android_api_level) - write_action_env_to_bazelrc('ANDROID_SDK_HOME', android_sdk_home_path) + """Set Android variables and write Android SDK WORKSPACE rule.""" + if is_windows() or is_cygwin(): + default_sdk_path = cygpath('%s/Android/Sdk' % environ_cp['APPDATA']) + elif is_macos(): + default_sdk_path = '%s/library/Android/Sdk/ndk-bundle' % environ_cp['HOME'] + else: + default_sdk_path = '%s/Android/Sdk' % environ_cp['HOME'] + + def valid_sdk_path(path): + return (os.path.exists(path) + and os.path.exists(os.path.join(path, 'platforms')) + and os.path.exists(os.path.join(path, 'build-tools'))) + + android_sdk_home_path = prompt_loop_or_load_from_env( + environ_cp, + var_name='ANDROID_SDK_HOME', + var_default=default_sdk_path, + ask_for_var='Please specify the home path of the Android SDK to use.', + check_success=valid_sdk_path, + error_msg=('Either %s does not exist, or it does not contain the ' + 'subdirectories "platforms" and "build-tools".')) + + platforms = os.path.join(android_sdk_home_path, 'platforms') + api_levels = sorted(os.listdir(platforms)) + api_levels = [x.replace('android-', '') for x in api_levels] + + def valid_api_level(api_level): + return os.path.exists( + os.path.join(android_sdk_home_path, 'platforms', + 'android-' + api_level)) + + android_api_level = prompt_loop_or_load_from_env( + environ_cp, + var_name='ANDROID_API_LEVEL', + var_default=api_levels[-1], + ask_for_var=('Please specify the Android SDK API level to use. ' + '[Available levels: %s]') % api_levels, + check_success=valid_api_level, + error_msg='Android-%s is not present in the SDK path.') + + build_tools = os.path.join(android_sdk_home_path, 'build-tools') + versions = sorted(os.listdir(build_tools)) + + def valid_build_tools(version): + return os.path.exists( + os.path.join(android_sdk_home_path, 'build-tools', version)) + + android_build_tools_version = prompt_loop_or_load_from_env( + environ_cp, + var_name='ANDROID_BUILD_TOOLS_VERSION', + var_default=versions[-1], + ask_for_var=('Please specify an Android build tools version to use. ' + '[Available versions: %s]') % versions, + check_success=valid_build_tools, + error_msg=('The selected SDK does not have build-tools version %s ' + 'available.')) + + write_action_env_to_bazelrc('ANDROID_BUILD_TOOLS_VERSION', + android_build_tools_version) + write_action_env_to_bazelrc('ANDROID_SDK_API_LEVEL', android_api_level) + write_action_env_to_bazelrc('ANDROID_SDK_HOME', android_sdk_home_path) def check_ndk_level(android_ndk_home_path): - """Check the revision number of an Android NDK path.""" - properties_path = '%s/source.properties' % android_ndk_home_path - if is_windows() or is_cygwin(): - properties_path = cygpath(properties_path) - with open(properties_path, 'r') as f: - filedata = f.read() - - revision = re.search(r'Pkg.Revision = (\d+)', filedata) - if revision: - ndk_api_level = revision.group(1) - else: - raise Exception('Unable to parse NDK revision.') - if int(ndk_api_level) not in _SUPPORTED_ANDROID_NDK_VERSIONS: - print( - 'WARNING: The API level of the NDK in %s is %s, which is not ' - 'supported by Bazel (officially supported versions: %s). Please use ' - 'another version. Compiling Android targets may result in confusing ' - 'errors.\n' % (android_ndk_home_path, ndk_api_level, - _SUPPORTED_ANDROID_NDK_VERSIONS)) - return ndk_api_level + """Check the revision number of an Android NDK path.""" + properties_path = '%s/source.properties' % android_ndk_home_path + if is_windows() or is_cygwin(): + properties_path = cygpath(properties_path) + with open(properties_path, 'r') as f: + filedata = f.read() + + revision = re.search(r'Pkg.Revision = (\d+)', filedata) + if revision: + ndk_api_level = revision.group(1) + else: + raise Exception('Unable to parse NDK revision.') + if int(ndk_api_level) not in _SUPPORTED_ANDROID_NDK_VERSIONS: + print( + 'WARNING: The API level of the NDK in %s is %s, which is not ' + 'supported by Bazel (officially supported versions: %s). Please use ' + 'another version. Compiling Android targets may result in confusing ' + 'errors.\n' % (android_ndk_home_path, ndk_api_level, + _SUPPORTED_ANDROID_NDK_VERSIONS)) + return ndk_api_level def set_gcc_host_compiler_path(environ_cp): - """Set GCC_HOST_COMPILER_PATH.""" - default_gcc_host_compiler_path = which('gcc') or '' - cuda_bin_symlink = '%s/bin/gcc' % environ_cp.get('CUDA_TOOLKIT_PATH') + """Set GCC_HOST_COMPILER_PATH.""" + default_gcc_host_compiler_path = which('gcc') or '' + cuda_bin_symlink = '%s/bin/gcc' % environ_cp.get('CUDA_TOOLKIT_PATH') - if os.path.islink(cuda_bin_symlink): - # os.readlink is only available in linux - default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink) + if os.path.islink(cuda_bin_symlink): + # os.readlink is only available in linux + default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink) - gcc_host_compiler_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='GCC_HOST_COMPILER_PATH', - var_default=default_gcc_host_compiler_path, - ask_for_var= - 'Please specify which gcc should be used by nvcc as the host compiler.', - check_success=os.path.exists, - error_msg='Invalid gcc path. %s cannot be found.', - ) + gcc_host_compiler_path = prompt_loop_or_load_from_env( + environ_cp, + var_name='GCC_HOST_COMPILER_PATH', + var_default=default_gcc_host_compiler_path, + ask_for_var='Please specify which gcc should be used by nvcc as the host compiler.', + check_success=os.path.exists, + error_msg='Invalid gcc path. %s cannot be found.', + ) - write_action_env_to_bazelrc('GCC_HOST_COMPILER_PATH', - gcc_host_compiler_path) + write_action_env_to_bazelrc('GCC_HOST_COMPILER_PATH', + gcc_host_compiler_path) def reformat_version_sequence(version_str, sequence_count): - """Reformat the version string to have the given number of sequences. + """Reformat the version string to have the given number of sequences. - For example: - Given (7, 2) -> 7.0 - (7.0.1, 2) -> 7.0 - (5, 1) -> 5 - (5.0.3.2, 1) -> 5 +For example: +Given (7, 2) -> 7.0 + (7.0.1, 2) -> 7.0 + (5, 1) -> 5 + (5.0.3.2, 1) -> 5 - Args: - version_str: String, the version string. - sequence_count: int, an integer. - Returns: - string, reformatted version string. - """ - v = version_str.split('.') - if len(v) < sequence_count: - v = v + (['0'] * (sequence_count - len(v))) +Args: + version_str: String, the version string. + sequence_count: int, an integer. +Returns: + string, reformatted version string. +""" + v = version_str.split('.') + if len(v) < sequence_count: + v = v + (['0'] * (sequence_count - len(v))) - return '.'.join(v[:sequence_count]) + return '.'.join(v[:sequence_count]) def set_tf_cuda_version(environ_cp): - """Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION.""" - ask_cuda_version = ( - 'Please specify the CUDA SDK version you want to use. ' - '[Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION - - for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - # Configure the Cuda SDK version to use. - tf_cuda_version = get_from_env_or_user_or_default( - environ_cp, 'TF_CUDA_VERSION', ask_cuda_version, - _DEFAULT_CUDA_VERSION) - tf_cuda_version = reformat_version_sequence(str(tf_cuda_version), 2) - - # Find out where the CUDA toolkit is installed - default_cuda_path = _DEFAULT_CUDA_PATH - if is_windows() or is_cygwin(): - default_cuda_path = cygpath( - environ_cp.get('CUDA_PATH', _DEFAULT_CUDA_PATH_WIN)) - elif is_linux(): - # If the default doesn't exist, try an alternative default. - if (not os.path.exists(default_cuda_path) - ) and os.path.exists(_DEFAULT_CUDA_PATH_LINUX): - default_cuda_path = _DEFAULT_CUDA_PATH_LINUX - ask_cuda_path = ('Please specify the location where CUDA %s toolkit is' - ' installed. Refer to README.md for more details. ' - '[Default is %s]: ') % (tf_cuda_version, - default_cuda_path) - cuda_toolkit_path = get_from_env_or_user_or_default( - environ_cp, 'CUDA_TOOLKIT_PATH', ask_cuda_path, default_cuda_path) - if is_windows() or is_cygwin(): - cuda_toolkit_path = cygpath(cuda_toolkit_path) - - if is_windows(): - cuda_rt_lib_path = 'lib/x64/cudart.lib' - elif is_linux(): - cuda_rt_lib_path = 'lib64/libcudart.so.%s' % tf_cuda_version - elif is_macos(): - cuda_rt_lib_path = 'lib/libcudart.%s.dylib' % tf_cuda_version - - cuda_toolkit_path_full = os.path.join(cuda_toolkit_path, - cuda_rt_lib_path) - if os.path.exists(cuda_toolkit_path_full): - break - - # Reset and retry - print('Invalid path to CUDA %s toolkit. %s cannot be found' % - (tf_cuda_version, cuda_toolkit_path_full)) - environ_cp['TF_CUDA_VERSION'] = '' - environ_cp['CUDA_TOOLKIT_PATH'] = '' + """Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION.""" + ask_cuda_version = ( + 'Please specify the CUDA SDK version you want to use. ' + '[Leave empty to default to CUDA %s]: ') % _DEFAULT_CUDA_VERSION + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + # Configure the Cuda SDK version to use. + tf_cuda_version = get_from_env_or_user_or_default( + environ_cp, 'TF_CUDA_VERSION', ask_cuda_version, + _DEFAULT_CUDA_VERSION) + tf_cuda_version = reformat_version_sequence(str(tf_cuda_version), 2) + + # Find out where the CUDA toolkit is installed + default_cuda_path = _DEFAULT_CUDA_PATH + if is_windows() or is_cygwin(): + default_cuda_path = cygpath( + environ_cp.get('CUDA_PATH', _DEFAULT_CUDA_PATH_WIN)) + elif is_linux(): + # If the default doesn't exist, try an alternative default. + if (not os.path.exists(default_cuda_path) + ) and os.path.exists(_DEFAULT_CUDA_PATH_LINUX): + default_cuda_path = _DEFAULT_CUDA_PATH_LINUX + ask_cuda_path = ('Please specify the location where CUDA %s toolkit is' + ' installed. Refer to README.md for more details. ' + '[Default is %s]: ') % (tf_cuda_version, + default_cuda_path) + cuda_toolkit_path = get_from_env_or_user_or_default( + environ_cp, 'CUDA_TOOLKIT_PATH', ask_cuda_path, default_cuda_path) + if is_windows() or is_cygwin(): + cuda_toolkit_path = cygpath(cuda_toolkit_path) - else: - raise UserInputError( - 'Invalid TF_CUDA_SETTING setting was provided %d ' - 'times in a row. Assuming to be a scripting mistake.' % - _DEFAULT_PROMPT_ASK_ATTEMPTS) + if is_windows(): + cuda_rt_lib_path = 'lib/x64/cudart.lib' + elif is_linux(): + cuda_rt_lib_path = 'lib64/libcudart.so.%s' % tf_cuda_version + elif is_macos(): + cuda_rt_lib_path = 'lib/libcudart.%s.dylib' % tf_cuda_version + + cuda_toolkit_path_full = os.path.join(cuda_toolkit_path, + cuda_rt_lib_path) + if os.path.exists(cuda_toolkit_path_full): + break + + # Reset and retry + print('Invalid path to CUDA %s toolkit. %s cannot be found' % + (tf_cuda_version, cuda_toolkit_path_full)) + environ_cp['TF_CUDA_VERSION'] = '' + environ_cp['CUDA_TOOLKIT_PATH'] = '' - # Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION - environ_cp['CUDA_TOOLKIT_PATH'] = cuda_toolkit_path - write_action_env_to_bazelrc('CUDA_TOOLKIT_PATH', cuda_toolkit_path) - environ_cp['TF_CUDA_VERSION'] = tf_cuda_version - write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version) + else: + raise UserInputError( + 'Invalid TF_CUDA_SETTING setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) + + # Set CUDA_TOOLKIT_PATH and TF_CUDA_VERSION + environ_cp['CUDA_TOOLKIT_PATH'] = cuda_toolkit_path + write_action_env_to_bazelrc('CUDA_TOOLKIT_PATH', cuda_toolkit_path) + environ_cp['TF_CUDA_VERSION'] = tf_cuda_version + write_action_env_to_bazelrc('TF_CUDA_VERSION', tf_cuda_version) def set_tf_cudnn_version(environ_cp): - """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" - ask_cudnn_version = ( - 'Please specify the cuDNN version you want to use. ' - '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION - - for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - tf_cudnn_version = get_from_env_or_user_or_default( - environ_cp, 'TF_CUDNN_VERSION', ask_cudnn_version, - _DEFAULT_CUDNN_VERSION) - tf_cudnn_version = reformat_version_sequence(str(tf_cudnn_version), 1) - - default_cudnn_path = environ_cp.get('CUDA_TOOLKIT_PATH') - ask_cudnn_path = ( - r'Please specify the location where cuDNN %s library is ' - 'installed. Refer to README.md for more details. [Default' - ' is %s]:') % (tf_cudnn_version, default_cudnn_path) - cudnn_install_path = get_from_env_or_user_or_default( - environ_cp, 'CUDNN_INSTALL_PATH', ask_cudnn_path, - default_cudnn_path) - - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - cudnn_install_path = os.path.realpath( - os.path.expanduser(cudnn_install_path)) - if is_windows() or is_cygwin(): - cudnn_install_path = cygpath(cudnn_install_path) - - if is_windows(): - cuda_dnn_lib_path = 'lib/x64/cudnn.lib' - cuda_dnn_lib_alt_path = 'lib/x64/cudnn.lib' - elif is_linux(): - cuda_dnn_lib_path = 'lib64/libcudnn.so.%s' % tf_cudnn_version - cuda_dnn_lib_alt_path = 'libcudnn.so.%s' % tf_cudnn_version - elif is_macos(): - cuda_dnn_lib_path = 'lib/libcudnn.%s.dylib' % tf_cudnn_version - cuda_dnn_lib_alt_path = 'libcudnn.%s.dylib' % tf_cudnn_version - - cuda_dnn_lib_path_full = os.path.join(cudnn_install_path, - cuda_dnn_lib_path) - cuda_dnn_lib_alt_path_full = os.path.join(cudnn_install_path, - cuda_dnn_lib_alt_path) - if os.path.exists(cuda_dnn_lib_path_full) or os.path.exists( - cuda_dnn_lib_alt_path_full): - break - - # Try another alternative for Linux - if is_linux(): - ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' - cudnn_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) - cudnn_path_from_ldconfig = re.search('.*libcudnn.so .* => (.*)', - cudnn_path_from_ldconfig) - if cudnn_path_from_ldconfig: - cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1) - if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, - tf_cudnn_version)): - cudnn_install_path = os.path.dirname( - cudnn_path_from_ldconfig) - break - - # Reset and Retry - print( - 'Invalid path to cuDNN %s toolkit. None of the following files can be ' - 'found:' % tf_cudnn_version) - print(cuda_dnn_lib_path_full) - print(cuda_dnn_lib_alt_path_full) - if is_linux(): - print('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)) - - environ_cp['TF_CUDNN_VERSION'] = '' - else: - raise UserInputError( - 'Invalid TF_CUDNN setting was provided %d ' - 'times in a row. Assuming to be a scripting mistake.' % - _DEFAULT_PROMPT_ASK_ATTEMPTS) + """Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""" + ask_cudnn_version = ( + 'Please specify the cuDNN version you want to use. ' + '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + tf_cudnn_version = get_from_env_or_user_or_default( + environ_cp, 'TF_CUDNN_VERSION', ask_cudnn_version, + _DEFAULT_CUDNN_VERSION) + tf_cudnn_version = reformat_version_sequence(str(tf_cudnn_version), 1) + + default_cudnn_path = environ_cp.get('CUDA_TOOLKIT_PATH') + ask_cudnn_path = ( + r'Please specify the location where cuDNN %s library is ' + 'installed. Refer to README.md for more details. [Default' + ' is %s]:') % (tf_cudnn_version, default_cudnn_path) + cudnn_install_path = get_from_env_or_user_or_default( + environ_cp, 'CUDNN_INSTALL_PATH', ask_cudnn_path, + default_cudnn_path) + + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + cudnn_install_path = os.path.realpath( + os.path.expanduser(cudnn_install_path)) + if is_windows() or is_cygwin(): + cudnn_install_path = cygpath(cudnn_install_path) - # Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION - environ_cp['CUDNN_INSTALL_PATH'] = cudnn_install_path - write_action_env_to_bazelrc('CUDNN_INSTALL_PATH', cudnn_install_path) - environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version - write_action_env_to_bazelrc('TF_CUDNN_VERSION', tf_cudnn_version) + if is_windows(): + cuda_dnn_lib_path = 'lib/x64/cudnn.lib' + cuda_dnn_lib_alt_path = 'lib/x64/cudnn.lib' + elif is_linux(): + cuda_dnn_lib_path = 'lib64/libcudnn.so.%s' % tf_cudnn_version + cuda_dnn_lib_alt_path = 'libcudnn.so.%s' % tf_cudnn_version + elif is_macos(): + cuda_dnn_lib_path = 'lib/libcudnn.%s.dylib' % tf_cudnn_version + cuda_dnn_lib_alt_path = 'libcudnn.%s.dylib' % tf_cudnn_version + + cuda_dnn_lib_path_full = os.path.join(cudnn_install_path, + cuda_dnn_lib_path) + cuda_dnn_lib_alt_path_full = os.path.join(cudnn_install_path, + cuda_dnn_lib_alt_path) + if os.path.exists(cuda_dnn_lib_path_full) or os.path.exists( + cuda_dnn_lib_alt_path_full): + break + + # Try another alternative for Linux + if is_linux(): + ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' + cudnn_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) + cudnn_path_from_ldconfig = re.search('.*libcudnn.so .* => (.*)', + cudnn_path_from_ldconfig) + if cudnn_path_from_ldconfig: + cudnn_path_from_ldconfig = cudnn_path_from_ldconfig.group(1) + if os.path.exists('%s.%s' % (cudnn_path_from_ldconfig, + tf_cudnn_version)): + cudnn_install_path = os.path.dirname( + cudnn_path_from_ldconfig) + break + + # Reset and Retry + print( + 'Invalid path to cuDNN %s toolkit. None of the following files can be ' + 'found:' % tf_cudnn_version) + print(cuda_dnn_lib_path_full) + print(cuda_dnn_lib_alt_path_full) + if is_linux(): + print('%s.%s' % (cudnn_path_from_ldconfig, tf_cudnn_version)) + + environ_cp['TF_CUDNN_VERSION'] = '' + else: + raise UserInputError( + 'Invalid TF_CUDNN setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) + + # Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION + environ_cp['CUDNN_INSTALL_PATH'] = cudnn_install_path + write_action_env_to_bazelrc('CUDNN_INSTALL_PATH', cudnn_install_path) + environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version + write_action_env_to_bazelrc('TF_CUDNN_VERSION', tf_cudnn_version) def is_cuda_compatible(lib, cuda_ver, cudnn_ver): - """Check compatibility between given library and cudnn/cudart libraries.""" - ldd_bin = which('ldd') or '/usr/bin/ldd' - ldd_out = run_shell([ldd_bin, lib], True) - ldd_out = ldd_out.split(os.linesep) - cudnn_pattern = re.compile('.*libcudnn.so\\.?(.*) =>.*$') - cuda_pattern = re.compile('.*libcudart.so\\.?(.*) =>.*$') - cudnn = None - cudart = None - cudnn_ok = True # assume no cudnn dependency by default - cuda_ok = True # assume no cuda dependency by default - for line in ldd_out: - if 'libcudnn.so' in line: - cudnn = cudnn_pattern.search(line) - cudnn_ok = False - elif 'libcudart.so' in line: - cudart = cuda_pattern.search(line) - cuda_ok = False - if cudnn and len(cudnn.group(1)): - cudnn = convert_version_to_int(cudnn.group(1)) - if cudart and len(cudart.group(1)): - cudart = convert_version_to_int(cudart.group(1)) - if cudnn is not None: - cudnn_ok = (cudnn == cudnn_ver) - if cudart is not None: - cuda_ok = (cudart == cuda_ver) - return cudnn_ok and cuda_ok + """Check compatibility between given library and cudnn/cudart libraries.""" + ldd_bin = which('ldd') or '/usr/bin/ldd' + ldd_out = run_shell([ldd_bin, lib], True) + ldd_out = ldd_out.split(os.linesep) + cudnn_pattern = re.compile('.*libcudnn.so\\.?(.*) =>.*$') + cuda_pattern = re.compile('.*libcudart.so\\.?(.*) =>.*$') + cudnn = None + cudart = None + cudnn_ok = True # assume no cudnn dependency by default + cuda_ok = True # assume no cuda dependency by default + for line in ldd_out: + if 'libcudnn.so' in line: + cudnn = cudnn_pattern.search(line) + cudnn_ok = False + elif 'libcudart.so' in line: + cudart = cuda_pattern.search(line) + cuda_ok = False + if cudnn and len(cudnn.group(1)): + cudnn = convert_version_to_int(cudnn.group(1)) + if cudart and len(cudart.group(1)): + cudart = convert_version_to_int(cudart.group(1)) + if cudnn is not None: + cudnn_ok = (cudnn == cudnn_ver) + if cudart is not None: + cuda_ok = (cudart == cuda_ver) + return cudnn_ok and cuda_ok def set_tf_tensorrt_install_path(environ_cp): - """Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION. - - Adapted from code contributed by Sami Kama (https://github.com/samikama). - - Args: - environ_cp: copy of the os.environ. - - Raises: - ValueError: if this method was called under non-Linux platform. - UserInputError: if user has provided invalid input multiple times. - """ - if not is_linux(): - raise ValueError( - 'Currently TensorRT is only supported on Linux platform.') - - # Ask user whether to add TensorRT support. - if str(int(get_var(environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', - False))) != '1': - return - - for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - ask_tensorrt_path = (r'Please specify the location where TensorRT is ' - 'installed. [Default is %s]:') % ( - _DEFAULT_TENSORRT_PATH_LINUX) - trt_install_path = get_from_env_or_user_or_default( - environ_cp, 'TENSORRT_INSTALL_PATH', ask_tensorrt_path, - _DEFAULT_TENSORRT_PATH_LINUX) - - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - trt_install_path = os.path.realpath( - os.path.expanduser(trt_install_path)) - - def find_libs(search_path): - """Search for libnvinfer.so in "search_path".""" - fl = set() - if os.path.exists(search_path) and os.path.isdir(search_path): - fl.update([ - os.path.realpath(os.path.join(search_path, x)) - for x in os.listdir(search_path) if 'libnvinfer.so' in x - ]) - return fl - - possible_files = find_libs(trt_install_path) - possible_files.update(find_libs(os.path.join(trt_install_path, 'lib'))) - possible_files.update( - find_libs(os.path.join(trt_install_path, 'lib64'))) - cuda_ver = convert_version_to_int(environ_cp['TF_CUDA_VERSION']) - cudnn_ver = convert_version_to_int(environ_cp['TF_CUDNN_VERSION']) - nvinfer_pattern = re.compile('.*libnvinfer.so.?(.*)$') - highest_ver = [0, None, None] - - for lib_file in possible_files: - if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver): - matches = nvinfer_pattern.search(lib_file) - if len(matches.groups()) == 0: - continue - ver_str = matches.group(1) - ver = convert_version_to_int(ver_str) if len(ver_str) else 0 - if ver > highest_ver[0]: - highest_ver = [ver, ver_str, lib_file] - if highest_ver[1] is not None: - trt_install_path = os.path.dirname(highest_ver[2]) - tf_tensorrt_version = highest_ver[1] - break - - # Try another alternative from ldconfig. - ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' - ldconfig_output = run_shell([ldconfig_bin, '-p']) - search_result = re.search('.*libnvinfer.so\\.?([0-9.]*).* => (.*)', - ldconfig_output) - if search_result: - libnvinfer_path_from_ldconfig = search_result.group(2) - if os.path.exists(libnvinfer_path_from_ldconfig): - if is_cuda_compatible(libnvinfer_path_from_ldconfig, cuda_ver, - cudnn_ver): - trt_install_path = os.path.dirname( - libnvinfer_path_from_ldconfig) - tf_tensorrt_version = search_result.group(1) - break - - # Reset and Retry - if possible_files: - print( - 'TensorRT libraries found in one the following directories', - 'are not compatible with selected cuda and cudnn installations' - ) - print(trt_install_path) - print(os.path.join(trt_install_path, 'lib')) - print(os.path.join(trt_install_path, 'lib64')) - if search_result: - print(libnvinfer_path_from_ldconfig) - else: - print( - 'Invalid path to TensorRT. None of the following files can be found:' - ) - print(trt_install_path) - print(os.path.join(trt_install_path, 'lib')) - print(os.path.join(trt_install_path, 'lib64')) - if search_result: - print(libnvinfer_path_from_ldconfig) - + """Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION. + +Adapted from code contributed by Sami Kama (https://github.com/samikama). + +Args: + environ_cp: copy of the os.environ. + +Raises: + ValueError: if this method was called under non-Linux platform. + UserInputError: if user has provided invalid input multiple times. +""" + if not is_linux(): + raise ValueError( + 'Currently TensorRT is only supported on Linux platform.') + + # Ask user whether to add TensorRT support. + if str(int(get_var(environ_cp, 'TF_NEED_TENSORRT', 'TensorRT', + False))) != '1': + return + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + ask_tensorrt_path = (r'Please specify the location where TensorRT is ' + 'installed. [Default is %s]:') % ( + _DEFAULT_TENSORRT_PATH_LINUX) + trt_install_path = get_from_env_or_user_or_default( + environ_cp, 'TENSORRT_INSTALL_PATH', ask_tensorrt_path, + _DEFAULT_TENSORRT_PATH_LINUX) + + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + trt_install_path = os.path.realpath( + os.path.expanduser(trt_install_path)) + + def find_libs(search_path): + """Search for libnvinfer.so in "search_path".""" + fl = set() + if os.path.exists(search_path) and os.path.isdir(search_path): + fl.update([ + os.path.realpath(os.path.join(search_path, x)) + for x in os.listdir(search_path) if 'libnvinfer.so' in x + ]) + return fl + + possible_files = find_libs(trt_install_path) + possible_files.update(find_libs(os.path.join(trt_install_path, 'lib'))) + possible_files.update( + find_libs(os.path.join(trt_install_path, 'lib64'))) + cuda_ver = convert_version_to_int(environ_cp['TF_CUDA_VERSION']) + cudnn_ver = convert_version_to_int(environ_cp['TF_CUDNN_VERSION']) + nvinfer_pattern = re.compile('.*libnvinfer.so.?(.*)$') + highest_ver = [0, None, None] + + for lib_file in possible_files: + if is_cuda_compatible(lib_file, cuda_ver, cudnn_ver): + matches = nvinfer_pattern.search(lib_file) + if len(matches.groups()) == 0: + continue + ver_str = matches.group(1) + ver = convert_version_to_int(ver_str) if len(ver_str) else 0 + if ver > highest_ver[0]: + highest_ver = [ver, ver_str, lib_file] + if highest_ver[1] is not None: + trt_install_path = os.path.dirname(highest_ver[2]) + tf_tensorrt_version = highest_ver[1] + break + + # Try another alternative from ldconfig. + ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' + ldconfig_output = run_shell([ldconfig_bin, '-p']) + search_result = re.search('.*libnvinfer.so\\.?([0-9.]*).* => (.*)', + ldconfig_output) + if search_result: + libnvinfer_path_from_ldconfig = search_result.group(2) + if os.path.exists(libnvinfer_path_from_ldconfig): + if is_cuda_compatible(libnvinfer_path_from_ldconfig, cuda_ver, + cudnn_ver): + trt_install_path = os.path.dirname( + libnvinfer_path_from_ldconfig) + tf_tensorrt_version = search_result.group(1) + break + + # Reset and Retry + if possible_files: + print( + 'TensorRT libraries found in one the following directories', + 'are not compatible with selected cuda and cudnn installations' + ) + print(trt_install_path) + print(os.path.join(trt_install_path, 'lib')) + print(os.path.join(trt_install_path, 'lib64')) + if search_result: + print(libnvinfer_path_from_ldconfig) else: - raise UserInputError( - 'Invalid TF_TENSORRT setting was provided %d ' - 'times in a row. Assuming to be a scripting mistake.' % - _DEFAULT_PROMPT_ASK_ATTEMPTS) - - # Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION - environ_cp['TENSORRT_INSTALL_PATH'] = trt_install_path - write_action_env_to_bazelrc('TENSORRT_INSTALL_PATH', trt_install_path) - environ_cp['TF_TENSORRT_VERSION'] = tf_tensorrt_version - write_action_env_to_bazelrc('TF_TENSORRT_VERSION', tf_tensorrt_version) + print( + 'Invalid path to TensorRT. None of the following files can be found:' + ) + print(trt_install_path) + print(os.path.join(trt_install_path, 'lib')) + print(os.path.join(trt_install_path, 'lib64')) + if search_result: + print(libnvinfer_path_from_ldconfig) + + else: + raise UserInputError( + 'Invalid TF_TENSORRT setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) + + # Set TENSORRT_INSTALL_PATH and TF_TENSORRT_VERSION + environ_cp['TENSORRT_INSTALL_PATH'] = trt_install_path + write_action_env_to_bazelrc('TENSORRT_INSTALL_PATH', trt_install_path) + environ_cp['TF_TENSORRT_VERSION'] = tf_tensorrt_version + write_action_env_to_bazelrc('TF_TENSORRT_VERSION', tf_tensorrt_version) def set_tf_nccl_install_path(environ_cp): - """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION. - - Args: - environ_cp: copy of the os.environ. - - Raises: - ValueError: if this method was called under non-Linux platform. - UserInputError: if user has provided invalid input multiple times. - """ - if not is_linux(): - raise ValueError( - 'Currently NCCL is only supported on Linux platforms.') - - ask_nccl_version = ( - 'Please specify the NCCL version you want to use. ' - '[Leave empty to default to NCCL %s]: ') % _DEFAULT_NCCL_VERSION - - for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - tf_nccl_version = get_from_env_or_user_or_default( - environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, - _DEFAULT_NCCL_VERSION) - tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) - - if tf_nccl_version == '1': - break # No need to get install path, NCCL 1 is a GitHub repo. - - # TODO(csigg): Look with ldconfig first if we can find the library in paths - # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding - # include directory. This is where the NCCL .deb packages install them. - # Then ask the user if we should use that. Instead of a single - # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to - # nccl_configure.bzl - default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') - ask_nccl_path = ( - r'Please specify the location where NCCL %s library is ' - 'installed. Refer to README.md for more details. [Default ' - 'is %s]:') % (tf_nccl_version, default_nccl_path) - nccl_install_path = get_from_env_or_user_or_default( - environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) - - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - nccl_install_path = os.path.realpath( - os.path.expanduser(nccl_install_path)) - if is_windows() or is_cygwin(): - nccl_install_path = cygpath(nccl_install_path) - - if is_windows(): - nccl_lib_path = 'lib/x64/nccl.lib' - elif is_linux(): - nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version - elif is_macos(): - nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version - - nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) - nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h') - nccl_license_path = os.path.join(nccl_install_path, 'NCCL-SLA.txt') - if os.path.exists(nccl_lib_path) and os.path.exists( - nccl_hdr_path) and os.path.exists(nccl_license_path): - # Set NCCL_INSTALL_PATH - environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path - write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) - break - - # Reset and Retry - print( - 'Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' - 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, - nccl_hdr_path)) - - environ_cp['TF_NCCL_VERSION'] = '' - else: - raise UserInputError( - 'Invalid TF_NCCL setting was provided %d ' - 'times in a row. Assuming to be a scripting mistake.' % - _DEFAULT_PROMPT_ASK_ATTEMPTS) + """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION. + +Args: + environ_cp: copy of the os.environ. + +Raises: + ValueError: if this method was called under non-Linux platform. + UserInputError: if user has provided invalid input multiple times. +""" + if not is_linux(): + raise ValueError( + 'Currently NCCL is only supported on Linux platforms.') + + ask_nccl_version = ( + 'Please specify the NCCL version you want to use. ' + '[Leave empty to default to NCCL %s]: ') % _DEFAULT_NCCL_VERSION + + for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): + tf_nccl_version = get_from_env_or_user_or_default( + environ_cp, 'TF_NCCL_VERSION', ask_nccl_version, + _DEFAULT_NCCL_VERSION) + tf_nccl_version = reformat_version_sequence(str(tf_nccl_version), 1) + + if tf_nccl_version == '1': + break # No need to get install path, NCCL 1 is a GitHub repo. + + # TODO(csigg): Look with ldconfig first if we can find the library in paths + # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding + # include directory. This is where the NCCL .deb packages install them. + # Then ask the user if we should use that. Instead of a single + # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to + # nccl_configure.bzl + default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') + ask_nccl_path = ( + r'Please specify the location where NCCL %s library is ' + 'installed. Refer to README.md for more details. [Default ' + 'is %s]:') % (tf_nccl_version, default_nccl_path) + nccl_install_path = get_from_env_or_user_or_default( + environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) + + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + nccl_install_path = os.path.realpath( + os.path.expanduser(nccl_install_path)) + if is_windows() or is_cygwin(): + nccl_install_path = cygpath(nccl_install_path) - # Set TF_NCCL_VERSION - environ_cp['TF_NCCL_VERSION'] = tf_nccl_version - write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version) + if is_windows(): + nccl_lib_path = 'lib/x64/nccl.lib' + elif is_linux(): + nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version + elif is_macos(): + nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version + + nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) + nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h') + nccl_license_path = os.path.join(nccl_install_path, 'NCCL-SLA.txt') + if os.path.exists(nccl_lib_path) and os.path.exists( + nccl_hdr_path) and os.path.exists(nccl_license_path): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) + break + + # Reset and Retry + print( + 'Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' + 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, + nccl_hdr_path)) + + environ_cp['TF_NCCL_VERSION'] = '' + else: + raise UserInputError( + 'Invalid TF_NCCL setting was provided %d ' + 'times in a row. Assuming to be a scripting mistake.' % + _DEFAULT_PROMPT_ASK_ATTEMPTS) + + # Set TF_NCCL_VERSION + environ_cp['TF_NCCL_VERSION'] = tf_nccl_version + write_action_env_to_bazelrc('TF_NCCL_VERSION', tf_nccl_version) def get_native_cuda_compute_capabilities(environ_cp): - """Get native cuda compute capabilities. - - Args: - environ_cp: copy of the os.environ. - Returns: - string of native cuda compute capabilities, separated by comma. - """ - device_query_bin = os.path.join( - environ_cp.get('CUDA_TOOLKIT_PATH'), 'extras/demo_suite/deviceQuery') - if os.path.isfile(device_query_bin) and os.access(device_query_bin, - os.X_OK): - try: - output = run_shell(device_query_bin).split('\n') - pattern = re.compile('[0-9]*\\.[0-9]*') - output = [pattern.search(x) for x in output if 'Capability' in x] - output = ','.join(x.group() for x in output if x is not None) - except subprocess.CalledProcessError: - output = '' - else: - output = '' - return output + """Get native cuda compute capabilities. + +Args: + environ_cp: copy of the os.environ. +Returns: + string of native cuda compute capabilities, separated by comma. +""" + device_query_bin = os.path.join( + environ_cp.get('CUDA_TOOLKIT_PATH'), 'extras/demo_suite/deviceQuery') + if os.path.isfile(device_query_bin) and os.access(device_query_bin, + os.X_OK): + try: + output = run_shell(device_query_bin).split('\n') + pattern = re.compile('[0-9]*\\.[0-9]*') + output = [pattern.search(x) for x in output if 'Capability' in x] + output = ','.join(x.group() for x in output if x is not None) + except subprocess.CalledProcessError: + output = '' + else: + output = '' + return output def set_tf_cuda_compute_capabilities(environ_cp): - """Set TF_CUDA_COMPUTE_CAPABILITIES.""" - while True: - native_cuda_compute_capabilities = get_native_cuda_compute_capabilities( - environ_cp) - if not native_cuda_compute_capabilities: - default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES - else: - default_cuda_compute_capabilities = native_cuda_compute_capabilities - - ask_cuda_compute_capabilities = ( - 'Please specify a list of comma-separated ' - 'Cuda compute capabilities you want to ' - 'build with.\nYou can find the compute ' - 'capability of your device at: ' - 'https://developer.nvidia.com/cuda-gpus.\nPlease' - ' note that each additional compute ' - 'capability significantly increases your ' - 'build time and binary size. [Default is: %s]' % - default_cuda_compute_capabilities) - tf_cuda_compute_capabilities = get_from_env_or_user_or_default( - environ_cp, 'TF_CUDA_COMPUTE_CAPABILITIES', - ask_cuda_compute_capabilities, default_cuda_compute_capabilities) - # Check whether all capabilities from the input is valid - all_valid = True - # Remove all whitespace characters before splitting the string - # that users may insert by accident, as this will result in error - tf_cuda_compute_capabilities = ''.join( - tf_cuda_compute_capabilities.split()) - for compute_capability in tf_cuda_compute_capabilities.split(','): - m = re.match('[0-9]+.[0-9]+', compute_capability) - if not m: - print('Invalid compute capability: ' % compute_capability) - all_valid = False - else: - ver = int(m.group(0).split('.')[0]) - if ver < 3: - print( - 'Only compute capabilities 3.0 or higher are supported.' - ) - all_valid = False - - if all_valid: - break - - # Reset and Retry - environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = '' - - # Set TF_CUDA_COMPUTE_CAPABILITIES - environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = tf_cuda_compute_capabilities - write_action_env_to_bazelrc('TF_CUDA_COMPUTE_CAPABILITIES', - tf_cuda_compute_capabilities) + """Set TF_CUDA_COMPUTE_CAPABILITIES.""" + while True: + native_cuda_compute_capabilities = get_native_cuda_compute_capabilities( + environ_cp) + if not native_cuda_compute_capabilities: + default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES + else: + default_cuda_compute_capabilities = native_cuda_compute_capabilities + + ask_cuda_compute_capabilities = ( + 'Please specify a list of comma-separated ' + 'Cuda compute capabilities you want to ' + 'build with.\nYou can find the compute ' + 'capability of your device at: ' + 'https://developer.nvidia.com/cuda-gpus.\nPlease' + ' note that each additional compute ' + 'capability significantly increases your ' + 'build time and binary size. [Default is: %s]' % + default_cuda_compute_capabilities) + tf_cuda_compute_capabilities = get_from_env_or_user_or_default( + environ_cp, 'TF_CUDA_COMPUTE_CAPABILITIES', + ask_cuda_compute_capabilities, default_cuda_compute_capabilities) + # Check whether all capabilities from the input is valid + all_valid = True + # Remove all whitespace characters before splitting the string + # that users may insert by accident, as this will result in error + tf_cuda_compute_capabilities = ''.join( + tf_cuda_compute_capabilities.split()) + for compute_capability in tf_cuda_compute_capabilities.split(','): + m = re.match('[0-9]+.[0-9]+', compute_capability) + if not m: + print('Invalid compute capability: ' % compute_capability) + all_valid = False + else: + ver = int(m.group(0).split('.')[0]) + if ver < 3: + print( + 'Only compute capabilities 3.0 or higher are supported.' + ) + all_valid = False + + if all_valid: + break + + # Reset and Retry + environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = '' + + # Set TF_CUDA_COMPUTE_CAPABILITIES + environ_cp['TF_CUDA_COMPUTE_CAPABILITIES'] = tf_cuda_compute_capabilities + write_action_env_to_bazelrc('TF_CUDA_COMPUTE_CAPABILITIES', + tf_cuda_compute_capabilities) def set_other_cuda_vars(environ_cp): - """Set other CUDA related variables.""" - # If CUDA is enabled, always use GPU during build and test. - if environ_cp.get('TF_CUDA_CLANG') == '1': - write_to_bazelrc('build --config=cuda_clang') - write_to_bazelrc('test --config=cuda_clang') - else: - write_to_bazelrc('build --config=cuda') - write_to_bazelrc('test --config=cuda') + """Set other CUDA related variables.""" + # If CUDA is enabled, always use GPU during build and test. + if environ_cp.get('TF_CUDA_CLANG') == '1': + write_to_bazelrc('build --config=cuda_clang') + write_to_bazelrc('test --config=cuda_clang') + else: + write_to_bazelrc('build --config=cuda') + write_to_bazelrc('test --config=cuda') def set_host_cxx_compiler(environ_cp): - """Set HOST_CXX_COMPILER.""" - default_cxx_host_compiler = which('g++') or '' + """Set HOST_CXX_COMPILER.""" + default_cxx_host_compiler = which('g++') or '' - host_cxx_compiler = prompt_loop_or_load_from_env( - environ_cp, - var_name='HOST_CXX_COMPILER', - var_default=default_cxx_host_compiler, - ask_for_var=('Please specify which C++ compiler should be used as the ' - 'host C++ compiler.'), - check_success=os.path.exists, - error_msg='Invalid C++ compiler path. %s cannot be found.', - ) + host_cxx_compiler = prompt_loop_or_load_from_env( + environ_cp, + var_name='HOST_CXX_COMPILER', + var_default=default_cxx_host_compiler, + ask_for_var=('Please specify which C++ compiler should be used as the ' + 'host C++ compiler.'), + check_success=os.path.exists, + error_msg='Invalid C++ compiler path. %s cannot be found.', + ) - write_action_env_to_bazelrc('HOST_CXX_COMPILER', host_cxx_compiler) + write_action_env_to_bazelrc('HOST_CXX_COMPILER', host_cxx_compiler) def set_host_c_compiler(environ_cp): - """Set HOST_C_COMPILER.""" - default_c_host_compiler = which('gcc') or '' + """Set HOST_C_COMPILER.""" + default_c_host_compiler = which('gcc') or '' - host_c_compiler = prompt_loop_or_load_from_env( - environ_cp, - var_name='HOST_C_COMPILER', - var_default=default_c_host_compiler, - ask_for_var=( - 'Please specify which C compiler should be used as the host ' - 'C compiler.'), - check_success=os.path.exists, - error_msg='Invalid C compiler path. %s cannot be found.', - ) + host_c_compiler = prompt_loop_or_load_from_env( + environ_cp, + var_name='HOST_C_COMPILER', + var_default=default_c_host_compiler, + ask_for_var=( + 'Please specify which C compiler should be used as the host ' + 'C compiler.'), + check_success=os.path.exists, + error_msg='Invalid C compiler path. %s cannot be found.', + ) - write_action_env_to_bazelrc('HOST_C_COMPILER', host_c_compiler) + write_action_env_to_bazelrc('HOST_C_COMPILER', host_c_compiler) def set_computecpp_toolkit_path(environ_cp): - """Set COMPUTECPP_TOOLKIT_PATH.""" - - def toolkit_exists(toolkit_path): - """Check if a computecpp toolkit path is valid.""" - if is_linux(): - sycl_rt_lib_path = 'lib/libComputeCpp.so' - else: - sycl_rt_lib_path = '' - - sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path) - exists = os.path.exists(sycl_rt_lib_path_full) - if not exists: - print('Invalid SYCL %s library path. %s cannot be found' % - (_TF_OPENCL_VERSION, sycl_rt_lib_path_full)) - return exists - - computecpp_toolkit_path = prompt_loop_or_load_from_env( - environ_cp, - var_name='COMPUTECPP_TOOLKIT_PATH', - var_default=_DEFAULT_COMPUTECPP_TOOLKIT_PATH, - ask_for_var=( - 'Please specify the location where ComputeCpp for SYCL %s is ' - 'installed.' % _TF_OPENCL_VERSION), - check_success=toolkit_exists, - error_msg='Invalid SYCL compiler path. %s cannot be found.', - suppress_default_error=True) - - write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', - computecpp_toolkit_path) + """Set COMPUTECPP_TOOLKIT_PATH.""" + + def toolkit_exists(toolkit_path): + """Check if a computecpp toolkit path is valid.""" + if is_linux(): + sycl_rt_lib_path = 'lib/libComputeCpp.so' + else: + sycl_rt_lib_path = '' + + sycl_rt_lib_path_full = os.path.join(toolkit_path, sycl_rt_lib_path) + exists = os.path.exists(sycl_rt_lib_path_full) + if not exists: + print('Invalid SYCL %s library path. %s cannot be found' % + (_TF_OPENCL_VERSION, sycl_rt_lib_path_full)) + return exists + + computecpp_toolkit_path = prompt_loop_or_load_from_env( + environ_cp, + var_name='COMPUTECPP_TOOLKIT_PATH', + var_default=_DEFAULT_COMPUTECPP_TOOLKIT_PATH, + ask_for_var=( + 'Please specify the location where ComputeCpp for SYCL %s is ' + 'installed.' % _TF_OPENCL_VERSION), + check_success=toolkit_exists, + error_msg='Invalid SYCL compiler path. %s cannot be found.', + suppress_default_error=True) + + write_action_env_to_bazelrc('COMPUTECPP_TOOLKIT_PATH', + computecpp_toolkit_path) def set_trisycl_include_dir(environ_cp): - """Set TRISYCL_INCLUDE_DIR.""" + """Set TRISYCL_INCLUDE_DIR.""" - ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' - 'include directory. (Use --config=sycl_trisycl ' - 'when building with Bazel) ' - '[Default is %s]: ') % ( - _DEFAULT_TRISYCL_INCLUDE_DIR) + ask_trisycl_include_dir = ('Please specify the location of the triSYCL ' + 'include directory. (Use --config=sycl_trisycl ' + 'when building with Bazel) ' + '[Default is %s]: ') % ( + _DEFAULT_TRISYCL_INCLUDE_DIR) - while True: - trisycl_include_dir = get_from_env_or_user_or_default( - environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, - _DEFAULT_TRISYCL_INCLUDE_DIR) - if os.path.exists(trisycl_include_dir): - break + while True: + trisycl_include_dir = get_from_env_or_user_or_default( + environ_cp, 'TRISYCL_INCLUDE_DIR', ask_trisycl_include_dir, + _DEFAULT_TRISYCL_INCLUDE_DIR) + if os.path.exists(trisycl_include_dir): + break - print('Invalid triSYCL include directory, %s cannot be found' % - (trisycl_include_dir)) + print('Invalid triSYCL include directory, %s cannot be found' % + (trisycl_include_dir)) - # Set TRISYCL_INCLUDE_DIR - environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir - write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) + # Set TRISYCL_INCLUDE_DIR + environ_cp['TRISYCL_INCLUDE_DIR'] = trisycl_include_dir + write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir) def set_mpi_home(environ_cp): - """Set MPI_HOME.""" - - default_mpi_home = which('mpirun') or which('mpiexec') or '' - default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home)) - - def valid_mpi_path(mpi_home): - exists = (os.path.exists(os.path.join(mpi_home, 'include')) - and os.path.exists(os.path.join(mpi_home, 'lib'))) - if not exists: - print('Invalid path to the MPI Toolkit. %s or %s cannot be found' % - (os.path.join(mpi_home, 'include'), - os.path.exists(os.path.join(mpi_home, 'lib')))) - return exists - - _ = prompt_loop_or_load_from_env( - environ_cp, - var_name='MPI_HOME', - var_default=default_mpi_home, - ask_for_var='Please specify the MPI toolkit folder.', - check_success=valid_mpi_path, - error_msg='', - suppress_default_error=True) + """Set MPI_HOME.""" + + default_mpi_home = which('mpirun') or which('mpiexec') or '' + default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home)) + + def valid_mpi_path(mpi_home): + exists = (os.path.exists(os.path.join(mpi_home, 'include')) + and os.path.exists(os.path.join(mpi_home, 'lib'))) + if not exists: + print('Invalid path to the MPI Toolkit. %s or %s cannot be found' % + (os.path.join(mpi_home, 'include'), + os.path.exists(os.path.join(mpi_home, 'lib')))) + return exists + + _ = prompt_loop_or_load_from_env( + environ_cp, + var_name='MPI_HOME', + var_default=default_mpi_home, + ask_for_var='Please specify the MPI toolkit folder.', + check_success=valid_mpi_path, + error_msg='', + suppress_default_error=True) def set_other_mpi_vars(environ_cp): - """Set other MPI related variables.""" - # Link the MPI header files - mpi_home = environ_cp.get('MPI_HOME') - symlink_force('%s/include/mpi.h' % mpi_home, 'third_party/mpi/mpi.h') - - # Determine if we use OpenMPI or MVAPICH, these require different header files - # to be included here to make bazel dependency checker happy - if os.path.exists( - os.path.join(mpi_home, 'include/mpi_portable_platform.h')): - symlink_force( - os.path.join(mpi_home, 'include/mpi_portable_platform.h'), - 'third_party/mpi/mpi_portable_platform.h') - # TODO(gunan): avoid editing files in configure - sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI=False', - 'MPI_LIB_IS_OPENMPI=True') - else: - # MVAPICH / MPICH - symlink_force( - os.path.join(mpi_home, 'include/mpio.h'), 'third_party/mpi/mpio.h') - symlink_force( - os.path.join(mpi_home, 'include/mpicxx.h'), - 'third_party/mpi/mpicxx.h') - # TODO(gunan): avoid editing files in configure - sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI=True', - 'MPI_LIB_IS_OPENMPI=False') - - if os.path.exists(os.path.join(mpi_home, 'lib/libmpi.so')): - symlink_force( - os.path.join(mpi_home, 'lib/libmpi.so'), - 'third_party/mpi/libmpi.so') - else: - raise ValueError( - 'Cannot find the MPI library file in %s/lib' % mpi_home) + """Set other MPI related variables.""" + # Link the MPI header files + mpi_home = environ_cp.get('MPI_HOME') + symlink_force('%s/include/mpi.h' % mpi_home, 'third_party/mpi/mpi.h') + + # Determine if we use OpenMPI or MVAPICH, these require different header files + # to be included here to make bazel dependency checker happy + if os.path.exists( + os.path.join(mpi_home, 'include/mpi_portable_platform.h')): + symlink_force( + os.path.join(mpi_home, 'include/mpi_portable_platform.h'), + 'third_party/mpi/mpi_portable_platform.h') + # TODO(gunan): avoid editing files in configure + sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI=False', + 'MPI_LIB_IS_OPENMPI=True') + else: + # MVAPICH / MPICH + symlink_force( + os.path.join(mpi_home, 'include/mpio.h'), 'third_party/mpi/mpio.h') + symlink_force( + os.path.join(mpi_home, 'include/mpicxx.h'), + 'third_party/mpi/mpicxx.h') + # TODO(gunan): avoid editing files in configure + sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI=True', + 'MPI_LIB_IS_OPENMPI=False') + + if os.path.exists(os.path.join(mpi_home, 'lib/libmpi.so')): + symlink_force( + os.path.join(mpi_home, 'lib/libmpi.so'), + 'third_party/mpi/libmpi.so') + else: + raise ValueError( + 'Cannot find the MPI library file in %s/lib' % mpi_home) def set_grpc_build_flags(): - write_to_bazelrc('build --define grpc_no_ares=true') + write_to_bazelrc('build --define grpc_no_ares=true') def set_build_strip_flag(): - write_to_bazelrc('build --strip=always') + write_to_bazelrc('build --strip=always') def set_windows_build_flags(): - if is_windows(): - # The non-monolithic build is not supported yet - write_to_bazelrc('build --config monolithic') - # Suppress warning messages - write_to_bazelrc('build --copt=-w --host_copt=-w') - # Output more verbose information when something goes wrong - write_to_bazelrc('build --verbose_failures') + if is_windows(): + # The non-monolithic build is not supported yet + write_to_bazelrc('build --config monolithic') + # Suppress warning messages + write_to_bazelrc('build --copt=-w --host_copt=-w') + # Output more verbose information when something goes wrong + write_to_bazelrc('build --verbose_failures') def config_info_line(name, help_text): - """Helper function to print formatted help text for Bazel config options.""" - print('\t--config=%-12s\t# %s' % (name, help_text)) + """Helper function to print formatted help text for Bazel config options.""" + print('\t--config=%-12s\t# %s' % (name, help_text)) def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--workspace", - type=str, - default=_TF_WORKSPACE_ROOT, - help="The absolute path to your active Bazel workspace.") - args = parser.parse_args() - - # Make a copy of os.environ to be clear when functions and getting and setting - # environment variables. - environ_cp = dict(os.environ) - - check_bazel_version('0.10.0') - - reset_tf_configure_bazelrc(args.workspace) - cleanup_makefile() - setup_python(environ_cp) - - if is_windows(): - environ_cp['TF_NEED_AWS'] = '0' - environ_cp['TF_NEED_GCP'] = '0' - environ_cp['TF_NEED_HDFS'] = '0' - environ_cp['TF_NEED_JEMALLOC'] = '0' - environ_cp['TF_NEED_KAFKA'] = '0' - environ_cp['TF_NEED_OPENCL_SYCL'] = '0' - environ_cp['TF_NEED_COMPUTECPP'] = '0' - environ_cp['TF_NEED_OPENCL'] = '0' - environ_cp['TF_CUDA_CLANG'] = '0' - environ_cp['TF_NEED_TENSORRT'] = '0' - # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on - # Windows. - environ_cp['TF_DOWNLOAD_CLANG'] = '0' - - if is_macos(): - environ_cp['TF_NEED_JEMALLOC'] = '0' - environ_cp['TF_NEED_TENSORRT'] = '0' - - set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', - 'with_jemalloc', True) - set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform', - 'with_gcp_support', True, 'gcp') - set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System', - 'with_hdfs_support', True, 'hdfs') - set_build_var(environ_cp, 'TF_NEED_AWS', 'Amazon AWS Platform', - 'with_aws_support', True, 'aws') - set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', - 'with_kafka_support', True, 'kafka') - set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', - False, 'xla') - set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', False, - 'gdr') - set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', - False, 'verbs') - set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph', - 'with_ngraph_support', False, 'ngraph') - - set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) - if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': - set_host_cxx_compiler(environ_cp) - set_host_c_compiler(environ_cp) - set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', - True) - if environ_cp.get('TF_NEED_COMPUTECPP') == '1': - set_computecpp_toolkit_path(environ_cp) - else: - set_trisycl_include_dir(environ_cp) - - set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) - if (environ_cp.get('TF_NEED_CUDA') == '1' - and 'TF_CUDA_CONFIG_REPO' not in environ_cp): - set_tf_cuda_version(environ_cp) - set_tf_cudnn_version(environ_cp) - if is_linux(): - set_tf_tensorrt_install_path(environ_cp) - set_tf_nccl_install_path(environ_cp) - - set_tf_cuda_compute_capabilities(environ_cp) - if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( - 'LD_LIBRARY_PATH') != '1': - write_action_env_to_bazelrc('LD_LIBRARY_PATH', - environ_cp.get('LD_LIBRARY_PATH')) - - set_tf_cuda_clang(environ_cp) - if environ_cp.get('TF_CUDA_CLANG') == '1': - # Ask whether we should download the clang toolchain. - set_tf_download_clang(environ_cp) - if environ_cp.get('TF_DOWNLOAD_CLANG') != '1': - # Set up which clang we should use as the cuda / host compiler. - set_clang_cuda_compiler_path(environ_cp) - else: - # Set up which gcc nvcc should use as the host compiler - # No need to set this on Windows - if not is_windows(): - set_gcc_host_compiler_path(environ_cp) - set_other_cuda_vars(environ_cp) + parser = argparse.ArgumentParser() + parser.add_argument( + "--workspace", + type=str, + default=_TF_WORKSPACE_ROOT, + help="The absolute path to your active Bazel workspace.") + args = parser.parse_args() + + # Make a copy of os.environ to be clear when functions and getting and setting + # environment variables. + environ_cp = dict(os.environ) + + check_bazel_version('0.10.0') + + reset_tf_configure_bazelrc(args.workspace) + cleanup_makefile() + setup_python(environ_cp) + + if is_windows(): + environ_cp['TF_NEED_AWS'] = '0' + environ_cp['TF_NEED_GCP'] = '0' + environ_cp['TF_NEED_HDFS'] = '0' + environ_cp['TF_NEED_JEMALLOC'] = '0' + environ_cp['TF_NEED_KAFKA'] = '0' + environ_cp['TF_NEED_OPENCL_SYCL'] = '0' + environ_cp['TF_NEED_COMPUTECPP'] = '0' + environ_cp['TF_NEED_OPENCL'] = '0' + environ_cp['TF_CUDA_CLANG'] = '0' + environ_cp['TF_NEED_TENSORRT'] = '0' + # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on + # Windows. + environ_cp['TF_DOWNLOAD_CLANG'] = '0' + + if is_macos(): + environ_cp['TF_NEED_JEMALLOC'] = '0' + environ_cp['TF_NEED_TENSORRT'] = '0' + + set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', + 'with_jemalloc', True) + set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform', + 'with_gcp_support', True, 'gcp') + set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System', + 'with_hdfs_support', True, 'hdfs') + set_build_var(environ_cp, 'TF_NEED_AWS', 'Amazon AWS Platform', + 'with_aws_support', True, 'aws') + set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', + 'with_kafka_support', True, 'kafka') + set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', + False, 'xla') + set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', False, + 'gdr') + set_build_var(environ_cp, 'TF_NEED_VERBS', 'VERBS', 'with_verbs_support', + False, 'verbs') + set_build_var(environ_cp, 'TF_NEED_NGRAPH', 'nGraph', + 'with_ngraph_support', False, 'ngraph') + + set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': + set_host_cxx_compiler(environ_cp) + set_host_c_compiler(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_COMPUTECPP', 'ComputeCPP', + True) + if environ_cp.get('TF_NEED_COMPUTECPP') == '1': + set_computecpp_toolkit_path(environ_cp) + else: + set_trisycl_include_dir(environ_cp) + + set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) + if (environ_cp.get('TF_NEED_CUDA') == '1' + and 'TF_CUDA_CONFIG_REPO' not in environ_cp): + set_tf_cuda_version(environ_cp) + set_tf_cudnn_version(environ_cp) + if is_linux(): + set_tf_tensorrt_install_path(environ_cp) + set_tf_nccl_install_path(environ_cp) + + set_tf_cuda_compute_capabilities(environ_cp) + if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( + 'LD_LIBRARY_PATH') != '1': + write_action_env_to_bazelrc('LD_LIBRARY_PATH', + environ_cp.get('LD_LIBRARY_PATH')) + + set_tf_cuda_clang(environ_cp) + if environ_cp.get('TF_CUDA_CLANG') == '1': + # Ask whether we should download the clang toolchain. + set_tf_download_clang(environ_cp) + if environ_cp.get('TF_DOWNLOAD_CLANG') != '1': + # Set up which clang we should use as the cuda / host compiler. + set_clang_cuda_compiler_path(environ_cp) else: - # CUDA not required. Ask whether we should download the clang toolchain and - # use it for the CPU build. - set_tf_download_clang(environ_cp) - if environ_cp.get('TF_DOWNLOAD_CLANG') == '1': - write_to_bazelrc('build --config=download_clang') - write_to_bazelrc('test --config=download_clang') - - set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) - if environ_cp.get('TF_NEED_MPI') == '1': - set_mpi_home(environ_cp) - set_other_mpi_vars(environ_cp) - - set_grpc_build_flags() - set_cc_opt_flags(environ_cp) - set_build_strip_flag() - set_windows_build_flags() - - if get_var(environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', - False, - ('Would you like to interactively configure ./WORKSPACE for ' - 'Android builds?'), 'Searching for NDK and SDK installations.', - 'Not configuring the WORKSPACE for Android builds.'): - create_android_ndk_rule(environ_cp) - create_android_sdk_rule(environ_cp) - - print('Preconfigured Bazel build configs. You can use any of the below by ' - 'adding "--config=<>" to your build command. See tools/bazel.rc for ' - 'more details.') - config_info_line('mkl', 'Build with MKL support.') - config_info_line('monolithic', - 'Config for mostly static monolithic build.') + # Set up which gcc nvcc should use as the host compiler + # No need to set this on Windows + if not is_windows(): + set_gcc_host_compiler_path(environ_cp) + set_other_cuda_vars(environ_cp) + else: + # CUDA not required. Ask whether we should download the clang toolchain and + # use it for the CPU build. + set_tf_download_clang(environ_cp) + if environ_cp.get('TF_DOWNLOAD_CLANG') == '1': + write_to_bazelrc('build --config=download_clang') + write_to_bazelrc('test --config=download_clang') + + set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) + if environ_cp.get('TF_NEED_MPI') == '1': + set_mpi_home(environ_cp) + set_other_mpi_vars(environ_cp) + + set_grpc_build_flags() + set_cc_opt_flags(environ_cp) + set_build_strip_flag() + set_windows_build_flags() + + if get_var(environ_cp, 'TF_SET_ANDROID_WORKSPACE', 'android workspace', + False, + ('Would you like to interactively configure ./WORKSPACE for ' + 'Android builds?'), 'Searching for NDK and SDK installations.', + 'Not configuring the WORKSPACE for Android builds.'): + create_android_ndk_rule(environ_cp) + create_android_sdk_rule(environ_cp) + + print('Preconfigured Bazel build configs. You can use any of the below by ' + 'adding "--config=<>" to your build command. See tools/bazel.rc for ' + 'more details.') + config_info_line('mkl', 'Build with MKL support.') + config_info_line('monolithic', + 'Config for mostly static monolithic build.') if __name__ == '__main__': - main() + main() diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 8953edf8a6..6cb8742df0 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -831,7 +831,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc0.tar.gz", "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc0.tar.gz" ], - sha256 = "c09a35d0a605afeeaf5aad81181a6abc7e9b9e39312e8fdfbae20cbd8eb58523", + sha256 = "b4f4a530c0eb7a406bf3c9f6965d74e9a371a7635a9e644e190ec54258bb70ac", strip_prefix = "ngraph-tf-0.3.0-rc0", build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), ) -- cgit v1.2.3 From 2f3e97cf0ee50ee0e55ab1a3795cc82537426e8c Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Wed, 25 Jul 2018 02:00:52 -0700 Subject: Upgrade the nGraph TensorFlow bridge. --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index f4eac0a9a1..fad96d11bf 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -848,11 +848,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "ngraph_tf", urls = [ - "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc0.tar.gz", - "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc0.tar.gz" + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz", + "https://github.com/NervanaSystems/ngraph-tf/archive/v0.3.0-rc1.tar.gz" ], - sha256 = "b4f4a530c0eb7a406bf3c9f6965d74e9a371a7635a9e644e190ec54258bb70ac", - strip_prefix = "ngraph-tf-0.3.0-rc0", + sha256 = "7919332cb15120101c3e05c1b969a5e029a6411581312583c8f80b6aaaa83072", + strip_prefix = "ngraph-tf-0.3.0-rc1", build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), ) -- cgit v1.2.3 From 5e07ab3ff51b06c3291789ac0e65499217f720d1 Mon Sep 17 00:00:00 2001 From: Vitaly Lavrukhin Date: Fri, 27 Jul 2018 07:32:47 -0700 Subject: Added a normalization term to ctc_beam_search_decoder in order to get correct log probabilities. It solves https://github.com/tensorflow/tensorflow/issues/6034 --- tensorflow/core/util/ctc/ctc_beam_search.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/ctc/ctc_beam_search.h b/tensorflow/core/util/ctc/ctc_beam_search.h index 709c65fc96..fa58b5cedf 100644 --- a/tensorflow/core/util/ctc/ctc_beam_search.h +++ b/tensorflow/core/util/ctc/ctc_beam_search.h @@ -259,6 +259,16 @@ void CTCBeamSearchDecoder::Step( } else { max_coeff = raw_input.maxCoeff(); } + + // Get normalization term of softmax: log(sum(exp(logit[j]-max_coeff))). + float logsumexp = 0.0; + for (int j = 0; j < raw_input.size(); ++j) { + logsumexp += expf(raw_input(j) - max_coeff); + } + logsumexp = logf(logsumexp); + // Final normalization offset to get correct log probabilities. + float norm_offset = max_coeff + logsumexp; + const float label_selection_input_min = (label_selection_margin_ >= 0) ? (max_coeff - label_selection_margin_) : -std::numeric_limits::infinity(); @@ -290,10 +300,10 @@ void CTCBeamSearchDecoder::Step( beam_scorer_->GetStateExpansionScore(b->state, previous)); } // Plabel(l=abc @ t=6) *= P(c @ 6) - b->newp.label += raw_input(b->label) - max_coeff; + b->newp.label += raw_input(b->label) - norm_offset; } // Pblank(l=abc @ t=6) = P(l=abc @ t=5) * P(- @ 6) - b->newp.blank = b->oldp.total + raw_input(blank_index_) - max_coeff; + b->newp.blank = b->oldp.total + raw_input(blank_index_) - norm_offset; // P(l=abc @ t=6) = Plabel(l=abc @ t=6) + Pblank(l=abc @ t=6) b->newp.total = LogSumExp(b->newp.blank, b->newp.label); @@ -328,6 +338,8 @@ void CTCBeamSearchDecoder::Step( const float logit = top_k ? top_k_logits[ind] : raw_input(ind); // Perform label selection: if input for this label looks very // unpromising, never evaluate it with a scorer. + // We may compare logits instead of log probabilities, + // since the difference is the same in both cases. if (logit < label_selection_input_min) { continue; } @@ -341,7 +353,7 @@ void CTCBeamSearchDecoder::Step( // Plabel(l=abcd @ t=6) = P(l=abc @ t=5) * P(d @ 6) beam_scorer_->ExpandState(b->state, b->label, &c.state, c.label); float previous = (c.label == b->label) ? b->oldp.blank : b->oldp.total; - c.newp.label = logit - max_coeff + + c.newp.label = logit - norm_offset + beam_scorer_->GetStateExpansionScore(c.state, previous); // P(l=abcd @ t=6) = Plabel(l=abcd @ t=6) c.newp.total = c.newp.label; -- cgit v1.2.3 From 6d76eaaa751cc56a68dcf6e39c5d72191b9be26e Mon Sep 17 00:00:00 2001 From: Vitaly Lavrukhin Date: Fri, 27 Jul 2018 17:16:05 -0700 Subject: Fix log probabilities in test --- tensorflow/python/kernel_tests/ctc_decoder_ops_test.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py b/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py index e1920eb568..41ae0b456f 100644 --- a/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py +++ b/tensorflow/python/kernel_tests/ctc_decoder_ops_test.py @@ -188,11 +188,11 @@ class CTCGreedyDecoderTest(test.TestCase): ], dtype=np.float32) # Add arbitrary offset - this is fine - input_log_prob_matrix_0 = np.log(input_prob_matrix_0) + 2.0 + input_prob_matrix_0 = input_prob_matrix_0 + 2.0 # len max_time_steps array of batch_size x depth matrices inputs = ([ - input_log_prob_matrix_0[t, :][np.newaxis, :] for t in range(seq_len_0) + input_prob_matrix_0[t, :][np.newaxis, :] for t in range(seq_len_0) ] # Pad to max_time_steps = 8 + 2 * [np.zeros( (1, depth), dtype=np.float32)]) @@ -200,11 +200,11 @@ class CTCGreedyDecoderTest(test.TestCase): # batch_size length vector of sequence_lengths seq_lens = np.array([seq_len_0], dtype=np.int32) - # batch_size length vector of negative log probabilities + # batch_size length vector of log probabilities log_prob_truth = np.array( [ - 0.584855, # output beam 0 - 0.389139 # output beam 1 + -5.811451, # output beam 0 + -6.63339 # output beam 1 ], np.float32)[np.newaxis, :] @@ -215,11 +215,11 @@ class CTCGreedyDecoderTest(test.TestCase): [[0, 0], [0, 1]], dtype=np.int64), np.array( [1, 0], dtype=np.int64), np.array( [1, 2], dtype=np.int64)), - # beam 1, batch 0, three outputs decoded + # beam 1, batch 0, one output decoded (np.array( - [[0, 0], [0, 1], [0, 2]], dtype=np.int64), np.array( - [0, 1, 0], dtype=np.int64), np.array( - [1, 3], dtype=np.int64)), + [[0, 0]], dtype=np.int64), np.array( + [1], dtype=np.int64), np.array( + [1, 1], dtype=np.int64)), ] # Test correct decoding. -- cgit v1.2.3 From 35c81bb208622589abaebccb35c44da9148e2d14 Mon Sep 17 00:00:00 2001 From: Stefan Dyulgerov Date: Sun, 22 Jul 2018 17:07:34 +0300 Subject: ignore cmake build artifacts --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5afe375f46..4e526261c7 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ __pycache__ *.swp .vscode/ cmake_build/ +tensorflow/contrib/cmake/_build/ .idea/** /build/ [Bb]uild/ -- cgit v1.2.3 From 171b34a519ea2c888d0f9fd754ca8a8c5ed02587 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Sun, 29 Jul 2018 09:21:29 +0800 Subject: PREP: use np.array to avoid copy behavior of index tensor --- tensorflow/python/ops/array_grad.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index a2b5f77f91..d709f6b36b 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from math import ceil +import numpy as np from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context @@ -774,17 +775,25 @@ def _ExtractImagePatchesGrad(op, grad): row_steps = range(0, rows_out * stride_r, stride_r) col_steps = range(0, cols_out * stride_h, stride_h) - idx = [] + idx = np.zeros((rows_out * cols_out * ksize_r * ksize_c, 2), + dtype=np.int64) + idx_len = 0 for i in range(rows_out): + r_low = row_steps[i] - pad_rows + r_high = r_low + ksize_r_eff + for j in range(cols_out): - r_low, c_low = row_steps[i] - pad_rows, col_steps[j] - pad_cols - r_high, c_high = r_low + ksize_r_eff, c_low + ksize_c_eff - - idx.extend([(r * (cols_in) + c, i * (cols_out * ksize_r * ksize_c) + j * - (ksize_r * ksize_c) + ri * (ksize_c) + ci) - for (ri, r) in enumerate(range(r_low, r_high, rate_r)) - for (ci, c) in enumerate(range(c_low, c_high, rate_c)) - if 0 <= r and r < rows_in and 0 <= c and c < cols_in]) + c_low = col_steps[j] - pad_cols + c_high = c_low + ksize_c_eff + + for (ri, r) in enumerate(range(r_low, r_high, rate_r)): + for (ci, c) in enumerate(range(c_low, c_high, rate_c)): + if 0 <= r and r < rows_in and 0 <= c and c < cols_in: + idx[idx_len][0] = r * (cols_in) + c + idx[idx_len][1] = (i * (cols_out * ksize_r * ksize_c) + + j * (ksize_r * ksize_c) + ri * (ksize_c) + ci) + idx_len += 1 + idx = idx[:idx_len] sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c) -- cgit v1.2.3 From 8e761899a7a8102334fc688b6b0fb69a23e93f92 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Sun, 29 Jul 2018 17:11:45 +0800 Subject: PREP: faster method for construction idx array --- tensorflow/python/ops/array_grad.py | 86 ++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 50 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index d709f6b36b..4578639649 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -735,7 +735,6 @@ def _QuantizeAndDequantizeV3Grad(_, grad): @ops.RegisterGradient("ExtractImagePatches") def _ExtractImagePatchesGrad(op, grad): - batch_size, rows_in, cols_in, channels = [ dim.value for dim in op.inputs[0].get_shape() ] @@ -743,28 +742,44 @@ def _ExtractImagePatchesGrad(op, grad): batch_size = input_bhwc[0] channels = input_bhwc[3] + # Create indices matrix for input tensor. + # Note that 0 is preserved for padding location, + # so indice for input starts from 1 to 1 + rows_in * cols_in. + input_indices_num = 1 + rows_in * cols_in + input_idx = array_ops.reshape(math_ops.range(1, input_indices_num, + dtype=ops.dtypes.int64), + (1, rows_in, cols_in, 1)) + input_idx_patched = gen_array_ops.extract_image_patches( + input_idx, + op.get_attr("ksizes"), + op.get_attr("strides"), + op.get_attr("rates"), + op.get_attr("padding")) + + # Create indices matrix for output tensor. _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].get_shape()] _, ksize_r, ksize_c, _ = op.get_attr("ksizes") - _, stride_r, stride_h, _ = op.get_attr("strides") - _, rate_r, rate_c, _ = op.get_attr("rates") - padding = op.get_attr("padding") - - ksize_r_eff = ksize_r + (ksize_r - 1) * (rate_r - 1) - ksize_c_eff = ksize_c + (ksize_c - 1) * (rate_c - 1) - - if padding == b"SAME": - rows_out = int(ceil(rows_in / stride_r)) - cols_out = int(ceil(cols_in / stride_h)) - pad_rows = ((rows_out - 1) * stride_r + ksize_r_eff - rows_in) // 2 - pad_cols = ((cols_out - 1) * stride_h + ksize_c_eff - cols_in) // 2 - - elif padding == b"VALID": - rows_out = int(ceil((rows_in - ksize_r_eff + 1) / stride_r)) - cols_out = int(ceil((cols_in - ksize_c_eff + 1) / stride_h)) - pad_rows = (rows_out - 1) * stride_r + ksize_r_eff - rows_in - pad_cols = (cols_out - 1) * stride_h + ksize_c_eff - cols_in - - pad_rows, pad_cols = max(0, pad_rows), max(0, pad_cols) + # Indice for output starts from 0. + output_indices_num = rows_out * cols_out * ksize_r * ksize_c + output_idx = array_ops.reshape(math_ops.range(output_indices_num, + dtype=ops.dtypes.int64), + (1, rows_out, cols_out, ksize_r * ksize_c)) + + # Construct mapping table for indices: input -> output. + idx_matrix = array_ops.concat([array_ops.expand_dims(input_idx_patched, axis=-1), + array_ops.expand_dims(output_idx, axis=-1)], + axis=-1) + idx_map = array_ops.reshape(idx_matrix, (-1, 2)) + + sp_shape = (input_indices_num, output_indices_num) + sp_mat = sparse_tensor.SparseTensor( + idx_map, + array_ops.ones_like(idx_map[:, 0], dtype=grad.dtype), + sp_shape) + # Remove all padding locations: [0, :]. + sp_mat = sparse_ops.sparse_slice(sp_mat, + (1, 0), + (input_indices_num - 1, output_indices_num)) grad_expanded = array_ops.transpose( array_ops.reshape( @@ -772,35 +787,6 @@ def _ExtractImagePatchesGrad(op, grad): (1, 2, 3, 4, 0, 5)) grad_flat = array_ops.reshape(grad_expanded, (-1, batch_size * channels)) - row_steps = range(0, rows_out * stride_r, stride_r) - col_steps = range(0, cols_out * stride_h, stride_h) - - idx = np.zeros((rows_out * cols_out * ksize_r * ksize_c, 2), - dtype=np.int64) - idx_len = 0 - for i in range(rows_out): - r_low = row_steps[i] - pad_rows - r_high = r_low + ksize_r_eff - - for j in range(cols_out): - c_low = col_steps[j] - pad_cols - c_high = c_low + ksize_c_eff - - for (ri, r) in enumerate(range(r_low, r_high, rate_r)): - for (ci, c) in enumerate(range(c_low, c_high, rate_c)): - if 0 <= r and r < rows_in and 0 <= c and c < cols_in: - idx[idx_len][0] = r * (cols_in) + c - idx[idx_len][1] = (i * (cols_out * ksize_r * ksize_c) + - j * (ksize_r * ksize_c) + ri * (ksize_c) + ci) - idx_len += 1 - idx = idx[:idx_len] - - sp_shape = (rows_in * cols_in, rows_out * cols_out * ksize_r * ksize_c) - - sp_mat = sparse_tensor.SparseTensor( - array_ops.constant(idx, dtype=ops.dtypes.int64), - array_ops.ones((len(idx),), dtype=grad.dtype), sp_shape) - jac = sparse_ops.sparse_tensor_dense_matmul(sp_mat, grad_flat) grad_out = array_ops.reshape(jac, (rows_in, cols_in, batch_size, channels)) -- cgit v1.2.3 From c22b5c678a42474fbc9aab59345ac09eeb685c37 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Sun, 29 Jul 2018 17:20:45 +0800 Subject: CLN: remove unused import --- tensorflow/python/ops/array_grad.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 4578639649..33c960e0dc 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -18,9 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from math import ceil -import numpy as np - from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.framework import constant_op -- cgit v1.2.3 From 0d49774a0487b26737b950b510605833671775d9 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Sun, 29 Jul 2018 17:23:23 +0800 Subject: CLN: typo: indices --- tensorflow/python/ops/array_grad.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 33c960e0dc..b6f03144b1 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -741,7 +741,7 @@ def _ExtractImagePatchesGrad(op, grad): # Create indices matrix for input tensor. # Note that 0 is preserved for padding location, - # so indice for input starts from 1 to 1 + rows_in * cols_in. + # so indices for input start from 1 to 1 + rows_in * cols_in. input_indices_num = 1 + rows_in * cols_in input_idx = array_ops.reshape(math_ops.range(1, input_indices_num, dtype=ops.dtypes.int64), @@ -756,7 +756,7 @@ def _ExtractImagePatchesGrad(op, grad): # Create indices matrix for output tensor. _, rows_out, cols_out, _ = [dim.value for dim in op.outputs[0].get_shape()] _, ksize_r, ksize_c, _ = op.get_attr("ksizes") - # Indice for output starts from 0. + # Indices for output start from 0. output_indices_num = rows_out * cols_out * ksize_r * ksize_c output_idx = array_ops.reshape(math_ops.range(output_indices_num, dtype=ops.dtypes.int64), -- cgit v1.2.3 From 4f456bc6f19d667a6d32a7459742b3139e8fe617 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Sun, 29 Jul 2018 22:52:44 +0800 Subject: CLN: clean codes --- tensorflow/python/ops/array_grad.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index b6f03144b1..328b4f7d53 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -762,19 +762,19 @@ def _ExtractImagePatchesGrad(op, grad): dtype=ops.dtypes.int64), (1, rows_out, cols_out, ksize_r * ksize_c)) - # Construct mapping table for indices: input -> output. + # Construct mapping table for indices: (input -> output). idx_matrix = array_ops.concat([array_ops.expand_dims(input_idx_patched, axis=-1), array_ops.expand_dims(output_idx, axis=-1)], axis=-1) idx_map = array_ops.reshape(idx_matrix, (-1, 2)) sp_shape = (input_indices_num, output_indices_num) - sp_mat = sparse_tensor.SparseTensor( + sp_mat_full = sparse_tensor.SparseTensor( idx_map, array_ops.ones_like(idx_map[:, 0], dtype=grad.dtype), sp_shape) - # Remove all padding locations: [0, :]. - sp_mat = sparse_ops.sparse_slice(sp_mat, + # Remove all padding locations [0, :]. + sp_mat = sparse_ops.sparse_slice(sp_mat_full, (1, 0), (input_indices_num - 1, output_indices_num)) -- cgit v1.2.3 From e6ae2664c5f72f09c9a6d102a89963c4a9bbf8f1 Mon Sep 17 00:00:00 2001 From: Johannes Schmitz Date: Tue, 31 Jul 2018 20:05:29 +0200 Subject: Improve readability of Tensor::CheckType error output --- tensorflow/core/framework/tensor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/tensor.cc b/tensorflow/core/framework/tensor.cc index 384a42fc11..2e5426712b 100644 --- a/tensorflow/core/framework/tensor.cc +++ b/tensorflow/core/framework/tensor.cc @@ -613,13 +613,13 @@ bool Tensor::IsInitialized() const { } void Tensor::CheckType(DataType expected_dtype) const { - CHECK_EQ(dtype(), expected_dtype) + CHECK_EQ(dtype(), expected_dtype) << " " << DataTypeString(expected_dtype) << " expected, got " << DataTypeString(dtype()); } void Tensor::CheckTypeAndIsAligned(DataType expected_dtype) const { - CHECK_EQ(dtype(), expected_dtype) + CHECK_EQ(dtype(), expected_dtype) << " " << DataTypeString(expected_dtype) << " expected, got " << DataTypeString(dtype()); CHECK(IsAligned()) << "ptr = " << base(); -- cgit v1.2.3 From 94e0c6bb67b82eb1a43135eb5edff6c6fe4ab638 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 18 Jul 2018 16:59:07 -0700 Subject: Add new Dockerfile assembler based on partials This change adds a new suite of TensorFlow dockerfiles. The dockerfiles come from an assembler controlled by a yaml spec, and are based on a set of re-usable partial dockerfiles. The assembler and spec include conveniences like spec validation, references to other images and specs for minimizing repetition, and arg expansion. --- tensorflow/tools/docker/README.md | 7 + tensorflow/tools/dockerfiles/Dockerfile | 11 + tensorflow/tools/dockerfiles/README.md | 38 ++ tensorflow/tools/dockerfiles/assembler.py | 528 +++++++++++++++++++++ tensorflow/tools/dockerfiles/bashrc | 33 ++ .../dockerfiles/cpu-devel-jupyter.Dockerfile | 85 ++++ .../dockerfiles/dockerfiles/cpu-devel.Dockerfile | 74 +++ .../dockerfiles/dockerfiles/cpu-jupyter.Dockerfile | 54 +++ .../tools/dockerfiles/dockerfiles/cpu.Dockerfile | 43 ++ .../dockerfiles/nvidia-devel-jupyter.Dockerfile | 105 ++++ .../dockerfiles/nvidia-devel.Dockerfile | 94 ++++ .../dockerfiles/nvidia-jupyter.Dockerfile | 75 +++ .../dockerfiles/dockerfiles/nvidia.Dockerfile | 64 +++ .../dockerfiles/partials/bazel.partial.Dockerfile | 13 + .../partials/jupyter.partial.Dockerfile | 8 + .../partials/nvidia-devel.partial.Dockerfile | 43 ++ .../dockerfiles/partials/nvidia.partial.Dockerfile | 23 + .../dockerfiles/partials/python.partial.Dockerfile | 12 + .../dockerfiles/partials/shell.partial.Dockerfile | 2 + .../partials/tensorflow.partial.Dockerfile | 2 + .../partials/ubuntu-devel.partial.Dockerfile | 24 + .../dockerfiles/partials/ubuntu.partial.Dockerfile | 2 + tensorflow/tools/dockerfiles/spec.yml | 177 +++++++ 23 files changed, 1517 insertions(+) create mode 100644 tensorflow/tools/dockerfiles/Dockerfile create mode 100644 tensorflow/tools/dockerfiles/README.md create mode 100644 tensorflow/tools/dockerfiles/assembler.py create mode 100644 tensorflow/tools/dockerfiles/bashrc create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/nvidia-jupyter.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/dockerfiles/nvidia.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/bazel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/nvidia-devel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/nvidia.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/python.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/shell.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu-devel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/spec.yml diff --git a/tensorflow/tools/docker/README.md b/tensorflow/tools/docker/README.md index 525f2995ce..41b8ffdf72 100644 --- a/tensorflow/tools/docker/README.md +++ b/tensorflow/tools/docker/README.md @@ -1,3 +1,10 @@ +# WARNING: THESE IMAGES ARE DEPRECATED. + +TensorFlow's Dockerfiles are now located in +[`tensorflow/tools/dockerfiles/`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/dockerfiles). + +This directory will eventually be removed. + # Using TensorFlow via Docker This directory contains `Dockerfile`s to make it easy to get up and running with diff --git a/tensorflow/tools/dockerfiles/Dockerfile b/tensorflow/tools/dockerfiles/Dockerfile new file mode 100644 index 0000000000..e8ca012298 --- /dev/null +++ b/tensorflow/tools/dockerfiles/Dockerfile @@ -0,0 +1,11 @@ +FROM hadolint/hadolint:latest-debian +LABEL maintainer="Austin Anderson " + +RUN apt-get update && apt-get install -y python3 python3-pip bash +RUN pip3 install --upgrade pip setuptools pyyaml absl-py cerberus + +WORKDIR /tf +VOLUME ["/tf"] + +COPY bashrc /etc/bash.bashrc +RUN chmod 777 /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md new file mode 100644 index 0000000000..1fe51adb4a --- /dev/null +++ b/tensorflow/tools/dockerfiles/README.md @@ -0,0 +1,38 @@ +# TensorFlow Dockerfiles + +This directory houses TensorFlow's Dockerfiles. **DO NOT EDIT THE DOCKERFILES +MANUALLY!** They are maintained by `assembler.py`, which builds Dockerfiles from +the files in `partials/` and the rules in `spec.yml`. See [the Maintaining +section](#maintaining) for more information. + +## Building + +The Dockerfiles in the `dockerfiles` directory must have their build context set +to **the directory with this README.md** to copy in helper files. For example: + +```bash +$ docker build -f ./dockerfiles/cpu.Dockerfile -t tf-cpu . +``` + +Each Dockerfile has its own set of available `--build-arg`s which are documented +in the Dockerfile itself. + +## Maintaining + +To make changes to TensorFlow's Dockerfiles, you'll update `spec.yml` and the +`*.partial.Dockerfile` files in the `partials` directory, then run +`assembler.py` to re-generate the full Dockerfiles before creating a pull +request. + +You can use the `Dockerfile` in this directory to build an editing environment +that has all of the Python dependencies you'll need: + +```bash +$ docker build -t tf-assembler . + +# Set --user to set correct permissions on generated files +$ docker run --user $(id -u):$(id -g) -it -v $(pwd):/tf tf-assembler bash + +# In the container... +/tf $ python3 ./assembler.py -o dockerfiles -s spec.yml --validate +``` diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py new file mode 100644 index 0000000000..a33c42ace6 --- /dev/null +++ b/tensorflow/tools/dockerfiles/assembler.py @@ -0,0 +1,528 @@ +"""Assemble common TF Dockerfiles from many parts. + +TODO(angerson): DO NOT SUBMIT without a detailed description of assembler. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import errno +import os +import os.path +import re +import shutil +import textwrap + +from absl import app +from absl import flags +import cerberus +import yaml + +FLAGS = flags.FLAGS + +flags.DEFINE_boolean( + 'dry_run', False, 'Do not actually generate Dockerfiles', short_name='n') + +flags.DEFINE_string( + 'spec_file', + './spec.yml', + 'Path to a YAML specification file', + short_name='s') + +flags.DEFINE_string( + 'output_dir', + '.', ('Path to an output directory for Dockerfiles. ' + 'Will be created if it doesn\'t exist.'), + short_name='o') + +flags.DEFINE_string( + 'partial_dir', + './partials', + 'Path to a directory containing foo.partial.Dockerfile partial files.', + short_name='p') + +flags.DEFINE_boolean( + 'quiet_dry_run', + True, + 'Do not print contents of dry run Dockerfiles.', + short_name='q') + +flags.DEFINE_boolean( + 'validate', True, 'Validate generated Dockerfiles', short_name='c') + +# Schema to verify the contents of spec.yml with Cerberus. +# Must be converted to a dict from yaml to work. +# Note: can add python references with e.g. +# !!python/name:builtins.str +# !!python/name:__main__.funcname +SCHEMA_TEXT = """ +header: + type: string + +partials: + type: dict + keyschema: + type: string + valueschema: + type: dict + schema: + desc: + type: string + args: + type: dict + keyschema: + type: string + valueschema: + anyof: + - type: [ boolean, number, string ] + - type: dict + schema: + default: + type: [ boolean, number, string ] + desc: + type: string + options: + type: list + schema: + type: string + +images: + keyschema: + type: string + valueschema: + type: dict + schema: + desc: + type: string + arg-defaults: + type: list + schema: + anyof: + - type: dict + keyschema: + type: string + arg_in_use: true + valueschema: + type: string + - type: string + isimage: true + create-dockerfile: + type: boolean + partials: + type: list + schema: + anyof: + - type: dict + keyschema: + type: string + regex: image + valueschema: + type: string + isimage: true + - type: string + ispartial: true +""" + + +class TfDockerValidator(cerberus.Validator): + """Custom Cerberus validator for TF dockerfile spec. + + Note that each custom validator's docstring must end with a segment describing + its own validation schema. + """ + + def _validate_ispartial(self, ispartial, field, value): + """Validate that a partial references an existing partial spec. + + Args: + ispartial: Value of the rule, a bool + field: The field being validated + value: The field's value + + The rule's arguments are validated against this schema: + {'type': 'boolean'} + """ + if ispartial and value not in self.root_document.get('partials', dict()): + self._error(field, '{} is not an existing partial.'.format(value)) + + def _validate_isimage(self, isimage, field, value): + """Validate that an image references an existing partial spec. + + Args: + isimage: Value of the rule, a bool + field: The field being validated + value: The field's value + + The rule's arguments are validated against this schema: + {'type': 'boolean'} + """ + if isimage and value not in self.root_document.get('images', dict()): + self._error(field, '{} is not an existing image.'.format(value)) + + def _validate_arg_in_use(self, arg_in_use, field, value): + """Validate that an arg references an existing partial spec's args. + + Args: + arg_in_use: Value of the rule, a bool + field: The field being validated + value: The field's value + + The rule's arguments are validated against this schema: + {'type': 'boolean'} + """ + if arg_in_use: + for partial in self.root_document.get('partials', dict()).values(): + if value in partial.get('args', tuple()): + return + self._error(field, '{} is not an arg used in any partial.'.format(value)) + + +def build_partial_description(partial_spec): + """Create the documentation lines for a specific partial. + + Generates something like this: + + # This is the partial's description, from spec.yml. + # --build-arg ARG_NAME=argdefault + # this is one of the args. + # --build-arg ANOTHER_ARG=(some|choices) + # another arg. + + Args: + partial_spec: A dict representing one of the partials from spec.yml. Doesn't + include the name of the partial; is a dict like { desc: ..., args: ... }. + + Returns: + A commented string describing this partial. + """ + + # Start from linewrapped desc field + lines = [] + wrapper = textwrap.TextWrapper( + initial_indent='# ', subsequent_indent='# ', width=80) + description = wrapper.fill(partial_spec.get('desc', '( no comments )')) + lines.extend(['#', description]) + + # Document each arg + for arg, arg_data in partial_spec.get('args', dict()).items(): + + # Wrap arg description with comment lines + desc = arg_data.get('desc', '( no description )') + desc = textwrap.fill( + desc, + initial_indent='# ', + subsequent_indent='# ', + width=80, + drop_whitespace=False) + + # Document (each|option|like|this) + if 'options' in arg_data: + arg_options = ' ({})'.format('|'.join(arg_data['options'])) + else: + arg_options = '' + + # Add usage sample + arg_use = '# --build-arg {}={}{}'.format(arg, + arg_data.get('default', '(unset)'), + arg_options) + lines.extend([arg_use, desc]) + return '\n'.join(lines) + + +def construct_contents(partial_specs, image_spec): + """Assemble the dockerfile contents for an image spec. + + It assembles a concrete list of partial references into a single, large + string. + Also expands argument defaults, so that the resulting Dockerfile doesn't have + to be configured with --build-arg=... every time. That is, any ARG directive + will be updated with a new default value. + + Args: + partial_specs: The dict from spec.yml["partials"]. + image_spec: One of the dict values from spec.yml["images"]. + + Returns: + A string containing a valid Dockerfile based on the partials listed in + image_spec. + """ + processed_partial_strings = [] + for partial_name in image_spec['partials']: + + # Apply image arg-defaults to existing arg defaults + partial_spec = copy.deepcopy(partial_specs[partial_name]) + args = partial_spec.get('args', dict()) + for k_v in image_spec.get('arg-defaults', []): + arg, value = list(k_v.items())[0] + if arg in args: + args[arg]['default'] = value + + # Read partial file contents + filename = partial_spec.get('file', partial_name) + partial_path = os.path.join(FLAGS.partial_dir, + '{}.partial.Dockerfile'.format(filename)) + with open(partial_path, 'r') as f_partial: + partial_contents = f_partial.read() + + # Replace ARG FOO=BAR with ARG FOO=[new-default] + for arg, arg_data in args.items(): + if 'default' in arg_data and arg_data['default']: + default = '={}'.format(arg_data['default']) + else: + default = '' + partial_contents = re.sub(r'ARG {}.*'.format(arg), 'ARG {}{}'.format( + arg, default), partial_contents) + processed_partial_strings.append(partial_contents) + return '\n'.join(processed_partial_strings) + + +# Create a directory and its parents, even if it already exists +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise + + +def construct_documentation(header, partial_specs, image_spec): + """Assemble all of the documentation for a single dockerfile. + + Builds explanations of included partials and available build args. + + Args: + header: The string from spec.yml["header"]; will be commented and wrapped. + partial_specs: The dict from spec.yml["partials"]. + image_spec: The spec for the dockerfile being built. + + Returns: + A string containing a commented header that documents the contents of the + dockerfile. + + """ + # Comment and wrap header and image description + commented_header = '\n'.join(['# ' + l for l in header.splitlines()]) + commented_desc = '\n'.join( + ['# ' + l for l in image_spec.get('desc', '').splitlines()]) + partial_descriptions = [] + + # Build documentation for each partial in the image + for partial in image_spec['partials']: + + # Copy partial data for default args unique to this image + partial_spec = copy.deepcopy(partial_specs[partial]) + args = partial_spec.get('args', dict()) + + # Overwrite any existing arg defaults + for k_v in image_spec.get('arg-defaults', []): + arg, value = list(k_v.items())[0] + if arg in args: + args[arg]['default'] = value + + # Build the description from new args + partial_description = build_partial_description(partial_spec) + partial_descriptions.append(partial_description) + + contents = [commented_header, '#', commented_desc] + partial_descriptions + return '\n'.join(contents) + '\n' + + +def normalize_partial_args(partial_specs): + """Normalize the shorthand form of a partial's args specification. + + Turns this: + + partial: + args: + SOME_ARG: arg_value + + Into this: + + partial: + args: + SOME_ARG: + default: arg_value + + Args: + partial_specs: The dict from spec.yml["partials"]. This dict is modified in + place. + + Returns: + The modified contents of partial_specs. + + """ + for _, partial in partial_specs.items(): + args = partial.get('args', dict()) + for arg, value in args.items(): + if not isinstance(value, dict): + new_value = {'default': value} + args[arg] = new_value + return partial_specs + + +def flatten_args_references(image_specs): + """Resolve all default-args in each image spec to a concrete dict. + + Turns this: + + example-image: + arg-defaults: + - MY_ARG: ARG_VALUE + + another-example: + arg-defaults: + - ANOTHER_ARG: ANOTHER_VALUE + - example_image + + Into this: + + example-image: + arg-defaults: + - MY_ARG: ARG_VALUE + + another-example: + arg-defaults: + - ANOTHER_ARG: ANOTHER_VALUE + - MY_ARG: ARG_VALUE + + Args: + image_specs: A dict of image_spec dicts; should be the contents of the + "images" key in the global spec.yaml. This dict is modified in place and + then returned. + + Returns: + The modified contents of image_specs. + """ + for _, image_spec in image_specs.items(): + too_deep = 0 + while str in map(type, image_spec.get('arg-defaults', [])) and too_deep < 5: + new_args = [] + for arg in image_spec['arg-defaults']: + if isinstance(arg, str): + new_args.extend(image_specs[arg]['arg-defaults']) + else: + new_args.append(arg) + image_spec['arg-defaults'] = new_args + too_deep += 1 + return image_specs + + +def flatten_partial_references(image_specs): + """Resolve all partial references in each image spec to a concrete list. + + Turns this: + + example-image: + partials: + - foo + + another-example: + partials: + - bar + - image: example-image + - bat + + Into this: + + example-image: + partials: + - foo + + another-example: + partials: + - bar + - foo + - bat + Args: + image_specs: A dict of image_spec dicts; should be the contents of the + "images" key in the global spec.yaml. This dict is modified in place and + then returned. + + Returns: + The modified contents of image_specs. + """ + for _, image_spec in image_specs.items(): + too_deep = 0 + while dict in map(type, image_spec['partials']) and too_deep < 5: + new_partials = [] + for partial in image_spec['partials']: + if isinstance(partial, str): + new_partials.append(partial) + else: + new_partials.extend(image_specs[partial['image']]['partials']) + image_spec['partials'] = new_partials + too_deep += 1 + return image_specs + + +def construct_dockerfiles(tf_spec): + """Generate a mapping of {"cpu": , ...}. + + Args: + tf_spec: The full spec.yml loaded as a python object. + + Returns: + A string:string dict of short names ("cpu-devel") to Dockerfile contents. + """ + names_to_contents = dict() + image_specs = tf_spec['images'] + image_specs = flatten_partial_references(image_specs) + image_specs = flatten_args_references(image_specs) + partial_specs = tf_spec['partials'] + partial_specs = normalize_partial_args(partial_specs) + + for name, image_spec in image_specs.items(): + if not image_spec.get('create-dockerfile', True): + continue + documentation = construct_documentation(tf_spec['header'], partial_specs, + image_spec) + contents = construct_contents(partial_specs, image_spec) + names_to_contents[name] = '\n'.join([documentation, contents]) + return names_to_contents + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + with open(FLAGS.spec_file, 'r') as spec_file: + tf_spec = yaml.load(spec_file) + + # Abort if spec.yaml is invalid + if FLAGS.validate: + schema = yaml.load(SCHEMA_TEXT) + v = TfDockerValidator(schema) + if not v.validate(tf_spec): + print('>> ERROR: {} is an invalid spec! The errors are:'.format( + FLAGS.spec_file)) + print(yaml.dump(v.errors, indent=2)) + exit(1) + else: + print('>> WARNING: Not validating {}'.format(FLAGS.spec_file)) + + # Generate mapping of { "cpu-devel": "", ... } + names_to_contents = construct_dockerfiles(tf_spec) + + # Write each completed Dockerfile + if not FLAGS.dry_run: + print('>> Emptying destination dir "{}"'.format(FLAGS.output_dir)) + shutil.rmtree(FLAGS.output_dir, ignore_errors=True) + mkdir_p(FLAGS.output_dir) + else: + print('>> Skipping creation of {} (dry run)'.format(FLAGS.output_dir)) + for name, contents in names_to_contents.items(): + path = os.path.join(FLAGS.output_dir, name + '.Dockerfile') + if FLAGS.dry_run: + print('>> Skipping writing contents of {} (dry run)'.format(path)) + print(contents) + else: + mkdir_p(FLAGS.output_dir) + print('>> Writing {}'.format(path)) + with open(path, 'w') as f: + f.write(contents) + + +if __name__ == '__main__': + app.run(main) diff --git a/tensorflow/tools/dockerfiles/bashrc b/tensorflow/tools/dockerfiles/bashrc new file mode 100644 index 0000000000..7f54609e78 --- /dev/null +++ b/tensorflow/tools/dockerfiles/bashrc @@ -0,0 +1,33 @@ +export PS1="\[\e[31m\]tf-docker\[\e[m\] \[\e[33m\]\w\[\e[m\] > " +export TERM=xterm-256color +alias grep="grep --color=auto" +alias ls="ls --color=auto" + +echo -e "\e[1;31m" +cat< + Start from Nvidia's Ubuntu base image with CUDA and CuDNN, with TF + development packages. + args: + UBUNTU_VERSION: 16.04 + + python: + desc: Python is required for TensorFlow and other libraries. + args: + USE_PYTHON_3_NOT_2: + default: true + desc: Install python 3 over Python 2 + + tensorflow: + desc: Install the TensorFlow Python package. + args: + TF_PACKAGE: + default: tensorflow + options: + - tensorflow + - tensorflow-gpu + - tf-nightly + - tf-nightly-gpu + desc: The specific TensorFlow Python package to install + shell: + desc: Configure TensorFlow's shell prompt and login tools. + jupyter: + desc: Launch Jupyter on execution instead of a bash prompt. + +# =========== +# DOCKERFILES +# =========== +# Represent dockerfiles. +# Spec: +# +# name: the name of the image, referenced from other sections +# desc: A description, inserted later into the Dockerfile +# create-dockerfile: Create a dockerfile based on this. Useful for creating +# base images. Default is true +# partials: List of VALUEs, where a VALUE is either: +# - the name of a partial, which inserts that partial into this file +# - image: [name of another image], which inserts the partials from that +# image into this file +# arg-defaults: List of VALUEs, where a VALUE is either: +# - the name of another image, which loads the default args from that image +# - ARG_NAME: VALUE, which is exactly what you'd expect +images: + + nodev: + create-dockerfile: false + partials: + - python + - tensorflow + - shell + + dev: + create-dockerfile: false + partials: + - python + - bazel + - shell + + cpu: + desc: Ubuntu-based, CPU-only environment for using TensorFlow + partials: + - ubuntu + - image: nodev + + cpu-devel: + desc: > + Ubuntu-based, CPU-only environment for developing changes for + TensorFlow. + partials: + - ubuntu-devel + - image: dev + + nvidia: + desc: Ubuntu-based, Nvidia-GPU-enabled environment for using TensorFlow. + arg-defaults: + - TF_PACKAGE: tensorflow-gpu + partials: + - nvidia + - image: nodev + + nvidia-devel: + desc: > + Ubuntu-based, Nvidia-GPU-enabled environment for developing changes + for TensorFlow. + arg-defaults: + - TF_PACKAGE: tensorflow-gpu + partials: + - nvidia-devel + - image: dev + + cpu-jupyter: + desc: > + Ubuntu-based, CPU-only environment for using TensorFlow, with Jupyter + included. + partials: + - image: cpu + - jupyter + + cpu-devel-jupyter: + desc: > + Ubuntu-based, CPU-only environment for developing changes for + TensorFlow, with Jupyter included. + partials: + - image: cpu-devel + - jupyter + + nvidia-jupyter: + desc: > + Ubuntu-based, Nvidia-GPU-enabled environment for using TensorFlow, with + Jupyter included. + arg-defaults: + - nvidia + partials: + - image: nvidia + - jupyter + + nvidia-devel-jupyter: + desc: > + Ubuntu-based, Nvidia-GPU-enabled environment for developing changes for + TensorFlow, with Jupyter included. + arg-defaults: + - nvidia-devel + partials: + - image: nvidia-devel + - jupyter -- cgit v1.2.3 From 46c2eafd65fd55d0837a9a86e8843f7f6d615990 Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Tue, 31 Jul 2018 23:08:07 -0700 Subject: Reverted the tensorflow whl that was added by mistake --- tensorflow-1.9.0-cp35-cp35m-linux_x86_64.whl | Bin 58906768 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tensorflow-1.9.0-cp35-cp35m-linux_x86_64.whl diff --git a/tensorflow-1.9.0-cp35-cp35m-linux_x86_64.whl b/tensorflow-1.9.0-cp35-cp35m-linux_x86_64.whl deleted file mode 100644 index 5784bb8134..0000000000 Binary files a/tensorflow-1.9.0-cp35-cp35m-linux_x86_64.whl and /dev/null differ -- cgit v1.2.3 From 842cd17c1075de0ffca4244e43d8428d7f341420 Mon Sep 17 00:00:00 2001 From: Ruizhi Date: Wed, 1 Aug 2018 16:55:33 +0800 Subject: Fix shapes in comments of nmt_with_attention.ipynb It is a bit misleading and confusing that the output shape of decoder is currently commented as `(batch_size * max_length, vocab)`. However the correct shape should be `(batch_size * 1, vocab)`, since the input x of GRU layer has shape == `(batch_size, 1, embedding_dim + hidden_size)`. --- .../eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb index 1ab1b71bd0..0408ef01ca 100644 --- a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb @@ -552,10 +552,10 @@ " # passing the concatenated vector to the GRU\n", " output, state = self.gru(x)\n", " \n", - " # output shape == (batch_size * max_length, hidden_size)\n", + " # output shape == (batch_size * 1, hidden_size)\n", " output = tf.reshape(output, (-1, output.shape[2]))\n", " \n", - " # output shape == (batch_size * max_length, vocab)\n", + " # output shape == (batch_size * 1, vocab)\n", " x = self.fc(output)\n", " \n", " return x, state, attention_weights\n", -- cgit v1.2.3 From 478c4161f2524f9e9a6b78f7de297dc7d194d37a Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Wed, 1 Aug 2018 09:35:31 -0700 Subject: Code changes based on Rasmus's code review suggestions on PR19403 and enhancing MklInputConversion for MKL-DNN v0.15 integration --- tensorflow/core/kernels/mkl_input_conversion_op.cc | 17 +++-- tensorflow/core/kernels/mkl_relu_op.cc | 73 +++++++++++----------- 2 files changed, 48 insertions(+), 42 deletions(-) diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index dc4da33a06..fee6c44cfe 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -296,7 +296,9 @@ class MklInputConversionOp : public OpKernel { // implementation. TensorShape tf_shape0 = input_shape_0.GetTfShape(); TensorShape tf_shape1 = input_shape_1.GetTfShape(); - if (tf_shape0 == tf_shape1) { + TensorShape tensor_shape0 = input_tensor_0.shape(); + TensorShape tensor_shape1 = input_tensor_1.shape(); + if (tf_shape0 == tf_shape1 && tensor_shape0 == tensor_shape1) { auto input0_md = input_shape_0.GetMklLayout(); auto input1_md = input_shape_1.GetMklLayout(); @@ -350,7 +352,8 @@ class MklInputConversionOp : public OpKernel { } // Sanity check - bool mkl_shapes_are_same = input_shape_0 == input_shape_1; + bool mkl_shapes_are_same = ((input_shape_0 == input_shape_1) && + (tensor_shape0 == tensor_shape1)); if (mkl_shapes_are_same) { CHECK(false) << "MklInputConversionOp: Unexpected: TF shapes are " "different but MKL shapes are same"; @@ -403,7 +406,8 @@ class MklInputConversionOp : public OpKernel { } // Broadcast is needed if the shapes are not the same - if (mkl_shape->GetTfShape().num_elements() == tf_tensor->shape().num_elements() ) { + if (mkl_shape->GetTfShape().num_elements() + == tf_tensor->shape().num_elements() ) { // Both shapes are same, convert the TF input to MKL VLOG(1) << "MklInputConversionOp: No broadcast needed."; VLOG(1) << "MklInputConversionOp: Converting input " << tf_tensor_index @@ -437,16 +441,17 @@ class MklInputConversionOp : public OpKernel { bool reordered = tf_input.CheckReorderToOpMem( memory::primitive_desc(output_mkl_md, cpu_engine), tensor_out, &net); - if(!reordered) { + + if (!reordered) { // This is the case that the TF tensor has the same shape and format of // mkl tensor. However, tf_tensor can not be simply forwarded to the // output tensor since mkl data tensor is always one dimensional tensor. // Tensor::CopyFrom shares the buffer of the other tensor while set its // shape to the other tensor. CHECK(tensor_out->CopyFrom(*tf_tensor, tensor_out->shape())); - } - else + } else { stream(stream::kind::eager).submit(net).wait(); + } // -- The tensor in MKL format passes through -- ForwardMklTensorInToOut(context, mkl_tensor_index, mkl_tensor_index); diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 3d5a05be73..69f2e37b61 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -83,8 +83,9 @@ class MklEltwiseFwdPrimitive : public MklPrimitive { // Eltwise forward execute // src_data: input data buffer of src // dst_data: output data buffer of dst - void Execute(T* src_data, T* dst_data) { - context_.src_mem->set_data_handle(static_cast(src_data)); + void Execute(const T* src_data, T* dst_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); context_.dst_mem->set_data_handle(static_cast(dst_data)); context_.fwd_stream->submit(context_.fwd_primitives); @@ -261,10 +262,11 @@ class MklEltwiseBwdPrimitive : public MklPrimitive { // src_data: input data buffer of src // diff_dst_data: input data buffer of diff_dst // diff_src_data: output data buffer of diff_src - - void Execute(T* src_data, T* diff_dst_data, T* diff_src_data) { - context_.src_mem->set_data_handle(static_cast(src_data)); - context_.diff_dst_mem->set_data_handle(static_cast(diff_dst_data)); + void Execute(const T* src_data, const T* diff_dst_data, T* diff_src_data) { + context_.src_mem->set_data_handle( + static_cast(const_cast(src_data))); + context_.diff_dst_mem->set_data_handle( + static_cast(const_cast(diff_dst_data))); context_.diff_src_mem->set_data_handle(static_cast(diff_src_data)); context_.bwd_stream->submit(context_.bwd_primitives); @@ -810,17 +812,15 @@ class MklReluOpBase : public OpKernel { MklEltwiseFwdPrimitiveFactory::Get(fwdParams); // prepare for execuation - T* src_data = nullptr; + const T* src_data = src_tensor.flat().data(); // check wehther src need to reorder if (src_md.data.format != eltwise_fwd->GetSrcMemoryFormat()) { src.SetUsrMem(src_md, &src_tensor); auto src_target_pd = memory::primitive_desc({{src_dims}, MklDnnType(), eltwise_fwd->GetSrcMemoryFormat()}, cpu_engine); src.CheckReorderToOpMem(src_target_pd); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast( - const_cast(src_tensor.flat().data())); + src_data = const_cast( + reinterpret_cast(src.GetOpMem().get_data_handle())); } // allocate dst tensor, always set it as MKL-DNN layout @@ -836,20 +836,20 @@ class MklReluOpBase : public OpKernel { dnn_shape_dst.SetTfLayout(dnn_shape_src.GetDimension(), dnn_shape_src.GetSizesAsMklDnnDims(), dnn_shape_src.GetTfDataFormat()); - tf_shape_dst.AddDim(dst_pd.get_size()/sizeof(T)); + tf_shape_dst.AddDim(dst_pd.get_size() / sizeof(T)); } else { - // TODO(yli135): why relu's input is TF tensor in VGG16?? dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } Tensor* dst_tensor = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {src_index}, dst_index, tf_shape_dst, &dst_tensor)); + {static_cast(src_index)}, + static_cast(dst_index), + tf_shape_dst, &dst_tensor)); AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); - T* dst_data = static_cast(const_cast( - dst_tensor->flat().data())); + T* dst_data = dst_tensor->flat().data(); // execute eltwise eltwise_fwd->Execute(src_data, dst_data); @@ -874,8 +874,8 @@ class MklReluGradOpBase : public OpKernel { public: ~MklReluGradOpBase() {} - explicit MklReluGradOpBase(OpKernelConstruction* context) : - OpKernel(context) { + explicit MklReluGradOpBase(OpKernelConstruction* context) + : OpKernel(context) { } virtual void Compute_Scalar(OpKernelContext* context) = 0; @@ -964,41 +964,43 @@ class MklReluGradOpBase : public OpKernel { auto eltwise_bwd_pd = eltwise_bwd->GetEltwiseBwdPd(); // check whether need reorder for src / diff_dst - T* src_data; - T* diff_dst_data; + const T* src_data = src_tensor.flat().data(); if (src_md.data.format != eltwise_bwd->GetSrcMemoryFormat()) { src.SetUsrMem(src_md, &src_tensor); src.CheckReorderToOpMem( eltwise_bwd_pd.get()->diff_src_primitive_desc()); - src_data = static_cast(src.GetOpMem().get_data_handle()); - } else { - src_data = static_cast( - const_cast(src_tensor.flat().data())); + src_data = const_cast( + reinterpret_cast(src.GetOpMem().get_data_handle())); } + const T* diff_dst_data = diff_dst_tensor.flat().data(); if (diff_dst_md.data.format != eltwise_bwd->GetDiffDstMemoryFormat()) { diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); diff_dst.CheckReorderToOpMem( eltwise_bwd_pd.get()->diff_src_primitive_desc()); - diff_dst_data = static_cast( - diff_dst.GetOpMem().get_data_handle()); - } else { - diff_dst_data = static_cast(const_cast( - diff_dst_tensor.flat().data())); + diff_dst_data = const_cast( + reinterpret_cast(diff_dst.GetOpMem().get_data_handle())); } // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { auto diff_src_pd = eltwise_bwd_pd->diff_src_primitive_desc(); dnn_shape_diff_src.SetMklTensor(true); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), - dnn_shape_src.GetSizesAsMklDnnDims(), - dnn_shape_src.GetTfDataFormat()); - tf_shape_diff_src.AddDim(diff_src_pd.get_size()/sizeof(T)); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), + dnn_shape_src.GetSizesAsMklDnnDims(), + dnn_shape_src.GetTfDataFormat()); + } else { + dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), + dnn_shape_diff_dst.GetSizesAsMklDnnDims(), + dnn_shape_diff_dst.GetTfDataFormat()); + } + tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); tf_shape_diff_src = src_tensor.shape(); @@ -1009,8 +1011,7 @@ class MklReluGradOpBase : public OpKernel { &diff_src_tensor)); AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); - T* diff_src_data = static_cast(const_cast( - diff_src_tensor->flat().data())); + T* diff_src_data = diff_src_tensor->flat().data(); // execute eltwise bwd eltwise_bwd->Execute(src_data, diff_dst_data, diff_src_data); -- cgit v1.2.3 From 04fb295a409b426ea44463934c4cec251990bc37 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 1 Aug 2018 15:23:10 -0700 Subject: Update readme --- tensorflow/tools/dockerfiles/README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md index 1fe51adb4a..4786f8ec81 100644 --- a/tensorflow/tools/dockerfiles/README.md +++ b/tensorflow/tools/dockerfiles/README.md @@ -11,12 +11,33 @@ The Dockerfiles in the `dockerfiles` directory must have their build context set to **the directory with this README.md** to copy in helper files. For example: ```bash -$ docker build -f ./dockerfiles/cpu.Dockerfile -t tf-cpu . +$ docker build -f ./dockerfiles/cpu.Dockerfile -t tf . ``` Each Dockerfile has its own set of available `--build-arg`s which are documented in the Dockerfile itself. +## Running + +After building the image with the tag `tf` (for example): + +```bash +# A volume mount is optional but highly recommended, especially for Jupyter + +# CPU-based images +$ docker run -u $(id -u):$(id -g) -v $(PWD):/my-devel -it tf + +# GPU-based images (set up nvidia-docker2 first) +$ docker run --runtime=nvidia -u $(id -u):$(id -g) -v $(PWD):/my-devel -it tf + +# Images with Jupyter run on port 8888, and needs a volume for notebooks +$ docker run --user $(id -u):$(id -g) -p 8888:8888 -v $(PWD):/notebooks -it tf + +# Development images +$ docker run --user $(id -u):$(id -g) -it tf +docker$ git clone https://github.com/tensorflow/tensorflow +``` + ## Maintaining To make changes to TensorFlow's Dockerfiles, you'll update `spec.yml` and the -- cgit v1.2.3 From 00869fc36a952418ffa75fd4fd5763b993251dd2 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 1 Aug 2018 15:32:36 -0700 Subject: Clean up some documentation --- tensorflow/tools/dockerfiles/assembler.py | 3 ++- tensorflow/tools/dockerfiles/spec.yml | 43 +++++++++++++++++-------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py index a33c42ace6..8e0e5923d6 100644 --- a/tensorflow/tools/dockerfiles/assembler.py +++ b/tensorflow/tools/dockerfiles/assembler.py @@ -1,6 +1,7 @@ """Assemble common TF Dockerfiles from many parts. -TODO(angerson): DO NOT SUBMIT without a detailed description of assembler. +This script constructs TF's Dockerfiles by aggregating partial +Dockerfiles. See README.md for usage examples. """ from __future__ import absolute_import diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 48a0cb772e..4d622c53d2 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -1,6 +1,7 @@ # ====== # HEADER # ====== +# # This is commented-out and prepended to each generated Dockerfile. header: | THIS IS A GENERATED DOCKERFILE. @@ -12,19 +13,20 @@ header: | # ======== # PARTIALS # ======== -# Represent and document pieces of a Dockerfile. -# Spec: +# +# Represent and document pieces of a Dockerfile. Spec: # -# name: the name of the partial, referenced from other sections +# name: the name of the partial, is referenced from the images section # desc: A description, inserted later into the Dockerfile -# file: Alternative file prefix, e.g. file.partial.Dockerfile (default = name) +# file: Alternative file prefix, e.g. file.partial.Dockerfile. The default is +# the name of the partial. # args: A dict of ARGs in the Dockerfile; each entry has the format -# ARG_NAME: VALUE where VALUE is -# - a concrete value: becomes the default +# ARG_NAME: VALUE where VALUE is one of: # - a dict: -# desc: Arg description -# default: Default value for the arg; is written to the Dockerfile -# options: List of strings, part of documentation +# desc: Documentation for the arg +# default: Default value for the arg; is written to the Dockerfile +# options: List of strings, part of documentation +# - a concrete value: the same as a dictionary with default: [value]. partials: ubuntu: @@ -75,23 +77,24 @@ partials: jupyter: desc: Launch Jupyter on execution instead of a bash prompt. -# =========== -# DOCKERFILES -# =========== -# Represent dockerfiles. -# Spec: +# ====== +# IMAGES +# ====== +# +# Represent Dockerfiles. Spec: # -# name: the name of the image, referenced from other sections +# name: the name of the image, possibly referenced by other images # desc: A description, inserted later into the Dockerfile # create-dockerfile: Create a dockerfile based on this. Useful for creating -# base images. Default is true +# extensible base images that don't need a file. Default is true. # partials: List of VALUEs, where a VALUE is either: -# - the name of a partial, which inserts that partial into this file +# - the name of a partial, which inserts that partial into this image # - image: [name of another image], which inserts the partials from that -# image into this file +# image into this image # arg-defaults: List of VALUEs, where a VALUE is either: -# - the name of another image, which loads the default args from that image -# - ARG_NAME: VALUE, which is exactly what you'd expect +# - ARG_NAME: VALUE, which sets the ARG_NAME to VALUE wherever it appears +# in this image's partials +# - [name of another image], which loads the default args from that image images: nodev: -- cgit v1.2.3 From f0b3a02cb76be13364d0247b0162c23482778f9c Mon Sep 17 00:00:00 2001 From: Ruizhi Date: Thu, 2 Aug 2018 14:16:04 +0800 Subject: Removed redundant tf.exp on predictions in evaluate method In the `evaluate` method, the usage of `tf.exp` seems redundant here, since `predictions` itself is the logits output of a dense layer in decoder. Therefore, I removed `tf.exp` so that `tf.multinomial` is applied on the logits directly, which agrees better with the proper usage of `tf.multinomial`. --- .../eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb index 0408ef01ca..0bc1c405ce 100644 --- a/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb @@ -753,7 +753,7 @@ " attention_weights = tf.reshape(attention_weights, (-1, ))\n", " attention_plot[t] = attention_weights.numpy()\n", "\n", - " predicted_id = tf.multinomial(tf.exp(predictions), num_samples=1)[0][0].numpy()\n", + " predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()\n", "\n", " result += targ_lang.idx2word[predicted_id] + ' '\n", "\n", -- cgit v1.2.3 From ffc12e18fbf7acbaf67a11f1470dc54786e2cc17 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 7 Aug 2018 14:53:26 -0700 Subject: rebase mkl_util.h with master branch --- tensorflow/core/util/mkl_util.h | 107 +++++++++++++++++++++++++--------------- 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 21868fa2c0..a66b1215bd 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -17,9 +17,10 @@ limitations under the License. #define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ #ifdef INTEL_MKL -#include +#include #include #include +#include #ifdef INTEL_MKL_ML #include "mkl_dnn.h" @@ -34,11 +35,11 @@ limitations under the License. #include "tensorflow/core/graph/mkl_graph_util.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/util/padding.h" #include "tensorflow/core/util/tensor_format.h" - #ifndef INTEL_MKL_ML #include "mkldnn.hpp" #include "tensorflow/core/lib/core/stringpiece.h" @@ -1503,7 +1504,8 @@ class MklDnnData { /// Operations memory descriptor memory::desc* op_md_; - + /// Operations temp buffer + void* allocated_buffer_; /// CPU engine on which operation will be executed const engine* cpu_engine_; @@ -1512,6 +1514,7 @@ class MklDnnData { : user_memory_(nullptr), reorder_memory_(nullptr), op_md_(nullptr), + allocated_buffer_(nullptr), cpu_engine_(e) {} ~MklDnnData() { @@ -1652,6 +1655,14 @@ class MklDnnData { user_memory_->set_data_handle(GetTensorBuffer(tensor)); } + /// allocate function for data buffer + inline void AllocateBuffer(size_t size) { + const int64 kMemoryAlginment = 64; // For AVX512 memory alignment. + allocated_buffer_ = cpu_allocator()->AllocateRaw(kMemoryAlginment, size); + } + + inline void* GetAllocatedBuffer() { return allocated_buffer_; } + /// Get the memory primitive for input and output of an op. If inputs /// to an op require reorders, then this function returns memory primitive /// for reorder. Otherwise, it will return memory primitive for user memory. @@ -1873,7 +1884,6 @@ class MklDnnData { net.push_back(FindOrCreateReorder(reorder_memory_, user_memory_)); stream(stream::kind::eager).submit(net).wait(); } - }; /// Base class for operations with reuse of primitives @@ -1882,9 +1892,8 @@ class MklPrimitive { public: virtual ~MklPrimitive() {} - // Dummy data. Its size, hard-coded as 256 here, does - // not matter since MKL should never operate on this buffer. - unsigned char DummyData[256]; + // Dummy data which MKL DNN never operates on + unsigned char* DummyData = nullptr; }; const mkldnn::memory::dims NONE_DIMS = {}; @@ -1896,8 +1905,9 @@ class MklPrimitiveFactory { ~MklPrimitiveFactory() {} MklPrimitive* GetOp(const string& key) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); - if (stream_iter == MklPrimitiveFactory::GetHashMap().end()) { + auto& map = MklPrimitiveFactory::GetHashMap(); + auto stream_iter = map.find(key); + if (stream_iter == map.end()) { return nullptr; } else { CHECK(stream_iter->second != nullptr) << "nullptr present in map"; @@ -1906,7 +1916,8 @@ class MklPrimitiveFactory { } void SetOp(const string& key, MklPrimitive* op) { - auto stream_iter = MklPrimitiveFactory::GetHashMap().find(key); + auto& map = MklPrimitiveFactory::GetHashMap(); + auto stream_iter = map.find(key); CHECK(stream_iter == map.end()); @@ -1955,11 +1966,25 @@ class FactoryKeyCreator { } }; +static inline memory::format get_desired_format(int channel) { + memory::format fmt_desired = memory::format::any; + + if (port::TestCPUFeature(port::CPUFeature::AVX512F) && (channel % 16) == 0) { + fmt_desired = memory::format::nChw16c; + } else if (port::TestCPUFeature(port::CPUFeature::AVX2) && + (channel % 8) == 0) { + fmt_desired = memory::format::nChw8c; + } else { + fmt_desired = memory::format::nchw; + } + return fmt_desired; +} + class MklReorderPrimitive : public MklPrimitive { - public: - explicit MklReorderPrimitive(const memory* from, const memory* to) { - Setup(from, to); - } + public: + explicit MklReorderPrimitive(const memory* from, const memory* to) { + Setup(from, to); + } ~MklReorderPrimitive() {} std::shared_ptr GetPrimitive() { @@ -1971,7 +1996,7 @@ class MklReorderPrimitive : public MklPrimitive { context_.dst_mem->set_data_handle(to->get_data_handle()); } - private: + private: struct ReorderContext { std::shared_ptr src_mem; std::shared_ptr dst_mem; @@ -1995,28 +2020,27 @@ class MklReorderPrimitive : public MklPrimitive { template class MklReorderPrimitiveFactory : public MklPrimitiveFactory { - public: - static MklReorderPrimitive* Get(const memory* from, - const memory* to) { - auto reorderPrim = static_cast( + public: + static MklReorderPrimitive* Get(const memory* from, const memory* to) { + auto reorderPrim = static_cast( MklReorderPrimitiveFactory::GetInstance().GetReorder(from, to)); - if (reorderPrim == nullptr) { - reorderPrim = new MklReorderPrimitive(from, to); - MklReorderPrimitiveFactory::GetInstance().SetReorder( - from, to, reorderPrim); - } - reorderPrim->SetMemory(from, to); - return reorderPrim; + if (reorderPrim == nullptr) { + reorderPrim = new MklReorderPrimitive(from, to); + MklReorderPrimitiveFactory::GetInstance().SetReorder(from, to, + reorderPrim); } + reorderPrim->SetMemory(from, to); + return reorderPrim; + } static MklReorderPrimitiveFactory & GetInstance() { static MklReorderPrimitiveFactory instance_; return instance_; } - private: - MklReorderPrimitiveFactory() {}; - ~MklReorderPrimitiveFactory() {}; + private: + MklReorderPrimitiveFactory() {} + ~MklReorderPrimitiveFactory() {} static string CreateKey(const memory* from, const memory* to) { string prefix = "reorder"; @@ -2046,18 +2070,19 @@ class MklReorderPrimitiveFactory : public MklPrimitiveFactory { } }; - /// Fuction to find(or create) a reorder from memory pointed by from to memory pointed - /// by to, it will created primitive or get primitive from pool if it is cached. - /// Returns the primitive. - template - inline primitive FindOrCreateReorder(const memory* from, const memory* to) { - CHECK_NOTNULL(from); - CHECK_NOTNULL(to); - MklReorderPrimitive *reorder_prim = - MklReorderPrimitiveFactory::Get(from, to); - return *reorder_prim->GetPrimitive(); - } - +/// Fuction to find(or create) a reorder from memory pointed by +/// from to memory pointed by to, it will created primitive or +/// get primitive from pool if it is cached. +/// Returns the primitive. +template +inline primitive FindOrCreateReorder(const memory* from, const memory* to) { + CHECK_NOTNULL(from); + CHECK_NOTNULL(to); + MklReorderPrimitive* reorder_prim = + MklReorderPrimitiveFactory::Get(from, to); + return *reorder_prim->GetPrimitive(); +} + #endif // INTEL_MKL_DNN } // namespace tensorflow -- cgit v1.2.3 From 10ca9a8fb215e66d25a8469c5eeb5b8d6c02e05e Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Tue, 7 Aug 2018 15:29:24 -0700 Subject: RNN.call should get initial state from full input spec --- tensorflow/python/keras/layers/recurrent.py | 8 +++++++ tensorflow/python/keras/layers/wrappers.py | 26 +++++++++++++++++----- .../python/keras/utils/multi_gpu_utils_test.py | 17 ++++++++++++++ 3 files changed, 46 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/keras/layers/recurrent.py b/tensorflow/python/keras/layers/recurrent.py index a8bfdf25f2..85d0a70203 100644 --- a/tensorflow/python/keras/layers/recurrent.py +++ b/tensorflow/python/keras/layers/recurrent.py @@ -583,6 +583,14 @@ class RNN(Layer): # note that the .build() method of subclasses MUST define # self.input_spec and self.state_spec with complete input shapes. if isinstance(inputs, list): + # get initial_state from full input spec + # as they could be copied to multiple GPU. + if self._num_constants is None: + initial_state = inputs[1:] + else: + initial_state = inputs[1:-self._num_constants] + if len(initial_state) == 0: + initial_state = None inputs = inputs[0] if initial_state is not None: pass diff --git a/tensorflow/python/keras/layers/wrappers.py b/tensorflow/python/keras/layers/wrappers.py index f0c1e76156..cf2e139fad 100644 --- a/tensorflow/python/keras/layers/wrappers.py +++ b/tensorflow/python/keras/layers/wrappers.py @@ -545,11 +545,27 @@ class Bidirectional(Wrapper): if initial_state is not None and generic_utils.has_arg( self.layer.call, 'initial_state'): - forward_state = initial_state[:len(initial_state) // 2] - backward_state = initial_state[len(initial_state) // 2:] - y = self.forward_layer.call(inputs, initial_state=forward_state, **kwargs) - y_rev = self.backward_layer.call( - inputs, initial_state=backward_state, **kwargs) + forward_inputs = [inputs[0]] + backward_inputs = [inputs[0]] + pivot = len(initial_state) // 2 + 1 + # add forward initial state + forward_state = inputs[1:pivot] + forward_inputs += forward_state + if self._num_constants is None: + # add backward initial state + backward_state = inputs[pivot:] + backward_inputs += backward_state + else: + # add backward initial state + backward_state = inputs[pivot:-self._num_constants] + backward_inputs += backward_state + # add constants for forward and backward layers + forward_inputs += inputs[-self._num_constants:] + backward_inputs += inputs[-self._num_constants:] + y = self.forward_layer.call(forward_inputs, + initial_state=forward_state, **kwargs) + y_rev = self.backward_layer.call(backward_inputs, + initial_state=backward_state, **kwargs) else: y = self.forward_layer.call(inputs, **kwargs) y_rev = self.backward_layer.call(inputs, **kwargs) diff --git a/tensorflow/python/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/utils/multi_gpu_utils_test.py index 77792d14f5..c7e94998b4 100644 --- a/tensorflow/python/keras/utils/multi_gpu_utils_test.py +++ b/tensorflow/python/keras/utils/multi_gpu_utils_test.py @@ -180,6 +180,23 @@ class TestMultiGPUModel(test.TestCase): target_tensors=[targets]) parallel_model.fit(epochs=1, steps_per_epoch=3) + def test_multi_gpu_with_multi_input_layers(self): + gpus = 2 + + if not check_if_compatible_devices(gpus=gpus): + return + + with self.test_session(): + inputs = keras.Input((4, 3)) + init_state = keras.Input((3,)) + outputs = keras.layers.SimpleRNN( + 3, return_sequences=True)(inputs, initial_state=init_state) + x = [np.random.randn(2, 4, 3), np.random.randn(2, 3)] + y = np.random.randn(2, 4, 3) + model = keras.Model([inputs, init_state], outputs) + parallel_model = keras.utils.multi_gpu_model(model, gpus=gpus) + parallel_model.compile(loss='mean_squared_error', optimizer='adam') + parallel_model.train_on_batch(x, y) if __name__ == '__main__': test.main() -- cgit v1.2.3 From b2470ca8a93a7a4bd960ba7dff65be74779c4f62 Mon Sep 17 00:00:00 2001 From: Jonathan Date: Thu, 26 Jul 2018 23:03:33 +0800 Subject: modify _TopKGrad so that all operations can run on GPU for better performance --- tensorflow/python/ops/nn_grad.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index df23ac55ce..025ce7ce88 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -975,25 +975,31 @@ def _TopKGrad(op, grad, _): in_shape = array_ops.shape(op.inputs[0]) ind_shape = array_ops.shape(op.outputs[1]) - ind_lastdim = array_ops.gather(ind_shape, array_ops.size(ind_shape) - 1) + # int32 is not supported on GPU hence up-casting + ind_lastdim = array_ops.gather(math_ops.cast( + ind_shape, dtypes.int64), array_ops.size(ind_shape) - 1) # Flatten indices to 2D. ind_2d = array_ops.reshape(op.outputs[1], array_ops.stack([-1, ind_lastdim])) - in_lastdim = array_ops.gather(in_shape, array_ops.size(in_shape) - 1) + in_lastdim = array_ops.gather(math_ops.cast( + in_shape, dtypes.int64), array_ops.size(in_shape) - 1) outerdim = array_ops.shape(ind_2d)[0] # Compute linear indices (flattened to 1D). - ind = array_ops.reshape(ind_2d + array_ops.expand_dims( - math_ops.range(0, outerdim * in_lastdim, in_lastdim), -1), [-1]) + ind = array_ops.reshape(ind_2d + math_ops.cast(array_ops.expand_dims( + math_ops.range(0, math_ops.cast(outerdim, dtypes.int64) + * in_lastdim, in_lastdim), -1 + ), dtypes.int32), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. return [ array_ops.reshape( - sparse_ops.sparse_to_dense( - ind, - array_ops.reshape(math_ops.reduce_prod(in_shape), [1]), + array_ops.scatter_nd( + array_ops.expand_dims(ind, -1), array_ops.reshape(grad, [-1]), - validate_indices=False), in_shape), + [math_ops.reduce_prod(in_shape)] + ), + in_shape), array_ops.zeros([], dtype=dtypes.int32) ] -- cgit v1.2.3 From 167487ebf7e50e13779fb344038b2002056e9b81 Mon Sep 17 00:00:00 2001 From: "weidan.kong" Date: Wed, 8 Aug 2018 11:50:11 -0700 Subject: elastic averaging SGD update: support partitioner & more optimizers --- .../python/training/elastic_average_optimizer.py | 183 ++++++++++++++++++--- .../training/elastic_average_optimizer_test.py | 107 ++++++++++-- 2 files changed, 246 insertions(+), 44 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py index 5763593b81..be72ef3767 100644 --- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py +++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py @@ -17,22 +17,26 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.ops import math_ops - -from tensorflow.python.ops import gen_nn_ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import gen_nn_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import optimizer +from tensorflow.python.training import saver from tensorflow.python.training import session_run_hook -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import data_flow_ops -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import constant_op LOCAL_VARIABLE_NAME = 'local_center_variable' GLOBAL_VARIABLE_NAME = 'global_center_variable' +GLOBAL_SHARE_VARS = 'global_share_var' +GLOBAL_STEP = 'global_step' class ElasticAverageCustomGetter(object): @@ -52,16 +56,32 @@ class ElasticAverageCustomGetter(object): with tf.device( tf.train.replica_device_setter( worker_device=worker_device, - ps_device="/job:ps/cpu:0", + ps_device="/job:ps", cluster=cluster)), tf.variable_scope('',custom_getter=ea_custom_getter): - hid_w = tf.get_variable( - initializer=tf.truncated_normal( - [IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units], - stddev=1.0 / IMAGE_PIXELS), - name="hid_w") - hid_b = tf.get_variable(initializer=tf.zeros([FLAGS.hidden_units]), - name="hid_b") + ... + create your model here + ... + with tf.device(worker_device): + opt = tf.train.MomentumOptimizer(...) + optimizer = ElasticAverageOptimizer( + opt, + num_worker=2, + moving_rate=0.01, # or use default value + communication_period=20, + ea_custom_getter=ea_custom_getter) + ... + train_op = optimizer.apply_gradients( + grads_vars, + global_step=global_step) + ... + hooks = [optimizer.make_session_run_hook(is_chief, task_index)] + ... + with tf.train.MonitoredTrainingSession(master=server.target, + is_chief=is_chief, + checkpoint_dir=("...), + save_checkpoint_secs=600, + hooks=hooks) as mon_sess: """ def __init__(self, worker_device): @@ -83,24 +103,50 @@ class ElasticAverageCustomGetter(object): collections=[ops.GraphKeys.LOCAL_VARIABLES], *args, **kwargs) - global_center_variable = variable_scope.variable( + if kwargs['reuse'] == True: + return local_var + global_center_variable = getter( name='%s/%s' % (GLOBAL_VARIABLE_NAME, name), - initial_value=local_var.initialized_value(), trainable=False, - collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + *args, + **kwargs) with ops.device(self._worker_device): - local_center_variable = variable_scope.variable( + local_center_variable = getter( name='%s/%s' % (LOCAL_VARIABLE_NAME, name), - initial_value=local_var.initialized_value(), trainable=False, - collections=[ops.GraphKeys.LOCAL_VARIABLES]) - - self._local_map[local_var] = local_center_variable - self._global_map[local_var] = global_center_variable + collections=[ops.GraphKeys.LOCAL_VARIABLES], + *args, + **kwargs) + if kwargs['partitioner'] is None: + self._local_map[local_var] = local_center_variable + self._global_map[local_var] = global_center_variable + else: + v_list = list(local_var) + for i in range(len(v_list)): + self._local_map[v_list[i]] \ + = list(local_center_variable)[i] + self._global_map[v_list[i]] \ + = list(global_center_variable)[i] return local_var else: - return getter(name, trainable, collections, *args, **kwargs) + # 1. default to LOCAL_VARIABLES (instead of GLOBAL_VARIABLES) + # 2. put to global if explicitly defined (GLOBAL_SHARE_VARS) + # 3. other GLOBAL_VARIABLES put to LOCAL_VARIABLES + # exept global_step, which must be global + if collections is None or len(collections) == 0: + collections = [ops.GraphKeys.LOCAL_VARIABLES] + elif GLOBAL_SHARE_VARS in collections: + collections = list(collections) + if ops.GraphKeys.GLOBAL_VARIABLES not in collections: + collections.append(ops.GraphKeys.GLOBAL_VARIABLES) + elif ops.GraphKeys.GLOBAL_VARIABLES in collections \ + and GLOBAL_STEP not in name.split('/'): + collections = list(collections) + collections.append(ops.GraphKeys.LOCAL_VARIABLES) + collections.remove(ops.GraphKeys.GLOBAL_VARIABLES) + return getter(name, trainable=trainable, collections=collections, *args, **kwargs) class ElasticAverageOptimizer(optimizer.Optimizer): @@ -123,8 +169,9 @@ class ElasticAverageOptimizer(optimizer.Optimizer): ea_custom_getter, communication_period=10, moving_rate=None, - rho=None, + rho=0.0, use_locking=True, + sync_flag=False, name='ElasticAverageOptimizer'): """Construct a new gradient descent optimizer. @@ -139,6 +186,8 @@ class ElasticAverageOptimizer(optimizer.Optimizer): rho: the amount of exploration we allow ine the model. The default value is moving_rate/learning_rate use_locking: If True use locks for update operations. + sync_flag: Add_sync_queues_and_barrier or not, default to False, in case of + restarting a worker,the worker won't hung there. name: Optional name prefix for the operations created when applying gradients. Defaults to "ElasticAverageOptimizer". """ @@ -148,6 +197,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer): self._period = communication_period self._local_map = ea_custom_getter._local_map self._global_map = ea_custom_getter._global_map + self._sync_flag = sync_flag if moving_rate is None: self._moving_rate = self.BETA / communication_period / num_worker @@ -246,6 +296,25 @@ class ElasticAverageOptimizer(optimizer.Optimizer): local_update = state_ops.assign_add( self._local_step, 1, name='local_step_update').op + # this is for place the variables created by optimizer to local collection + # e.g., AdamOptimizer will create beta as global variables + def _adjust_optimizer_variable_collection(): + g = ops.get_default_graph() + # global to local & clear global + idx = 0 + for _ in range(len(g._collections[ops.GraphKeys.GLOBAL_VARIABLES])): + var = g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] + name = var.op.name + if GLOBAL_STEP not in name.split('/') \ + and var not in ops.get_collection(GLOBAL_SHARE_VARS) \ + and name.find(GLOBAL_VARIABLE_NAME) == -1: + ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var) + del g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] + else: + idx += 1 + + _adjust_optimizer_variable_collection() + # update global variables. def _Update_global_variables(): local_vars = [v for g, v in grads_and_vars if g is not None] @@ -290,7 +359,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer): variables equal to the global center variables before the training begins""" def _Add_sync_queues_and_barrier(enqueue_after_list): - """Adds ops to enqueu on all worker queues""" + """Adds ops to enqueue on all worker queues""" sync_queues = [ data_flow_ops.FIFOQueue( self._num_worker, [dtypes.bool], @@ -324,6 +393,9 @@ class ElasticAverageOptimizer(optimizer.Optimizer): init_ops.append(state_ops.assign(lc_var, gc_var)) init_op = control_flow_ops.group(*(init_ops)) + if self._sync_flag == False: + return init_op + sync_queue_op = _Add_sync_queues_and_barrier([init_op]) return sync_queue_op @@ -331,6 +403,59 @@ class ElasticAverageOptimizer(optimizer.Optimizer): """Creates a hook to handle ElasticAverageOptimizerHook ops such as initialization.""" return _ElasticAverageOptimizerHook(self, is_chief, task_index) + def swapping_saver(self, var_list=None, name='swapping_saver', **kwargs): + """Create a saver copy global_center_variable to trainable variables + Please call this function after all your variables created with EACustomGetter. + For evaluations or inference, use this saver during training. It will save the + global_center_variable of the trained parameters under the original parameter names. + Args: + var_list: List of variables to save, as per `Saver()`. + If set to None, will save all the trainable_variables that have been + created before this call. + name: The name of the saver. + **kwargs: Keyword arguments of `Saver()`. + Returns: + A `tf.train.Saver` object. + Raises: + RuntimeError: global_center_variable is empty, please make sure + this is called after model created and + EACustomGetter is used when declaring you model + """ + if not self._global_map: + raise RuntimeError('global_center_variable is empty, please make sure ' + 'this is called after model created and ' + 'ElasticAverageCustomGetter is used when declaring you model') + + if var_list is None: + var_list = variables.trainable_variables() + if not isinstance(var_list, dict): + var_list = saver.BaseSaverBuilder.OpListToDict(var_list) + + swapped_var_list = {} + has_global_step = False + for key, var in var_list.items(): + tensor = var + if False == has_global_step\ + and GLOBAL_STEP in key.split('/'): + has_global_step = True + + if isinstance(var, list) == False: + for tvar in variables.trainable_variables(): + if tvar.op.name == var.op.name: + tensor = self._global_map.get(tvar, var) + break + else: #partitioned variable + tensor = [self._global_map.get(lvar, lvar) for lvar in var] + + swapped_var_list[key] = tensor + + # find global_step and add it if missing + if False == has_global_step: + for ele in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES): + if GLOBAL_STEP in ele.op.name.split('/'): + swapped_var_list[ele.op.name] = ele + + return saver.Saver(swapped_var_list, name=name, **kwargs) class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook): @@ -351,3 +476,7 @@ class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook): if self._is_chief: self._global_init_op = variables.global_variables_initializer() self._variable_init_op = self._ea_optimizer.get_init_op(self._task_index) + + def after_create_session(self, session, coord): + """Run initialization ops""" + session.run(self._variable_init_op) \ No newline at end of file diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py index 5ed8057b86..8a8f7ab080 100644 --- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py @@ -18,19 +18,23 @@ from __future__ import division from __future__ import print_function import portpicker +from tensorflow.python.client import session from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test +from tensorflow.python.training import device_setter from tensorflow.python.training import gradient_descent +from tensorflow.python.training import saver from tensorflow.python.training import server_lib from tensorflow.python.training import training from tensorflow.python.training import training_util -from tensorflow.python.ops import variable_scope -from tensorflow.python.training import device_setter from tensorflow.contrib.opt.python.training.elastic_average_optimizer import \ - ElasticAverageOptimizer, ElasticAverageCustomGetter, GLOBAL_VARIABLE_NAME + ElasticAverageOptimizer, ElasticAverageCustomGetter, GLOBAL_VARIABLE_NAME, GLOBAL_SHARE_VARS def create_local_cluster(num_workers, num_ps, protocol="grpc"): @@ -59,29 +63,49 @@ def create_local_cluster(num_workers, num_ps, protocol="grpc"): # Creates the workers and return their sessions, graphs, train_ops. # Chief worker will update at last -def _get_workers(num_workers, period, workers, moving_rate): +def _get_workers(num_workers, period, workers, moving_rate, num_ps=1): sessions = [] graphs = [] train_ops = [] + savers = [] for worker_id in range(num_workers): graph = ops.Graph() is_chief = (worker_id == 0) with graph.as_default(): worker_device = "/job:worker/task:%d/cpu:0" % (worker_id) - ea_coustom = ElasticAverageCustomGetter(worker_device=worker_device) + ea_custom = ElasticAverageCustomGetter(worker_device=worker_device) with variable_scope.variable_scope( - "", custom_getter=ea_coustom), ops.device( + "", custom_getter=ea_custom), ops.device( device_setter.replica_device_setter( worker_device=worker_device, ps_device="/job:ps/task:0/cpu:0", ps_tasks=1)): - global_step = variables.Variable(0, name="global_step", trainable=False) + global_step = training_util.get_or_create_global_step() + local_var = variable_scope.get_variable(initializer=0.0, trainable=False, + name='local_var', collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + global_var = variable_scope.get_variable(initializer=0.0, trainable=False, + name='global_var', collections=[GLOBAL_SHARE_VARS]) var_0 = variable_scope.get_variable(initializer=0.0, name="v0") var_1 = variable_scope.get_variable(initializer=1.0, name="v1") + if num_ps > 1: + with variable_scope.variable_scope("", + partitioner=partitioned_variables.fixed_size_partitioner(num_ps, axis=0), + custom_getter=ea_custom), ops.device( + device_setter.replica_device_setter( + worker_device=worker_device, + ps_device="/job:ps/task:0/cpu:0", + ps_tasks=num_ps)): + + partition_var = variable_scope.get_variable( + 'partition_var',shape=[2,4], initializer=init_ops.ones_initializer) + part_0 = list(partition_var)[0] + part_1 = list(partition_var)[1] with ops.device("/job:worker/task:" + str(worker_id)): grads_0 = constant_op.constant(-1.0) grads_1 = constant_op.constant(-1.0) + grads_part_0 = constant_op.constant([[-1., -1., -1., -1.]]) + grads_part_1 = constant_op.constant([[-1., -1., -1., -1.]]) sgd_opt = gradient_descent.GradientDescentOptimizer(1.0) opt = ElasticAverageOptimizer( @@ -89,12 +113,20 @@ def _get_workers(num_workers, period, workers, moving_rate): num_worker=num_workers, moving_rate=moving_rate, communication_period=period, - ea_custom_getter=ea_coustom) - train_op = [ + ea_custom_getter=ea_custom) + if num_ps == 1: + train_op = [ opt.apply_gradients(([grads_0, var_0], [grads_1, var_1]), global_step) - ] + ] + else: + train_op = [ + opt.apply_gradients(([grads_0, var_0], [grads_1, var_1], + [grads_part_0, part_0], [grads_part_1, part_1]), + global_step) + ] easgd_hook = opt.make_session_run_hook(is_chief, worker_id) + saver = opt.swapping_saver() # Creates MonitoredSession sess = training.MonitoredTrainingSession( workers[worker_id].target, hooks=[easgd_hook]) @@ -102,8 +134,9 @@ def _get_workers(num_workers, period, workers, moving_rate): sessions.append(sess) graphs.append(graph) train_ops.append(train_op) + savers.append(saver) - return sessions, graphs, train_ops + return sessions, graphs, train_ops, savers class ElasticAverageOptimizerTest(test.TestCase): @@ -118,7 +151,7 @@ class ElasticAverageOptimizerTest(test.TestCase): cluster, workers, _ = create_local_cluster( num_workers=num_workers, num_ps=num_ps) - sessions, graphs, train_ops = _get_workers( + sessions, graphs, train_ops, savers = _get_workers( num_workers, communication_period, workers, 1.0) var_0 = graphs[0].get_tensor_by_name("v0:0") @@ -150,6 +183,13 @@ class ElasticAverageOptimizerTest(test.TestCase): self.assertAllEqual(3.0, sessions[0].run(var_1_g)) self.assertAllEqual(1, sessions[0].run(global_step)) + # verify variables are set to right collection + with graphs[0].as_default(): + for ele in ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES): + self.assertNotEqual(ele.op.name, 'global_var') + for ele in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES): + self.assertNotEqual(ele.op.name, 'local_var') + # iteration 3 sessions[0].run(train_ops[0]) @@ -158,6 +198,20 @@ class ElasticAverageOptimizerTest(test.TestCase): self.assertAllEqual(2.0, sessions[0].run(var_0_g)) self.assertAllEqual(3.0, sessions[0].run(var_1_g)) self.assertAllEqual(1, sessions[0].run(global_step)) + sessions[0].run(train_ops[0]) + + # save, data will be global value + savers[0].save(sessions[0]._sess._sess._sess._sess, save_path='./model/model') + ops.reset_default_graph() # restore on a new graph + with session.Session() as sess: + v0 = variable_scope.get_variable(initializer=0.0, name="v0") + v1 = variable_scope.get_variable(initializer=1.0, name="v1") + sess.run(variables.local_variables_initializer()) + global_step = training_util.get_or_create_global_step() + saver_opt = saver.Saver(var_list=[v1, v0, global_step]) + saver_opt.restore(sess, './model/model') + self.assertAllEqual(2.0, sess.run(v0)) + self.assertAllEqual(3.0, sess.run(v1)) def test2Worker1Period(self): num_workers = 2 @@ -166,8 +220,8 @@ class ElasticAverageOptimizerTest(test.TestCase): cluster, workers, _ = create_local_cluster( num_workers=num_workers, num_ps=num_ps) - sessions, graphs, train_ops = _get_workers( - num_workers, communication_period, workers, 0.5) + sessions, graphs, train_ops, savers = _get_workers( + num_workers, communication_period, workers, 0.5, num_ps=2) var_0 = graphs[0].get_tensor_by_name("v0:0") var_1 = graphs[0].get_tensor_by_name("v1:0") @@ -177,6 +231,8 @@ class ElasticAverageOptimizerTest(test.TestCase): var_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v0:0") var_1_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/v1:0") + part_0_g = graphs[0].get_tensor_by_name(GLOBAL_VARIABLE_NAME + "/partition_var/part_0:0") + # Verify the initialized value. self.assertAllEqual(0.0, sessions[0].run(var_0)) self.assertAllEqual(1.0, sessions[0].run(var_1)) @@ -194,22 +250,39 @@ class ElasticAverageOptimizerTest(test.TestCase): self.assertAllEqual(1.75, sessions[0].run(var_1_g)) self.assertAllEqual(0.75, sessions[1].run(var_0_1)) self.assertAllEqual(1.75, sessions[1].run(var_1_1)) + # part_0 of global_center copy + part_0_g = sessions[0].run(part_0_g) + + savers[0].save(sessions[0]._sess._sess._sess._sess, save_path='./model/model') + + # verify restore of partitioned_variables + ops.reset_default_graph() # restore on a new graph + g = ops.get_default_graph() + with session.Session() as sess, g.as_default(): + with variable_scope.variable_scope("", + partitioner=partitioned_variables.fixed_size_partitioner(num_ps, axis=0)): + partition_var = variable_scope.get_variable( + 'partition_var',shape=[2,4], initializer=init_ops.ones_initializer) + s = saver.Saver(var_list=[partition_var]) + s.restore(sess, './model/model') + part_0 = g.get_tensor_by_name('partition_var/part_0:0') + self.assertAllEqual(part_0_g, sess.run(part_0)) def testPS2TasksWithClusterSpecClass(self): cluster_spec = server_lib.ClusterSpec({ "ps": ["ps0:2222", "ps1:2222"], "worker": ["worker0:2222", "worker1:2222", "worker2:2222"] }) - ea_coustom = ElasticAverageCustomGetter(worker_device="/job:worker/task:0") + ea_custom = ElasticAverageCustomGetter(worker_device="/job:worker/task:0") from tensorflow.python.training import device_setter with ops.device( device_setter.replica_device_setter(cluster=cluster_spec, worker_device="/job:worker/task:0", ps_device="/job:ps")), \ - variable_scope.variable_scope("", custom_getter=ea_coustom): + variable_scope.variable_scope("", custom_getter=ea_custom): v = variable_scope.get_variable(initializer=[1, 2], name="v") w = variable_scope.get_variable(initializer=[2, 1], name="w") - v_g, w_g = ea_coustom._global_map[v], ea_coustom._global_map[w] + v_g, w_g = ea_custom._global_map[v], ea_custom._global_map[w] self.assertDeviceEqual("/job:worker/task:0", v.device) self.assertDeviceEqual("job:ps/task:0", v_g.device) self.assertDeviceEqual("/job:worker/task:0", w.device) -- cgit v1.2.3 From 0e6fd0a53a8a16b9972eb1ad6ffbda99beeb2032 Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Wed, 8 Aug 2018 20:59:52 -0700 Subject: [Intel MKL] Tweaking URLs that didn't work with command line utilities. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1aaecae6de..6ace2a2ed4 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ The TensorFlow project strives to abide by generally accepted best practices in | **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA | | **IBM ppc64le GPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA | | **Linux CPU with Intel® MKL-DNN** Nightly | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) | -| **Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6| ![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)|[1.9.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.9.0-cp27-cp27mu-linux_x86_64.whl)
[1.9.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.9.0-cp35-cp35m-linux_x86_64.whl)
[1.9.0 py3.6](https://storage.cloud.google.com/intel-optimized-tensorflow/tensorflow-1.9.0-cp36-cp36m-linux_x86_64.whl) | +| **Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild)|[1.9.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.9.0-cp27-cp27mu-linux_x86_64.whl)
[1.9.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.9.0-cp35-cp35m-linux_x86_64.whl)
[1.9.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.9.0-cp36-cp36m-linux_x86_64.whl) | ## For more information -- cgit v1.2.3 From eef787ed58abaa207745d21ae1a915e23af327f3 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Wed, 8 Aug 2018 23:39:04 -0700 Subject: Try to find an allocator when the engine is not assigned a device. --- tensorflow/contrib/tensorrt/BUILD | 1 + .../contrib/tensorrt/convert/convert_graph.cc | 72 ++++++++++++++-------- .../core/grappler/clusters/single_machine.cc | 1 + 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index fc0d22d112..03404c1bf3 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -280,6 +280,7 @@ tf_cuda_library( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core:framework_lite", + "//tensorflow/core:gpu_runtime", "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 21ec8b0b30..3dae0ea4e3 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -31,6 +31,9 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/resources/trt_resources.h" #include "tensorflow/contrib/tensorrt/segment/segment.h" #include "tensorflow/contrib/tensorrt/test/utils.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/graph_to_functiondef.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -772,33 +775,54 @@ std::pair GetDeviceAndAllocator( const ConversionParams& params, const EngineInfo& engine) { int cuda_device_id = -1; tensorflow::Allocator* dev_allocator = nullptr; - if (params.cluster) { - std::vector devices; - if (!engine.device.empty() && params.cluster->GetDeviceSet()) { - DeviceNameUtils::ParsedName parsed_name; - if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) && - parsed_name.has_id) { - params.cluster->GetDeviceSet()->FindMatchingDevices(parsed_name, - &devices); + if (params.cluster == nullptr || params.cluster->GetDeviceSet() == nullptr || + engine.device.empty()) { + // If device is not set, use the first found GPU device for the conversion. + for (int tf_gpu_id_value = 0; tf_gpu_id_value < 100; ++tf_gpu_id_value) { + TfGpuId tf_gpu_id(tf_gpu_id_value); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (s.ok()) { + VLOG(1) << "Found TF GPU " << tf_gpu_id.value() << " at cuda device " + << cuda_gpu_id.value(); + cuda_device_id = cuda_gpu_id.value(); + GPUOptions gpu_options; + // If the TF to Cuda gpu id mapping exist, the device and corresponding + // allocator must have been initialized already, so the + // GetGPUAllocator() call won't create a new allocator. + dev_allocator = GPUProcessState::singleton()->GetGPUAllocator( + gpu_options, tf_gpu_id, 1); + break; } + VLOG(2) << "TF GPU with id " << tf_gpu_id_value << " do not exist " << s; } - if (!devices.empty()) { - if (devices.size() > 1) { - string msg = "Found multiple matching devices using name '"; - StrAppend(&msg, engine.device, "': "); - for (auto d : devices) StrAppend(&msg, d->name(), ", "); - StrAppend(&msg, ". Will get the allocator from first one."); - LOG(WARNING) << msg; - } - tensorflow::AllocatorAttributes alloc_attr; - cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id; - dev_allocator = devices[0]->GetAllocator(alloc_attr); - VLOG(1) << "Using allocator " << dev_allocator->Name() - << " and cuda_device_id " << cuda_device_id; - } else { - LOG(WARNING) << "Cluster is set but device '" << engine.device - << "' is not found in the cluster"; + return std::make_pair(cuda_device_id, dev_allocator); + } + + // Use the device requested by the engine. + auto device_set = params.cluster->GetDeviceSet(); + std::vector devices; + DeviceNameUtils::ParsedName parsed_name; + if (DeviceNameUtils::ParseFullName(engine.device, &parsed_name) && + parsed_name.has_id) { + device_set->FindMatchingDevices(parsed_name, &devices); + } + if (!devices.empty()) { + if (devices.size() > 1) { + string msg = "Found multiple matching devices using name '"; + StrAppend(&msg, engine.device, "': "); + for (auto d : devices) StrAppend(&msg, d->name(), ", "); + StrAppend(&msg, ". Will get the allocator from first one."); + LOG(WARNING) << msg; } + tensorflow::AllocatorAttributes alloc_attr; + cuda_device_id = devices[0]->tensorflow_gpu_device_info()->gpu_id; + dev_allocator = devices[0]->GetAllocator(alloc_attr); + VLOG(1) << "Using allocator " << dev_allocator->Name() + << " and cuda_device_id " << cuda_device_id; + } else { + LOG(WARNING) << "Cluster is set but device '" << engine.device + << "' is not found in the cluster"; } return std::make_pair(cuda_device_id, dev_allocator); } diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index b97603c890..83fde4fe37 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -48,6 +48,7 @@ SingleMachine::SingleMachine(int timeout_s, int num_cpu_cores, int num_gpus) (*options_.config.mutable_device_count())["CPU"] = 1; if (num_gpus > 0) { (*options_.config.mutable_device_count())["GPU"] = num_gpus; + options_.config.mutable_gpu_options()->set_allow_growth(true); } CHECK_GE(num_cpu_cores, 1); options_.config.set_intra_op_parallelism_threads(num_cpu_cores); -- cgit v1.2.3 From a90fce71faaa356b531157c2e00804046961b39d Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Thu, 9 Aug 2018 19:34:39 +0800 Subject: CLN: clang-format format cc codes --- tensorflow/cc/gradients/math_grad.cc | 9 +++++---- tensorflow/cc/gradients/math_grad_test.cc | 6 ++++-- tensorflow/core/kernels/cwise_op_div.cc | 4 ++-- tensorflow/core/ops/math_grad_test.cc | 23 ++++++++++------------- tensorflow/core/ops/math_ops.cc | 5 +++-- 5 files changed, 24 insertions(+), 23 deletions(-) diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index 84552e7c5e..c6e60689fa 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -442,16 +442,17 @@ Status RealDivGrad(const Scope& scope, const Operation& op, REGISTER_GRADIENT_OP("RealDiv", RealDivGrad); Status UnsafeDivGrad(const Scope& scope, const Operation& op, - const std::vector& grad_inputs, - std::vector* grad_outputs) { + const std::vector& grad_inputs, + std::vector* grad_outputs) { auto x_1 = ConjugateHelper(scope, op.input(0)); auto x_2 = ConjugateHelper(scope, op.input(1)); // y = x_1 / x_2 // dy/dx_1 = 1/x_2 // dy/dx_2 = -x_1/x_2^2 auto gx_1 = UnsafeDiv(scope, grad_inputs[0], x_2); - auto gx_2 = Mul(scope, grad_inputs[0], - UnsafeDiv(scope, UnsafeDiv(scope, Neg(scope, x_1), x_2), x_2)); + auto gx_2 = + Mul(scope, grad_inputs[0], + UnsafeDiv(scope, UnsafeDiv(scope, Neg(scope, x_1), x_2), x_2)); return BinaryGradCommon(scope, op, grad_outputs, gx_1, gx_2); } REGISTER_GRADIENT_OP("UnsafeDiv", UnsafeDivGrad); diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc index 330d1722af..12a19bcf28 100644 --- a/tensorflow/cc/gradients/math_grad_test.cc +++ b/tensorflow/cc/gradients/math_grad_test.cc @@ -860,7 +860,8 @@ TEST_F(NaryGradTest, UnsafeDiv) { const auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); // Test x / (1 + |x|) rather than x_1 / x_2 to avoid triggering large // division errors in the numeric estimator used by the gradient checker. - const auto y = UnsafeDiv(scope_, x, Add(scope_, Const(scope_, 1), Abs(scope_, x))); + const auto y = UnsafeDiv( + scope_, x, Add(scope_, Const(scope_, 1), Abs(scope_, x))); RunTest({x}, {x_shape}, {y}, {x_shape}); } { @@ -873,7 +874,8 @@ TEST_F(NaryGradTest, UnsafeDiv) { TF_EXPECT_OK(AddSymbolicGradients(scope_, {y}, {x}, &grad_outputs)); ClientSession session(scope_); std::vector grad_result; - TF_EXPECT_OK(session.Run({{x, {-3.0f, 0.0f, 3.0f}}}, grad_outputs, &grad_result)); + TF_EXPECT_OK( + session.Run({{x, {-3.0f, 0.0f, 3.0f}}}, grad_outputs, &grad_result)); EXPECT_EQ(grad_result.size(), 1); EXPECT_EQ(grad_result[0].NumElements(), 3); EXPECT_EQ(grad_result[0].flat()(0), 0.0f); diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc index e6d458925c..d6a2403816 100644 --- a/tensorflow/core/kernels/cwise_op_div.cc +++ b/tensorflow/core/kernels/cwise_op_div.cc @@ -24,8 +24,8 @@ REGISTER5(BinaryOp, CPU, "TruncateDiv", functor::safe_div, uint8, uint16, int16, int32, int64); REGISTER6(BinaryOp, CPU, "RealDiv", functor::div, float, Eigen::half, double, bfloat16, complex64, complex128); -REGISTER5(BinaryOp, CPU, "UnsafeDiv", functor::unsafe_div, - float, double, int16, int32, int64); +REGISTER5(BinaryOp, CPU, "UnsafeDiv", functor::unsafe_div, float, double, int16, + int32, int64); #if GOOGLE_CUDA REGISTER9(BinaryOp, GPU, "Div", functor::div, float, Eigen::half, double, uint8, diff --git a/tensorflow/core/ops/math_grad_test.cc b/tensorflow/core/ops/math_grad_test.cc index f05297d234..b0d1595c31 100644 --- a/tensorflow/core/ops/math_grad_test.cc +++ b/tensorflow/core/ops/math_grad_test.cc @@ -754,10 +754,8 @@ TEST_F(MathGradTest, Div) { } TEST_F(MathGradTest, UnsafeDiv) { - auto x = test::AsTensor({0.f, -3.f, -2.f, - -1.f, 0.f, 1.f, - 2.f, 3.f, 0.f}, - TensorShape({3, 3})); + auto x = test::AsTensor( + {0.f, -3.f, -2.f, -1.f, 0.f, 1.f, 2.f, 3.f, 0.f}, TensorShape({3, 3})); auto y = test::AsTensor({-10.f, 0.f, 10.f}, TensorShape({3, 1})); Tensor dx; Tensor dy; @@ -781,8 +779,7 @@ TEST_F(MathGradTest, UnsafeDiv) { auto g = [](float x, float y) { if (y == 0.f) { return 0.f; - } - else { + } else { return -x / (y * y); } }; @@ -804,18 +801,18 @@ TEST_F(MathGradTest, UnsafeDiv) { return 1.f / y; } }; - test::ExpectClose(dy, test::AsTensor( - {g(-10.f, 0.f) + g(-10.f, -3.f) + g(-10.f, -2.f), - g(0.f, -1.f) + g(0.f, 0.f) + g(0.f, 1.f), - g(10.f, 2.f) + g(10.f, 3.f) + g(10.f, 0.f)}, - TensorShape({3, 1}))); + test::ExpectClose(dy, + test::AsTensor( + {g(-10.f, 0.f) + g(-10.f, -3.f) + g(-10.f, -2.f), + g(0.f, -1.f) + g(0.f, 0.f) + g(0.f, 1.f), + g(10.f, 2.f) + g(10.f, 3.f) + g(10.f, 0.f)}, + TensorShape({3, 1}))); } { auto g = [](float x, float y) { if (y == 0.f) { return 0.f; - } - else { + } else { return -x / (y * y); } }; diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 4716f4341e..49646f1f3a 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -392,8 +392,9 @@ Returns x * y element-wise. REGISTER_OP("Div").BINARY_MORE().SetShapeFn( shape_inference::BroadcastBinaryOpShapeFn); -REGISTER_OP("UnsafeDiv").BINARY_MORE().SetShapeFn( - shape_inference::BroadcastBinaryOpShapeFn); +REGISTER_OP("UnsafeDiv") + .BINARY_MORE() + .SetShapeFn(shape_inference::BroadcastBinaryOpShapeFn); REGISTER_OP("FloorDiv") .BINARY_MORE() -- cgit v1.2.3 From 7baf484688b950e74d7b75caed8f3b4cd06b4fcf Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Thu, 9 Aug 2018 10:51:37 -0700 Subject: Add test to reproduce the error. --- tensorflow/contrib/tensorrt/BUILD | 1 + .../tensorrt/test/no_device_assignment_test.py | 72 ++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 tensorflow/contrib/tensorrt/test/no_device_assignment_test.py diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 03404c1bf3..cb60dcbb0c 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -399,6 +399,7 @@ cuda_py_tests( # "test/vgg_block_nchw_test.py", # "test/vgg_block_test.py", "test/memory_alignment_test.py", + "test/no_device_assignment_test.py", ], additional_deps = [ ":tf_trt_integration_test_base", diff --git a/tensorflow/contrib/tensorrt/test/no_device_assignment_test.py b/tensorflow/contrib/tensorrt/test/no_device_assignment_test.py new file mode 100644 index 0000000000..a06a422860 --- /dev/null +++ b/tensorflow/contrib/tensorrt/test/no_device_assignment_test.py @@ -0,0 +1,72 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Basic tests for TF-TensorRT integration.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.tensorrt.python import trt_convert +# pylint: disable=unused-import +from tensorflow.contrib.tensorrt.python.ops import trt_engine_op +# pylint: enable=unused-import +from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import test +from tensorflow.python.platform import googletest + + +class NoDeviceAssignmentTest(googletest.TestCase): + + def testNoDeviceAssignment(self): + """Test that conversion should succeed when device is not specified.""" + sess = session.Session() # By default this will consume all the gpu memory. + used_bytes = 0 + for device in sess.list_devices(): + if 'GPU:0' in device.name: + used_bytes = device.memory_limit_bytes + self.assertGreater(used_bytes, 0) + + input_dims = [100, 24, 24, 2] + g = ops.Graph() + with g.as_default(): + inp = array_ops.placeholder( + dtype=dtypes.float32, shape=input_dims, name='input') + for i in range(2): + mul = inp * inp + inp = mul + inp + array_ops.squeeze(inp, name='output') + + trt_gdef = trt_convert.create_inference_graph( + input_graph_def=g.as_graph_def(), + outputs=['output'], + max_batch_size=input_dims[0], + # Use half of the allocated memory. It will fail if the converter + # fallback to use native cudaMalloc(), so here it tests that converter + # doesn't fallback. + max_workspace_size_bytes=used_bytes // 4, + minimum_segment_size=2, + is_dynamic_op=False) + self.assertEqual(1, + sum([node.op == 'TRTEngineOp' for node in trt_gdef.node])) + + +if __name__ == '__main__': + test.main() -- cgit v1.2.3 From 8945e0f1fb1cdc026ce7cf91b339b0b6a21f6dc6 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Thu, 9 Aug 2018 12:54:15 -0700 Subject: Fix rank_two_test. --- tensorflow/contrib/tensorrt/test/rank_two_test.py | 65 +++++++++++++---------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/tensorflow/contrib/tensorrt/test/rank_two_test.py b/tensorflow/contrib/tensorrt/test/rank_two_test.py index a0c18da265..fbed1ac4e8 100644 --- a/tensorflow/contrib/tensorrt/test/rank_two_test.py +++ b/tensorflow/contrib/tensorrt/test/rank_two_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -33,40 +34,46 @@ class RankTwoTest(trt_test.TfTrtIntegrationTestBase): def GetParams(self): """Test for rank 2 input in TF-TRT.""" - dtype = dtypes.float32 - input_name = "input" - input_dims = [12, 5] - input2_name = "input2" - input2_dims = [12, 5, 2, 2] + input_names = ["input", "input2"] + input_dims = [[12, 5], [12, 5, 2, 2]] g = ops.Graph() with g.as_default(): - # path 1 with rank 2 input - x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) - q = x + 1.0 - q = math_ops.abs(q) - q = q + 2.2 - q = math_ops.abs(q) - q = q + 3.0 - q = array_ops.expand_dims(q, -1) - q = array_ops.expand_dims(q, -1) - a = gen_math_ops.reciprocal(q) - # path 2 with rank 4 input - x = array_ops.placeholder(dtype=dtype, shape=input2_dims, name=input2_name) - q = x + 1.0 - q = math_ops.abs(q) - q = q + 2.2 - q = math_ops.abs(q) - q = q + 3.0 - b = gen_math_ops.reciprocal(q) - # combine path 1 & 2 - q = a + b + # Path 1 with rank 2 input + outputs = [] + for i in range(2): + x = array_ops.placeholder( + dtype=dtypes.float32, shape=input_dims[i], name=input_names[i]) + c = constant_op.constant(1.0, name="c%d_1" % i) + q = math_ops.add(x, c, name="add%d_1" % i) + q = math_ops.abs(q, name="abs%d_1" % i) + c = constant_op.constant(2.2, name="c%d_2" % i) + q = math_ops.add(q, c, name="add%d_2" % i) + q = math_ops.abs(q, name="abs%d_2" % i) + c = constant_op.constant(3.0, name="c%d_3" % i) + q = math_ops.add(q, c, name="add%d_3" % i) + if i == 0: + for j in range(2): + q = array_ops.expand_dims(q, -1, name="expand%d_%d" % (i, j)) + q = gen_math_ops.reciprocal(q, name="reciprocal%d" % i) + outputs.append(q) + # Combine path 1 & 2 + q = math_ops.add(outputs[0], outputs[1], name="add") array_ops.squeeze(q, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), - input_names=[input_name, input2_name], - input_dims=[input_dims, input2_dims], - num_expected_engines=2, - expected_output_dims=(12, 5, 2, 2), + input_names=input_names, + input_dims=input_dims, + expected_engines={ + "my_trt_op_0": [ + "add0_1", "add0_2", "add0_3", "c0_1", "c0_2", "c0_3", "abs0_1", + "abs0_2" + ], + "my_trt_op_1": [ + "add", "add1_1", "add1_2", "add1_3", "c1_1", "c1_2", "c1_3", + "abs1_1", "abs1_2", "reciprocal0", "reciprocal1" + ], + }, + expected_output_dims=tuple(input_dims[1]), allclose_atol=1.e-03, allclose_rtol=1.e-03) -- cgit v1.2.3 From 0483e03d5e0abf053cd8440752d96d486c9cd692 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Thu, 9 Aug 2018 14:14:00 -0700 Subject: Fix broken test --- tensorflow/contrib/tensorrt/test/no_device_assignment_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tensorrt/test/no_device_assignment_test.py b/tensorflow/contrib/tensorrt/test/no_device_assignment_test.py index a06a422860..1d54ff3a36 100644 --- a/tensorflow/contrib/tensorrt/test/no_device_assignment_test.py +++ b/tensorflow/contrib/tensorrt/test/no_device_assignment_test.py @@ -18,14 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - from tensorflow.contrib.tensorrt.python import trt_convert # pylint: disable=unused-import from tensorflow.contrib.tensorrt.python.ops import trt_engine_op # pylint: enable=unused-import from tensorflow.python.client import session -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -37,6 +34,8 @@ class NoDeviceAssignmentTest(googletest.TestCase): def testNoDeviceAssignment(self): """Test that conversion should succeed when device is not specified.""" + if not trt_convert.is_tensorrt_enabled(): + return sess = session.Session() # By default this will consume all the gpu memory. used_bytes = 0 for device in sess.list_devices(): -- cgit v1.2.3 From dbfa330c963f9e015cc66ad4aebdd7985651c024 Mon Sep 17 00:00:00 2001 From: drpngx Date: Fri, 10 Aug 2018 13:19:23 -0700 Subject: Address ebrevdo --- tensorflow/python/ops/nn_grad.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 025ce7ce88..59ba0091c8 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -987,8 +987,7 @@ def _TopKGrad(op, grad, _): # Compute linear indices (flattened to 1D). ind = array_ops.reshape(ind_2d + math_ops.cast(array_ops.expand_dims( math_ops.range(0, math_ops.cast(outerdim, dtypes.int64) - * in_lastdim, in_lastdim), -1 - ), dtypes.int32), [-1]) + * in_lastdim, in_lastdim), -1), dtypes.int32), [-1]) # Substitute grad to appropriate locations and fill the rest with zeros, # finally reshaping it to the original input shape. -- cgit v1.2.3 From 9a50467e57d81ce1e91089732eb482a9fe0ca68f Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 10 Aug 2018 15:48:01 +0000 Subject: Update Kafka to 0.11.5 This fix updates librdkafka to 0.11.5. Signed-off-by: Yong Tang --- tensorflow/workspace.bzl | 8 ++++---- third_party/kafka/BUILD | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index c21e5ebc9e..4c36eda209 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -576,11 +576,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "kafka", urls = [ - "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.4.tar.gz", - "https://github.com/edenhill/librdkafka/archive/v0.11.4.tar.gz", + "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.5.tar.gz", + "https://github.com/edenhill/librdkafka/archive/v0.11.5.tar.gz", ], - sha256 = "9d8f1eb7b0e29e9ab1168347c939cb7ae5dff00a39cef99e7ef033fd8f92737c", - strip_prefix = "librdkafka-0.11.4", + sha256 = "cc6ebbcd0a826eec1b8ce1f625ffe71b53ef3290f8192b6cae38412a958f4fd3", + strip_prefix = "librdkafka-0.11.5", build_file = clean_dep("//third_party:kafka/BUILD"), patch_file = clean_dep("//third_party/kafka:config.patch"), ) diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD index 3c50b8cf52..6c89a71837 100644 --- a/third_party/kafka/BUILD +++ b/third_party/kafka/BUILD @@ -50,6 +50,7 @@ cc_library( "src/rdkafka.h", "src/rdkafka_assignor.c", "src/rdkafka_assignor.h", + "src/rdkafka_background.c", "src/rdkafka_broker.c", "src/rdkafka_broker.h", "src/rdkafka_buf.c", -- cgit v1.2.3 From 211f164edc633087c0c000c08f6e92343bc525bd Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 10 Aug 2018 20:17:04 +0000 Subject: Update boringssl to 7f634429a04abc48e2eb041c81c5235816c96514 This fix udpates boringssl to 7f634429a04abc48e2eb041c81c5235816c96514 This update is necessary as it contains OpenSSL API SSL[_CTX]_set1_sigalgs[_list]: https://boringssl-review.googlesource.com/c/boringssl/+/30304 which is necessary to build librdkafka without additional patching: https://github.com/edenhill/librdkafka/issues/1896 (same issue with previous version of openssl and libressl) Signed-off-by: Yong Tang --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 4c36eda209..496a2488f8 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -521,11 +521,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "boringssl", urls = [ - "https://mirror.bazel.build/github.com/google/boringssl/archive/45c4a87ae97eb95a8fc2906c035d6a8d0e02e1b8.tar.gz", - "https://github.com/google/boringssl/archive/45c4a87ae97eb95a8fc2906c035d6a8d0e02e1b8.tar.gz", + "https://mirror.bazel.build/github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz", + "https://github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz", ], - sha256 = "972e8d8a9d1daf9892fff7155312b1af46b4754446575a7b285e62f917424c78", - strip_prefix = "boringssl-45c4a87ae97eb95a8fc2906c035d6a8d0e02e1b8", + sha256 = "1188e29000013ed6517168600fc35a010d58c5d321846d6a6dfee74e4c788b45", + strip_prefix = "boringssl-7f634429a04abc48e2eb041c81c5235816c96514", ) tf_http_archive( -- cgit v1.2.3 From abc066e230aaf037e82d97b6f993ea5ecb07c3e7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 10 Aug 2018 21:25:31 +0000 Subject: Fix missing inclue file in kafka/BUILD Signed-off-by: Yong Tang --- third_party/kafka/BUILD | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD index 6c89a71837..11ec50069a 100644 --- a/third_party/kafka/BUILD +++ b/third_party/kafka/BUILD @@ -48,8 +48,12 @@ cc_library( "src/rdinterval.h", "src/rdkafka.c", "src/rdkafka.h", + "src/rdkafka_admin.c", + "src/rdkafka_admin.h", "src/rdkafka_assignor.c", "src/rdkafka_assignor.h", + "src/rdkafka_aux.c", + "src/rdkafka_aux.h", "src/rdkafka_background.c", "src/rdkafka_broker.c", "src/rdkafka_broker.h", @@ -59,6 +63,7 @@ cc_library( "src/rdkafka_cgrp.h", "src/rdkafka_conf.c", "src/rdkafka_conf.h", + "src/rdkafka_confval.h", "src/rdkafka_event.h", "src/rdkafka_feature.c", "src/rdkafka_feature.h", -- cgit v1.2.3 From be645259c251e9b81e2d36efdd7b403bedaffe03 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Fri, 10 Aug 2018 16:01:08 -0700 Subject: Fix rank_two_test and reenable all other tests. --- tensorflow/contrib/tensorrt/BUILD | 17 +- tensorflow/contrib/tensorrt/test/base_test.py | 126 +++++------ .../contrib/tensorrt/test/batch_matmul_test.py | 38 +++- .../contrib/tensorrt/test/biasadd_matmul_test.py | 54 ++++- .../test/binary_tensor_weight_broadcast_test.py | 43 ++-- .../contrib/tensorrt/test/concatenation_test.py | 9 +- .../contrib/tensorrt/test/const_broadcast_test.py | 17 +- tensorflow/contrib/tensorrt/test/manual_test.py | 125 +++++++++++ .../contrib/tensorrt/test/memory_alignment_test.py | 17 +- .../test/multi_connection_neighbor_engine_test.py | 9 +- .../tensorrt/test/neighboring_engine_test.py | 15 +- tensorflow/contrib/tensorrt/test/rank_two_test.py | 37 ++-- .../tensorrt/test/tf_trt_integration_test_base.py | 238 ++++++++++++++------- tensorflow/contrib/tensorrt/test/unary_test.py | 15 +- .../contrib/tensorrt/test/vgg_block_nchw_test.py | 17 +- tensorflow/contrib/tensorrt/test/vgg_block_test.py | 17 +- 16 files changed, 548 insertions(+), 246 deletions(-) create mode 100644 tensorflow/contrib/tensorrt/test/manual_test.py diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 5b54cb76b4..26236a0435 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -387,18 +387,19 @@ cuda_py_tests( name = "tf_trt_integration_test", srcs = [ "test/base_test.py", - # "test/batch_matmul_test.py", - # "test/biasadd_matmul_test.py", - # "test/binary_tensor_weight_broadcast_test.py", # Blocked by trt4 installation - # "test/concatenation_test.py", # Blocked by trt4 installation + "test/batch_matmul_test.py", + "test/biasadd_matmul_test.py", + "test/binary_tensor_weight_broadcast_test.py", + "test/concatenation_test.py", "test/const_broadcast_test.py", + "test/manual_test.py", + "test/memory_alignment_test.py", "test/multi_connection_neighbor_engine_test.py", "test/neighboring_engine_test.py", "test/rank_two_test.py", - # "test/unary_test.py", # Blocked by trt4 installation - # "test/vgg_block_nchw_test.py", - # "test/vgg_block_test.py", - "test/memory_alignment_test.py", + "test/unary_test.py", + "test/vgg_block_nchw_test.py", + "test/vgg_block_test.py", ], additional_deps = [ ":tf_trt_integration_test_base", diff --git a/tensorflow/contrib/tensorrt/test/base_test.py b/tensorflow/contrib/tensorrt/test/base_test.py index 8ea5a63735..8453807a50 100644 --- a/tensorflow/contrib/tensorrt/test/base_test.py +++ b/tensorflow/contrib/tensorrt/test/base_test.py @@ -67,14 +67,15 @@ class SimpleSingleEngineTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which - # breaks the connection check, fix it. - # - my_trt_op_0 should have ["weights", "conv", "bias", "bias_add", - # "relu", "identity", "max_pool"] - expected_engines=["my_trt_op_0"], - expected_output_dims=(100, 6, 6, 6), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(100, 6, 6, 6)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which + # breaks the connection check, fix it. + # - my_trt_op_0 should have ["weights", "conv", "bias", "bias_add", + # "relu", "identity", "max_pool"] + return ["my_trt_op_0"] class SimpleMultiEnginesTest(trt_test.TfTrtIntegrationTestBase): @@ -120,15 +121,16 @@ class SimpleMultiEnginesTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which - # breaks the connection check, fix it. - # - my_trt_op_0 should have ["mul", "sub", "div1", "mul1", "add1", - # "add", "sub1"]; - # - my_trt_op_1 should have ["weights","conv", "div"] - expected_engines=["my_trt_op_0", "my_trt_op_1"], - expected_output_dims=(100, 12, 12, 6), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(100, 12, 12, 6)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + # TODO(aaroey): LayoutOptimizer adds additional nodes to the graph which + # breaks the connection check, fix it. + # - my_trt_op_0 should have ["mul", "sub", "div1", "mul1", "add1", + # "add", "sub1"]; + # - my_trt_op_1 should have ["weights","conv", "div"] + return ["my_trt_op_0", "my_trt_op_1"] class PartiallyConvertedTestA(trt_test.TfTrtIntegrationTestBase): @@ -166,13 +168,14 @@ class PartiallyConvertedTestA(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines={ - # Only the first engine is built. - "my_trt_op_0": ["c0", "c1", "add0", "add1", "mul0", "mul1"] - }, - expected_output_dims=tuple(input_dims), - allclose_atol=1.e-06, - allclose_rtol=1.e-06) + expected_output_dims=tuple(input_dims)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return { + # Only the first engine is built. + "my_trt_op_0": ["c0", "c1", "add0", "add1", "mul0", "mul1"] + } class PartiallyConvertedTestB(PartiallyConvertedTestA): @@ -184,13 +187,12 @@ class PartiallyConvertedTestB(PartiallyConvertedTestA): trt_convert.clear_test_values("") trt_convert.add_test_value("my_trt_op_0:CreateTRTNode", "fail") - def GetParams(self): - """Create a graph containing two segment.""" - return super(PartiallyConvertedTestB, self).GetParams()._replace( - expected_engines={ - # Only the second engine is built. - "my_trt_op_1": ["c2", "c3", "add2", "add3", "mul2", "mul3"] - }) + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return { + # Only the second engine is built. + "my_trt_op_1": ["c2", "c3", "add2", "add3", "mul2", "mul3"] + } class ConstInputTest(trt_test.TfTrtIntegrationTestBase): @@ -226,13 +228,14 @@ class ConstInputTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines={ - "my_trt_op_0": ["add", "add1", "mul"], - "my_trt_op_1": ["add2", "add3", "mul1"] - }, - expected_output_dims=tuple(input_dims), - allclose_atol=1.e-06, - allclose_rtol=1.e-06) + expected_output_dims=tuple(input_dims)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return { + "my_trt_op_0": ["add", "add1", "mul"], + "my_trt_op_1": ["add2", "add3", "mul1"] + } class ConstDataInputSingleEngineTest(trt_test.TfTrtIntegrationTestBase): @@ -256,10 +259,11 @@ class ConstDataInputSingleEngineTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines={"my_trt_op_0": ["c", "add", "add1", "mul"]}, - expected_output_dims=tuple(input_dims), - allclose_atol=1.e-06, - allclose_rtol=1.e-06) + expected_output_dims=tuple(input_dims)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return {"my_trt_op_0": ["c", "add", "add1", "mul"]} class ConstDataInputMultipleEnginesTest(trt_test.TfTrtIntegrationTestBase): @@ -287,17 +291,18 @@ class ConstDataInputMultipleEnginesTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines={ - "my_trt_op_0": ["add2", "add3", "mul1"], - # Why segment ["add", "add1", "mul"] was assigned segment id 1 - # instead of 0: the parent node of this segment is actually const - # node 'c', but it's removed later since it's const output of the - # segment which is not allowed. - "my_trt_op_1": ["add", "add1", "mul"] - }, - expected_output_dims=tuple(input_dims), - allclose_atol=1.e-06, - allclose_rtol=1.e-06) + expected_output_dims=tuple(input_dims)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return { + "my_trt_op_0": ["add2", "add3", "mul1"], + # Why segment ["add", "add1", "mul"] was assigned segment id 1 + # instead of 0: the parent node of this segment is actually const + # node 'c', but it's removed later since it's const output of the + # segment which is not allowed. + "my_trt_op_1": ["add", "add1", "mul"] + } class ControlDependencyTest(trt_test.TfTrtIntegrationTestBase): @@ -333,13 +338,14 @@ class ControlDependencyTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines={ - "my_trt_op_0": ["c1", "add", "add1", "mul"], - "my_trt_op_1": ["c2", "add2", "add3", "mul1"] - }, - expected_output_dims=tuple(input_dims), - allclose_atol=1.e-06, - allclose_rtol=1.e-06) + expected_output_dims=tuple(input_dims)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return { + "my_trt_op_0": ["c1", "add", "add1", "mul"], + "my_trt_op_1": ["c2", "add2", "add3", "mul1"] + } if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/batch_matmul_test.py b/tensorflow/contrib/tensorrt/test/batch_matmul_test.py index 2e1107e303..070a30557d 100644 --- a/tensorflow/contrib/tensorrt/test/batch_matmul_test.py +++ b/tensorflow/contrib/tensorrt/test/batch_matmul_test.py @@ -66,10 +66,40 @@ class BatchMatMulTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name, w1_name, w2_name], input_dims=[input_dims, w1_dims, w2_dims], - expected_engines=["my_trt_op_0"], - expected_output_dims=(12, 5, 8, 7), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(12, 5, 8, 7)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + if (run_params.dynamic_engine and + not trt_test.IsQuantizationMode(run_params.precision_mode)): + return ["my_trt_op_0", "my_trt_op_1"] + return ["my_trt_op_1"] + + def ExpectedEnginesToRun(self, run_params): + """Return the expected engines to run.""" + return ["my_trt_op_1"] + + def ShouldRunTest(self, run_params): + """Whether to run the test.""" + # TODO(aaroey): Trt library will fail like: + # + # ../builder/cudnnBuilder2.cpp:685: + # virtual std::vector> + # nvinfer1::builder::Node::getSupportedFormats( + # const nvinfer1::query::Ports&, + # const nvinfer1::cudnn::HardwareContext&, + # nvinfer1::builder::Format::Type, + # const nvinfer1::builder::FormatTypeHack&) const: + # Assertion `sf' failed. + # + # To reproduce, run: + # bazel test -c opt --copt=-mavx \ + # --test_arg=BatchMatMulTest.testTfTrt_ToolConversion_INT8_DynamicEngine \ + # tensorflow/contrib/tensorrt:batch_matmul_test + # + # Investigate and fix it. + return not trt_test.IsQuantizationMode(run_params.precision_mode) if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py index 8be32f59b4..3e30acc231 100644 --- a/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py +++ b/tensorflow/contrib/tensorrt/test/biasadd_matmul_test.py @@ -102,13 +102,53 @@ class BiasaddMatMulTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines=[ - "my_trt_op_0", "my_trt_op_1", "my_trt_op_2", "my_trt_op_3", - "my_trt_op_4", "my_trt_op_5", "my_trt_op_6" - ], - expected_output_dims=(48, 89), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(48, 89)) + + def GetConversionParams(self, run_params): + """Return a ConversionParams for test.""" + return super(BiasaddMatMulTest, + self).GetConversionParams(run_params)._replace( + max_batch_size=48, maximum_cached_engines=2) + + def _ValidEngines(self): + """Engines expected to build and run.""" + return [ + "my_trt_op_0", "my_trt_op_1", "my_trt_op_2", "my_trt_op_6", + "my_trt_op_7", "my_trt_op_8", "my_trt_op_9" + ] + + def _InvalidEngines(self): + """Engines that will cause conversion error at building time.""" + return ["my_trt_op_3", "my_trt_op_4", "my_trt_op_5"] + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + # In dynamic engine mode the engines are built in execution time, not in + # conversion time, so build errors occurs later. Here three of the engines + # will be failed to built but the corresponding engine op are still created. + # TODO(aaroey, jjsjann123): fix this. + if (run_params.dynamic_engine and + not trt_test.IsQuantizationMode(run_params.precision_mode)): + return self._ValidEngines() + self._InvalidEngines() + return self._ValidEngines() + + def ExpectedEnginesToRun(self, run_params): + """Return the expected engines to run.""" + return self._ValidEngines() + + def ShouldRunTest(self, run_params): + """Whether to run the test.""" + # TODO(aaroey): Trt 4.0 forbids conversion for tensors with rank <3 in int8 + # mode, which is a bug. Re-enable this when trt library is fixed. + return not trt_test.IsQuantizationMode(run_params.precision_mode) + + def ExpectedAbsoluteTolerance(self, run_params): + """The absolute tolerance to compare floating point results.""" + return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03 + + def ExpectedRelativeTolerance(self, run_params): + """The relative tolerance to compare floating point results.""" + return 1.e-05 if run_params.precision_mode == "FP32" else 1.e-03 if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/binary_tensor_weight_broadcast_test.py b/tensorflow/contrib/tensorrt/test/binary_tensor_weight_broadcast_test.py index 9316b14da0..89ef6a5baf 100644 --- a/tensorflow/contrib/tensorrt/test/binary_tensor_weight_broadcast_test.py +++ b/tensorflow/contrib/tensorrt/test/binary_tensor_weight_broadcast_test.py @@ -109,27 +109,28 @@ class BinaryTensorWeightBroadcastTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines=[ - "my_trt_op_0", - "my_trt_op_1", - "my_trt_op_2", - "my_trt_op_3", - "my_trt_op_4", - "my_trt_op_5", - "my_trt_op_6", - "my_trt_op_7", - "my_trt_op_8", - "my_trt_op_9", - "my_trt_op_10", - "my_trt_op_11", - "my_trt_op_12", - "my_trt_op_13", - "my_trt_op_14", - "my_trt_op_15", - ], - expected_output_dims=(5, 23040), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(5, 23040)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return [ + "my_trt_op_0", + "my_trt_op_1", + "my_trt_op_2", + "my_trt_op_3", + "my_trt_op_4", + "my_trt_op_5", + "my_trt_op_6", + "my_trt_op_7", + "my_trt_op_8", + "my_trt_op_9", + "my_trt_op_10", + "my_trt_op_11", + "my_trt_op_12", + "my_trt_op_13", + "my_trt_op_14", + "my_trt_op_15", + ] if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/concatenation_test.py b/tensorflow/contrib/tensorrt/test/concatenation_test.py index 1874b9dd45..c670b759dc 100644 --- a/tensorflow/contrib/tensorrt/test/concatenation_test.py +++ b/tensorflow/contrib/tensorrt/test/concatenation_test.py @@ -73,10 +73,11 @@ class ConcatenationTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines=["my_trt_op_0"], - expected_output_dims=(2, 126), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(2, 126)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return ["my_trt_op_0"] if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/const_broadcast_test.py b/tensorflow/contrib/tensorrt/test/const_broadcast_test.py index 8c59000b70..d2d1d0e6dd 100644 --- a/tensorflow/contrib/tensorrt/test/const_broadcast_test.py +++ b/tensorflow/contrib/tensorrt/test/const_broadcast_test.py @@ -58,10 +58,19 @@ class ConstBroadcastTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines=['my_trt_op_0'], - expected_output_dims=(5, 12, 12, 1), - allclose_atol=1.e-02, - allclose_rtol=1.e-02) + expected_output_dims=(5, 12, 12, 1)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return ['my_trt_op_0'] + + def ExpectedAbsoluteTolerance(self, run_params): + """The absolute tolerance to compare floating point results.""" + return 1.e-04 if run_params.precision_mode == 'FP32' else 1.e-02 + + def ExpectedRelativeTolerance(self, run_params): + """The relative tolerance to compare floating point results.""" + return 1.e-04 if run_params.precision_mode == 'FP32' else 1.e-02 if __name__ == '__main__': diff --git a/tensorflow/contrib/tensorrt/test/manual_test.py b/tensorflow/contrib/tensorrt/test/manual_test.py new file mode 100644 index 0000000000..60607681eb --- /dev/null +++ b/tensorflow/contrib/tensorrt/test/manual_test.py @@ -0,0 +1,125 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Basic tests for TF-TensorRT integration.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import ast +import numpy as np +import os + +from tensorflow.contrib.tensorrt.python import trt_convert +from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test +from tensorflow.core.framework import graph_pb2 +from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import importer +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.platform import test +from tensorflow.python.platform import gfile + + +class ManualTest(trt_test.TfTrtIntegrationTestBase): + + def __init__(self, methodName='runTest'): # pylint: disable=invalid-name + super(ManualTest, self).__init__(methodName) + self._params_map = None + + def _GetEnv(self): + """Get an environment variable specifying the manual test parameters. + + The value of the environment variable is the string representation of a dict + which should contain the following keys: + - 'graph_path': the file path to the serialized frozen graphdef + - 'input_names': TfTrtIntegrationTestParams.input_names + - 'input_dims': TfTrtIntegrationTestParams.input_dims + - 'expected_output_dims': TfTrtIntegrationTestParams.expected_output_dims + - 'output_name': the name of op to fetch + - 'expected_engines_to_run': ExpectedEnginesToRun() will return this + - 'expected_engines_to_build': ExpectedEnginesToBuild() will return this + - 'max_batch_size': ConversionParams.max_batch_size + """ + return os.getenv('TRT_MANUAL_TEST_PARAMS', '') + + def _GetParamsMap(self): + """Parse the environment variable as a dict and return it.""" + if self._params_map is None: + self._params_map = ast.literal_eval(self._GetEnv()) + return self._params_map + + @property + def output_name(self): + return self._GetParamsMap()['output_name'] + + def GetParams(self): + """Testing conversion of manually provided frozen graph.""" + params_map = self._GetParamsMap() + gdef = graph_pb2.GraphDef() + with gfile.Open(params_map['graph_path'], 'rb') as f: + gdef.ParseFromString(f.read()) + return trt_test.TfTrtIntegrationTestParams( + gdef=gdef, + input_names=params_map['input_names'], + input_dims=params_map['input_dims'], + expected_output_dims=params_map['expected_output_dims']) + + def GetConversionParams(self, run_params): + """Return a ConversionParams for test.""" + conversion_params = super(ManualTest, self).GetConversionParams(run_params) + params_map = self._GetParamsMap() + if 'max_batch_size' in params_map: + conversion_params = conversion_params._replace( + max_batch_size=params_map['max_batch_size']) + return conversion_params + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return self._GetParamsMap()['expected_engines_to_build'] + + def ExpectedEnginesToRun(self, run_params): + """Return the expected engines to run.""" + params_map = self._GetParamsMap() + if 'expected_engines_to_run' in params_map: + return params_map['expected_engines_to_run'] + return self.ExpectedEnginesToBuild(run_params) + + def ExpectedAbsoluteTolerance(self, run_params): + """The absolute tolerance to compare floating point results.""" + params_map = self._GetParamsMap() + if 'atol' in params_map: + return params_map['atol'] + return 1.e-3 + + def ExpectedRelativeTolerance(self, run_params): + """The relative tolerance to compare floating point results.""" + params_map = self._GetParamsMap() + if 'rtol' in params_map: + return params_map['rtol'] + return 1.e-3 + + def ShouldRunTest(self, run_params): + """Whether to run the test.""" + return len(self._GetEnv()) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/tensorrt/test/memory_alignment_test.py b/tensorflow/contrib/tensorrt/test/memory_alignment_test.py index 66eb6be757..fd2c165f35 100644 --- a/tensorflow/contrib/tensorrt/test/memory_alignment_test.py +++ b/tensorflow/contrib/tensorrt/test/memory_alignment_test.py @@ -62,10 +62,19 @@ class MemoryAlignmentTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines=["my_trt_op_0"], - expected_output_dims=(2, 15, 15, 10), - allclose_atol=1.e-02, - allclose_rtol=1.e-02) + expected_output_dims=(2, 15, 15, 10)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return ["my_trt_op_0"] + + def ExpectedAbsoluteTolerance(self, run_params): + """The absolute tolerance to compare floating point results.""" + return 1.e-06 if run_params.precision_mode == "FP32" else 1.e-02 + + def ExpectedRelativeTolerance(self, run_params): + """The relative tolerance to compare floating point results.""" + return 1.e-06 if run_params.precision_mode == "FP32" else 1.e-02 if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/multi_connection_neighbor_engine_test.py b/tensorflow/contrib/tensorrt/test/multi_connection_neighbor_engine_test.py index fd55b8cd99..13fdbcc5ad 100644 --- a/tensorflow/contrib/tensorrt/test/multi_connection_neighbor_engine_test.py +++ b/tensorflow/contrib/tensorrt/test/multi_connection_neighbor_engine_test.py @@ -77,10 +77,11 @@ class MultiConnectionNeighborEngineTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines=["my_trt_op_0", "my_trt_op_1"], - expected_output_dims=(2, 4, 5, 4), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(2, 4, 5, 4)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return ["my_trt_op_0", "my_trt_op_1"] if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/neighboring_engine_test.py b/tensorflow/contrib/tensorrt/test/neighboring_engine_test.py index 51c905a50b..d83f7278fc 100644 --- a/tensorflow/contrib/tensorrt/test/neighboring_engine_test.py +++ b/tensorflow/contrib/tensorrt/test/neighboring_engine_test.py @@ -59,13 +59,14 @@ class NeighboringEngineTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines={ - "my_trt_op_0": ["bias", "mul", "sub"], - "my_trt_op_1": ["weights", "conv"] - }, - expected_output_dims=(2, 4, 5, 4), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(2, 4, 5, 4)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return { + "my_trt_op_0": ["bias", "mul", "sub"], + "my_trt_op_1": ["weights", "conv"] + } if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/rank_two_test.py b/tensorflow/contrib/tensorrt/test/rank_two_test.py index fbed1ac4e8..9a9c919fca 100644 --- a/tensorflow/contrib/tensorrt/test/rank_two_test.py +++ b/tensorflow/contrib/tensorrt/test/rank_two_test.py @@ -35,10 +35,10 @@ class RankTwoTest(trt_test.TfTrtIntegrationTestBase): def GetParams(self): """Test for rank 2 input in TF-TRT.""" input_names = ["input", "input2"] + # Two paths: first with rank 2 input, second with rank 4 input. input_dims = [[12, 5], [12, 5, 2, 2]] g = ops.Graph() with g.as_default(): - # Path 1 with rank 2 input outputs = [] for i in range(2): x = array_ops.placeholder( @@ -56,26 +56,33 @@ class RankTwoTest(trt_test.TfTrtIntegrationTestBase): q = array_ops.expand_dims(q, -1, name="expand%d_%d" % (i, j)) q = gen_math_ops.reciprocal(q, name="reciprocal%d" % i) outputs.append(q) - # Combine path 1 & 2 + # Combine both paths q = math_ops.add(outputs[0], outputs[1], name="add") array_ops.squeeze(q, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=input_names, input_dims=input_dims, - expected_engines={ - "my_trt_op_0": [ - "add0_1", "add0_2", "add0_3", "c0_1", "c0_2", "c0_3", "abs0_1", - "abs0_2" - ], - "my_trt_op_1": [ - "add", "add1_1", "add1_2", "add1_3", "c1_1", "c1_2", "c1_3", - "abs1_1", "abs1_2", "reciprocal0", "reciprocal1" - ], - }, - expected_output_dims=tuple(input_dims[1]), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=tuple(input_dims[1])) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return { + "my_trt_op_0": [ + "add0_1", "add0_2", "add0_3", "c0_1", "c0_2", "c0_3", "abs0_1", + "abs0_2" + ], + "my_trt_op_1": [ + "add", "add1_1", "add1_2", "add1_3", "c1_1", "c1_2", "c1_3", + "abs1_1", "abs1_2", "reciprocal0", "reciprocal1" + ], + } + + def ShouldRunTest(self, run_params): + """Whether to run the test.""" + # TODO(aaroey): Trt 4.0 forbids conversion for tensors with rank <3 in int8 + # mode, which is a bug. Re-enable this when trt library is fixed. + return not trt_test.IsQuantizationMode(run_params.precision_mode) if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py index 6f85ada464..fc20950e45 100644 --- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test_base.py @@ -38,19 +38,24 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import math_ops from tensorflow.python.platform import tf_logging as logging -TfTrtIntegrationTestParams = namedtuple("TfTrtIntegrationTestParams", [ - "gdef", "input_names", "input_dims", "expected_engines", - "expected_output_dims", "allclose_atol", "allclose_rtol" -]) +TfTrtIntegrationTestParams = namedtuple( + "TfTrtIntegrationTestParams", + ["gdef", "input_names", "input_dims", "expected_output_dims"]) RunParams = namedtuple( "RunParams", ["use_optimizer", "precision_mode", "dynamic_engine", "test_name"]) +ConversionParams = namedtuple("ConversionParams", [ + "max_batch_size", "max_workspace_size_bytes", "precision_mode", + "minimum_segment_size", "is_dynamic_op", "maximum_cached_engines", + "cached_engine_batches" +]) + PRECISION_MODES = ["FP32", "FP16", "INT8"] -def _IsQuantizationMode(mode): +def IsQuantizationMode(mode): return mode == "INT8" @@ -112,6 +117,10 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): super(TfTrtIntegrationTestBase, cls).setUpClass() trt_convert.enable_test_value() + def __init__(self, methodName="runTest"): # pylint: disable=invalid-name + super(TfTrtIntegrationTestBase, self).__init__(methodName) + self._trt_test_params = None + def setUp(self): """Setup method.""" super(TfTrtIntegrationTestBase, self).setUp() @@ -122,43 +131,96 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): """Return a TfTrtIntegrationTestParams for test, implemented by subclass.""" raise NotImplementedError() - def _PrepareRun(self, params, graph_state): + def GetConversionParams(self, run_params): + """Return a ConversionParams for test.""" + return ConversionParams( + max_batch_size=max( + [dims[0] for dims in self._GetParamsCached().input_dims]), + max_workspace_size_bytes=1 << 25, + precision_mode=self._ToBytes(run_params.precision_mode), + minimum_segment_size=2, + is_dynamic_op=run_params.dynamic_engine, + maximum_cached_engines=1, + cached_engine_batches=None) + + def ShouldRunTest(self, run_params): + """Whether to run the test.""" + return True + + def VerifyRunForEngine(self, engine_name, graph_state, expect_run=True): + """Verify the state of a particular engine after sess.run().""" + if graph_state == GraphState.ORIGINAL: + self._ExpectCalibration(engine_name, "") + self._ExpectNativeSegment(engine_name, "") + self._ExpectTrtEngine(engine_name, "") + elif graph_state == GraphState.CALIBRATE: + self._ExpectCalibration(engine_name, "done") + self._ExpectNativeSegment(engine_name, "done") + self._ExpectTrtEngine(engine_name, "") + elif graph_state == GraphState.INFERENCE: + self._ExpectCalibration(engine_name, "") + if expect_run: + self._ExpectNativeSegment(engine_name, "") + self._ExpectTrtEngine(engine_name, "done") + else: + self._ExpectNativeSegment(engine_name, "done") + self._ExpectTrtEngine(engine_name, "") + + def VerifyRun(self, run_params, graph_state): + """Verify the state of all engines after sess.run().""" + for engine_name in self.ExpectedEnginesToBuild(run_params): + expect_run = (engine_name in self.ExpectedEnginesToRun(run_params)) + self.VerifyRunForEngine(engine_name, graph_state, expect_run) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build, implemented by subclass.""" + raise NotImplementedError() + + def ExpectedEnginesToRun(self, run_params): + """Return the expected engines to run.""" + return self.ExpectedEnginesToBuild(run_params) + + def ExpectedAbsoluteTolerance(self, run_params): + """The absolute tolerance to compare floating point results.""" + return 1.e-06 if run_params.precision_mode == "FP32" else 1.e-03 + + def ExpectedRelativeTolerance(self, run_params): + """The relative tolerance to compare floating point results.""" + return 1.e-06 if run_params.precision_mode == "FP32" else 1.e-03 + + def _GetParamsCached(self): + if self._trt_test_params is None: + self._trt_test_params = self.GetParams() + return self._trt_test_params + + def _PrepareRun(self, graph_state): """Set up necessary testing environment before calling sess.run().""" # Clear test values added by TRTEngineOp. trt_convert.clear_test_values("my_trt_op_.*:ExecuteTrtEngine") trt_convert.clear_test_values("my_trt_op_.*:ExecuteCalibration") trt_convert.clear_test_values("my_trt_op_.*:ExecuteNativeSegment") - def _VerifyRun(self, params, graph_state): - """Verify the state after sess.run().""" - for engine_name in params.expected_engines: - if graph_state == GraphState.ORIGINAL: - self._ExpectCalibration(engine_name, "") - self._ExpectNativeSegment(engine_name, "") - self._ExpectTrtEngine(engine_name, "") - elif graph_state == GraphState.CALIBRATE: - self._ExpectCalibration(engine_name, "done") - self._ExpectNativeSegment(engine_name, "done") - self._ExpectTrtEngine(engine_name, "") - elif graph_state == GraphState.INFERENCE: - self._ExpectCalibration(engine_name, "") - self._ExpectNativeSegment(engine_name, "") - self._ExpectTrtEngine(engine_name, "done") - - def _GetConfigProto(self, params, run_params, graph_state): + def _GetConfigProto(self, run_params, graph_state): """Get config proto based on specific settings.""" if graph_state != GraphState.ORIGINAL and run_params.use_optimizer: rewriter_cfg = rewriter_config_pb2.RewriterConfig() rewriter_cfg.optimizers.extend(["constfold", "layout"]) custom_op = rewriter_cfg.custom_optimizers.add() custom_op.name = "TensorRTOptimizer" - custom_op.parameter_map["minimum_segment_size"].i = 2 - custom_op.parameter_map["max_batch_size"].i = max( - [dims[0] for dims in params.input_dims]) - custom_op.parameter_map["is_dynamic_op"].b = run_params.dynamic_engine - custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25 - custom_op.parameter_map["precision_mode"].s = self._ToBytes( - run_params.precision_mode) + trt_params = self.GetConversionParams(run_params) + custom_op.parameter_map["max_batch_size"].i = trt_params.max_batch_size + custom_op.parameter_map["max_workspace_size_bytes"].i = ( + trt_params.max_workspace_size_bytes) + custom_op.parameter_map["precision_mode"].s = trt_params.precision_mode + custom_op.parameter_map["minimum_segment_size"].i = ( + trt_params.minimum_segment_size) + custom_op.parameter_map["is_dynamic_op"].b = trt_params.is_dynamic_op + custom_op.parameter_map["maximum_cached_engines"].i = ( + trt_params.maximum_cached_engines) + if trt_params.cached_engine_batches: + optimizer.parameter_map["cached_engine_batches"].list.i.extend( + trt_params.cached_engine_batches) + graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_cfg) else: graph_options = config_pb2.GraphOptions() @@ -190,9 +252,15 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): def _ExpectNativeSegment(self, engine_name, value): self._ExpectTestValue(engine_name, "ExecuteNativeSegment", value) - def _RunGraph(self, params, gdef, input_data, config, graph_state, + def _RunGraph(self, + run_params, + gdef, + input_data, + config, + graph_state, num_runs=2): """Run given graphdef multiple times.""" + params = self._GetParamsCached() assert len(params.input_names) == len(input_data) g = ops.Graph() with g.as_default(): @@ -208,35 +276,38 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): val = None # Defaults to 2 runs to verify result across multiple runs is same. for _ in range(num_runs): - self._PrepareRun(params, graph_state) + self._PrepareRun(graph_state) new_val = sess.run(out, {inp[i]: input_data[i] for i in range(len(inp))}) self.assertEqual(params.expected_output_dims, new_val.shape) if val is not None: self.assertAllEqual(val, new_val) val = new_val - self._VerifyRun(params, graph_state) + self.VerifyRun(run_params, graph_state) return val # Use real data that is representative of the inference dataset # for calibration. For this test script it is random data. - def _RunCalibration(self, params, gdef, input_data, config): + def _RunCalibration(self, run_params, gdef, input_data, config): """Run calibration on given graph.""" return self._RunGraph( - params, gdef, input_data, config, GraphState.CALIBRATE, num_runs=5) + run_params, gdef, input_data, config, GraphState.CALIBRATE, num_runs=5) - def _GetTrtGraphDef(self, params, run_params, gdef): + def _GetTrtGraphDef(self, run_params, gdef): """Return trt converted graphdef.""" + trt_params = self.GetConversionParams(run_params) return trt_convert.create_inference_graph( input_graph_def=gdef, outputs=[self.output_name], - max_batch_size=max([dims[0] for dims in params.input_dims]), - max_workspace_size_bytes=1 << 25, - precision_mode=run_params.precision_mode, - minimum_segment_size=2, - is_dynamic_op=run_params.dynamic_engine) - - def _WriteGraph(self, params, run_params, gdef, graph_state): + max_batch_size=trt_params.max_batch_size, + max_workspace_size_bytes=trt_params.max_workspace_size_bytes, + precision_mode=trt_params.precision_mode, + minimum_segment_size=trt_params.minimum_segment_size, + is_dynamic_op=trt_params.is_dynamic_op, + maximum_cached_engines=trt_params.maximum_cached_engines, + cached_engine_batches=trt_params.cached_engine_batches) + + def _WriteGraph(self, run_params, gdef, graph_state): if graph_state == GraphState.ORIGINAL: label = "Original" elif graph_state == GraphState.CALIBRATE: @@ -250,12 +321,13 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): logging.info("Writing graph to %s/%s", temp_dir, graph_name) graph_io.write_graph(gdef, temp_dir, graph_name) - def _VerifyConnections(self, params, converted_gdef): + def _VerifyConnections(self, expected_engines, converted_gdef): + params = self._GetParamsCached() old_to_new_node_map = { self._ToString(node.name): self._ToString(node.name) for node in params.gdef.node } - for engine_name, node_names in params.expected_engines.items(): + for engine_name, node_names in expected_engines.items(): for node_name in node_names: old_to_new_node_map[node_name] = engine_name name_to_node_map = { @@ -310,14 +382,16 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): msg="expected:\n%s\nvs actual:\n%s" % (sorted( expected_input_map.items()), sorted(actual_input_map.items()))) - def _VerifyGraphDef(self, params, run_params, gdef, graph_state): - self._WriteGraph(params, run_params, gdef, graph_state) + def _VerifyGraphDef(self, run_params, gdef, graph_state): + self._WriteGraph(run_params, gdef, graph_state) + params = self._GetParamsCached() + expected_engines = self.ExpectedEnginesToBuild(run_params) num_engines = 0 for node in gdef.node: if node.op == "TRTEngineOp": num_engines += 1 - self.assertTrue(node.name in params.expected_engines) + self.assertTrue(node.name in expected_engines) self.assertTrue(len(node.attr["serialized_segment"].s)) self.assertTrue(len(node.attr["segment_funcdef_name"].s)) self.assertEqual( @@ -328,7 +402,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): self.assertEqual(run_params.dynamic_engine, is_dynamic_engine) has_calibration_data = len(node.attr["calibration_data"].s) - if (_IsQuantizationMode(run_params.precision_mode) and + if (IsQuantizationMode(run_params.precision_mode) and graph_state == GraphState.INFERENCE): self.assertTrue(has_calibration_data) else: @@ -336,71 +410,70 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase): if graph_state == GraphState.ORIGINAL: self.assertEqual(0, num_engines) else: - self.assertEqual(num_engines, len(params.expected_engines)) - if isinstance(params.expected_engines, dict): - self._VerifyConnections(params, gdef) + self.assertEqual(num_engines, len(expected_engines)) + if isinstance(expected_engines, dict): + self._VerifyConnections(expected_engines, gdef) # TODO(aaroey): consider verifying the corresponding TF function. - def RunTest(self, params, run_params): + def RunTest(self, run_params): + if not self.ShouldRunTest(run_params): + return assert run_params.precision_mode in PRECISION_MODES + params = self._GetParamsCached() input_data = [np.random.random_sample(dims) for dims in params.input_dims] input_gdef = params.gdef - self._VerifyGraphDef(params, run_params, input_gdef, GraphState.ORIGINAL) + self._VerifyGraphDef(run_params, input_gdef, GraphState.ORIGINAL) # Get reference result without running trt. - config_no_trt = self._GetConfigProto(params, run_params, - GraphState.ORIGINAL) + config_no_trt = self._GetConfigProto(run_params, GraphState.ORIGINAL) logging.info("Running original graph w/o trt, config:\n%s", str(config_no_trt)) - ref_result = self._RunGraph(params, input_gdef, input_data, config_no_trt, - GraphState.ORIGINAL) + ref_result = self._RunGraph(run_params, input_gdef, input_data, + config_no_trt, GraphState.ORIGINAL) # Run calibration if necessary. - if _IsQuantizationMode(run_params.precision_mode): + if IsQuantizationMode(run_params.precision_mode): - calib_config = self._GetConfigProto(params, run_params, - GraphState.CALIBRATE) + calib_config = self._GetConfigProto(run_params, GraphState.CALIBRATE) logging.info("Running calibration graph, config:\n%s", str(calib_config)) if run_params.use_optimizer: - result = self._RunCalibration(params, input_gdef, input_data, + result = self._RunCalibration(run_params, input_gdef, input_data, calib_config) else: - calib_gdef = self._GetTrtGraphDef(params, run_params, input_gdef) - self._VerifyGraphDef(params, run_params, calib_gdef, - GraphState.CALIBRATE) - result = self._RunCalibration(params, calib_gdef, input_data, + calib_gdef = self._GetTrtGraphDef(run_params, input_gdef) + self._VerifyGraphDef(run_params, calib_gdef, GraphState.CALIBRATE) + result = self._RunCalibration(run_params, calib_gdef, input_data, calib_config) - infer_gdef = trt_convert.calib_graph_to_infer_graph(calib_gdef) - self._VerifyGraphDef(params, run_params, infer_gdef, GraphState.INFERENCE) + infer_gdef = trt_convert.calib_graph_to_infer_graph( + calib_gdef, run_params.dynamic_engine) + self._VerifyGraphDef(run_params, infer_gdef, GraphState.INFERENCE) self.assertAllClose( ref_result, result, - atol=params.allclose_atol, - rtol=params.allclose_rtol) + atol=self.ExpectedAbsoluteTolerance(run_params), + rtol=self.ExpectedRelativeTolerance(run_params)) else: infer_gdef = input_gdef # Run inference. - infer_config = self._GetConfigProto(params, run_params, - GraphState.INFERENCE) + infer_config = self._GetConfigProto(run_params, GraphState.INFERENCE) logging.info("Running final inference graph, config:\n%s", str(infer_config)) if run_params.use_optimizer: - result = self._RunGraph(params, infer_gdef, input_data, infer_config, + result = self._RunGraph(run_params, infer_gdef, input_data, infer_config, GraphState.INFERENCE) else: - trt_infer_gdef = self._GetTrtGraphDef(params, run_params, infer_gdef) - self._VerifyGraphDef(params, run_params, trt_infer_gdef, - GraphState.INFERENCE) - result = self._RunGraph(params, trt_infer_gdef, input_data, infer_config, - GraphState.INFERENCE) + trt_infer_gdef = self._GetTrtGraphDef(run_params, infer_gdef) + self._VerifyGraphDef(run_params, trt_infer_gdef, GraphState.INFERENCE) + result = self._RunGraph(run_params, trt_infer_gdef, input_data, + infer_config, GraphState.INFERENCE) self.assertAllClose( ref_result, result, - atol=params.allclose_atol, - rtol=params.allclose_rtol) + atol=self.ExpectedAbsoluteTolerance(run_params), + rtol=self.ExpectedRelativeTolerance(run_params)) def testIdempotence(self): # Test that applying tensorrt optimizer or offline conversion tools multiple @@ -421,13 +494,12 @@ def _AddTests(test_class): """Gets a single test method based on the parameters.""" def _Test(self): - params = self.GetParams() logging.info( "Running test %s with parameters: use_optimizer=%s, " "precision_mode=%s, dynamic_engine=%s", "testTfTrt_" + run_params.test_name, run_params.use_optimizer, run_params.precision_mode, run_params.dynamic_engine) - self.RunTest(params, run_params) + self.RunTest(run_params) return _Test @@ -435,7 +507,7 @@ def _AddTests(test_class): dynamic_engine_options = [False, True] for (use_optimizer, precision_mode, dynamic_engine) in itertools.product( use_optimizer_options, PRECISION_MODES, dynamic_engine_options): - if _IsQuantizationMode(precision_mode): + if IsQuantizationMode(precision_mode): if use_optimizer: # TODO(aaroey): if use_optimizer is True we need to get the inference # graphdef using custom python wrapper class, which is not currently diff --git a/tensorflow/contrib/tensorrt/test/unary_test.py b/tensorflow/contrib/tensorrt/test/unary_test.py index 500057a36d..5036bd7aaa 100644 --- a/tensorflow/contrib/tensorrt/test/unary_test.py +++ b/tensorflow/contrib/tensorrt/test/unary_test.py @@ -100,13 +100,14 @@ class UnaryTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name, input2_name], input_dims=[input_dims, input2_dims], - expected_engines=[ - "my_trt_op_0", "my_trt_op_1", "my_trt_op_2", "my_trt_op_3", - "my_trt_op_4" - ], - expected_output_dims=(12, 5, 8, 12), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(12, 5, 8, 12)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return [ + "my_trt_op_0", "my_trt_op_1", "my_trt_op_2", "my_trt_op_3", + "my_trt_op_4" + ] if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/vgg_block_nchw_test.py b/tensorflow/contrib/tensorrt/test/vgg_block_nchw_test.py index ab4d224db4..12f29ceebf 100644 --- a/tensorflow/contrib/tensorrt/test/vgg_block_nchw_test.py +++ b/tensorflow/contrib/tensorrt/test/vgg_block_nchw_test.py @@ -42,11 +42,9 @@ class VGGBlockNCHWTest(trt_test.TfTrtIntegrationTestBase): with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) x, _, _ = nn_impl.fused_batch_norm( - x, - np.random.randn(2).astype(np.float32), - np.random.randn(2).astype(np.float32), - mean=np.random.randn(2).astype(np.float32), - variance=np.random.randn(2).astype(np.float32), + x, [1.0, 1.0], [0.0, 0.0], + mean=[0.5, 0.5], + variance=[1.0, 1.0], data_format="NCHW", is_training=False) e = constant_op.constant( @@ -72,10 +70,11 @@ class VGGBlockNCHWTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines=["my_trt_op_0"], - expected_output_dims=(5, 6, 2, 2), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(5, 6, 2, 2)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return ["my_trt_op_0"] if __name__ == "__main__": diff --git a/tensorflow/contrib/tensorrt/test/vgg_block_test.py b/tensorflow/contrib/tensorrt/test/vgg_block_test.py index 56bdf848ea..129795bf98 100644 --- a/tensorflow/contrib/tensorrt/test/vgg_block_test.py +++ b/tensorflow/contrib/tensorrt/test/vgg_block_test.py @@ -42,11 +42,9 @@ class VGGBlockTest(trt_test.TfTrtIntegrationTestBase): with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) x, _, _ = nn_impl.fused_batch_norm( - x, - np.random.randn(2).astype(np.float32), - np.random.randn(2).astype(np.float32), - mean=np.random.randn(2).astype(np.float32), - variance=np.random.randn(2).astype(np.float32), + x, [1.0, 1.0], [0.0, 0.0], + mean=[0.5, 0.5], + variance=[1.0, 1.0], is_training=False) e = constant_op.constant( np.random.randn(1, 1, 2, 6), name="weights", dtype=dtype) @@ -63,10 +61,11 @@ class VGGBlockTest(trt_test.TfTrtIntegrationTestBase): gdef=g.as_graph_def(), input_names=[input_name], input_dims=[input_dims], - expected_engines=["my_trt_op_0"], - expected_output_dims=(5, 2, 2, 6), - allclose_atol=1.e-03, - allclose_rtol=1.e-03) + expected_output_dims=(5, 2, 2, 6)) + + def ExpectedEnginesToBuild(self, run_params): + """Return the expected engines to build.""" + return ["my_trt_op_0"] if __name__ == "__main__": -- cgit v1.2.3 From 6b5be9a7f33462bd20bf14b0df9ca1fcb2da6bb3 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Fri, 10 Aug 2018 22:59:18 -0700 Subject: Revert grappler changes, the fix in convert_graph.cc is sufficient. --- tensorflow/core/grappler/clusters/single_machine.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index 83fde4fe37..b97603c890 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -48,7 +48,6 @@ SingleMachine::SingleMachine(int timeout_s, int num_cpu_cores, int num_gpus) (*options_.config.mutable_device_count())["CPU"] = 1; if (num_gpus > 0) { (*options_.config.mutable_device_count())["GPU"] = num_gpus; - options_.config.mutable_gpu_options()->set_allow_growth(true); } CHECK_GE(num_cpu_cores, 1); options_.config.set_intra_op_parallelism_threads(num_cpu_cores); -- cgit v1.2.3 From 026004e05dc172d1639840055462013f95e56bbe Mon Sep 17 00:00:00 2001 From: Seb Bro Date: Sat, 11 Aug 2018 22:04:08 +0200 Subject: Fix latex text (docs). Escape special character and add text section for comments. --- tensorflow/python/training/adam.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py index bcbe5907d6..86b854c0de 100644 --- a/tensorflow/python/training/adam.py +++ b/tensorflow/python/training/adam.py @@ -43,15 +43,15 @@ class AdamOptimizer(optimizer.Optimizer): Initialization: - $$m_0 := 0 (Initialize initial 1st moment vector)$$ - $$v_0 := 0 (Initialize initial 2nd moment vector)$$ - $$t := 0 (Initialize timestep)$$ + $$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$ + $$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$ + $$t := 0 \text{(Initialize timestep)}$$ The update rule for `variable` with gradient `g` uses an optimization described at the end of section2 of the paper: $$t := t + 1$$ - $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ + $$lr_t := \text{learning\_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ -- cgit v1.2.3 From 62049835ce3064c191a8054ec1056b4701afb744 Mon Sep 17 00:00:00 2001 From: Seb Bro Date: Sat, 11 Aug 2018 22:19:44 +0200 Subject: Fix sqrt in lr formula. --- tensorflow/python/training/adam.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py index 86b854c0de..704ad6d3fe 100644 --- a/tensorflow/python/training/adam.py +++ b/tensorflow/python/training/adam.py @@ -51,7 +51,7 @@ class AdamOptimizer(optimizer.Optimizer): described at the end of section2 of the paper: $$t := t + 1$$ - $$lr_t := \text{learning\_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ + $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ -- cgit v1.2.3 From a4667873d90cf2c8530e8a8058e7d1c065639ce8 Mon Sep 17 00:00:00 2001 From: Seb Bro Date: Sat, 11 Aug 2018 22:23:21 +0200 Subject: Fix formula. --- tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt b/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt index b90f5473c8..6341eeda32 100644 --- a/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ApplyAdam.pbtxt @@ -82,7 +82,7 @@ END } summary: "Update \'*var\' according to the Adam algorithm." description: < Date: Sat, 11 Aug 2018 22:23:52 +0200 Subject: Fix formula. --- tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt index ad0aeac004..2dcd136ae3 100644 --- a/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ResourceApplyAdam.pbtxt @@ -76,7 +76,7 @@ END } summary: "Update \'*var\' according to the Adam algorithm." description: < Date: Sat, 11 Aug 2018 22:26:40 +0200 Subject: Fix formula and text rendering. --- tensorflow/contrib/optimizer_v2/adam.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/optimizer_v2/adam.py b/tensorflow/contrib/optimizer_v2/adam.py index 631d4f44df..04b1552b61 100644 --- a/tensorflow/contrib/optimizer_v2/adam.py +++ b/tensorflow/contrib/optimizer_v2/adam.py @@ -40,15 +40,14 @@ class AdamOptimizer(optimizer_v2.OptimizerV2): Initialization: - $$m_0 := 0 (Initialize initial 1st moment vector)$$ - $$v_0 := 0 (Initialize initial 2nd moment vector)$$ - $$t := 0 (Initialize timestep)$$ - + $$m_0 := 0 \text{(Initialize initial 1st moment vector)}$$ + $$v_0 := 0 \text{(Initialize initial 2nd moment vector)}$$ + $$t := 0 \text{(Initialize timestep)}$$ The update rule for `variable` with gradient `g` uses an optimization described at the end of section2 of the paper: $$t := t + 1$$ - $$lr_t := \text{learning_rate} * \sqrt{(1 - beta_2^t) / (1 - beta_1^t)}$$ + $$lr_t := \text{learning\_rate} * \sqrt{1 - beta_2^t} / (1 - beta_1^t)$$ $$m_t := beta_1 * m_{t-1} + (1 - beta_1) * g$$ $$v_t := beta_2 * v_{t-1} + (1 - beta_2) * g * g$$ -- cgit v1.2.3 From 93e950c308071071f35d6dcb35b9f91b8a34876c Mon Sep 17 00:00:00 2001 From: Avijit <30507445+avijit-nervana@users.noreply.github.com> Date: Sun, 12 Aug 2018 14:35:19 -0700 Subject: Updated based on PR feedback. --- WORKSPACE | 2 +- tensorflow/BUILD | 3 +-- tensorflow/core/BUILD | 2 +- tensorflow/core/platform/default/build_config.bzl | 1 - tensorflow/python/BUILD | 2 +- tensorflow/tensorflow.bzl | 1 + tensorflow/workspace.bzl | 1 + third_party/ngraph/ngraph_tf.BUILD | 1 + 8 files changed, 7 insertions(+), 6 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 02a1dc1626..17961829a6 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -78,4 +78,4 @@ new_http_archive( "http://storage.googleapis.com/download.tensorflow.org/models/speech_commands_v0.01.zip", "http://download.tensorflow.org/models/speech_commands_v0.01.zip", ], -) \ No newline at end of file +) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 161e5f80d4..5c13ebacfc 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -23,6 +23,7 @@ load( "//tensorflow/python/tools/api/generator:api_gen.bzl", "gen_api_init_files", # @unused ) +load("//third_party/ngraph:build_defs.bzl", "if_ngraph") # Config setting used when building for products # which requires restricted licenses to be avoided. @@ -441,8 +442,6 @@ load( "if_mkl", ) -load("//third_party/ngraph:build_defs.bzl", "if_ngraph") - filegroup( name = "intel_binary_blob", data = if_mkl( diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 385a14eb44..189f512400 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2523,7 +2523,7 @@ tf_cuda_library( "//third_party/mkl:intel_binary_blob", "@mkl_dnn", ], - ), + ), alwayslink = 1, ) diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index e15aae5488..28891320c4 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -708,4 +708,3 @@ def tf_additional_binary_deps(): "//third_party/mkl:intel_binary_blob", ], ) - \ No newline at end of file diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 456f007348..eb3f50f922 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3809,7 +3809,7 @@ tf_py_wrap_cc( tf_additional_plugin_deps() + tf_additional_verbs_deps() + tf_additional_mpi_deps() + - tf_additional_gdr_deps())+ + tf_additional_gdr_deps()) + if_ngraph(["@ngraph_tf//:ngraph_tf"]) ) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 443c582360..12ac8dfa0e 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -29,6 +29,7 @@ load( "//third_party/mkl_dnn:build_defs.bzl", "if_mkl_open_source_only", ) + load( "//third_party/ngraph:build_defs.bzl", "if_ngraph", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 702698abed..4aa0172e22 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -6,6 +6,7 @@ load("//third_party:nccl/nccl_configure.bzl", "nccl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") load("//third_party/git:git_configure.bzl", "git_configure") load("//third_party/py:python_configure.bzl", "python_configure") + load("//third_party/sycl:sycl_configure.bzl", "sycl_configure") load("//third_party/systemlibs:syslibs_configure.bzl", "syslibs_configure") load("//third_party/toolchains/clang6:repo.bzl", "clang6_configure") diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD index 838f022222..f36532449c 100644 --- a/third_party/ngraph/ngraph_tf.BUILD +++ b/third_party/ngraph/ngraph_tf.BUILD @@ -68,6 +68,7 @@ cc_library( "-I external/ngraph/src", "-D NGRAPH_EMBEDDED_IN_TENSORFLOW=1", ], + alwayslink=1, visibility = ["//visibility:public"], ) -- cgit v1.2.3 From 135ac89cae38464a9c6ea21af244e4a1bda255ed Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 13 Aug 2018 15:52:43 -0700 Subject: enable pooling3D op --- tensorflow/core/graph/mkl_layout_pass.cc | 28 ++++- tensorflow/core/graph/mkl_tfconversion_pass.cc | 12 +- tensorflow/core/kernels/mkl_aggregate_ops.cc | 20 +++- tensorflow/core/kernels/mkl_avgpooling_op.cc | 51 ++++++--- tensorflow/core/kernels/mkl_maxpooling_op.cc | 59 +++++++--- tensorflow/core/kernels/mkl_pooling_ops_common.cc | 129 +++++++++++++++------ tensorflow/core/kernels/mkl_pooling_ops_common.h | 132 ++++++++++++++++------ tensorflow/core/ops/nn_ops.cc | 98 ++++++++++++++++ tensorflow/core/util/mkl_util.h | 114 ++++++++++++++++--- 9 files changed, 519 insertions(+), 124 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 5683944e46..30e48d3860 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -334,6 +334,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.conv2d_grad_input, mkl_op_registry::GetMklOpName(csinfo_.conv2d_grad_input), CopyAttrsConv2D, AlwaysRewrite, nullptr}); + rinfo_.push_back({csinfo_.fused_batch_norm, mkl_op_registry::GetMklOpName(csinfo_.fused_batch_norm), CopyAttrsFusedBatchNorm, AlwaysRewrite, nullptr}); @@ -546,14 +547,14 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If Op has been specifically assigned to a non-CPU device, then No. if (!n->assigned_device_name().empty() && - !str_util::StrContains(n->assigned_device_name(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->assigned_device_name(), kCPUDeviceSubStr)) { result = false; reason = "Op has been assigned a runtime device that is not CPU."; } // If user has specifically assigned this op to a non-CPU device, then No. if (!n->def().device().empty() && - !str_util::StrContains(n->def().device(),kCPUDeviceSubStr)) { + !str_util::StrContains(n->def().device(), kCPUDeviceSubStr)) { result = false; reason = "User has assigned a device that is not CPU."; } @@ -2408,6 +2409,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.addn = "AddN"; csinfo_.avg_pool = "AvgPool"; csinfo_.avg_pool_grad = "AvgPoolGrad"; + csinfo_.avg_pool3d = "AvgPool3D"; + csinfo_.avg_pool3d_grad = "AvgPool3DGrad"; csinfo_.bias_add = "BiasAdd"; csinfo_.bias_add_grad = "BiasAddGrad"; csinfo_.concat = "Concat"; @@ -2426,6 +2429,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.matmul = "MatMul"; csinfo_.max_pool = "MaxPool"; csinfo_.max_pool_grad = "MaxPoolGrad"; + csinfo_.max_pool3d = "MaxPool3D"; + csinfo_.max_pool3d_grad = "MaxPool3DGrad"; csinfo_.mkl_conv2d = "_MklConv2D"; csinfo_.mkl_conv2d_grad_input = "_MklConv2DBackpropInput"; csinfo_.mkl_conv2d_grad_filter = "_MklConv2DBackpropFilter"; @@ -2460,6 +2465,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.avg_pool_grad, mkl_op_registry::GetMklOpName(csinfo_.avg_pool_grad), CopyAttrsPooling, AlwaysRewrite}); + rinfo_.push_back({csinfo_.avg_pool3d, + mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d), + CopyAttrsPooling, AlwaysRewrite}); + rinfo_.push_back({csinfo_.avg_pool3d_grad, + mkl_op_registry::GetMklOpName(csinfo_.avg_pool3d_grad), + CopyAttrsPooling, AlwaysRewrite}); rinfo_.push_back({csinfo_.concat, mkl_op_registry::GetMklOpName(csinfo_.concat), CopyAttrsConcat, AlwaysRewrite}); @@ -2501,7 +2512,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.max_pool_grad, mkl_op_registry::GetMklOpName(csinfo_.max_pool_grad), CopyAttrsPooling, MaxpoolGradRewrite}); - + rinfo_.push_back({csinfo_.max_pool3d, + mkl_op_registry::GetMklOpName(csinfo_.max_pool3d), + CopyAttrsPooling, NonDepthBatchWisePoolRewrite}); + rinfo_.push_back({csinfo_.max_pool3d_grad, + mkl_op_registry::GetMklOpName(csinfo_.max_pool3d_grad), + CopyAttrsPooling, AlwaysRewrite}); rinfo_.push_back({csinfo_.maximum, mkl_op_registry::GetMklOpName(csinfo_.maximum), CopyAttrsDataType, AlwaysRewrite}); @@ -2538,6 +2554,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // Add info about which ops to add workspace edge to and the slots. wsinfo_.push_back({csinfo_.lrn, csinfo_.lrn_grad, 0, 2, 1, 3}); wsinfo_.push_back({csinfo_.max_pool, csinfo_.max_pool_grad, 0, 1, 1, 3}); + wsinfo_.push_back + ({csinfo_.max_pool3d, csinfo_.max_pool3d_grad, 0, 1, 1, 3}); // Add a rule for merging nodes minfo_.push_back({csinfo_.conv2d, csinfo_.bias_add, @@ -2605,6 +2623,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string add; string avg_pool; string avg_pool_grad; + string avg_pool3d; + string avg_pool3d_grad; string bias_add; string bias_add_grad; string concat; @@ -2622,6 +2642,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string matmul; string max_pool; string max_pool_grad; + string max_pool3d; + string max_pool3d_grad; string maximum; string mkl_conv2d; string mkl_conv2d_grad_input; diff --git a/tensorflow/core/graph/mkl_tfconversion_pass.cc b/tensorflow/core/graph/mkl_tfconversion_pass.cc index aa39af637f..b67a321fc1 100644 --- a/tensorflow/core/graph/mkl_tfconversion_pass.cc +++ b/tensorflow/core/graph/mkl_tfconversion_pass.cc @@ -175,7 +175,11 @@ Status MklToTfConversionPass::InsertConversionNodeOnEdge( .Finalize(&**g, &conversion_node)); CHECK_NOTNULL(conversion_node); - if (GetNodeAttr(src->def(), "data_format", &data_format) == Status::OK()) { + // TODO(Intel-tf) MklToTf accepts only NHWC or NCHW, but doesn't seem to be + // using data_format. This code might be redundant. + if (GetNodeAttr(src->def(), "data_format", &data_format) == Status::OK() && + (data_format == ToString(FORMAT_NHWC) || + data_format == ToString(FORMAT_NCHW))) { conversion_node->AddAttr("data_format", data_format); } @@ -254,9 +258,13 @@ Status MklToTfConversionPass::InsertInputConversionNode( } } + // TODO(Intel-tf) MklInputConversion accepts only NHWC or NCHW, but doesn't + // seem to be using data_format. This code might be redundant. string data_format; if (GetNodeAttr(edges[0]->src()->def(), "data_format", &data_format) == - Status::OK()) { + Status::OK() && + (data_format == ToString(FORMAT_NHWC) || + data_format == ToString(FORMAT_NCHW))) { conversion_node->AddAttr("data_format", data_format); } diff --git a/tensorflow/core/kernels/mkl_aggregate_ops.cc b/tensorflow/core/kernels/mkl_aggregate_ops.cc index 28edf51546..20aa1f7ea1 100644 --- a/tensorflow/core/kernels/mkl_aggregate_ops.cc +++ b/tensorflow/core/kernels/mkl_aggregate_ops.cc @@ -392,16 +392,28 @@ class MklAddNOp : public OpKernel { memory::format src1_mkl_data_format = src1_mkl_shape.GetTfDataFormat(); auto src1_tf_data_format = MklDnnDataFormatToTFDataFormat(src1_mkl_data_format); - auto src2_dims = - TFShapeToMklDnnDimsInNCHW(src2_tensor.shape(), src1_tf_data_format); + memory::dims src2_dims; + if (src2_tensor.dims() == 4) { + src2_dims = TFShapeToMklDnnDimsInNCHW(src2_tensor.shape(), + src1_tf_data_format); + } else { + src2_dims = TFShapeToMklDnnDimsInNCDHW(src2_tensor.shape(), + src1_tf_data_format); + } md2 = memory::desc(src2_dims, MklDnnType(), src1_mkl_data_format); } else if (input2_in_mkl_format && !input1_in_mkl_format) { // Same comment as above. memory::format src2_mkl_data_format = src2_mkl_shape.GetTfDataFormat(); auto src2_tf_data_format = MklDnnDataFormatToTFDataFormat(src2_mkl_data_format); - auto src1_dims = - TFShapeToMklDnnDimsInNCHW(src1_tensor.shape(), src2_tf_data_format); + memory::dims src1_dims; + if (src1_tensor.dims() == 4) { + src1_dims = TFShapeToMklDnnDimsInNCHW(src1_tensor.shape(), + src2_tf_data_format); + } else { + src1_dims = TFShapeToMklDnnDimsInNCDHW(src1_tensor.shape(), + src2_tf_data_format); + } md1 = memory::desc(src1_dims, MklDnnType(), src2_mkl_data_format); md2 = src2_mkl_shape.GetMklLayout(); diff --git a/tensorflow/core/kernels/mkl_avgpooling_op.cc b/tensorflow/core/kernels/mkl_avgpooling_op.cc index 969baecc51..749b2a1838 100644 --- a/tensorflow/core/kernels/mkl_avgpooling_op.cc +++ b/tensorflow/core/kernels/mkl_avgpooling_op.cc @@ -453,6 +453,8 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { // initialize variables for the pooling op MklPoolParameters pool_params; + // check whether pooling is 2D or 3D + bool isPool2D = (this->ksize_.size() == 4); // Get the input tensor and initialize the pooling parameters TensorShape input_tensor_shape = input_tensor.shape(); this->InitMklPoolParameters(context, &pool_params, dnn_shape_input, @@ -473,23 +475,22 @@ class MklAvgPoolingOp : public MklPoolingForwardOpBase { } memory::dims filter_dims, strides, padding_left, padding_right; + // Get src/filter/stride/padding information this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right); + &padding_left, &padding_right, isPool2D); // Get the input memory descriptor - memory::desc input_md = - dnn_shape_input.IsMklTensor() - ? dnn_shape_input.GetMklLayout() - : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, - this->data_format_tf_), - MklDnnType(), this->data_format_mkldnn_); - - // Get src/filter/stride/padding information memory::dims src_dims = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), - this->data_format_tf_); + : isPool2D ? TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(input_tensor.shape(), + this->data_format_tf_); + memory::desc input_md = dnn_shape_input.IsMklTensor() + ? dnn_shape_input.GetMklLayout() + : memory::desc(src_dims, MklDnnType(), + this->data_format_mkldnn_); // Get an average pooling primitive from the op pool MklPoolingFwdPrimitive* pooling_fwd = nullptr; @@ -562,24 +563,30 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { for (int i = 0; i < orig_input_tensor.NumElements(); i++) { orig_input_shape.AddDim(shape_vec(i)); } + + bool isPool2D = (this->ksize_.size() == 4); this->InitMklPoolParameters(context, &pool_params, orig_input_mkl_shape, orig_input_shape); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right); + &padding_left, &padding_right, isPool2D); memory::dims orig_input_dims_mkl_order = orig_input_mkl_shape.IsMklTensor() ? orig_input_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(orig_input_shape, - this->data_format_tf_); + : isPool2D ? TFShapeToMklDnnDimsInNCHW(orig_input_shape, + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(orig_input_shape, + this->data_format_tf_); memory::dims diff_dst_dims = grad_mkl_shape.IsMklTensor() ? grad_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), - this->data_format_tf_); + : isPool2D ? TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(grad_tensor.shape(), + this->data_format_tf_); memory::dims output_dims_mkl_order; this->GetOutputDims(pool_params, &output_dims_mkl_order); @@ -664,6 +671,18 @@ class MklAvgPoolingGradOp : public MklPoolingBackwardOpBase { } }; // MklAvgPoolingGradOp +REGISTER_KERNEL_BUILDER(Name("_MklAvgPool3D") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .Label(mkl_op_registry::kMklOpLabel), + MklAvgPoolingOp); + +REGISTER_KERNEL_BUILDER(Name("_MklAvgPool3DGrad") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .Label(mkl_op_registry::kMklOpLabel), + MklAvgPoolingGradOp); + #endif // INTEL_MKL_ML_ONLY REGISTER_KERNEL_BUILDER(Name("_MklAvgPool") diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc index e149f003e5..aa7c0d9b7f 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -524,6 +524,8 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { // initialize variables for the pooling op MklPoolParameters pool_params; + // check whether pooling is 2D or 3D + bool isPool2D = (this->ksize_.size() == 4); // Get the input tensor and initialize the pooling parameters TensorShape input_tensor_shape = input_tensor.shape(); this->InitMklPoolParameters(context, &pool_params, dnn_shape_input, @@ -547,20 +549,26 @@ class MklMaxPoolingOp : public MklPoolingForwardOpBase { memory::desc input_md = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetMklLayout() - : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, - this->data_format_tf_), - MklDnnType(), this->data_format_mkldnn_); + : isPool2D ? memory::desc( + TFShapeToMklDnnDimsInNCHW(input_tensor_shape, + this->data_format_tf_), + MklDnnType(), this->data_format_mkldnn_) + : memory::desc( + TFShapeToMklDnnDimsInNCDHW( + input_tensor_shape, this->data_format_tf_), + MklDnnType(), this->data_format_mkldnn_); // Get src/filter/stride/padding information memory::dims src_dims = dnn_shape_input.IsMklTensor() ? dnn_shape_input.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), - this->data_format_tf_); - + : isPool2D ? TFShapeToMklDnnDimsInNCHW(input_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(input_tensor.shape(), + this->data_format_tf_); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right); + &padding_left, &padding_right, isPool2D); // Get a pooling op from the cached pool MklPoolingFwdPrimitive* pooling_fwd = nullptr; @@ -663,23 +671,30 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { MklPoolParameters pool_params; TensorShape orig_input_shape = orig_input_tensor.shape(); + + bool isPool2D = (this->ksize_.size() == 4); this->InitMklPoolParameters(context, &pool_params, orig_input_mkl_shape, orig_input_shape); memory::dims filter_dims, strides, padding_left, padding_right; this->PoolParamsToDims(&pool_params, &filter_dims, &strides, - &padding_left, &padding_right); + &padding_left, &padding_right, isPool2D); - memory::dims diff_dst_dims = - grad_mkl_shape.IsMklTensor() - ? grad_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), - this->data_format_tf_); memory::dims orig_input_dims_mkl_order = orig_input_mkl_shape.IsMklTensor() ? orig_input_mkl_shape.GetSizesAsMklDnnDims() - : TFShapeToMklDnnDimsInNCHW(orig_input_shape, - this->data_format_tf_); + : isPool2D ? TFShapeToMklDnnDimsInNCHW(orig_input_shape, + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(orig_input_shape, + this->data_format_tf_); + + memory::dims diff_dst_dims = + grad_mkl_shape.IsMklTensor() + ? grad_mkl_shape.GetSizesAsMklDnnDims() + : isPool2D ? TFShapeToMklDnnDimsInNCHW(grad_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(grad_tensor.shape(), + this->data_format_tf_); memory::dims output_dims_mkl_order; this->GetOutputDims(pool_params, &output_dims_mkl_order); @@ -715,7 +730,7 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { void* ws_data = static_cast( const_cast(workspace_tensor.flat().data())); - ; + auto ws_md = pooling_bwd->GetPoolingFwdPd()->workspace_primitive_desc().desc(); if (ws_md.data.format != pooling_bwd->GetWorkspaceFormat()) { @@ -817,6 +832,18 @@ class MklMaxPoolingGradOp : public MklPoolingBackwardOpBase { } }; // MklMaxPoolingGradOp +REGISTER_KERNEL_BUILDER(Name("_MklMaxPool3D") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .Label(mkl_op_registry::kMklOpLabel), + MklMaxPoolingOp); + +REGISTER_KERNEL_BUILDER(Name("_MklMaxPool3DGrad") + .Device(DEVICE_CPU) + .TypeConstraint("T") + .Label(mkl_op_registry::kMklOpLabel), + MklMaxPoolingGradOp); + #endif // INTEL_MKL_ML_ONLY REGISTER_KERNEL_BUILDER(Name("_MklMaxPool") diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.cc b/tensorflow/core/kernels/mkl_pooling_ops_common.cc index d7ad3f9dcd..5d02ceea12 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.cc +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.cc @@ -24,7 +24,7 @@ limitations under the License. namespace tensorflow { -#ifndef INTEL_MKL_ML +#ifndef INTEL_MKL_ML_ONLY using mkldnn::pooling_avg; using mkldnn::pooling_avg_exclude_padding; @@ -46,9 +46,10 @@ void MklPoolingFwdPrimitive::Setup(const MklPoolingParams& fwdParams) { // so src format is currently hard-coded. // A utility function is used to do this, // which may be broken with future CPU architectures + bool is_2d = (fwdParams.src_dims.size() == 4); context_.src_md.reset( new memory::desc({fwdParams.src_dims}, MklDnnType(), - get_desired_format(fwdParams.src_dims[1]))); + get_desired_format(fwdParams.src_dims[1], is_2d))); context_.dst_md.reset(new memory::desc({fwdParams.dst_dims}, MklDnnType(), memory::format::any)); @@ -61,7 +62,7 @@ void MklPoolingFwdPrimitive::Setup(const MklPoolingParams& fwdParams) { new pooling_forward::primitive_desc(*context_.fwd_desc, cpu_engine_)); // store expected primitive format - context_.src_fmt = get_desired_format(fwdParams.src_dims[1]); + context_.src_fmt = get_desired_format(fwdParams.src_dims[1], is_2d); context_.dst_fmt = static_cast( context_.fwd_pd.get()->dst_primitive_desc().desc().data.format); @@ -126,12 +127,14 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { } context_.alg_kind = bwdParams.alg_kind; + // check whether it is 2d or 3d + bool is_2d = (bwdParams.dst_dims.size() == 4); // Create memory desc context_.diff_src_md.reset(new memory::desc( {bwdParams.src_dims}, MklDnnType(), memory::format::any)); context_.diff_dst_md.reset( new memory::desc({bwdParams.dst_dims}, MklDnnType(), - get_desired_format(bwdParams.dst_dims[1]))); + get_desired_format(bwdParams.dst_dims[1], is_2d))); context_.bwd_desc.reset(new pooling_backward::desc( bwdParams.alg_kind, *context_.diff_src_md, *context_.diff_dst_md, bwdParams.strides, bwdParams.filter_dims, bwdParams.padding_left, @@ -151,7 +154,7 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { // store expected primitive format context_.diff_src_fmt = static_cast( context_.bwd_pd.get()->diff_src_primitive_desc().desc().data.format); - context_.diff_dst_fmt = get_desired_format(bwdParams.dst_dims[1]); + context_.diff_dst_fmt = get_desired_format(bwdParams.dst_dims[1], is_2d); // create MKL-DNN internal memory object with dummy data context_.diff_src_mem.reset( @@ -165,7 +168,7 @@ void MklPoolingBwdPrimitive::Setup(const MklPoolingParams& bwdParams) { if (bwdParams.alg_kind == pooling_max) { auto ws_pd = context_.fwd_pd.get()->workspace_primitive_desc().desc().data; context_.ws_dims.assign(ws_pd.dims, ws_pd.dims + ws_pd.ndims); - context_.ws_fmt = get_desired_format(context_.ws_dims[1]); + context_.ws_fmt = get_desired_format(context_.ws_dims[1], is_2d); context_.ws_dt = static_cast(ws_pd.data_type); context_.ws_mem.reset(new memory( {{{context_.ws_dims}, context_.ws_dt, context_.ws_fmt}, cpu_engine}, @@ -211,13 +214,22 @@ void MklPoolParameters::Init(OpKernelContext* context, const std::vector& stride, Padding padding, TensorFormat data_format, const TensorShape& tensor_in_shape) { - // For maxpooling, tensor_in should have 4 dimensions. - OP_REQUIRES(context, tensor_in_shape.dims() == 4, - errors::InvalidArgument("tensor_in must be 4-dimensional")); + // For maxpooling, tensor_in should have 4 or 5 dimensions. + OP_REQUIRES(context, + tensor_in_shape.dims() == 4 || tensor_in_shape.dims() == 5, + errors::InvalidArgument("tensor_in must be 4 or 5-dimensional")); depth = GetTensorDim(tensor_in_shape, data_format, 'C'); - tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, 'W'); - tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, 'H'); + if (tensor_in_shape.dims() == 4) { + // Pool2D + tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, 'W'); + tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, 'H'); + } else { + // Pool3D + tensor_in_planes = GetTensorDim(tensor_in_shape, data_format, '0'); + tensor_in_rows = GetTensorDim(tensor_in_shape, data_format, '1'); + tensor_in_cols = GetTensorDim(tensor_in_shape, data_format, '2'); + } tensor_in_batch = GetTensorDim(tensor_in_shape, data_format, 'N'); Init(context, ksize, stride, padding, data_format); @@ -246,10 +258,20 @@ void MklPoolParameters::Init(OpKernelContext* context, TensorFormat data_format, const MklDnnShape* mklInputShape) { // Get the input sizes - depth = mklInputShape->GetDimension('C'); - tensor_in_cols = mklInputShape->GetDimension('W'); - tensor_in_rows = mklInputShape->GetDimension('H'); - tensor_in_batch = mklInputShape->GetDimension('N'); + if (ksize.size() == 4) { + // Pool2D + depth = mklInputShape->GetDimension('C'); + tensor_in_cols = mklInputShape->GetDimension('W'); + tensor_in_rows = mklInputShape->GetDimension('H'); + tensor_in_batch = mklInputShape->GetDimension('N'); + } else { + // Pool3D + depth = mklInputShape->GetDimension3D('C'); + tensor_in_cols = mklInputShape->GetDimension3D('W'); + tensor_in_rows = mklInputShape->GetDimension3D('H'); + tensor_in_planes = mklInputShape->GetDimension3D('D'); + tensor_in_batch = mklInputShape->GetDimension3D('N'); + } Init(context, ksize, stride, padding, data_format); } @@ -262,25 +284,58 @@ void MklPoolParameters::Init(OpKernelContext* context, // Get the data format this->data_format = data_format; - // Get the output sizes - window_rows = GetTensorDim(ksize, data_format, 'H'); - window_cols = GetTensorDim(ksize, data_format, 'W'); - depth_window = GetTensorDim(ksize, data_format, 'C'); - - // Get the strides - row_stride = GetTensorDim(stride, data_format, 'H'); - col_stride = GetTensorDim(stride, data_format, 'W'); - depth_stride = GetTensorDim(stride, data_format, 'C'); + bool isPool2D = (ksize.size() == 4); + if (isPool2D) { + // Pool2D + // Get the output sizes + window_rows = GetTensorDim(ksize, data_format, 'H'); + window_cols = GetTensorDim(ksize, data_format, 'W'); + depth_window = GetTensorDim(ksize, data_format, 'C'); + + // Get the strides + row_stride = GetTensorDim(stride, data_format, 'H'); + col_stride = GetTensorDim(stride, data_format, 'W'); + depth_stride = GetTensorDim(stride, data_format, 'C'); + + // We only support 2D pooling across width/height and depthwise + // pooling, not a combination. + OP_REQUIRES(context, + (depth_window == 1 || (window_rows == 1 && window_cols == 1)), + errors::Unimplemented( + "MaxPooling supports exactly one of pooling across depth " + "or pooling across width/height.")); + } else { + // Pool3D + // Get the output sizes + window_planes = GetTensorDim(ksize, data_format, '0'); + window_rows = GetTensorDim(ksize, data_format, '1'); + window_cols = GetTensorDim(ksize, data_format, '2'); + depth_window = GetTensorDim(ksize, data_format, 'C'); + + // Get the strides + planes_stride = GetTensorDim(stride, data_format, '0'); + row_stride = GetTensorDim(stride, data_format, '1'); + col_stride = GetTensorDim(stride, data_format, '2'); + depth_stride = GetTensorDim(stride, data_format, 'C'); + + // We only support 3D pooling across depth/width/height and depthwise + // pooling, not a combination. + OP_REQUIRES(context, + (depth_window == 1 || + (window_rows == 1 && window_cols == 1 && window_planes == 1)), + errors::Unimplemented( + "AvgPooling3D supports exactly one of pooling across depth " + "or pooling across depth/width/height.")); + } - // We only support 2D pooling across width/height and depthwise - // pooling, not a combination. - OP_REQUIRES(context, - (depth_window == 1 || (window_rows == 1 && window_cols == 1)), - errors::Unimplemented( - "MaxPooling supports exactly one of pooling across depth " - "or pooling across width/height.")); + if (depth_window == 1) { // we are pooling in the D (Pool3D only), H and W + if (!isPool2D) { + OP_REQUIRES_OK( + context, GetWindowedOutputSizeVerbose(tensor_in_planes, window_planes, + planes_stride, padding, + &out_planes, &pad_P1, &pad_P2)); + } - if (depth_window == 1) { // we are pooling in the H and W OP_REQUIRES_OK(context, GetWindowedOutputSizeVerbose( tensor_in_rows, window_rows, row_stride, padding, &out_height, &pad_top, &pad_bottom)); @@ -290,7 +345,14 @@ void MklPoolParameters::Init(OpKernelContext* context, padding, &out_width, &pad_left, &pad_right)); #ifndef INTEL_MKL_ML_ONLY // TF can work with int64, but mkldnn only supports int32 - // Fail if the height or width are greater than MAX_INT + // Fail if the depth, height or width are greater than MAX_INT + // We check depth only for 3D pooling case + + if (!isPool2D) { + OP_REQUIRES(context, + FastBoundsCheck(out_planes, std::numeric_limits::max()), + errors::InvalidArgument("output depth/planes is too large")); + } OP_REQUIRES(context, FastBoundsCheck(out_height, std::numeric_limits::max()), @@ -299,7 +361,6 @@ void MklPoolParameters::Init(OpKernelContext* context, OP_REQUIRES(context, FastBoundsCheck(out_width, std::numeric_limits::max()), errors::InvalidArgument("output width is too large")); - #endif out_depth = depth; // output will have the same depth as the input } else { // we are pooling in the depth dimension diff --git a/tensorflow/core/kernels/mkl_pooling_ops_common.h b/tensorflow/core/kernels/mkl_pooling_ops_common.h index ec7af5092d..ea7458062c 100644 --- a/tensorflow/core/kernels/mkl_pooling_ops_common.h +++ b/tensorflow/core/kernels/mkl_pooling_ops_common.h @@ -19,6 +19,7 @@ limitations under the License. #ifdef INTEL_MKL #include #include +#include #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/padding.h" @@ -32,7 +33,7 @@ using mkldnn::stream; namespace tensorflow { -#ifndef INTEL_MKL_ML +#ifndef INTEL_MKL_ML_ONLY using mkldnn::memory; using mkldnn::pooling_avg; @@ -357,22 +358,28 @@ typedef Eigen::ThreadPoolDevice CPUDevice; struct MklPoolParameters { int depth; + int tensor_in_planes; // Pool3D int tensor_in_cols; int tensor_in_rows; int tensor_in_batch; + int window_planes; // Pool3D int window_rows; int window_cols; int depth_window; + int planes_stride; // Pool3D int row_stride; int col_stride; int depth_stride; + int64 out_planes; // Pool3D int64 out_height; int64 out_width; int out_depth; + int64 pad_P1; // Pool3D + int64 pad_P2; // Pool3D int64 pad_left; int64 pad_right; int64 pad_top; @@ -382,18 +389,24 @@ struct MklPoolParameters { TensorFormat data_format; MklPoolParameters() : depth(0), + tensor_in_planes(0), tensor_in_cols(0), tensor_in_rows(0), tensor_in_batch(0), + window_planes(0), window_rows(0), window_cols(0), depth_window(0), + planes_stride(0), row_stride(0), col_stride(0), depth_stride(0), + out_planes(0), out_height(0), out_width(0), out_depth(0), + pad_P1(0), + pad_P2(0), pad_left(0), pad_right(0), pad_top(0), @@ -433,20 +446,22 @@ class MklPoolingOpBase : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); OP_REQUIRES(context, FormatFromString(data_format, &this->data_format_tf_), errors::InvalidArgument("Invalid data format")); - this->data_format_mkldnn_ = - TFDataFormatToMklDnnDataFormat(this->data_format_tf_); OP_REQUIRES_OK(context, context->GetAttr("ksize", &this->ksize_)); - OP_REQUIRES(context, this->ksize_.size() == 4, + OP_REQUIRES(context, this->ksize_.size() == 4 || this->ksize_.size() == 5, errors::InvalidArgument("Sliding window ksize field must " - "specify 4 dimensions")); + "specify 4 or 5 dimensions")); OP_REQUIRES_OK(context, context->GetAttr("strides", &this->stride_)); - OP_REQUIRES(context, this->stride_.size() == 4, + OP_REQUIRES(context, this->stride_.size() == 4 || this->stride_.size() == 5, errors::InvalidArgument("Sliding window strides field must " - "specify 4 dimensions")); + "specify 4 or 5 dimensions")); OP_REQUIRES_OK(context, context->GetAttr("padding", &this->padding_)); OP_REQUIRES(context, this->ksize_[0] == 1 && this->stride_[0] == 1, errors::Unimplemented("Pooling is not yet supported on the " "batch dimension.")); + bool isPool2D = (this->ksize_.size() == 4); + this->data_format_mkldnn_ = + isPool2D ? TFDataFormatToMklDnnDataFormat(this->data_format_tf_) + : TFDataFormatToMklDnn3DDataFormat(this->data_format_tf_); // We may not get this attribute for this node if it does not go through // graph rewrite pass. So we do not check for error while retrieving this @@ -457,17 +472,26 @@ class MklPoolingOpBase : public OpKernel { protected: // Calculate output shape of pooling op in MKL-DNN and TensorFlow order. - // MKL-DNN uses NCHW for output order. But TensorFlow output will be in - // NHWC or NCHW format depending on data format. Function expects - // output height and output width to have already been int32 - // bounds-checked + // MKL-DNN uses NCHW(Pool2D) or NCDHW(Pool3D) for output order. + // But TensorFlow output will be in NHWC/NCHW(Pool2D) or + // NDHWC/NCDHW(Pool3D) format depending on data format. Function expects + // output height and width to have already been int32 bounds-checked. void GetOutputDims(const MklPoolParameters& mkl_pool_params, memory::dims* output_dims_mkl_order) { - // MKL-DNN always needs output in NCHW format. - *output_dims_mkl_order = {mkl_pool_params.tensor_in_batch, - mkl_pool_params.out_depth, - static_cast(mkl_pool_params.out_height), - static_cast(mkl_pool_params.out_width)}; + if (this->ksize_.size() == 4) { + // Pooling2D: MKL-DNN always needs output in NCHW format. + *output_dims_mkl_order = {mkl_pool_params.tensor_in_batch, + mkl_pool_params.out_depth, + static_cast(mkl_pool_params.out_height), + static_cast(mkl_pool_params.out_width)}; + } else { + // Pooling3D: MKL-DNN always needs output in NCDHW format. + *output_dims_mkl_order = {mkl_pool_params.tensor_in_batch, + mkl_pool_params.out_depth, + static_cast(mkl_pool_params.out_planes), + static_cast(mkl_pool_params.out_height), + static_cast(mkl_pool_params.out_width)}; + } } void InitMklPoolParameters(OpKernelContext* context, @@ -485,14 +509,34 @@ class MklPoolingOpBase : public OpKernel { void PoolParamsToDims(const MklPoolParameters* pool_params, memory::dims* filter_dims, memory::dims* strides, - memory::dims* padding_left, - memory::dims* padding_right) { - *filter_dims = {pool_params->window_rows, pool_params->window_cols}; - *strides = {pool_params->row_stride, pool_params->col_stride}; - *padding_left = {static_cast(pool_params->pad_top), - static_cast(pool_params->pad_left)}; - *padding_right = {static_cast(pool_params->pad_bottom), - static_cast(pool_params->pad_right)}; + memory::dims* padding_left, memory::dims* padding_right, + bool isPool2D) { + if (isPool2D) { + // Pool2D + *filter_dims = + memory::dims({pool_params->window_rows, pool_params->window_cols}); + *strides = + memory::dims({pool_params->row_stride, pool_params->col_stride}); + *padding_left = memory::dims({static_cast(pool_params->pad_top), + static_cast(pool_params->pad_left)}); + *padding_right = memory::dims({static_cast(pool_params->pad_bottom), + static_cast(pool_params->pad_right)}); + } else { + // Pool3D + *filter_dims = + memory::dims({pool_params->window_planes, pool_params->window_rows, + pool_params->window_cols}); + *strides = + memory::dims({pool_params->planes_stride, pool_params->row_stride, + pool_params->col_stride}); + + *padding_left = memory::dims({static_cast(pool_params->pad_P1), + static_cast(pool_params->pad_top), + static_cast(pool_params->pad_left)}); + *padding_right = memory::dims({static_cast(pool_params->pad_P2), + static_cast(pool_params->pad_bottom), + static_cast(pool_params->pad_right)}); + } } void AllocateEmptyOutputTensor(OpKernelContext* context, @@ -556,12 +600,27 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase { TensorShape input_tensor_shape = input_tensor.shape(); if (input_tensor.NumElements() != 0) { memory::desc input_md = - input_mkl_shape.IsMklTensor() - ? input_mkl_shape.GetMklLayout() - : memory::desc(TFShapeToMklDnnDimsInNCHW(input_tensor_shape, + input_mkl_shape.IsMklTensor() + ? input_mkl_shape.GetMklLayout() + : memory::desc( + (this->ksize_.size() == 4) + ? TFShapeToMklDnnDimsInNCHW(input_tensor_shape, + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW(input_tensor_shape, this->data_format_tf_), - MklDnnType(), this->data_format_mkldnn_); + MklDnnType(), this->data_format_mkldnn_); dnn_data_input->SetUsrMem(input_md, &input_tensor); + + if (this->ksize_.size() == 5) { + // Pool3D + std::vector mkldnn_sizes(5, -1); + mkldnn_sizes[MklDnnDims3D::Dim3d_N] = input_md.data.dims[0]; + mkldnn_sizes[MklDnnDims3D::Dim3d_C] = input_md.data.dims[1]; + mkldnn_sizes[MklDnnDims3D::Dim3d_D] = input_md.data.dims[2]; + mkldnn_sizes[MklDnnDims3D::Dim3d_H] = input_md.data.dims[3]; + mkldnn_sizes[MklDnnDims3D::Dim3d_W] = input_md.data.dims[4]; + dnn_data_input->SetOpMemDesc(mkldnn_sizes, this->data_format_mkldnn_); + } } this->InitMklPoolParameters(context, pool_params, input_mkl_shape, input_tensor_shape); @@ -593,12 +652,13 @@ class MklPoolingForwardOpBase : public MklPoolingOpBase { void SanityCheckInput(OpKernelContext* context, const Tensor& input_tensor, const MklDnnShape& input_mkl_shape) { if (!input_mkl_shape.IsMklTensor()) { - OP_REQUIRES(context, input_tensor.dims() == 4, - errors::InvalidArgument("Input must be 4-dimensional")); + OP_REQUIRES(context, input_tensor.dims() == 4 || input_tensor.dims() == 5, + errors::InvalidArgument("Input must be 4 or 5-dimensional")); } else { - OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4, + OP_REQUIRES(context, input_mkl_shape.GetDimension() == 4 || + input_mkl_shape.GetDimension() == 5, errors::InvalidArgument("Input shape must be " - "4-dimensional")); + "4 or 5-dimensional")); } } // .Input("value: T") @@ -649,8 +709,12 @@ class MklPoolingBackwardOpBase : public MklPoolingOpBase { input_gradient_mkl_shape.IsMklTensor() ? input_gradient_mkl_shape.GetMklLayout() : memory::desc( - TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(), - this->data_format_tf_), + (this->ksize_.size() == 4) + ? TFShapeToMklDnnDimsInNCHW(input_gradient_tensor.shape(), + this->data_format_tf_) + : TFShapeToMklDnnDimsInNCDHW( + input_gradient_tensor.shape(), + this->data_format_tf_), MklDnnType(), this->data_format_mkldnn_); input_gradient_dnn_data->SetUsrMem(original_input_grad_md, diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index e0f25fb4ef..a01413f2a7 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1943,6 +1943,104 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is expected to invoke these operators. )doc"); +REGISTER_OP("_MklAvgPool3D") + .Input("value: T") + .Input("mkl_input: uint8") + .Output("output: T") + .Output("mkl_output: uint8") + .Attr("ksize: list(int) >= 5") + .Attr("strides: list(int) >= 5") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("T: {float, half, double}") + .SetShapeFn(shape_inference::Pool3DShape) + .Doc(R"doc( +MKL version of AvgPool3D operator. Uses MKL DNN APIs to perform average pooling +on the input. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + + +REGISTER_OP("_MklAvgPool3DGrad") + .Input("orig_input_shape: int32") + .Input("grad: T") + .Input("mkl_orig_input: uint8") + .Input("mkl_grad: uint8") + .Output("output: T") + .Output("mkl_output: uint8") + .Attr("ksize: list(int) >= 5") + .Attr("strides: list(int) >= 5") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("T: {float, half, double}") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle s; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); + TF_RETURN_IF_ERROR(c->WithRank(s, 5, &s)); + c->set_output(0, s); + return Status::OK(); + }) + .Doc(R"doc( +MKL version of AvgPool3DGrad operator. Uses MKL DNN APIs to compute gradients +of AvgPool function. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + +REGISTER_OP("_MklMaxPool3D") + .Input("input: T") + .Input("mkl_input: uint8") + .Output("output: T") + .Output("workspace: uint8") + .Output("mkl_output: uint8") + .Output("mkl_workspace: uint8") + .Attr("ksize: list(int) >= 5") + .Attr("strides: list(int) >= 5") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("T: {half, bfloat16, float}") + .Attr("workspace_enabled: bool = false") + .SetShapeFn(shape_inference::Pool3DShape) + .Doc(R"doc( +MKL version of MaxPool3D operator. Uses MKL DNN APIs to perform average pooling +on the input. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + +REGISTER_OP("_MklMaxPool3DGrad") + .Input("orig_input: TInput") + .Input("orig_output: TInput") + .Input("grad: T") + .Input("workspace: uint8") + .Input("mkl_orig_input: uint8") + .Input("mkl_orig_output: uint8") + .Input("mkl_grad: uint8") + .Input("mkl_workspace: uint8") + .Output("output: T") + .Output("mkl_output: uint8") + .Attr("ksize: list(int) >= 5") + .Attr("strides: list(int) >= 5") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnet3dDataFormatAttrString()) + .Attr("T: {half, bfloat16, float} = DT_FLOAT") + .Attr("TInput: {half, bfloat16, float} = DT_FLOAT") + .Attr("workspace_enabled: bool = false") + .SetShapeFn([](InferenceContext* c) { + return UnchangedShapeWithRank(c, 5); + }) + .Doc(R"doc( +MKL version of MklPool3DGrad operator. Uses MKL DNN APIs to compute gradients +of MklPool function. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + REGISTER_OP("_MklLRN") .Input("input: T") .Input("mkl_input: uint8") diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 159a787d05..79fc7500fc 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -66,7 +66,6 @@ using mkldnn::reorder; typedef unsigned int uint; #endif - namespace tensorflow { // The file contains a number of utility classes and functions used by MKL @@ -87,6 +86,17 @@ typedef enum { Dim_I = 1 } MklDnnDims; +typedef enum { + Dim3d_N = 0, + Dim3d_C = 1, + Dim3d_D = 2, + Dim3d_H = 3, + Dim3d_W = 4, + Dim3d_O = 0, + Dim3d_I = 1 +} MklDnnDims3D; + + #ifdef INTEL_MKL_ML_ONLY class MklShape { public: @@ -453,6 +463,14 @@ class MklDnnShape { return this->DimSize(index); } + inline size_t GetDimension3D(char dimension) const { + int index = GetMklDnnTensor3DDimIndex(dimension); + CHECK(index >= 0 && index < this->GetDimension()) + << "Invalid index from the dimension: " << index << ", " << dimension; + return this->DimSize(index); + } + + inline int32 GetMklDnnTensorDimIndex(char dimension) const { switch (dimension) { case 'N': @@ -469,6 +487,24 @@ class MklDnnShape { } } + inline int32 GetMklDnnTensor3DDimIndex(char dimension) const { + switch (dimension) { + case 'N': + return MklDnnDims3D::Dim3d_N; + case 'C': + return MklDnnDims3D::Dim3d_C; + case 'D': + return MklDnnDims3D::Dim3d_D; + case 'H': + return MklDnnDims3D::Dim3d_H; + case 'W': + return MklDnnDims3D::Dim3d_W; + default: + LOG(FATAL) << "Invalid dimension: " << dimension; + return -1; // Avoid compiler warning about missing return value + } + } + inline size_t GetDimension() const { return data_.dimension_; } inline const int* GetSizes() const { return reinterpret_cast(&data_.sizes_[0]); @@ -587,15 +623,29 @@ class MklDnnShape { } inline void SetTfDimOrder(const size_t dimension, TensorFormat data_format) { - // TODO(nhasabni): Why do we restrict this to 4D? - CHECK_EQ(dimension, 4); - CHECK(dimension == data_.dimension_); - data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W; - data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H; - data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C; - data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N; + if (dimension == 5) { + CHECK(dimension == data_.dimension_); + data_.map_[GetTensorDimIndex<3>(data_format, '0')] = + MklDnnDims3D::Dim3d_D; + data_.map_[GetTensorDimIndex<3>(data_format, '1')] = + MklDnnDims3D::Dim3d_H; + data_.map_[GetTensorDimIndex<3>(data_format, '2')] = + MklDnnDims3D::Dim3d_W; + data_.map_[GetTensorDimIndex<3>(data_format, 'C')] = + MklDnnDims3D::Dim3d_C; + data_.map_[GetTensorDimIndex<3>(data_format, 'N')] = + MklDnnDims3D::Dim3d_N; + } else { + CHECK_EQ(dimension, 4); + CHECK(dimension == data_.dimension_); + data_.map_[GetTensorDimIndex<2>(data_format, 'W')] = MklDnnDims::Dim_W; + data_.map_[GetTensorDimIndex<2>(data_format, 'H')] = MklDnnDims::Dim_H; + data_.map_[GetTensorDimIndex<2>(data_format, 'C')] = MklDnnDims::Dim_C; + data_.map_[GetTensorDimIndex<2>(data_format, 'N')] = MklDnnDims::Dim_N; + } } + inline void SetTfDimOrder(const size_t dimension, memory::format format) { TensorFormat data_format = MklDnnDataFormatToTFDataFormat(format); SetTfDimOrder(dimension, data_format); @@ -1329,6 +1379,19 @@ memory::data_type MklDnnType() { return memory::data_type::f32; } +/// Map TensorFlow's data format into MKL-DNN 3D data format +/// @input: TensorFlow data format +/// @return: memory::format corresponding to TensorFlow data format; +/// Fails with an error if invalid data format. +inline memory::format TFDataFormatToMklDnn3DDataFormat(TensorFormat format) { + if (format == FORMAT_NHWC) + return memory::format::ndhwc; + else if (format == FORMAT_NCHW) + return memory::format::ncdhw; + TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); + return memory::format::format_undef; +} + /// Map TensorFlow's data format into MKL-DNN data format /// /// @input: TensorFlow data format @@ -1350,9 +1413,9 @@ inline memory::format TFDataFormatToMklDnnDataFormat(TensorFormat format) { /// @return: Tensorflow data format corresponding to memory::format /// Fails with an error if invalid data format. inline TensorFormat MklDnnDataFormatToTFDataFormat(memory::format format) { - if (format == memory::format::nhwc) + if (format == memory::format::nhwc || format == memory::format::ndhwc) return FORMAT_NHWC; - else if (format == memory::format::nchw) + else if (format == memory::format::nchw || format == memory::format::ncdhw) return FORMAT_NCHW; TF_CHECK_OK(Status(error::Code::INVALID_ARGUMENT, "Unsupported data format")); @@ -1402,6 +1465,23 @@ inline memory::dims TFShapeToMklDnnDimsInNCHW(const TensorShape& shape, return memory::dims({n, c, h, w}); } +inline memory::dims TFShapeToMklDnnDimsInNCDHW(const TensorShape& shape, + TensorFormat format) { + // Check validity of format. + CHECK_NE(TFDataFormatToMklDnn3DDataFormat(format), + memory::format::format_undef); + + int n = shape.dim_size(GetTensorDimIndex<3>(format, 'N')); + int c = shape.dim_size(GetTensorDimIndex<3>(format, 'C')); + int d = shape.dim_size(GetTensorDimIndex<3>(format, '0')); + int h = shape.dim_size(GetTensorDimIndex<3>(format, '1')); + int w = shape.dim_size(GetTensorDimIndex<3>(format, '2')); + + // MKL-DNN requires dimensions in NCDHW format. + return memory::dims({n, c, d, h, w}); +} + + /// Overloaded version of function above. Input parameters are /// self-explanatory. inline memory::dims MklDnnDimsInNCHW(const memory::dims& in_dims, @@ -1976,16 +2056,20 @@ class FactoryKeyCreator { } }; -static inline memory::format get_desired_format(int channel) { + +static inline memory::format get_desired_format(int channel, + bool is_2d = true) { memory::format fmt_desired = memory::format::any; - if (port::TestCPUFeature(port::CPUFeature::AVX512F) && (channel % 16) == 0) { - fmt_desired = memory::format::nChw16c; + if (port::TestCPUFeature(port::CPUFeature::AVX512F)) { + fmt_desired = is_2d ? memory::format::nChw16c : memory::format::nCdhw16c; } else if (port::TestCPUFeature(port::CPUFeature::AVX2) && (channel % 8) == 0) { - fmt_desired = memory::format::nChw8c; + fmt_desired = is_2d + ? memory::format::nChw8c + : memory::format::ncdhw; //not support avx2 for 3d yet. } else { - fmt_desired = memory::format::nchw; + fmt_desired = is_2d ? memory::format::nchw : memory::format::ncdhw; } return fmt_desired; } -- cgit v1.2.3 From 7b35aac2924b2dbd744ff5db9a24d8b05eb90f58 Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Mon, 13 Aug 2018 18:00:58 -0700 Subject: Replaced INTEL_MKL_ML with new macro INTEL_MKL_ML_ONLY --- tensorflow/core/kernels/mkl_relu_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 8db43b2a8d..99f8136f41 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -44,7 +44,7 @@ using mkldnn::memory; namespace tensorflow { -#ifndef INTEL_MKL_ML +#ifndef INTEL_MKL_ML_ONLY template class MklEltwiseFwdParams { -- cgit v1.2.3 From 285273717d17c0609c49e020b4cc9220913d3558 Mon Sep 17 00:00:00 2001 From: Ben Date: Mon, 13 Aug 2018 21:50:54 -0400 Subject: py37 --- tensorflow/c/eager/c_api.cc | 8 ++++---- tensorflow/c/eager/c_api.h | 4 ++-- tensorflow/python/eager/pywrap_tfe_src.cc | 4 ++-- tensorflow/workspace.bzl | 34 ++++++++++++++++++++----------- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index dfb1c9a376..ce5a3f29a4 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -244,8 +244,8 @@ void TFE_ContextOptionsSetConfig(TFE_ContextOptions* options, const void* proto, } void TFE_ContextOptionsSetAsync(TFE_ContextOptions* options, - unsigned char async) { - options->async = async; + unsigned char async_) { + options->async = async_; } void TFE_ContextOptionsSetDevicePlacementPolicy( TFE_ContextOptions* options, TFE_ContextDevicePlacementPolicy policy) { @@ -253,9 +253,9 @@ void TFE_ContextOptionsSetDevicePlacementPolicy( } TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx, - unsigned char async, + unsigned char async_, TF_Status* status) { - status->status = ctx->context.SetAsyncForThread(async); + status->status = ctx->context.SetAsyncForThread(async_); } void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index a0ebc6fa0a..db0079b0de 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -76,7 +76,7 @@ typedef enum TFE_ContextDevicePlacementPolicy { // Sets the default execution mode (sync/async). Note that this can be // overridden per thread using TFE_ContextSetAsyncForThread. TF_CAPI_EXPORT extern void TFE_ContextOptionsSetAsync(TFE_ContextOptions*, - unsigned char async); + unsigned char async_); TF_CAPI_EXPORT extern void TFE_ContextOptionsSetDevicePlacementPolicy( TFE_ContextOptions*, TFE_ContextDevicePlacementPolicy); @@ -114,7 +114,7 @@ TFE_ContextGetDevicePlacementPolicy(TFE_Context*); // Overrides the execution mode (sync/async) for the current thread. TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context*, - unsigned char async, + unsigned char async_, TF_Status* status); // A tensorflow.ServerDef specifies remote workers (in addition to the current diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 2d54555cd3..64cf36d079 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -216,7 +216,7 @@ bool ParseStringValue(const string& key, PyObject* py_value, TF_Status* status, #if PY_MAJOR_VERSION >= 3 if (PyUnicode_Check(py_value)) { Py_ssize_t size = 0; - char* buf = PyUnicode_AsUTF8AndSize(py_value, &size); + const char* buf = PyUnicode_AsUTF8AndSize(py_value, &size); if (buf == nullptr) return false; *value = tensorflow::StringPiece(buf, size); return true; @@ -825,7 +825,7 @@ int MaybeRaiseExceptionFromStatus(const tensorflow::Status& status, return -1; } -char* TFE_GetPythonString(PyObject* o) { +const char* TFE_GetPythonString(PyObject* o) { if (PyBytes_Check(o)) { return PyBytes_AsString(o); } diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index c21e5ebc9e..7138c0a452 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -362,11 +362,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "protobuf_archive", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz", - "https://github.com/google/protobuf/archive/v3.6.0.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", ], - sha256 = "50a5753995b3142627ac55cfd496cebc418a2e575ca0236e29033c67bd5665f4", - strip_prefix = "protobuf-3.6.0", + sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", + strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", ) # We need to import the protobuf library under the names com_google_protobuf @@ -375,21 +375,31 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_google_protobuf", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz", - "https://github.com/google/protobuf/archive/v3.6.0.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", ], - sha256 = "50a5753995b3142627ac55cfd496cebc418a2e575ca0236e29033c67bd5665f4", - strip_prefix = "protobuf-3.6.0", + sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", + strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", ) tf_http_archive( name = "com_google_protobuf_cc", urls = [ - "https://mirror.bazel.build/github.com/google/protobuf/archive/v3.6.0.tar.gz", - "https://github.com/google/protobuf/archive/v3.6.0.tar.gz", + "https://mirror.bazel.build/github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", + "https://github.com/google/protobuf/archive/0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7.tar.gz", ], - sha256 = "50a5753995b3142627ac55cfd496cebc418a2e575ca0236e29033c67bd5665f4", - strip_prefix = "protobuf-3.6.0", + sha256 = "4bb48bcc972ee9d40a8bb7e481522030b6e6771a7283ae83c896872115180d25", + strip_prefix = "protobuf-0a59054c30e4f0ba10f10acfc1d7f3814c63e1a7", + ) + + tf_http_archive( + name = "bazel_skylib", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/archive/2169ae1c374aab4a09aa90e65efe1a3aad4e279b.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/archive/2169ae1c374aab4a09aa90e65efe1a3aad4e279b.tar.gz" + ], + sha256 = "bbccf674aa441c266df9894182d80de104cabd19be98be002f6d478aaa31574d", + strip_prefix = "bazel-skylib-2169ae1c374aab4a09aa90e65efe1a3aad4e279b", ) tf_http_archive( -- cgit v1.2.3 From cd89c1bc76474cc0e5179ff647a81deb51bba25b Mon Sep 17 00:00:00 2001 From: weidankong Date: Mon, 13 Aug 2018 18:53:24 -0700 Subject: update according review comments --- .../python/training/elastic_average_optimizer.py | 36 +++++++++------------- .../training/elastic_average_optimizer_test.py | 3 +- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py index be72ef3767..0554c43c18 100644 --- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py +++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer.py @@ -169,7 +169,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer): ea_custom_getter, communication_period=10, moving_rate=None, - rho=0.0, + rho=None, use_locking=True, sync_flag=False, name='ElasticAverageOptimizer'): @@ -183,11 +183,16 @@ class ElasticAverageOptimizer(optimizer.Optimizer): communication_period: An int point value to controls the frequency of the communication between every worker and the ps. moving_rate: A floating point value to control the elastic difference. - rho: the amount of exploration we allow ine the model. The default + rho: the amount of exploration we allow in the model. The default value is moving_rate/learning_rate + rho=0.0 is suggested in async mode. use_locking: If True use locks for update operations. - sync_flag: Add_sync_queues_and_barrier or not, default to False, in case of - restarting a worker,the worker won't hung there. + sync_flag: Add_sync_queues_and_barrier or not. + True: all workers will wait for each other before start training + False: worker can start training when its initilization is done, + no need to wait for everyone is ready. + in case one worker is restarted, it can join and continue + training without being blocked. name: Optional name prefix for the operations created when applying gradients. Defaults to "ElasticAverageOptimizer". """ @@ -291,29 +296,28 @@ class ElasticAverageOptimizer(optimizer.Optimizer): TypeError: If `grads_and_vars` is malformed. ValueError: If none of the variables have gradients. """ + global_old = set(n.op.name for n in variables.global_variables()) apply_updates = self._opt.apply_gradients(grads_and_vars) + global_new = set(n.op.name for n in variables.global_variables()) with ops.control_dependencies([apply_updates]): local_update = state_ops.assign_add( self._local_step, 1, name='local_step_update').op # this is for place the variables created by optimizer to local collection # e.g., AdamOptimizer will create beta as global variables - def _adjust_optimizer_variable_collection(): + def _adjust_optimizer_variable_collection(opt_vars): g = ops.get_default_graph() - # global to local & clear global idx = 0 for _ in range(len(g._collections[ops.GraphKeys.GLOBAL_VARIABLES])): var = g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] name = var.op.name - if GLOBAL_STEP not in name.split('/') \ - and var not in ops.get_collection(GLOBAL_SHARE_VARS) \ - and name.find(GLOBAL_VARIABLE_NAME) == -1: + if name in opt_vars: ops.add_to_collection(ops.GraphKeys.LOCAL_VARIABLES, var) del g._collections[ops.GraphKeys.GLOBAL_VARIABLES][idx] else: idx += 1 - _adjust_optimizer_variable_collection() + _adjust_optimizer_variable_collection(global_new - global_old) # update global variables. def _Update_global_variables(): @@ -432,14 +436,10 @@ class ElasticAverageOptimizer(optimizer.Optimizer): var_list = saver.BaseSaverBuilder.OpListToDict(var_list) swapped_var_list = {} - has_global_step = False for key, var in var_list.items(): tensor = var - if False == has_global_step\ - and GLOBAL_STEP in key.split('/'): - has_global_step = True - if isinstance(var, list) == False: + if not isinstance(var, list): for tvar in variables.trainable_variables(): if tvar.op.name == var.op.name: tensor = self._global_map.get(tvar, var) @@ -449,12 +449,6 @@ class ElasticAverageOptimizer(optimizer.Optimizer): swapped_var_list[key] = tensor - # find global_step and add it if missing - if False == has_global_step: - for ele in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES): - if GLOBAL_STEP in ele.op.name.split('/'): - swapped_var_list[ele.op.name] = ele - return saver.Saver(swapped_var_list, name=name, **kwargs) class _ElasticAverageOptimizerHook(session_run_hook.SessionRunHook): diff --git a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py index 8a8f7ab080..acb663d628 100644 --- a/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/elastic_average_optimizer_test.py @@ -207,8 +207,7 @@ class ElasticAverageOptimizerTest(test.TestCase): v0 = variable_scope.get_variable(initializer=0.0, name="v0") v1 = variable_scope.get_variable(initializer=1.0, name="v1") sess.run(variables.local_variables_initializer()) - global_step = training_util.get_or_create_global_step() - saver_opt = saver.Saver(var_list=[v1, v0, global_step]) + saver_opt = saver.Saver(var_list=[v1, v0]) saver_opt.restore(sess, './model/model') self.assertAllEqual(2.0, sess.run(v0)) self.assertAllEqual(3.0, sess.run(v1)) -- cgit v1.2.3 From 33c10145490d113e125847142ce0d9f05d9775d3 Mon Sep 17 00:00:00 2001 From: Ruizhi Date: Tue, 14 Aug 2018 10:07:53 +0800 Subject: Remove unnecessary tf.exp --- .../eager/python/examples/generative_examples/text_generation.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb index b173f856c6..38358b444a 100644 --- a/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb +++ b/tensorflow/contrib/eager/python/examples/generative_examples/text_generation.ipynb @@ -621,7 +621,7 @@ "\n", " # using a multinomial distribution to predict the word returned by the model\n", " predictions = predictions / temperature\n", - " predicted_id = tf.multinomial(tf.exp(predictions), num_samples=1)[0][0].numpy()\n", + " predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()\n", " \n", " # We pass the predicted word as the next input to the model\n", " # along with the previous hidden state\n", -- cgit v1.2.3 From fee2c48d3ff8e0c307070804275318565cac788a Mon Sep 17 00:00:00 2001 From: Ruizhi Date: Tue, 14 Aug 2018 10:28:06 +0800 Subject: Remove unnecessary tf.exp --- .../examples/generative_examples/image_captioning_with_attention.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb index 1a5a186e7a..315d7a4893 100644 --- a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb @@ -1056,7 +1056,7 @@ "\n", " attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()\n", "\n", - " predicted_id = tf.multinomial(tf.exp(predictions), num_samples=1)[0][0].numpy()\n", + " predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()\n", " result.append(index_word[predicted_id])\n", "\n", " if index_word[predicted_id] == '':\n", -- cgit v1.2.3 From 4aaab50552a3cdb4b785653f071ae6c7193992ca Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 14 Aug 2018 12:25:18 +0800 Subject: CLN: fix coding style --- tensorflow/python/ops/array_grad.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 328b4f7d53..2beb58d534 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -763,9 +763,10 @@ def _ExtractImagePatchesGrad(op, grad): (1, rows_out, cols_out, ksize_r * ksize_c)) # Construct mapping table for indices: (input -> output). - idx_matrix = array_ops.concat([array_ops.expand_dims(input_idx_patched, axis=-1), - array_ops.expand_dims(output_idx, axis=-1)], - axis=-1) + idx_matrix = array_ops.concat( + [array_ops.expand_dims(input_idx_patched, axis=-1), + array_ops.expand_dims(output_idx, axis=-1)], + axis=-1) idx_map = array_ops.reshape(idx_matrix, (-1, 2)) sp_shape = (input_indices_num, output_indices_num) -- cgit v1.2.3 From f982cfe9f943c9920cafeefff7818ea298d5b509 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 14 Aug 2018 12:41:36 +0800 Subject: TST: add benchmark --- .../kernel_tests/extract_image_patches_grad_test.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py b/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py index 60090a1510..e1f5a6b620 100644 --- a/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py +++ b/tensorflow/python/kernel_tests/extract_image_patches_grad_test.py @@ -25,6 +25,8 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed as random_seed_lib from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import variable_scope from tensorflow.python.platform import test @@ -100,6 +102,24 @@ class ExtractImagePatchesGradTest(test.TestCase): print('extract_image_patches gradient err: %.4e' % err) self.assertLess(err, 1e-4) + def testConstructGradientWithLargeImages(self): + batch_size = 4 + height = 1024 + width = 1024 + ksize = 5 + images = variable_scope.get_variable('inputs', + (batch_size, height, width, 1)) + patches = array_ops.extract_image_patches(images, + ksizes=[1, ksize, ksize, 1], + strides=[1, 1, 1, 1], + rates=[1, 1, 1, 1], + padding='SAME') + # Github issue: #20146 + # tf.extract_image_patches() gradient very slow at graph construction time + gradients = gradients_impl.gradients(patches, images) + # Won't time out. + self.assertIsNotNone(gradients) + if __name__ == '__main__': test.main() -- cgit v1.2.3 From 3c83ef9fbc8dc23ab0878cffa13ecbfd07ac70e5 Mon Sep 17 00:00:00 2001 From: "Yan Facai (颜发才)" Date: Tue, 14 Aug 2018 14:08:15 +0800 Subject: CLN: rename UnsafeDiv => DivNoNan --- tensorflow/cc/gradients/math_grad.cc | 15 +++++++-------- tensorflow/cc/gradients/math_grad_test.cc | 8 ++++---- tensorflow/core/api_def/base_api/api_def_UnsafeDiv.pbtxt | 4 ++-- .../core/api_def/python_api/api_def_UnsafeDiv.pbtxt | 2 +- tensorflow/core/kernels/cwise_op_div.cc | 2 +- tensorflow/core/ops/math_grad.cc | 8 ++++---- tensorflow/core/ops/math_grad_test.cc | 6 +++--- tensorflow/core/ops/math_ops.cc | 2 +- tensorflow/core/ops/math_ops_test.cc | 2 +- tensorflow/python/ops/math_grad.py | 6 +++--- tensorflow/python/ops/math_grad_test.py | 2 +- tensorflow/python/ops/math_ops_test.py | 2 +- 12 files changed, 29 insertions(+), 30 deletions(-) diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index c6e60689fa..cd215f740d 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -441,21 +441,20 @@ Status RealDivGrad(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("RealDiv", RealDivGrad); -Status UnsafeDivGrad(const Scope& scope, const Operation& op, - const std::vector& grad_inputs, - std::vector* grad_outputs) { +Status DivNoNanGrad(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { auto x_1 = ConjugateHelper(scope, op.input(0)); auto x_2 = ConjugateHelper(scope, op.input(1)); // y = x_1 / x_2 // dy/dx_1 = 1/x_2 // dy/dx_2 = -x_1/x_2^2 - auto gx_1 = UnsafeDiv(scope, grad_inputs[0], x_2); - auto gx_2 = - Mul(scope, grad_inputs[0], - UnsafeDiv(scope, UnsafeDiv(scope, Neg(scope, x_1), x_2), x_2)); + auto gx_1 = DivNoNan(scope, grad_inputs[0], x_2); + auto gx_2 = Mul(scope, grad_inputs[0], + DivNoNan(scope, DivNoNan(scope, Neg(scope, x_1), x_2), x_2)); return BinaryGradCommon(scope, op, grad_outputs, gx_1, gx_2); } -REGISTER_GRADIENT_OP("UnsafeDiv", UnsafeDivGrad); +REGISTER_GRADIENT_OP("DivNoNan", DivNoNanGrad); Status SquaredDifferenceGrad(const Scope& scope, const Operation& op, const std::vector& grad_inputs, diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc index 12a19bcf28..147428cc39 100644 --- a/tensorflow/cc/gradients/math_grad_test.cc +++ b/tensorflow/cc/gradients/math_grad_test.cc @@ -33,6 +33,7 @@ using ops::AddN; using ops::BatchMatMul; using ops::Const; using ops::Div; +using ops::DivNoNan; using ops::MatMul; using ops::Max; using ops::Maximum; @@ -47,7 +48,6 @@ using ops::RealDiv; using ops::SquaredDifference; using ops::Sub; using ops::Sum; -using ops::UnsafeDiv; using ops::Where3; // TODO(andydavis) Test gradient function against numeric gradients output. @@ -854,13 +854,13 @@ TEST_F(NaryGradTest, RealDiv) { RunTest({x}, {x_shape}, {y}, {x_shape}); } -TEST_F(NaryGradTest, UnsafeDiv) { +TEST_F(NaryGradTest, DivNoNan) { { TensorShape x_shape({3, 2, 5}); const auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); // Test x / (1 + |x|) rather than x_1 / x_2 to avoid triggering large // division errors in the numeric estimator used by the gradient checker. - const auto y = UnsafeDiv( + const auto y = DivNoNan( scope_, x, Add(scope_, Const(scope_, 1), Abs(scope_, x))); RunTest({x}, {x_shape}, {y}, {x_shape}); } @@ -868,7 +868,7 @@ TEST_F(NaryGradTest, UnsafeDiv) { // Return 0 gradient (rather than NaN) for division by zero. const auto x = Placeholder(scope_, DT_FLOAT); const auto zero = Const(scope_, 0.0); - const auto y = UnsafeDiv(scope_, x, zero); + const auto y = DivNoNan(scope_, x, zero); std::vector grad_outputs; TF_EXPECT_OK(AddSymbolicGradients(scope_, {y}, {x}, &grad_outputs)); diff --git a/tensorflow/core/api_def/base_api/api_def_UnsafeDiv.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsafeDiv.pbtxt index d8f76c4cf8..5604a1a89e 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsafeDiv.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsafeDiv.pbtxt @@ -1,9 +1,9 @@ op { - graph_op_name: "UnsafeDiv" + graph_op_name: "DivNoNan" summary: "Returns 0 if the denominator is zero." description: <