7 files changed, 113 insertions, 120 deletions
diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
index 7dcd30b0b2..ba7d3b5f86 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc
@@ -424,31 +424,25 @@ tensorflow::Status CreateTRTNode(tensorflow::Graph* graph,
   string segment_string;
   if (info.engine_type == EngineInfo::EngineType::TRTStatic ||
       info.precision_mode == INT8MODE) {
-    // Create static engine and for int8 test validity of the engine. We can not
-    // allow engine to fail at the calibration time. So we are constructing a
-    // FP32 engine here to check its validity. If it is a valid engine then we
-    // put the serialized graphdef to the op. Otherwise we skip node creation
-    // for this engine.
+    // Create static engine for fp32/fp16 mode, and test validity of the engine
+    // for int8 mode. We don't want engine to fail at the calibration time.
+    // So we are constructing a FP32 engine here to check its validity, and if
+    // it is a valid engine then we put the serialized graphdef to the op.
+    // Otherwise we skip node creation for this engine.
     Logger trt_logger;
-    TrtUniquePtrType<nvinfer1::IBuilder> builder(
-        nvinfer1::createInferBuilder(trt_logger));
-    builder->setMaxBatchSize(max_batch_size);
-    if (info.precision_mode == FP16MODE) builder->setHalf2Mode(true);
-    builder->setMaxWorkspaceSize(info.max_workspace_size_bytes);
-#if NV_TENSORRT_MAJOR > 3
-    builder->setGpuAllocator(alloc);
-#endif
     TrtUniquePtrType<nvinfer1::ICudaEngine> engine;
     // TODO(sami): What happens if 1st dim is not batch?
     TF_RETURN_IF_ERROR(ConvertGraphDefToEngine(
-        info.segment_graph_def, info.precision_mode, shapes, builder.get(),
-        &engine, /*convert_successfully=*/nullptr));
+        info.segment_graph_def,
+        info.precision_mode == INT8MODE ? FP32MODE : info.precision_mode,
+        max_batch_size, info.max_workspace_size_bytes, shapes, &trt_logger,
+        alloc, /*calibrator=*/nullptr, &engine,
+        /*convert_successfully=*/nullptr));
     TrtUniquePtrType<nvinfer1::IHostMemory> engine_data(engine->serialize());
     segment_string =
         string((const char*)engine_data->data(), engine_data->size());
     if (info.precision_mode == INT8MODE) {
-      // See above comment on the reason why not putting this inside the 'else'
-      // branch.
+      // See above comment about why not putting this inside the 'else' branch.
       segment_string = info.segment_graph_def.SerializeAsString();
     }
   } else {
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
index 5608761206..b5214b461a 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc
@@ -433,7 +433,7 @@ class Converter {
   OpConverter plugin_converter_;
   nvinfer1::INetworkDefinition* trt_network_;
   std::list<std::vector<uint8_t>> temp_bufs_;
-  tensorflow::tensorrt::TRTWeightStore* weight_store_;
+  TRTWeightStore* weight_store_;
   bool fp16_;
   void register_op_converters();
   tensorflow::Status get_inputs(const tensorflow::NodeDef& node_def,
@@ -475,11 +475,11 @@ class Converter {
 
  public:
   explicit Converter(nvinfer1::INetworkDefinition* trt_network,
-                     tensorflow::tensorrt::TRTWeightStore* ws, bool fp16)
+                     TRTWeightStore* ws, bool fp16)
       : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) {
     this->register_op_converters();
   }
-  tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; }
+  TRTWeightStore* weight_store() { return weight_store_; }
   TRT_ShapedWeights get_temp_weights(tensorflow::DataType type,
                                      nvinfer1::Dims shape) {
     TRT_ShapedWeights weights(type, nullptr, shape);
@@ -2130,21 +2130,44 @@ void Converter::register_op_converters() {
 }  // namespace
 
 tensorflow::Status ConvertGraphDefToEngine(
-    const tensorflow::GraphDef& gdef, int precision_mode,
+    const tensorflow::GraphDef& gdef,
+    int precision_mode,
+    int max_batch_size,
+    size_t max_workspace_size_bytes,
     const std::vector<tensorflow::PartialTensorShape>& input_shapes,
-    nvinfer1::IBuilder* builder,
+    Logger* logger,
+    nvinfer1::IGpuAllocator* allocator,
+    TRTInt8Calibrator* calibrator,
     TrtUniquePtrType<nvinfer1::ICudaEngine>* engine,
     bool* convert_successfully) {
   engine->reset();
   if (convert_successfully) *convert_successfully = false;
+
+  // Create the builder.
+  TrtUniquePtrType<nvinfer1::IBuilder> builder(
+      nvinfer1::createInferBuilder(*logger));
+  builder->setMaxBatchSize(max_batch_size);
+  // TODO(aaroey): use the allocator to allocate the TRT workspace.
+  builder->setMaxWorkspaceSize(max_workspace_size_bytes);
+#if NV_TENSORRT_MAJOR > 3
+  builder->setGpuAllocator(allocator);
+#endif
+  if (precision_mode == FP16MODE) {
+    builder->setHalf2Mode(true);
+  } else if (precision_mode == INT8MODE) {
+    builder->setInt8Mode(true);
+    builder->setInt8Calibrator(calibrator);
+  }
+
+  // Create the network.
   auto trt_network =
       TrtUniquePtrType<nvinfer1::INetworkDefinition>(builder->createNetwork());
   if (!trt_network) {
     return tensorflow::errors::Internal(
         "Failed to create TensorRT network object");
   }
-  auto ws = std::unique_ptr<tensorflow::tensorrt::TRTWeightStore>(
-      new TRTWeightStore());
+  auto ws = std::unique_ptr<TRTWeightStore>(new TRTWeightStore());
+
   // Build the network
   VLOG(1) << "Starting engine conversion ";
   Converter converter(trt_network.get(), ws.get(), precision_mode == FP16MODE);
diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
index b357da0d84..2da4edf7f5 100644
--- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h
+++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h
@@ -24,6 +24,7 @@ limitations under the License.
 
 #include "tensorflow/contrib/tensorrt/convert/utils.h"
 #include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
+#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h"
 #include "tensorflow/core/framework/graph.pb.h"
 #include "tensorflow/core/graph/graph.h"
 #include "tensorflow/core/grappler/costs/graph_properties.h"
@@ -119,9 +120,14 @@ tensorflow::Status ConvertSegmentToGraphDef(
 //   is successful. This is different than successfully building the engine:
 //   building can still fail afterwards.
 tensorflow::Status ConvertGraphDefToEngine(
-    const tensorflow::GraphDef& gdef, int precision_mode,
+    const tensorflow::GraphDef& gdef,
+    int precision_mode,
+    int max_batch_size,
+    size_t max_workspace_size_bytes,
     const std::vector<tensorflow::PartialTensorShape>& input_shapes,
-    nvinfer1::IBuilder* builder,
+    Logger* logger,
+    nvinfer1::IGpuAllocator* allocator,
+    TRTInt8Calibrator* calibrator,
     TrtUniquePtrType<nvinfer1::ICudaEngine>* engine,
     bool* convert_successfully);
 
diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
index 4b45281f51..d12f738ac5 100644
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc
@@ -36,7 +36,6 @@ namespace tensorflow {
 namespace tensorrt {
 static Logger logger;
 using ::nvinfer1::IRuntime;
-using ::nvinfer1::Dims;
 using ::tensorflow::strings::StrAppend;
 using ::tensorflow::strings::StrCat;
 
@@ -441,6 +440,7 @@ TRTEngineOp::EngineCtxPair& TRTEngineOp::GetEngine(int batch_size,
 #if NV_TENSORRT_MAJOR > 3
     auto allocator = GetAllocator(ctx);
     if (allocator == nullptr) {
+      // GetAllocator already set the Status.
       return null_pair;
     };
     infer->setGpuAllocator(allocator);
@@ -464,39 +464,27 @@ TRTEngineOp::EngineCtxPair& TRTEngineOp::GetEngine(int batch_size,
   auto engine_it = engine_map_.find(batch_size);
   if (engine_it == engine_map_.end() &&
       engine_map_.size() < (size_t)max_cached_engines_) {
-    TrtUniquePtrType<nvinfer1::IBuilder> builder(
-        nvinfer1::createInferBuilder(logger));
+    nvinfer1::IGpuAllocator* allocator = nullptr;
 #if NV_TENSORRT_MAJOR > 3
-    auto allocator = GetAllocator(ctx);
+    allocator = GetAllocator(ctx);
     if (allocator == nullptr) {
       // GetAllocator already set the Status.
       return null_pair;
     }
-    builder->setGpuAllocator(allocator);
 #endif
-    VLOG(0) << name() << " Constructing a new engine with batch size "
-            << batch_size;
-    builder->setMaxBatchSize(batch_size);
-    if (precision_mode_ == convert::FP16MODE) {
-      builder->setHalf2Mode(true);
-    } else if (precision_mode_ == convert::INT8MODE) {
-      builder->setInt8Mode(true);
-      // Up to this point, calibrator_ can never be empty, since otherwise it
-      // means calibration_mode_ is true and this path won't get executed.
-      builder->setInt8Calibrator(calibrator_.get());
-    }
-    // TODO(aaroey): use the allocator to allocate the TRT workspace.
-    builder->setMaxWorkspaceSize(workspace_size_);
     std::vector<tensorflow::PartialTensorShape> shapes;
     for (int i = 0; i < ctx->num_inputs(); ++i) {
       shapes.emplace_back(ctx->input(i).shape());
     }
     TrtUniquePtrType<nvinfer1::ICudaEngine> engine;
     bool convert_successfully = false;
-    VLOG(1) << "Calling conversion for " << batch_size << " " << name();
+    VLOG(0) << name() << " Constructing a new engine with batch size "
+            << batch_size;
+    // Up to this point, calibrator_ can never be empty, since otherwise it
+    // means calibration_mode_ is true and this path won't get executed.
     auto status = convert::ConvertGraphDefToEngine(
-        segment_graph_, precision_mode_, shapes, builder.get(), &engine,
-        &convert_successfully);
+        segment_graph_, precision_mode_, batch_size, workspace_size_, shapes,
+        &logger, allocator, calibrator_.get(), &engine, &convert_successfully);
     if (!status.ok()) {
       if (convert_successfully) {
         // This means it fail to build the engine even when the network is built
@@ -522,9 +510,7 @@ tensorflow::Status TRTEngineOp::AllocateCalibrationResources(
     TRTCalibrationResource** cr) {
   auto cres = new TRTCalibrationResource();
   *cr = cres;
-  cres->logger_ = new Logger();
-
-#if NV_TENSORRT_MAJOR > 3
+  // Get the allocator.
   auto alloc = ctx->device()->GetAllocator(tensorflow::AllocatorAttributes());
   if (!alloc) {
     LOG(WARNING) << "Can't get device allocator will not be able to "
@@ -533,11 +519,10 @@ tensorflow::Status TRTEngineOp::AllocateCalibrationResources(
   } else {
     cres->allocator_.reset(new TRTDeviceAllocator(alloc));
   }
-#endif
-  int batch_size = ctx->input(0).dim_size(0);
+  // Get the input shapes.
+  const int batch_size = ctx->input(0).dim_size(0);
+  const int num_inputs = ctx->num_inputs();
   std::vector<tensorflow::PartialTensorShape> shapes;
-  int num_inputs = ctx->num_inputs();
-  // first run instantiate calibrator
   dev_tensors_.resize(num_inputs);
   VLOG(1) << " Constructing calibrator";
   for (int i = 0; i < num_inputs; i++) {
@@ -557,51 +542,45 @@ tensorflow::Status TRTEngineOp::AllocateCalibrationResources(
         StrCat(kInputPHName, i),
         std::pair<void*, size_t>(device_address, device_tensor->TotalBytes()));
   }
-  cres->calibrator_ =
-      new TRTInt8Calibrator(device_buffers_, batch_size, name());
-  string label(name());
+  cres->calibrator_.reset(
+      new TRTInt8Calibrator(device_buffers_, batch_size, name()));
+  const string label(name());
   auto segment_graph = &segment_graph_;
-  int cuda_device = ctx->device()->tensorflow_gpu_device_info()->gpu_id;
-  if (cuda_device < 0) {
+  const int cuda_gpu_id = ctx->device()->tensorflow_gpu_device_info()->gpu_id;
+  if (cuda_gpu_id < 0) {
     LOG(ERROR) << "Can't get gpu_device_info from context->device()";
     return tensorflow::errors::InvalidArgument(
         "Context->device doesn't contain device info!");
   }
-  int workspace_size = workspace_size_;
-  cres->thr_ = new std::thread([cres, label, segment_graph, shapes, cuda_device,
-                                batch_size, workspace_size]() {
-    VLOG(0) << "Starting calibration thread on device " << cuda_device
+  const int64 workspace_size_bytes = workspace_size_;
+  cres->thr_.reset(new std::thread([cres, label, segment_graph, shapes,
+                                    cuda_gpu_id, workspace_size_bytes]() {
+    VLOG(0) << "Starting calibration thread on device " << cuda_gpu_id
             << ", Calibration Resource @ " << cres;
-    auto err = cudaSetDevice(cuda_device);
+    auto err = cudaSetDevice(cuda_gpu_id);
     if (err != cudaSuccess) {
-      VLOG(0) << "Couldn't set cuda device to " << cuda_device
-              << " in calibration thread";
+      // TODO(aaroey): should return error here.
+      LOG(ERROR) << "Couldn't set cuda device to " << cuda_gpu_id
+                 << " in calibration thread";
     }
-    // initialize builder here
-    cres->builder_.reset(nvinfer1::createInferBuilder(*(cres->logger_)));
-    // TODO(aaroey): maybe setting the max batch size using the python
-    // calibration wrapper class.
-    cres->builder_->setMaxBatchSize(batch_size);
-#if NV_TENSORRT_MAJOR > 3
-    cres->builder_->setGpuAllocator(cres->allocator_.get());
-#endif
-    cres->builder_->setInt8Mode(true);
-    cres->builder_->setMaxWorkspaceSize(workspace_size);
-    cres->builder_->setInt8Calibrator(cres->calibrator_);
     // ConvertGraphDefToEngine() will try to build the engine. This thread
     // will loop inside buildCudaEngine() consuming the calibration data
     // that is set by the TF op, and drive the builder until calibrator returns
     // false. Engine is discarded after calibration table is generated
+    //
+    // TODO(aaroey): maybe setting the max batch size using the python
+    // calibration wrapper class.
     auto s = convert::ConvertGraphDefToEngine(
-        *segment_graph, convert::INT8MODE, shapes, cres->builder_.get(),
-        &cres->engine_, /*convert_successfully=*/nullptr);
+        *segment_graph, convert::INT8MODE, cres->calibrator_->getBatchSize(),
+        workspace_size_bytes, shapes, &cres->logger_, cres->allocator_.get(),
+        cres->calibrator_.get(), &cres->engine_,
+        /*convert_successfully=*/nullptr);
     if (!s.ok()) {
-      LOG(ERROR)
-          << "Calibration failed. Engine will not be calibrated! Error is" << s;
-      cres->calibrator_->setDone();  // ignore further pushes
+      LOG(ERROR) << "Calibration failed: " << s;
+      cres->calibrator_->setDone();  // Ignore further pushes
     }
     VLOG(1) << "Calibration loop terminated " << label;
-  });
+  }));
   VLOG(1) << "initialized calibrator resource";
   return tensorflow::Status::OK();
 }
diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h
index cb43403130..0d2f9e8a9d 100644
--- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h
+++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.h
@@ -20,6 +20,7 @@ limitations under the License.
 #include <vector>
 
 #include "tensorflow/contrib/tensorrt/convert/utils.h"
+#include "tensorflow/contrib/tensorrt/log/trt_logger.h"
 #include "tensorflow/contrib/tensorrt/resources/trt_allocator.h"
 #include "tensorflow/core/framework/function.h"
 #include "tensorflow/core/framework/graph.pb.h"
@@ -46,25 +47,24 @@ class TRTEngineOp : public AsyncOpKernel {
   explicit TRTEngineOp(OpKernelConstruction* context);
 
   void ComputeAsync(OpKernelContext* context,
-                    tensorflow::AsyncOpKernel::DoneCallback done) override;
+                    AsyncOpKernel::DoneCallback done) override;
   ~TRTEngineOp();
 
  private:
   // Execute calibration
-  void ExecuteCalibration(tensorflow::OpKernelContext* ctx,
+  void ExecuteCalibration(OpKernelContext* ctx,
                           AsyncHelper* helper);
 
   // Construct a function handle for executing native funcdef graph
-  tensorflow::Status ConstructFunctionHandle(tensorflow::OpKernelContext* ctx);
+  Status ConstructFunctionHandle(OpKernelContext* ctx);
 
   // Execute replaced native segment as function Op.
-  void ExecuteNativeSegment(tensorflow::OpKernelContext* ctx,
+  void ExecuteNativeSegment(OpKernelContext* ctx,
                             AsyncHelper* helper);
 
   // Allocate necessary resources for calibration
-  tensorflow::Status AllocateCalibrationResources(
-      tensorflow::OpKernelContext* ctx,
-      tensorflow::tensorrt::TRTCalibrationResource** cr);
+  Status AllocateCalibrationResources(
+      OpKernelContext* ctx, TRTCalibrationResource** cr);
 
   // TODO(samikama): context should go to a resource manager!
   typedef std::pair<TrtUniquePtrType<nvinfer1::ICudaEngine>,
@@ -92,13 +92,13 @@ class TRTEngineOp : public AsyncOpKernel {
   string funcdef_name_;
 
   // GraphDef representation of the segment.
-  tensorflow::GraphDef segment_graph_;
+  GraphDef segment_graph_;
 
   // Lookup table for temporary staging areas of input tensors for calibration.
   std::unordered_map<string, std::pair<void*, size_t>> device_buffers_;
 
   // Temporary staging areas for calibration inputs.
-  std::vector<tensorflow::PersistentTensor> dev_tensors_;
+  std::vector<PersistentTensor> dev_tensors_;
 
   // Engine Precision mode.
   int precision_mode_;
@@ -120,9 +120,11 @@ class TRTEngineOp : public AsyncOpKernel {
   // Maximum number of cached engines
   int max_cached_engines_;
 
-  tensorflow::int64 workspace_size_;
-  tensorflow::mutex engine_mutex_;
-  tensorflow::FunctionLibraryRuntime::Handle native_func_;
+  int64 workspace_size_;
+  mutex engine_mutex_;
+  FunctionLibraryRuntime::Handle native_func_;
+
+  // The finalized calibrator for inference.
   std::unique_ptr<TRTInt8Calibrator> calibrator_;
 };
 
diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc
index 9c1c306947..59ae860bc0 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc
+++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc
@@ -51,8 +51,8 @@ TRTInt8Calibrator::TRTInt8Calibrator(const string& calib_data)
 bool TRTInt8Calibrator::setBatch(const std::unordered_map<string, void*>& data,
                                  const cudaStream_t stream) {
   tensorflow::mutex_lock lock(cond_mtx_);
-  while ((calib_running_ || batch_is_set_) &&
-         !done_) {  // wait while calibration is running
+  // wait while calibration is running.
+  while ((calib_running_ || batch_is_set_) && !done_) {
     cond_.wait(lock);
   }
   if (done_) return false;
@@ -66,8 +66,6 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map<string, void*>& data,
     }
     const auto& d = devptr->second;
 
-    // TODO(aaroey): we should not use sync copy on default stream. Make sure
-    // stream->ThenMemcpy() is used in future PRs.
     // TODO(sami,aaroey): Need to figure out a way to ensure synchronization
     // between stream, perhaps using a tensor?
     auto status = cudaMemcpyAsync(d.first, it.second, d.second,
@@ -91,12 +89,11 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names,
   tensorflow::mutex_lock lock(cond_mtx_);
   calib_running_ = false;
   cond_.notify_all();
-  while ((!batch_is_set_ && !done_)) {  // wait until new batch arrives
+  // wait until new batch arrives
+  while ((!batch_is_set_ && !done_)) {
     cond_.wait(lock);
   }
-  if (done_) {
-    return false;
-  }
+  if (done_) return false;
 
   for (int i = 0; i < num_bindings; i++) {
     auto it = dev_buffers_.find(names[i]);
diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h
index 43734bbdd8..76863503bd 100644
--- a/tensorflow/contrib/tensorrt/resources/trt_resources.h
+++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h
@@ -38,11 +38,6 @@ namespace tensorrt {
 
 class TRTCalibrationResource : public tensorflow::ResourceBase {
  public:
-  TRTCalibrationResource()
-      : calibrator_(nullptr),
-        logger_(nullptr),
-        thr_(nullptr) {}
-
   ~TRTCalibrationResource() {
     VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString();
     builder_.reset();
@@ -50,9 +45,6 @@ class TRTCalibrationResource : public tensorflow::ResourceBase {
     // We need to manually destroy the builder and engine before the allocator
     // is destroyed.
     allocator_.reset();
-    delete thr_;
-    delete logger_;
-    delete calibrator_;
   }
 
   string DebugString() override {
@@ -60,22 +52,22 @@ class TRTCalibrationResource : public tensorflow::ResourceBase {
     using std::hex;
     using std::dec;
     using std::endl;
-    oss << " Calibrator = " << hex << calibrator_      << dec << endl
-        << " Builder    = " << hex << builder_.get()   << dec << endl
-        << " Engine     = " << hex << engine_.get()    << dec << endl
-        << " Logger     = " << hex << logger_          << dec << endl
-        << " Allocator  = " << hex << allocator_.get() << dec << endl
-        << " Thread     = " << hex << thr_             << dec << endl;
+    oss << " Calibrator = " << hex << calibrator_.get() << dec << endl
+        << " Builder    = " << hex << builder_.get()    << dec << endl
+        << " Engine     = " << hex << engine_.get()     << dec << endl
+        << " Logger     = " << hex << &logger_          << dec << endl
+        << " Allocator  = " << hex << allocator_.get()  << dec << endl
+        << " Thread     = " << hex << thr_.get()        << dec << endl;
     return oss.str();
   }
 
-  TRTInt8Calibrator* calibrator_;
+  std::unique_ptr<TRTInt8Calibrator> calibrator_;
   TrtUniquePtrType<nvinfer1::IBuilder> builder_;
   TrtUniquePtrType<nvinfer1::ICudaEngine> engine_;
   std::unique_ptr<nvinfer1::IGpuAllocator> allocator_;
-  tensorflow::tensorrt::Logger* logger_;
+  tensorflow::tensorrt::Logger logger_;
   // TODO(sami): Use threadpool threads!
-  std::thread* thr_;
+  std::unique_ptr<std::thread> thr_;
 };
 
 class TRTWeightStore {