[XLA] Rename all (Mutable)ArraySlice to absl::Span.

PiperOrigin-RevId: 210998142
author: Tim Shen <timshen@google.com> 2018-08-30 16:03:10 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-08-30 16:07:27 -0700
commit: 6f879f891abe2e267c5cf512d034d7c3641cfdb0 (patch)
tree: 33dfda2aa13bdec06d3aa330dd5816441d449fa7 /tensorflow/compiler/xla/service
parent: 5d5591fbd4624ff7e50f305464667315f2d41ebb (diff)
133 files changed, 836 insertions, 1044 deletions
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
index 196865f333..a7a0044308 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc
@@ -449,8 +449,7 @@ Status AlgebraicSimplifierVisitor::HandleCopy(HloInstruction* copy) {
 
 Status AlgebraicSimplifierVisitor::HandleConcatenate(
     HloInstruction* concatenate) {
-  tensorflow::gtl::ArraySlice<HloInstruction*> operands(
-      concatenate->operands());
+  absl::Span<HloInstruction* const> operands(concatenate->operands());
   if (operands.size() == 1) {
     // Unary concatenates are useless.
     ReplaceInstructionIfSameShape(concatenate, operands[0]);
@@ -588,7 +587,7 @@ Status AlgebraicSimplifierVisitor::HandleSubtract(HloInstruction* sub) {
 namespace {
 template <typename T>
 Status InvertConstant(const HloInstruction& constant, Literal* result) {
-  return result->Populate<T>([&](tensorflow::gtl::ArraySlice<int64> indices) {
+  return result->Populate<T>([&](absl::Span<const int64> indices) {
     return T{1.0} / constant.literal().Get<T>(indices);
   });
 }
@@ -1249,8 +1248,7 @@ namespace {
 //
 // Precondition: input_dim_indices is sorted.
 absl::optional<std::vector<int64>> ReshapeLeavesDimensionsUnmodified(
-    const HloInstruction* hlo,
-    tensorflow::gtl::ArraySlice<int64> input_dim_indices) {
+    const HloInstruction* hlo, absl::Span<const int64> input_dim_indices) {
   CHECK_EQ(HloOpcode::kReshape, hlo->opcode());
   CHECK(std::is_sorted(input_dim_indices.begin(), input_dim_indices.end()));
 
@@ -1853,7 +1851,7 @@ Status AlgebraicSimplifierVisitor::HandleReduce(HloInstruction* reduce) {
 
   auto arg = reduce->mutable_operand(0);
   auto init_value = reduce->mutable_operand(1);
-  tensorflow::gtl::ArraySlice<int64> dimensions(reduce->dimensions());
+  absl::Span<const int64> dimensions(reduce->dimensions());
   HloComputation* function = reduce->to_apply();
   if (ShapeUtil::IsZeroElementArray(arg->shape()) ||
       ShapeUtil::IsZeroElementArray(reduce->shape())) {
diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
index cbce98ef13..182c581ad8 100644
--- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
+++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc
@@ -2226,7 +2226,7 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) {
     auto out_dims = in_dims;
     out_dims[in_channel_idx] = options.f_output_channels;
 
-    auto make_shape = [](tensorflow::gtl::ArraySlice<int64> dims,
+    auto make_shape = [](absl::Span<const int64> dims,
                          bool minor_to_major_layout) {
       if (minor_to_major_layout) {
         return ShapeUtil::MakeShapeWithLayout(F32, dims, {0, 1, 2, 3});
@@ -2838,8 +2838,8 @@ TEST_P(PadReduceWindowEffectiveBroadcastTest, DoIt) {
 
   // a and b are parallel bounds we can either turn into a B F S0 S1 or
   // `B S0 S1 F` kind of pattern.
-  auto decorate_spatials = [&param](tensorflow::gtl::ArraySlice<int64> spatials,
-                                    int64 a, int64 b) {
+  auto decorate_spatials = [&param](absl::Span<const int64> spatials, int64 a,
+                                    int64 b) {
     std::vector<int64> result;
     if (param.prepend_a) {
       result.push_back(a);
diff --git a/tensorflow/compiler/xla/service/backend.cc b/tensorflow/compiler/xla/service/backend.cc
index a6889cb171..5c180cbdd4 100644
--- a/tensorflow/compiler/xla/service/backend.cc
+++ b/tensorflow/compiler/xla/service/backend.cc
@@ -112,11 +112,11 @@ StatusOr<StreamPool::Ptr> Backend::BorrowStream(se::StreamExecutor* executor) {
   return stream_pools_.at(executor).BorrowStream(executor);
 }
 
-Backend::Backend(
-    se::Platform* platform, Compiler* compiler,
-    tensorflow::gtl::ArraySlice<se::StreamExecutor*> stream_executors,
-    TransferManager* transfer_manager, ComputationPlacer* computation_placer,
-    int intra_op_parallelism_threads)
+Backend::Backend(se::Platform* platform, Compiler* compiler,
+                 absl::Span<se::StreamExecutor* const> stream_executors,
+                 TransferManager* transfer_manager,
+                 ComputationPlacer* computation_placer,
+                 int intra_op_parallelism_threads)
     : platform_(platform),
       compiler_(compiler),
       transfer_manager_(transfer_manager),
diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h
index 4a6a78daf0..fdf8d9cab2 100644
--- a/tensorflow/compiler/xla/service/backend.h
+++ b/tensorflow/compiler/xla/service/backend.h
@@ -149,7 +149,7 @@ class Backend {
  private:
   struct EigenThreadPoolWrapper;
   Backend(se::Platform* platform, Compiler* compiler,
-          tensorflow::gtl::ArraySlice<se::StreamExecutor*> stream_executors,
+          absl::Span<se::StreamExecutor* const> stream_executors,
           TransferManager* transfer_manager,
           ComputationPlacer* computation_placer,
           int intra_op_parallelism_threads);
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc
index a6f77db3b0..b5cf245af6 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc
@@ -69,8 +69,7 @@ class BFloat16NormalizationVisitor : public DfsHloVisitorWithDefault {
   // Inserts conversion HLOs to replace the called computations' BF16
   // operands/outputs to F32.
   Status ConvertCalledComputations(
-      HloInstruction* hlo,
-      tensorflow::gtl::ArraySlice<HloComputation*> bf16_called_comps);
+      HloInstruction* hlo, absl::Span<HloComputation* const> bf16_called_comps);
 
   HloComputation* computation_;
   const BFloat16Support* bfloat16_support_;
@@ -114,8 +113,7 @@ Status BFloat16NormalizationVisitor::InsertConvertBeforeOperand(
 }
 
 Status BFloat16NormalizationVisitor::ConvertCalledComputations(
-    HloInstruction* hlo,
-    tensorflow::gtl::ArraySlice<HloComputation*> bf16_called_comps) {
+    HloInstruction* hlo, absl::Span<HloComputation* const> bf16_called_comps) {
   std::map<HloComputation*, HloComputation*> cloned_computations;
   for (auto& comp : bf16_called_comps) {
     auto cloned = comp->parent()->AddEmbeddedComputation(comp->Clone());
diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
index 2fb401c428..545a6ecfb1 100644
--- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc
@@ -407,7 +407,7 @@ void BFloat16Propagation::AdjustCalledComputationParameters(
     HloInstruction* hlo) {
   auto adjust_computation =
       [this, hlo](HloComputation* computation,
-                  tensorflow::gtl::ArraySlice<HloInstruction*> operands) {
+                  absl::Span<HloInstruction* const> operands) {
         // Adjust parameters.
         CHECK_EQ(operands.size(), computation->num_parameters());
         for (int64 i = 0; i < operands.size(); ++i) {
diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
index e9751cc269..8bd1533972 100644
--- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc
+++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc
@@ -118,7 +118,7 @@ class BufferAssignmentTest : public HloVerifiedTestBase {
 
   std::unique_ptr<BufferAssignment> RunBufferAssignmentWithInstructionSequence(
       HloModule* module,
-      tensorflow::gtl::ArraySlice<const HloInstruction*> instruction_sequence,
+      absl::Span<const HloInstruction* const> instruction_sequence,
       int64 alignment = 1) {
     SequentialHloOrdering::HloModuleSequence module_sequence;
     module_sequence[module->entry_computation()] =
diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc
index 3079695e96..e5a6c28478 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.cc
+++ b/tensorflow/compiler/xla/service/compile_only_service.cc
@@ -62,7 +62,7 @@ CompileOnlyService::CompileOnlyService(const ServiceOptions& options,
 
 StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
 CompileOnlyService::CompileAheadOfTime(
-    const tensorflow::gtl::ArraySlice<AotXlaComputationInstance> computations,
+    const absl::Span<const AotXlaComputationInstance> computations,
     const AotCompilationOptions& options,
     std::unique_ptr<AotCompilationMetadata>* metadata) {
   std::vector<std::unique_ptr<HloModule>> hlo_modules;
diff --git a/tensorflow/compiler/xla/service/compile_only_service.h b/tensorflow/compiler/xla/service/compile_only_service.h
index 1ac950bdd6..61136a3e11 100644
--- a/tensorflow/compiler/xla/service/compile_only_service.h
+++ b/tensorflow/compiler/xla/service/compile_only_service.h
@@ -50,12 +50,12 @@ class CompileOnlyService : public Service {
   // |CompileOnlyClient::CompileAheadOfTime| for additional details.
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
   CompileAheadOfTime(
-      const tensorflow::gtl::ArraySlice<AotXlaComputationInstance> computations,
+      const absl::Span<const AotXlaComputationInstance> computations,
       const AotCompilationOptions& options);
 
   StatusOr<std::vector<std::unique_ptr<AotCompilationResult>>>
   CompileAheadOfTime(
-      const tensorflow::gtl::ArraySlice<AotXlaComputationInstance> computations,
+      const absl::Span<const AotXlaComputationInstance> computations,
       const AotCompilationOptions& options,
       std::unique_ptr<AotCompilationMetadata>* metadata);
 
diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc
index 1b7a7b36ea..b65dfef9c9 100644
--- a/tensorflow/compiler/xla/service/copy_insertion.cc
+++ b/tensorflow/compiler/xla/service/copy_insertion.cc
@@ -479,7 +479,7 @@ class CopyRemover {
     // 'values' an entry is created in value_to_node which indicates the
     // respective ValueNode representing that value.
     void AddValueList(
-        tensorflow::gtl::ArraySlice<const HloValue*> values,
+        absl::Span<const HloValue* const> values,
         tensorflow::gtl::FlatMap<const HloValue*, ValueNode*>* value_to_node) {
       ValueNode* tail = nullptr;
       ValueNode* head = nullptr;
diff --git a/tensorflow/compiler/xla/service/cpu/buffer_info_util.cc b/tensorflow/compiler/xla/service/cpu/buffer_info_util.cc
index 408fe0f5bf..1942ea1a2a 100644
--- a/tensorflow/compiler/xla/service/cpu/buffer_info_util.cc
+++ b/tensorflow/compiler/xla/service/cpu/buffer_info_util.cc
@@ -40,7 +40,7 @@ std::vector<BufferInfo> CreateBufferInfosFromBufferAssignment(
 }
 
 std::vector<int32> CreateArgIndexTableFromBufferInfos(
-    tensorflow::gtl::ArraySlice<BufferInfo> buffer_infos) {
+    absl::Span<const BufferInfo> buffer_infos) {
   std::vector<int32> result;
   for (int64 i = 0; i < buffer_infos.size(); i++) {
     if (buffer_infos[i].is_entry_parameter()) {
diff --git a/tensorflow/compiler/xla/service/cpu/buffer_info_util.h b/tensorflow/compiler/xla/service/cpu/buffer_info_util.h
index 05de70c726..0c5a60f13f 100644
--- a/tensorflow/compiler/xla/service/cpu/buffer_info_util.h
+++ b/tensorflow/compiler/xla/service/cpu/buffer_info_util.h
@@ -34,7 +34,7 @@ CreateBufferInfosFromBufferAssignment(
 // If this function returns V then entry parameter i has buffer allocation index
 // V[i].
 std::vector<int32> CreateArgIndexTableFromBufferInfos(
-    tensorflow::gtl::ArraySlice<::tensorflow::cpu_function_runtime::BufferInfo>
+    absl::Span<const ::tensorflow::cpu_function_runtime::BufferInfo>
         buffer_infos);
 }  // namespace cpu
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
index bf2efc4d14..9b00f2eaa5 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.cc
@@ -77,7 +77,7 @@ StatusOr<std::pair<std::vector<se::DeviceMemoryBase>,
                    std::vector<OwningDeviceMemory>>>
 CpuExecutable::CreateTempArray(
     DeviceMemoryAllocator* memory_allocator, int device_ordinal,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
+    absl::Span<const ShapedBuffer* const> arguments) {
   std::vector<se::DeviceMemoryBase> unowning_buffers(
       assignment_->Allocations().size());
   std::vector<OwningDeviceMemory> owning_buffers(
@@ -136,7 +136,7 @@ CpuExecutable::CreateTempArray(
 
 Status CpuExecutable::ExecuteComputeFunction(
     const ExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
+    absl::Span<const se::DeviceMemoryBase> buffers,
     HloExecutionProfile* hlo_execution_profile) {
   // The calling convention for JITed functions is:
   //
@@ -207,7 +207,7 @@ Status CpuExecutable::ExecuteComputeFunction(
 
 StatusOr<ScopedShapedBuffer> CpuExecutable::CreateResultShapedBuffer(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::MutableArraySlice<OwningDeviceMemory> buffers) {
+    absl::Span<OwningDeviceMemory> buffers) {
   se::Stream* stream = run_options->stream();
   ScopedShapedBuffer result_buffer(
       /*on_host_shape=*/result_shape(),
@@ -245,7 +245,7 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::CreateResultShapedBuffer(
 
 StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+    absl::Span<const ShapedBuffer* const> arguments,
     HloExecutionProfile* hlo_execution_profile) {
   TF_ASSIGN_OR_RETURN(
       auto result,
@@ -256,7 +256,7 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteOnStream(
 
 StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
+    absl::Span<const ShapedBuffer* const> arguments) {
   if (hlo_profiling_enabled()) {
     return Unimplemented(
         "Asynchronous execution on stream with hlo profiling is not yet "
@@ -267,7 +267,7 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStream(
 
 StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStreamImpl(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+    absl::Span<const ShapedBuffer* const> arguments,
     HloExecutionProfile* hlo_execution_profile) {
   if (GetRootPointsToSet().IsAmbiguous()) {
     return Unimplemented("Points-to set of root instruction is ambiguous");
@@ -299,7 +299,7 @@ StatusOr<ScopedShapedBuffer> CpuExecutable::ExecuteAsyncOnStreamImpl(
   //
   // We also need to change the types of some of the variables we capture:
   // run_options needs to change from a pointer to a value type, and arguments
-  // needs to change from an ArraySlice into a vector.  We use a struct instead
+  // needs to change from a Span into a vector.  We use a struct instead
   // of a lambda to make this explicit.
   struct AsyncRunTask {
     CpuExecutable* executable;
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_executable.h b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
index 96e53de57e..236de8f14f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_executable.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_executable.h
@@ -57,12 +57,12 @@ class CpuExecutable : public Executable {
 
   StatusOr<ScopedShapedBuffer> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      absl::Span<const ShapedBuffer* const> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
   StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
+      absl::Span<const ShapedBuffer* const> arguments) override;
 
   // This should be called after set_ir_module_string.
   const string& ir_module_string() const { return ir_module_string_; }
@@ -92,7 +92,7 @@ class CpuExecutable : public Executable {
   // exists) must out-live the task.
   StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStreamImpl(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      absl::Span<const ShapedBuffer* const> arguments,
       HloExecutionProfile* hlo_execution_profile);
 
   // Creates an array suitable for passing as the "temps" argument to the JIT
@@ -112,21 +112,20 @@ class CpuExecutable : public Executable {
   StatusOr<std::pair<std::vector<se::DeviceMemoryBase>,
                      std::vector<OwningDeviceMemory>>>
   CreateTempArray(DeviceMemoryAllocator* memory_allocator, int device_ordinal,
-                  tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments);
+                  absl::Span<const ShapedBuffer* const> arguments);
 
   // Calls the generated function performing the computation with the given
   // arguments using the supplied buffers.
-  Status ExecuteComputeFunction(
-      const ExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> buffers,
-      HloExecutionProfile* hlo_execution_profile);
+  Status ExecuteComputeFunction(const ExecutableRunOptions* run_options,
+                                absl::Span<const se::DeviceMemoryBase> buffers,
+                                HloExecutionProfile* hlo_execution_profile);
 
   // Creates a ScopedShapedBuffer for holding the result of the computation,
   // moving buffers out of allocated_buffers and into the result as appropriate.
   // The addresses are set according to buffer assignment.
   StatusOr<ScopedShapedBuffer> CreateResultShapedBuffer(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::MutableArraySlice<OwningDeviceMemory> buffers);
+      absl::Span<OwningDeviceMemory> buffers);
 
   // Returns the points-to set of the root instruction of the entry
   // computation. Uses points-to analysis from buffer assignment.
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
index 0df2abf001..5519a43b2f 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.cc
@@ -179,7 +179,7 @@ Status CpuTransferManager::TransferLiteralFromOutfeed(
     int64 size = GetByteSizeRequirement(literal_shape);
     // Note: OSS build didn't like implicit conversion from
     // literal_shape.dimensions() to the array slice on 2017-07-10.
-    tensorflow::gtl::ArraySlice<int64> dimensions(
+    absl::Span<const int64> dimensions(
         tensorflow::bit_cast<const int64*>(literal_shape.dimensions().data()),
         literal_shape.dimensions().size());
     TF_ASSIGN_OR_RETURN(
@@ -225,7 +225,7 @@ Status CpuTransferManager::TransferLiteralFromOutfeed(
 
 StatusOr<Shape> CpuTransferManager::TransferTupleBuffersFromOutfeed(
     se::StreamExecutor* executor,
-    tensorflow::gtl::ArraySlice<std::pair<void*, int64>> buffer_data) {
+    absl::Span<const std::pair<void*, int64>> buffer_data) {
   return TransferBuffersFromOutfeedInternal(executor, buffer_data,
                                             /*is_tuple=*/true);
 }
@@ -238,8 +238,7 @@ StatusOr<Shape> CpuTransferManager::TransferArrayBufferFromOutfeed(
 
 StatusOr<Shape> CpuTransferManager::TransferBuffersFromOutfeedInternal(
     se::StreamExecutor* executor,
-    tensorflow::gtl::ArraySlice<std::pair<void*, int64>> buffer_data,
-    bool is_tuple) {
+    absl::Span<const std::pair<void*, int64>> buffer_data, bool is_tuple) {
   std::vector<std::unique_ptr<CpuOutfeedBuffer>> buffers;
   for (auto b : buffer_data) {
     int64 size = b.second;
diff --git a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.h b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.h
index 7b938e9fd7..6927edff86 100644
--- a/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/cpu/cpu_transfer_manager.h
@@ -56,7 +56,7 @@ class CpuTransferManager : public GenericTransferManager {
   // Helper that transfers a tuple of element buffers from the device's outfeed.
   StatusOr<Shape> TransferTupleBuffersFromOutfeed(
       se::StreamExecutor* executor,
-      tensorflow::gtl::ArraySlice<std::pair<void*, int64>> buffer_data);
+      absl::Span<const std::pair<void*, int64>> buffer_data);
 
   // Helper that transfers an array buffer from the device's outfeed.
   StatusOr<Shape> TransferArrayBufferFromOutfeed(se::StreamExecutor* executor,
@@ -68,8 +68,7 @@ class CpuTransferManager : public GenericTransferManager {
   // for the given buffers.
   StatusOr<Shape> TransferBuffersFromOutfeedInternal(
       se::StreamExecutor* executor,
-      tensorflow::gtl::ArraySlice<std::pair<void*, int64>> buffer_data,
-      bool is_tuple);
+      absl::Span<const std::pair<void*, int64>> buffer_data, bool is_tuple);
 
   TF_DISALLOW_COPY_AND_ASSIGN(CpuTransferManager);
 };
diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
index dd060f54a2..99fa707c95 100644
--- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc
@@ -80,7 +80,7 @@ class MemoryTile {
   // `minor_dim_offset`}.
   //
   // Note: `major_dim_offset` is a parameter to the constructor.
-  void StoreTile(tensorflow::gtl::ArraySlice<llvm::Value*> tile,
+  void StoreTile(absl::Span<llvm::Value* const> tile,
                  llvm::Value* minor_dim_offset) const {
     CHECK_EQ(tile.size(), pointers_.size());
     for (int64 i = 0; i < pointers_.size(); i++) {
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
index 1c828cc02c..7839d97317 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc
@@ -506,8 +506,7 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) {
 
 llvm::Value* IrEmitter::EmitElementalMap(
     const HloMapInstruction& map_instr,
-    tensorflow::gtl::ArraySlice<llvm::Value*> elemental_operands,
-    absl::string_view name) {
+    absl::Span<llvm::Value* const> elemental_operands, absl::string_view name) {
   return EmitThreadLocalCall(*map_instr.to_apply(), elemental_operands, name);
 }
 
@@ -1455,7 +1454,7 @@ IrEmitter::EmitInnerLoopForVectorizedReduction(
     const ReductionGenerator& reduction_generator,
     const llvm_ir::IrArray::Index& output_index,
     const ShardedVectorType& accumulator_type, HloInstruction* init_value,
-    HloInstruction* arg, gtl::ArraySlice<int64> dimensions,
+    HloInstruction* arg, absl::Span<const int64> dimensions,
     unsigned element_alignment) {
   ShardedVector accumulator;
   accumulator.reserve(accumulator_type.size());
@@ -1551,7 +1550,7 @@ void IrEmitter::EmitShardedVectorStore(
 
 StatusOr<bool> IrEmitter::EmitVectorizedReduce(
     HloInstruction* reduce, HloInstruction* arg, HloInstruction* init_value,
-    gtl::ArraySlice<int64> dimensions, HloComputation* function,
+    absl::Span<const int64> dimensions, HloComputation* function,
     string* failure_reason) {
   if (!ReductionPreservesLayout(*reduce)) {
     return false;
@@ -1701,7 +1700,7 @@ StatusOr<llvm::Value*> IrEmitter::EmitTargetElementLoopBodyForReduce(
     HloReduceInstruction* reduce, const llvm_ir::IrArray::Index& index) {
   const HloInstruction* arg = reduce->mutable_operand(0);
   const HloInstruction* init_value = reduce->mutable_operand(1);
-  gtl::ArraySlice<int64> dimensions(reduce->dimensions());
+  absl::Span<const int64> dimensions(reduce->dimensions());
 
   // Initialize an accumulator with init_value.
   PrimitiveType accumulator_type = reduce->shape().element_type();
@@ -1758,7 +1757,7 @@ Status IrEmitter::HandleReduce(HloInstruction* reduce) {
   }
   auto arg = reduce->mutable_operand(0);
   auto init_value = reduce->mutable_operand(1);
-  gtl::ArraySlice<int64> dimensions(reduce->dimensions());
+  absl::Span<const int64> dimensions(reduce->dimensions());
   HloComputation* function = reduce->to_apply();
   if (!options::VectorizedReduceDisabled(hlo_module_config_)) {
     string vectorization_failure_reason;
@@ -2113,7 +2112,7 @@ Status IrEmitter::HandleCall(HloInstruction* call) {
 }
 
 Status IrEmitter::HandleCustomCall(HloInstruction* custom_call) {
-  gtl::ArraySlice<HloInstruction*> operands(custom_call->operands());
+  absl::Span<HloInstruction* const> operands(custom_call->operands());
   absl::string_view custom_call_target(custom_call->custom_call_target());
   llvm::Type* i8_ptr_type = b_.getInt8PtrTy();
   llvm::AllocaInst* operands_alloca =
@@ -2233,7 +2232,7 @@ Status IrEmitter::HandleWhile(HloInstruction* xla_while) {
 }
 
 StatusOr<bool> IrEmitter::EmitFastConcatenate(
-    HloInstruction* concatenate, gtl::ArraySlice<HloInstruction*> operands,
+    HloInstruction* concatenate, absl::Span<HloInstruction* const> operands,
     string* failure_reason) {
   if (ShouldEmitParallelLoopFor(*concatenate)) {
     *failure_reason =
@@ -2369,7 +2368,7 @@ void IrEmitter::EmitTransferElements(llvm::Value* target, llvm::Value* source,
 }
 
 Status IrEmitter::HandleConcatenate(HloInstruction* concatenate) {
-  gtl::ArraySlice<HloInstruction*> operands(concatenate->operands());
+  absl::Span<HloInstruction* const> operands(concatenate->operands());
   string failure_reason;
   TF_ASSIGN_OR_RETURN(
       bool successful,
@@ -2800,8 +2799,8 @@ Status IrEmitter::EmitMemcpy(const HloInstruction& source,
 
 Status IrEmitter::ElementTypesSameAndSupported(
     const HloInstruction& instruction,
-    gtl::ArraySlice<const HloInstruction*> operands,
-    gtl::ArraySlice<PrimitiveType> supported_types) {
+    absl::Span<const HloInstruction* const> operands,
+    absl::Span<const PrimitiveType> supported_types) {
   for (auto operand : operands) {
     TF_RET_CHECK(
         ShapeUtil::SameElementType(operands[0]->shape(), operand->shape()));
@@ -2831,8 +2830,7 @@ Status IrEmitter::DefaultAction(HloInstruction* hlo) {
 }
 
 llvm::Value* IrEmitter::EmitThreadLocalCall(
-    const HloComputation& callee,
-    tensorflow::gtl::ArraySlice<llvm::Value*> parameters,
+    const HloComputation& callee, absl::Span<llvm::Value* const> parameters,
     absl::string_view name) {
   const Shape& return_shape = callee.root_instruction()->shape();
 
diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.h b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
index f98891246b..015724b65d 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.h
@@ -111,7 +111,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   // Emit code to map one element according to `map_instr`.
   llvm::Value* EmitElementalMap(
       const HloMapInstruction& map_instr,
-      tensorflow::gtl::ArraySlice<llvm::Value*> elemental_operands,
+      absl::Span<llvm::Value* const> elemental_operands,
       absl::string_view name);
 
  protected:
@@ -252,10 +252,9 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   //
   // `parameters` holds the *scalar values* that need to be passed to the
   // callee.  The return value is the scalar returned by the callee.
-  llvm::Value* EmitThreadLocalCall(
-      const HloComputation& callee,
-      tensorflow::gtl::ArraySlice<llvm::Value*> parameters,
-      absl::string_view name);
+  llvm::Value* EmitThreadLocalCall(const HloComputation& callee,
+                                   absl::Span<llvm::Value* const> parameters,
+                                   absl::string_view name);
 
   // Emits a call to a "global" function (e.g. to the computation nested within
   // a kWhile or a kCall).  Buffer assignment unabiguously assignes buffers to
@@ -271,8 +270,8 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   // match and are of one of the given supported types.
   Status ElementTypesSameAndSupported(
       const HloInstruction& instruction,
-      tensorflow::gtl::ArraySlice<const HloInstruction*> operands,
-      tensorflow::gtl::ArraySlice<PrimitiveType> supported_types);
+      absl::Span<const HloInstruction* const> operands,
+      absl::Span<const PrimitiveType> supported_types);
 
   // Emit IR to perform a computation for every element in the given target op.
   // This produces a series of nested loops (one for each dimension of the op's
@@ -319,10 +318,12 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   // concepts that generalize over other vectorizable operations.  We should
   // consider pulling out these abstractions into a VectorizingIrEmitter or
   // something similar.
-  StatusOr<bool> EmitVectorizedReduce(
-      HloInstruction* reduce, HloInstruction* arg, HloInstruction* init_value,
-      tensorflow::gtl::ArraySlice<int64> dimensions, HloComputation* function,
-      string* failure_reason);
+  StatusOr<bool> EmitVectorizedReduce(HloInstruction* reduce,
+                                      HloInstruction* arg,
+                                      HloInstruction* init_value,
+                                      absl::Span<const int64> dimensions,
+                                      HloComputation* function,
+                                      string* failure_reason);
 
   // We'd like to keep one or two one cache-line's worth of data in registers
   // without generating IR with illegal (e.g. excessively large or
@@ -372,16 +373,15 @@ class IrEmitter : public DfsHloVisitorWithDefault,
       const ReductionGenerator& reduction_generator,
       const llvm_ir::IrArray::Index& output_index,
       const ShardedVectorType& accumulator_type, HloInstruction* init_value,
-      HloInstruction* arg, tensorflow::gtl::ArraySlice<int64> dimensions,
+      HloInstruction* arg, absl::Span<const int64> dimensions,
       unsigned element_alignment);
 
   // Tries to emit a fast concatenate operation using memcpy.  Returns true if
   // successful, and false on failure.  On failure, sets "failure_reason" to a
   // string describing why it could not emit a fast concatenate.
-  StatusOr<bool> EmitFastConcatenate(
-      HloInstruction* concatenate,
-      tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-      string* failure_reason);
+  StatusOr<bool> EmitFastConcatenate(HloInstruction* concatenate,
+                                     absl::Span<HloInstruction* const> operands,
+                                     string* failure_reason);
 
   // Emits LLVM IR to transfer "element_count" elements of type "primitive_type"
   // from the address "source" to the address "target".
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.cc b/tensorflow/compiler/xla/service/cpu/ir_function.cc
index 784045313d..3ecf4b69b7 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_function.cc
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.cc
@@ -200,10 +200,10 @@ llvm::Value* IrFunction::GetDynamicLoopBound(const int64 offset) {
 // Returns an array of compute function call arguments (including parameter
 // address buffer).
 std::vector<llvm::Value*> GetArrayFunctionCallArguments(
-    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-    llvm::IRBuilder<>* b, absl::string_view name,
-    llvm::Value* return_value_buffer, llvm::Value* exec_run_options_arg,
-    llvm::Value* temp_buffers_arg, llvm::Value* profile_counters_arg) {
+    absl::Span<llvm::Value* const> parameter_addresses, llvm::IRBuilder<>* b,
+    absl::string_view name, llvm::Value* return_value_buffer,
+    llvm::Value* exec_run_options_arg, llvm::Value* temp_buffers_arg,
+    llvm::Value* profile_counters_arg) {
   llvm::Value* parameter_addresses_buffer;
 
   if (parameter_addresses.empty()) {
diff --git a/tensorflow/compiler/xla/service/cpu/ir_function.h b/tensorflow/compiler/xla/service/cpu/ir_function.h
index ee7595f6e9..076ca219bc 100644
--- a/tensorflow/compiler/xla/service/cpu/ir_function.h
+++ b/tensorflow/compiler/xla/service/cpu/ir_function.h
@@ -115,10 +115,10 @@ class IrFunction {
 
 // Returns an array of compute function call argument ir values.
 std::vector<llvm::Value*> GetArrayFunctionCallArguments(
-    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_addresses,
-    llvm::IRBuilder<>* b, absl::string_view name,
-    llvm::Value* return_value_buffer, llvm::Value* exec_run_options_arg,
-    llvm::Value* temp_buffers_arg, llvm::Value* profile_counters_arg);
+    absl::Span<llvm::Value* const> parameter_addresses, llvm::IRBuilder<>* b,
+    absl::string_view name, llvm::Value* return_value_buffer,
+    llvm::Value* exec_run_options_arg, llvm::Value* temp_buffers_arg,
+    llvm::Value* profile_counters_arg);
 
 // Emits a call to a runtime fork/join function which dispatches parallel
 // calls to 'parallel_function' (and joins threads before returning).
diff --git a/tensorflow/compiler/xla/service/cpu/vector_support_library.cc b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc
index 962ea69c09..1bd4b59dd6 100644
--- a/tensorflow/compiler/xla/service/cpu/vector_support_library.cc
+++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.cc
@@ -428,7 +428,7 @@ std::vector<llvm::Value*> TileVariable::Get() const {
   return result;
 }
 
-void TileVariable::Set(tensorflow::gtl::ArraySlice<llvm::Value*> value) {
+void TileVariable::Set(absl::Span<llvm::Value* const> value) {
   CHECK_EQ(value.size(), storage_.size());
   for (int64 i = 0, e = value.size(); i < e; i++) {
     storage_[i].Set(value[i]);
diff --git a/tensorflow/compiler/xla/service/cpu/vector_support_library.h b/tensorflow/compiler/xla/service/cpu/vector_support_library.h
index c728f6df0a..3dfe941a3a 100644
--- a/tensorflow/compiler/xla/service/cpu/vector_support_library.h
+++ b/tensorflow/compiler/xla/service/cpu/vector_support_library.h
@@ -324,7 +324,7 @@ class TileVariable {
                std::vector<llvm::Value*> initial_value);
 
   std::vector<llvm::Value*> Get() const;
-  void Set(tensorflow::gtl::ArraySlice<llvm::Value*> value);
+  void Set(absl::Span<llvm::Value* const> value);
 
  private:
   std::vector<VectorVariable> storage_;
diff --git a/tensorflow/compiler/xla/service/cpu/xfeed_manager.cc b/tensorflow/compiler/xla/service/cpu/xfeed_manager.cc
index 47543b2082..b9e47f5aad 100644
--- a/tensorflow/compiler/xla/service/cpu/xfeed_manager.cc
+++ b/tensorflow/compiler/xla/service/cpu/xfeed_manager.cc
@@ -37,7 +37,7 @@ void XfeedQueueManager::Reset() {
 }
 
 void XfeedQueueManager::EnqueueBuffersAtomically(
-    tensorflow::gtl::ArraySlice<XfeedBuffer*> buffers) {
+    absl::Span<XfeedBuffer* const> buffers) {
   tensorflow::mutex_lock l(mu_);
   bool was_empty = enqueued_buffers_.empty();
   for (XfeedBuffer* b : buffers) {
diff --git a/tensorflow/compiler/xla/service/cpu/xfeed_manager.h b/tensorflow/compiler/xla/service/cpu/xfeed_manager.h
index b4ace23260..fac1722b10 100644
--- a/tensorflow/compiler/xla/service/cpu/xfeed_manager.h
+++ b/tensorflow/compiler/xla/service/cpu/xfeed_manager.h
@@ -63,8 +63,7 @@ class XfeedQueueManager {
   // called when the buffer will no longer be accessed by the XfeedManager,
   // either as a result of a call to Reset or because the runtime has dequeued
   // and used the buffer.
-  void EnqueueBuffersAtomically(
-      tensorflow::gtl::ArraySlice<XfeedBuffer*> buffers);
+  void EnqueueBuffersAtomically(absl::Span<XfeedBuffer* const> buffers);
 
   // Blocks until the queue is non-empty, then returns the buffer at the head of
   // the queue. Sets the current buffer to be the returned buffer. It is an
diff --git a/tensorflow/compiler/xla/service/device_memory_allocator.cc b/tensorflow/compiler/xla/service/device_memory_allocator.cc
index 1d0297cfbf..edbcb25247 100644
--- a/tensorflow/compiler/xla/service/device_memory_allocator.cc
+++ b/tensorflow/compiler/xla/service/device_memory_allocator.cc
@@ -25,7 +25,7 @@ namespace xla {
 
 StreamExecutorMemoryAllocator::StreamExecutorMemoryAllocator(
     const se::Platform* platform,
-    tensorflow::gtl::ArraySlice<se::StreamExecutor*> stream_executors)
+    absl::Span<se::StreamExecutor* const> stream_executors)
     : DeviceMemoryAllocator(platform),
       stream_executors_(stream_executors.begin(), stream_executors.end()) {}
 
diff --git a/tensorflow/compiler/xla/service/device_memory_allocator.h b/tensorflow/compiler/xla/service/device_memory_allocator.h
index d87b86caf0..28a3539373 100644
--- a/tensorflow/compiler/xla/service/device_memory_allocator.h
+++ b/tensorflow/compiler/xla/service/device_memory_allocator.h
@@ -80,7 +80,7 @@ class StreamExecutorMemoryAllocator : public DeviceMemoryAllocator {
  public:
   StreamExecutorMemoryAllocator(
       const se::Platform* platform,
-      tensorflow::gtl::ArraySlice<se::StreamExecutor*> stream_executors);
+      absl::Span<se::StreamExecutor* const> stream_executors);
 
   StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size,
                                         bool retry_on_failure) override;
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
index def42f9c77..4bb1e071d8 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc
@@ -856,7 +856,7 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfInv(PrimitiveType prim_type,
   auto getFloat = [&](const float f) {
     return llvm::ConstantFP::get(b_->getFloatTy(), f);
   };
-  auto multiply_add = [&](tensorflow::gtl::ArraySlice<float> coefficients,
+  auto multiply_add = [&](absl::Span<const float> coefficients,
                           llvm::Value* w) {
     llvm::Value* p = getFloat(coefficients.front());
     coefficients.remove_prefix(1);
@@ -893,7 +893,7 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfInv(PrimitiveType prim_type,
   SetToFirstInsertPoint(if_data.true_block, b_);
   {
     llvm::Value* lw = FSub(w, getFloat(2.5f));
-    tensorflow::gtl::ArraySlice<float> lq{
+    absl::Span<const float> lq{
         2.81022636e-08f,  3.43273939e-07f, -3.5233877e-06f,
         -4.39150654e-06f, 0.00021858087f,  -0.00125372503f,
         -0.00417768164f,  0.246640727f,    1.50140941f};
@@ -908,7 +908,7 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitErfInv(PrimitiveType prim_type,
         module_, llvm::Intrinsic::sqrt, {b_->getFloatTy()});
 
     llvm::Value* gw = FSub(Call(sqrtf_fn, w), getFloat(3.0f));
-    tensorflow::gtl::ArraySlice<float> gq{
+    absl::Span<const float> gq{
         -0.000200214257f, 0.000100950558f, 0.00134934322f,
         -0.00367342844f,  0.00573950773f,  -0.0076224613f,
         0.00943887047f,   1.00167406f,     2.83297682f};
diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter_test.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter_test.cc
index 5ab0756219..1b3be199f6 100644
--- a/tensorflow/compiler/xla/service/elemental_ir_emitter_test.cc
+++ b/tensorflow/compiler/xla/service/elemental_ir_emitter_test.cc
@@ -28,8 +28,7 @@ using absl::nullopt;
 
 class ElementalIrEmitterExecutionTest : public HloTestBase {
  protected:
-  void RunTest(const string& hlo_text,
-               tensorflow::gtl::ArraySlice<Literal*> args) {
+  void RunTest(const string& hlo_text, absl::Span<Literal* const> args) {
     HloModuleConfig config;
     config.set_debug_options(GetDebugOptionsForTest());
     TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc
index 78edf918a4..47c56e2f7f 100644
--- a/tensorflow/compiler/xla/service/executable.cc
+++ b/tensorflow/compiler/xla/service/executable.cc
@@ -26,13 +26,12 @@ limitations under the License.
 #include "tensorflow/core/lib/strings/proto_serialization.h"
 #include "tensorflow/core/platform/env.h"
 
-using tensorflow::gtl::ArraySlice;
 
 namespace xla {
 
 StatusOr<std::vector<ScopedShapedBuffer>> Executable::ExecuteOnStreams(
-    ArraySlice<const ServiceExecutableRunOptions> run_options,
-    ArraySlice<ArraySlice<const ShapedBuffer*>> arguments) {
+    absl::Span<const ServiceExecutableRunOptions> run_options,
+    absl::Span<const absl::Span<const ShapedBuffer* const>> arguments) {
   TF_RET_CHECK(run_options.size() == arguments.size());
 
   std::vector<ScopedShapedBuffer> return_values;
@@ -63,7 +62,7 @@ StatusOr<std::vector<ScopedShapedBuffer>> Executable::ExecuteOnStreams(
 
 StatusOr<ScopedShapedBuffer> Executable::ExecuteOnStreamWrapper(
     const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
-    ArraySlice<const ShapedBuffer*> arguments) {
+    absl::Span<const ShapedBuffer* const> arguments) {
   se::Stream* stream = run_options->stream();
   std::unique_ptr<se::Timer> timer;
   if (profile != nullptr) {
diff --git a/tensorflow/compiler/xla/service/executable.h b/tensorflow/compiler/xla/service/executable.h
index 6e055edc03..4b8d955b28 100644
--- a/tensorflow/compiler/xla/service/executable.h
+++ b/tensorflow/compiler/xla/service/executable.h
@@ -81,14 +81,14 @@ class Executable {
   // Returns a shaped buffer containing the result of the computation.
   virtual StatusOr<ScopedShapedBuffer> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      absl::Span<const ShapedBuffer* const> arguments,
       HloExecutionProfile* hlo_execution_profile) = 0;
 
   // Same as ExecuteOnStream(), but this call is non-blocking and returns as
   // soon as all of the operations are enqueued for launch on the stream.
   virtual StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) = 0;
+      absl::Span<const ShapedBuffer* const> arguments) = 0;
 
   // Starts the given program executing on the given stream/executor.
   //
@@ -119,11 +119,8 @@ class Executable {
   // run_options[i]->stream() and the returned value is at index i of the
   // returned vector.
   virtual StatusOr<std::vector<ScopedShapedBuffer>> ExecuteOnStreams(
-      tensorflow::gtl::ArraySlice<const ServiceExecutableRunOptions>
-          run_options,
-      tensorflow::gtl::ArraySlice<
-          tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
-          arguments);
+      absl::Span<const ServiceExecutableRunOptions> run_options,
+      absl::Span<const absl::Span<const ShapedBuffer* const>> arguments);
 
   // Populates `hlo_execution_profile` from `executor`. This is implicit in any
   // Execute* API call that takes a hlo_execution_profile argument, but must be
@@ -139,7 +136,7 @@ class Executable {
   // given ExecutionProfile if non-null.
   StatusOr<ScopedShapedBuffer> ExecuteOnStreamWrapper(
       const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments);
+      absl::Span<const ShapedBuffer* const> arguments);
 
   // Returns the ExecutionProfile from executing on the device. This includes
   // the number of cycles taken for the computation or the compilation time.
diff --git a/tensorflow/compiler/xla/service/gather_expander.cc b/tensorflow/compiler/xla/service/gather_expander.cc
index 3f1a881372..cb86c98579 100644
--- a/tensorflow/compiler/xla/service/gather_expander.cc
+++ b/tensorflow/compiler/xla/service/gather_expander.cc
@@ -25,7 +25,6 @@ limitations under the License.
 #include "tensorflow/compiler/xla/util.h"
 
 namespace xla {
-using tensorflow::gtl::ArraySlice;
 
 static StatusOr<HloInstruction*> TransposeIndexVectorDimToLast(
     HloInstruction* start_indices, int64 index_vector_dim) {
@@ -225,7 +224,7 @@ static StatusOr<std::vector<HloInstruction*>> GatherLoopBody(
 
 static StatusOr<HloInstruction*> CreateGatherLoopAccumulatorInitValue(
     HloComputation* computation, PrimitiveType element_type,
-    ArraySlice<int64> slice_sizes, int64 gather_loop_trip_count,
+    absl::Span<const int64> slice_sizes, int64 gather_loop_trip_count,
     const GatherDimensionNumbers& dim_numbers) {
   std::vector<int64> accumulator_state_shape_dims;
   accumulator_state_shape_dims.reserve(1 + slice_sizes.size());
@@ -244,7 +243,7 @@ static StatusOr<HloInstruction*> CreateGatherLoopAccumulatorInitValue(
 // are the major dimensions and the offset dimensions are the minor dimensions.
 // Fix this up with a transpose.
 static StatusOr<HloInstruction*> PermuteBatchAndOffsetDims(
-    HloInstruction* accumulator, ArraySlice<int64> offset_dims,
+    HloInstruction* accumulator, absl::Span<const int64> offset_dims,
     int64 output_rank) {
   std::vector<int64> permutation;
   permutation.reserve(output_rank);
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index 0ce2db907b..4ed91ef187 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -42,8 +42,7 @@ se::Platform::Id GenericTransferManager::PlatformId() const {
 }
 
 Status GenericTransferManager::WriteSingleTupleIndexTable(
-    se::Stream* stream,
-    tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> elements,
+    se::Stream* stream, absl::Span<const se::DeviceMemoryBase> elements,
     const Shape& shape, se::DeviceMemoryBase* region) {
   TF_RET_CHECK(elements.size() == ShapeUtil::TupleElementCount(shape));
 
@@ -163,7 +162,7 @@ Status GenericTransferManager::TransferLiteralFromOutfeed(
 }
 
 Status GenericTransferManager::ResetDevices(
-    tensorflow::gtl::ArraySlice<se::StreamExecutor*>
+    absl::Span<se::StreamExecutor* const>
     /*executors*/) {
   return Unimplemented(
       "Device reset is not yet supported on this platform (b/30481585)");
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h
index 6c1a21587a..86c8b1c145 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h
@@ -55,15 +55,13 @@ class GenericTransferManager : public TransferManager {
                                     const Shape& literal_shape,
                                     MutableBorrowingLiteral literal) override;
 
-  Status ResetDevices(
-      tensorflow::gtl::ArraySlice<se::StreamExecutor*> executors) override;
+  Status ResetDevices(absl::Span<se::StreamExecutor* const> executors) override;
 
   int64 GetByteSizeRequirement(const Shape& shape) const override;
 
  protected:
   Status WriteSingleTupleIndexTable(
-      se::Stream* stream,
-      tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> elements,
+      se::Stream* stream, absl::Span<const se::DeviceMemoryBase> elements,
       const Shape& shape, se::DeviceMemoryBase* region) override;
 
  private:
diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
index dbdf8e7a0e..2af31a52f9 100644
--- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc
@@ -204,9 +204,8 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm(
   if (allocator_ != nullptr) {
     allocator = allocator_;
   } else {
-    se_allocator.emplace(
-        stream_exec_->platform(),
-        tensorflow::gtl::ArraySlice<se::StreamExecutor*>({stream_exec_}));
+    se_allocator.emplace(stream_exec_->platform(),
+                         absl::Span<se::StreamExecutor* const>({stream_exec_}));
     allocator = &*se_allocator;
   }
 
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
index 57a3a43a6f..c1aaa4bf04 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc
@@ -74,10 +74,8 @@ GpuElementalIrEmitter::GpuElementalIrEmitter(
       compute_nested_(std::move(compute_nested)) {}
 
 StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitLibdeviceMathCall(
-    const string& callee_name,
-    tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-    tensorflow::gtl::ArraySlice<PrimitiveType> input_types,
-    PrimitiveType output_type) {
+    const string& callee_name, absl::Span<llvm::Value* const> operands,
+    absl::Span<const PrimitiveType> input_types, PrimitiveType output_type) {
   // The libdevice math functions differentiate between "double" and "float" by
   // appending an 'f' to the function's name. libdevice doesn't have f16 math
   // functions, so we convert the operands to f32 before calling the function
@@ -119,10 +117,8 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitLibdeviceMathCall(
 }
 
 StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitLlvmIntrinsicMathCall(
-    const string& callee_name,
-    tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-    tensorflow::gtl::ArraySlice<PrimitiveType> input_types,
-    PrimitiveType output_type) {
+    const string& callee_name, absl::Span<llvm::Value* const> operands,
+    absl::Span<const PrimitiveType> input_types, PrimitiveType output_type) {
   // llvm intrinsics differentiate between half/float/double functions via
   // the suffixes ".f16", ".f32" and ".f64".
   string munged_callee = callee_name;
@@ -144,10 +140,8 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitLlvmIntrinsicMathCall(
 }
 
 StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitMathCall(
-    const string& callee_name,
-    tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-    tensorflow::gtl::ArraySlice<PrimitiveType> input_types,
-    PrimitiveType output_type) {
+    const string& callee_name, absl::Span<llvm::Value* const> operands,
+    absl::Span<const PrimitiveType> input_types, PrimitiveType output_type) {
   // Binary math functions transform are of type [T] -> T.
   for (PrimitiveType input_type : input_types) {
     if (output_type != input_type) {
@@ -290,11 +284,9 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitTanh(PrimitiveType prim_type,
 }
 
 llvm::Value* GpuElementalIrEmitter::EmitDeviceFunctionCall(
-    const string& callee_name,
-    tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-    tensorflow::gtl::ArraySlice<PrimitiveType> input_types,
-    PrimitiveType output_type,
-    tensorflow::gtl::ArraySlice<llvm::Attribute::AttrKind> attributes) {
+    const string& callee_name, absl::Span<llvm::Value* const> operands,
+    absl::Span<const PrimitiveType> input_types, PrimitiveType output_type,
+    absl::Span<const llvm::Attribute::AttrKind> attributes) {
   std::vector<llvm::Type*> ir_input_types;
   for (PrimitiveType input_type : input_types) {
     ir_input_types.push_back(
diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
index 91942785d2..43f1f208bf 100644
--- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.h
@@ -38,9 +38,9 @@ namespace gpu {
 class GpuElementalIrEmitter : public ElementalIrEmitter {
  public:
   // A NestedComputer computes an element of the output of the given computation
-  // given an ArraySlice of its input elements.
+  // given a Span of its input elements.
   using NestedComputer = std::function<StatusOr<llvm::Value*>(
-      const HloComputation&, tensorflow::gtl::ArraySlice<llvm::Value*>)>;
+      const HloComputation&, absl::Span<llvm::Value* const>)>;
 
   GpuElementalIrEmitter(const HloModuleConfig& hlo_module_config,
                         llvm::Module* module, llvm::IRBuilder<>* b,
@@ -96,37 +96,29 @@ class GpuElementalIrEmitter : public ElementalIrEmitter {
   // Emits IR to call a device function named "callee_name" on the given
   // operand. Returns the IR value that represents the return value.
   llvm::Value* EmitDeviceFunctionCall(
-      const string& callee_name,
-      tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-      tensorflow::gtl::ArraySlice<PrimitiveType> input_type,
-      PrimitiveType output_type,
-      tensorflow::gtl::ArraySlice<llvm::Attribute::AttrKind> attributes);
+      const string& callee_name, absl::Span<llvm::Value* const> operands,
+      absl::Span<const PrimitiveType> input_type, PrimitiveType output_type,
+      absl::Span<const llvm::Attribute::AttrKind> attributes);
 
   // Emits IR to call an LLVM intrinsic of type [T] -> T.  Adjusts
   // callee_name according to T.  Returns the IR value that represents the
   // return value of the function.
   StatusOr<llvm::Value*> EmitLlvmIntrinsicMathCall(
-      const string& callee_name,
-      tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-      tensorflow::gtl::ArraySlice<PrimitiveType> input_types,
-      PrimitiveType output_type);
+      const string& callee_name, absl::Span<llvm::Value* const> operands,
+      absl::Span<const PrimitiveType> input_types, PrimitiveType output_type);
 
   // Emits IR to call a libdevice function of type [T] -> T.  Adjusts
   // callee_name according to T.  Returns the IR value that represents the
   // return value of the function.
   StatusOr<llvm::Value*> EmitLibdeviceMathCall(
-      const string& callee_name,
-      tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-      tensorflow::gtl::ArraySlice<PrimitiveType> input_types,
-      PrimitiveType output_type);
+      const string& callee_name, absl::Span<llvm::Value* const> operands,
+      absl::Span<const PrimitiveType> input_types, PrimitiveType output_type);
 
   // Emits IR to call a function of type [T] -> T.  Does not munge callee_name.
   // Returns the IR value that represents the return value of the function.
   StatusOr<llvm::Value*> EmitMathCall(
-      const string& callee_name,
-      tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-      tensorflow::gtl::ArraySlice<PrimitiveType> input_types,
-      PrimitiveType output_type);
+      const string& callee_name, absl::Span<llvm::Value* const> operands,
+      absl::Span<const PrimitiveType> input_types, PrimitiveType output_type);
 
   const HloModuleConfig& hlo_module_config_;
   NestedComputer compute_nested_;
diff --git a/tensorflow/compiler/xla/service/gpu/fft_thunk.cc b/tensorflow/compiler/xla/service/gpu/fft_thunk.cc
index 11549cdac5..ca4a605af5 100644
--- a/tensorflow/compiler/xla/service/gpu/fft_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/fft_thunk.cc
@@ -92,8 +92,7 @@ string FftTypeToString(se::fft::Type type) {
 
 }  // namespace
 
-FftThunk::FftThunk(FftType fft_type,
-                   tensorflow::gtl::ArraySlice<int64> fft_length,
+FftThunk::FftThunk(FftType fft_type, absl::Span<const int64> fft_length,
                    const BufferAllocation::Slice& input_buffer,
                    const BufferAllocation::Slice& output_buffer,
                    const Shape& input_shape, const Shape& output_shape,
diff --git a/tensorflow/compiler/xla/service/gpu/fft_thunk.h b/tensorflow/compiler/xla/service/gpu/fft_thunk.h
index 4adec7ee54..2be50e08bd 100644
--- a/tensorflow/compiler/xla/service/gpu/fft_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/fft_thunk.h
@@ -62,7 +62,7 @@ class FftThunk : public Thunk {
  public:
   // Constructs a thunk for launching an FFT on a stream.
   // Semantics of null hlo_instruction argument are as in Thunk.
-  FftThunk(FftType fft_type, tensorflow::gtl::ArraySlice<int64> fft_length,
+  FftThunk(FftType fft_type, absl::Span<const int64> fft_length,
            const BufferAllocation::Slice& input_buffer,
            const BufferAllocation::Slice& output_buffer,
            const Shape& input_shape, const Shape& output_shape,
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 71a02e70df..31a9f9b1be 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -234,7 +234,7 @@ GpuExecutable::ResolveConstantGlobals(se::StreamExecutor* executor) {
 
 StatusOr<ScopedShapedBuffer> GpuExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+    absl::Span<const ShapedBuffer* const> arguments,
     HloExecutionProfile* hlo_execution_profile) {
   DeviceMemoryAllocator* memory_allocator = run_options->allocator();
 
@@ -325,7 +325,7 @@ StatusOr<ScopedShapedBuffer> GpuExecutable::ExecuteOnStream(
 
 StatusOr<ScopedShapedBuffer> GpuExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
+    absl::Span<const ShapedBuffer* const> arguments) {
   // TODO(b/30671675): Implement asynchronous execution mode.
   return Unimplemented(
       "Asynchronous execution on stream is not yet supported on GPU.");
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.h b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
index 627a05e240..b3765adf5e 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.h
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.h
@@ -78,12 +78,12 @@ class GpuExecutable : public Executable {
   // match the compute capability passed to this object's constructor.
   StatusOr<ScopedShapedBuffer> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      absl::Span<const ShapedBuffer* const> arguments,
       HloExecutionProfile* hlo_execution_profile) override;
 
   StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
+      absl::Span<const ShapedBuffer* const> arguments) override;
 
  private:
   // If `block_host_until_done` is false, execution will not block the host
diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc
index 0e205b9c02..51627402b4 100644
--- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc
+++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.cc
@@ -35,8 +35,8 @@ using absl::StrAppend;
 using absl::StrCat;
 
 void HloToIrBindings::EmitBasePointersForHlos(
-    tensorflow::gtl::ArraySlice<const HloInstruction*> io_hlos,
-    tensorflow::gtl::ArraySlice<const HloInstruction*> non_io_hlos) {
+    absl::Span<const HloInstruction* const> io_hlos,
+    absl::Span<const HloInstruction* const> non_io_hlos) {
   // I/O HLOs are bound to the arguments of the current IR function. I.e.,
   //
   // void IrFunction(io_0, io_1, ..., io_{m-1}, temp_buffer_base) {
diff --git a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h
index eee40b0e91..5b05ed812e 100644
--- a/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h
+++ b/tensorflow/compiler/xla/service/gpu/hlo_to_ir_bindings.h
@@ -45,8 +45,8 @@ class HloToIrBindings {
         alias_analysis_(module, *buffer_assignment_, &b_->getContext()) {}
 
   void EmitBasePointersForHlos(
-      tensorflow::gtl::ArraySlice<const HloInstruction*> io_hlos,
-      tensorflow::gtl::ArraySlice<const HloInstruction*> non_io_hlos);
+      absl::Span<const HloInstruction* const> io_hlos,
+      absl::Span<const HloInstruction* const> non_io_hlos);
 
   // Rebinds the given HLO to the LLVM IR value that represent its address.
   void BindHloToIrValue(const HloInstruction& hlo, llvm::Value* ir_value,
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
index f544bcc919..9c90f4d46b 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc
@@ -216,7 +216,7 @@ bool IsReductionToVector(const HloInstruction& reduce) {
 // "i32 vprintf(i8* fmt, arguments_type* arguments)" in the driver; see
 // http://docs.nvidia.com/cuda/ptx-writers-guide-to-interoperability/index.html#system-calls
 llvm::Value* EmitPrintf(absl::string_view fmt,
-                        tensorflow::gtl::ArraySlice<llvm::Value*> arguments,
+                        absl::Span<llvm::Value* const> arguments,
                         llvm::IRBuilder<>* builder) {
   std::vector<llvm::Type*> argument_types;
   for (auto argument : arguments) {
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
index a35e250101..d242897e16 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h
@@ -127,7 +127,7 @@ bool IsReductionToVector(const HloInstruction& reduce);
 
 // Emits call to "vprintf" with given format and arguments.
 llvm::Value* EmitPrintf(absl::string_view fmt,
-                        tensorflow::gtl::ArraySlice<llvm::Value*> arguments,
+                        absl::Span<llvm::Value* const> arguments,
                         llvm::IRBuilder<>* builder);
 
 // Emits code to shuffle data between threads of a warp. This has the same
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
index bdf6aadde6..ffca5d6549 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc
@@ -141,7 +141,7 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) {
 
 Status IrEmitter::EmitCallToNestedComputation(
     const HloComputation& nested_computation,
-    tensorflow::gtl::ArraySlice<llvm::Value*> operands, llvm::Value* output) {
+    absl::Span<llvm::Value* const> operands, llvm::Value* output) {
   TF_RET_CHECK(nested_computation.num_parameters() > 0);
   llvm::Function*& emitted_function =
       computation_to_ir_function_[&nested_computation];
@@ -633,7 +633,7 @@ Status IrEmitter::HandleReduce(HloInstruction* reduce) {
   }
   auto arg = reduce->operand(0);
   auto init_value = reduce->operand(1);
-  tensorflow::gtl::ArraySlice<int64> dimensions(reduce->dimensions());
+  absl::Span<const int64> dimensions(reduce->dimensions());
   HloComputation* function = reduce->to_apply();
   return EmitTargetElementLoop(
       *reduce,
@@ -748,7 +748,7 @@ Status IrEmitter::HandleBatchNormGrad(HloInstruction*) {
 
 StatusOr<llvm::Value*> IrEmitter::ComputeNestedElement(
     const HloComputation& computation,
-    tensorflow::gtl::ArraySlice<llvm::Value*> parameter_elements) {
+    absl::Span<llvm::Value* const> parameter_elements) {
   llvm::Value* return_buffer = llvm_ir::EmitAllocaAtFunctionEntry(
       llvm_ir::PrimitiveTypeToIrType(
           computation.root_instruction()->shape().element_type(), module_),
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.h b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
index 3673b9f58d..bc2b04ace5 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.h
@@ -143,9 +143,9 @@ class IrEmitter : public DfsHloVisitorWithDefault,
   // Emits a call in IR to the given nested computation with the given operands
   // and output. If no IR function has been previously emitted for the
   // computation, also emits such a function.
-  Status EmitCallToNestedComputation(
-      const HloComputation& nested_computation,
-      tensorflow::gtl::ArraySlice<llvm::Value*> operands, llvm::Value* output);
+  Status EmitCallToNestedComputation(const HloComputation& nested_computation,
+                                     absl::Span<llvm::Value* const> operands,
+                                     llvm::Value* output);
 
   // Emits an atomic operation that implements `nested_computation` in the
   // sequentially consistent memory model. `output_address` and `source_address`
@@ -199,7 +199,7 @@ class IrEmitter : public DfsHloVisitorWithDefault,
 
   StatusOr<llvm::Value*> ComputeNestedElement(
       const HloComputation& computation,
-      tensorflow::gtl::ArraySlice<llvm::Value*> parameter_elements);
+      absl::Span<llvm::Value* const> parameter_elements);
 
   // Emits an atomic operation that implements `nested_computation` in the
   // sequentially consistent memory model. `output_address` and `source_address`
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
index 860dd0b50f..3ab79197e2 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc
@@ -94,7 +94,6 @@ using absl::optional;
 using absl::StrCat;
 using llvm_ir::IrArray;
 using llvm_ir::IrName;
-using tensorflow::gtl::ArraySlice;
 
 // If a dimensions is smaller than this, untiled transposition may be more
 // efficient.
@@ -176,7 +175,7 @@ Status IrEmitterUnnested::Postprocess(HloInstruction* hlo) {
 
 llvm::Function* IrEmitterUnnested::BuildKernelPrototype(
     const HloInstruction& inst,
-    tensorflow::gtl::ArraySlice<const BufferAllocation*> args) {
+    absl::Span<const BufferAllocation* const> args) {
   // Compute the kernel name. The opcode string may contain "-" which cannot be
   // in a PTX function name, so sanitize the name before uniquifying it.
   string kernel_name = ir_emitter_context_->name_uniquer()->GetUniqueName(
@@ -556,10 +555,10 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
         }
         VLOG(3) << "Emitting fused reduction to vector: " << fusion->ToString();
         std::vector<std::unique_ptr<Thunk>> thunks;
-        ArraySlice<HloInstruction*> output_instructions =
+        absl::Span<HloInstruction* const> output_instructions =
             root->opcode() == HloOpcode::kTuple
                 ? root->operands()
-                : ArraySlice<HloInstruction*>(&root, 1);
+                : absl::Span<HloInstruction* const>(&root, 1);
 
         // For multi-output fusion emit an initializer for each tuple element.
         // Otherwise it's sufficient to just initialize the single output.
@@ -718,8 +717,7 @@ Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) {
 
 Status IrEmitterUnnested::EmitExtraOutputsForReduce(
     const HloInstruction* reduce, const IrArray::Index& index,
-    tensorflow::gtl::ArraySlice<
-        std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
         extra_output_gens) {
   for (int i = 0; i != extra_output_gens.size(); ++i) {
     const HloInstruction* output = reduce->parent()->FusionInstruction();
@@ -736,12 +734,11 @@ Status IrEmitterUnnested::EmitExtraOutputsForReduce(
 
 Status IrEmitterUnnested::EmitReductionToScalar(
     HloInstruction* reduce, const Shape& input_shape,
-    tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> input_gens,
-    tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> init_value_gens,
-    tensorflow::gtl::ArraySlice<HloComputation*> reducers,
-    tensorflow::gtl::ArraySlice<ShapeIndex> reduce_output_shapes,
-    tensorflow::gtl::ArraySlice<
-        std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+    absl::Span<const llvm_ir::ElementGenerator> input_gens,
+    absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
+    absl::Span<HloComputation* const> reducers,
+    absl::Span<const ShapeIndex> reduce_output_shapes,
+    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
         extra_output_gens) {
   // Number of elements processed by a single thread.
   constexpr int64 kTileSize = 16;
@@ -951,12 +948,11 @@ Status IrEmitterUnnested::EmitReductionToScalar(
 
 Status IrEmitterUnnested::EmitColumnReduction(
     int64 height, int64 width, HloInstruction* reduce, const Shape& input_shape,
-    tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> input_gens,
-    tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> init_value_gens,
-    tensorflow::gtl::ArraySlice<HloComputation*> reducers,
-    tensorflow::gtl::ArraySlice<ShapeIndex> reduce_output_shapes,
-    tensorflow::gtl::ArraySlice<
-        std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+    absl::Span<const llvm_ir::ElementGenerator> input_gens,
+    absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
+    absl::Span<HloComputation* const> reducers,
+    absl::Span<const ShapeIndex> reduce_output_shapes,
+    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
         extra_output_gens) {
   // Divide the input matrix into tiles of size KxL. For example, when the
   // input matrix is 4x4, K=2, and L=1 the tiled matrix looks like
@@ -1240,12 +1236,11 @@ static std::pair<int64, int64> ComputeTilingSchemeForReduction(
 Status IrEmitterUnnested::EmitRowReduction(
     int64 depth, int64 height, int64 width, HloInstruction* reduce,
     const Shape& input_shape,
-    tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> input_gens,
-    tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> init_value_gens,
-    tensorflow::gtl::ArraySlice<HloComputation*> reducers,
-    tensorflow::gtl::ArraySlice<ShapeIndex> reduce_output_shapes,
-    tensorflow::gtl::ArraySlice<
-        std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+    absl::Span<const llvm_ir::ElementGenerator> input_gens,
+    absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
+    absl::Span<HloComputation* const> reducers,
+    absl::Span<const ShapeIndex> reduce_output_shapes,
+    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
         extra_output_gens) {
   // A naive algorithm is:
   // 1. Divide the x dimension of the input tensor into tiles of size 1x1xX.
@@ -1593,13 +1588,12 @@ Status IrEmitterUnnested::EmitRowReduction(
 //               elementwise.
 Status IrEmitterUnnested::EmitReductionToVector(
     HloInstruction* reduce, const Shape& input_shape,
-    tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> input_gens,
-    tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> init_value_gens,
-    tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
-    tensorflow::gtl::ArraySlice<HloComputation*> reducers,
-    tensorflow::gtl::ArraySlice<ShapeIndex> reduce_output_shapes,
-    tensorflow::gtl::ArraySlice<
-        std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+    absl::Span<const llvm_ir::ElementGenerator> input_gens,
+    absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
+    absl::Span<const int64> dimensions_to_reduce,
+    absl::Span<HloComputation* const> reducers,
+    absl::Span<const ShapeIndex> reduce_output_shapes,
+    absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
         extra_output_gens) {
   // This emission requires "reduce" to have an input layout. It is either set
   // by LayoutAssignment (for a top-level kReduce) or by InstructionFusion (for
@@ -1694,7 +1688,7 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) {
   }
   auto input = reduce->operand(0);
   auto init_value = reduce->operand(1);
-  tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce(reduce->dimensions());
+  absl::Span<const int64> dimensions_to_reduce(reduce->dimensions());
   HloComputation* reducer = reduce->to_apply();
   // HandleReduce specializes reduction from a multi-dimensional array to a 1D
   // array. The specialized version requires an initializer thunk that
@@ -2570,7 +2564,7 @@ StatusOr<std::unique_ptr<Thunk>> IrEmitterUnnested::BuildInitializerThunk(
 
     // Are all the bytes of this scalar equal to 0?  If so, we can create a
     // MemzeroThunk.
-    ArraySlice<uint8> literal_bytes(
+    absl::Span<const uint8> literal_bytes(
         reinterpret_cast<const uint8*>(literal.untyped_data()), num_bytes);
     if (absl::c_all_of(literal_bytes, [](uint8 byte) { return byte == 0; })) {
       return {absl::make_unique<MemzeroThunk>(GetAllocationSlice(*hlo, index),
@@ -2880,7 +2874,7 @@ int IrEmitterUnnested::ConstructIrArrayForInputs(
 
 int IrEmitterUnnested::ConstructOutputReducedShapeAndCastOutputIrArrayToShape(
     const HloInstruction& hlo, const std::vector<IrArray>& output_arrays,
-    tensorflow::gtl::ArraySlice<int64> reduced_output_dims,
+    absl::Span<const int64> reduced_output_dims,
     std::vector<Shape>* output_reduced_shapes,
     std::vector<IrArray>* output_in_reduced_shape_arrays) {
   int64 num_outputs = 1;
@@ -2907,7 +2901,7 @@ int IrEmitterUnnested::ConstructOutputReducedShapeAndCastOutputIrArrayToShape(
 int IrEmitterUnnested::ConstructInputReducedShapeAndCastInputIrArrayToShape(
     const HloInstruction& hlo, const std::vector<IrArray>& param_arrays,
     const std::vector<llvm::Value*>& param_buffers,
-    tensorflow::gtl::ArraySlice<int64> reduced_output_dims,
+    absl::Span<const int64> reduced_output_dims,
     std::vector<Shape>* param_reduced_shapes,
     std::vector<IrArray>* param_in_reduced_shape_arrays) {
   int64 num_params = hlo.operands().size();
@@ -3048,8 +3042,8 @@ void EmitTiledElementalCodeWithBoundsCheck(
 // TODO(b/33320379): Here each block transposes 1 tile. It may be more efficient
 // to launch fewer blocks so each transposes many tiles.
 LaunchDimensions IrEmitterUnnested::EmitHlo021Tile(
-    HloInstruction* hlo, tensorflow::gtl::ArraySlice<int64> reduced_output_dims,
-    tensorflow::gtl::ArraySlice<int64> tiled_param_ids) {
+    HloInstruction* hlo, absl::Span<const int64> reduced_output_dims,
+    absl::Span<const int64> tiled_param_ids) {
   // Parameters for the tiling algorithm.
   constexpr int64 kTileSize = 32;
   constexpr int64 kNumRows = 4;
diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
index 5254419907..084462330e 100644
--- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
+++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h
@@ -105,13 +105,12 @@ class IrEmitterUnnested : public IrEmitter {
   // This kernel takes as arguments pointers to the given buffer allocations.
   llvm::Function* BuildKernelPrototype(
       const HloInstruction& inst,
-      tensorflow::gtl::ArraySlice<const BufferAllocation*> args);
+      absl::Span<const BufferAllocation* const> args);
 
   // Helper for writing extra outputs from inside a reduce kernel.
   Status EmitExtraOutputsForReduce(
       const HloInstruction* reduce, const llvm_ir::IrArray::Index& index,
-      tensorflow::gtl::ArraySlice<
-          std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
           extra_output_gens);
 
   // EmitColumnReduction and EmitRowReduction emit code for column and row
@@ -127,12 +126,11 @@ class IrEmitterUnnested : public IrEmitter {
   Status EmitColumnReduction(
       int64 height, int64 width, HloInstruction* reduce,
       const Shape& input_shape,
-      tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> input_gens,
-      tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> init_value_gens,
-      tensorflow::gtl::ArraySlice<HloComputation*> reducers,
-      tensorflow::gtl::ArraySlice<ShapeIndex> reduce_output_shapes,
-      tensorflow::gtl::ArraySlice<
-          std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+      absl::Span<const llvm_ir::ElementGenerator> input_gens,
+      absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
+      absl::Span<HloComputation* const> reducers,
+      absl::Span<const ShapeIndex> reduce_output_shapes,
+      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
           extra_output_gens);
 
   // Emits code that reduces a 3D tensor of shape [depth x height x width] to a
@@ -143,23 +141,21 @@ class IrEmitterUnnested : public IrEmitter {
   Status EmitRowReduction(
       int64 depth, int64 height, int64 width, HloInstruction* reduce,
       const Shape& input_shape,
-      tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> input_gens,
-      tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> init_value_gens,
-      tensorflow::gtl::ArraySlice<HloComputation*> reducers,
-      tensorflow::gtl::ArraySlice<ShapeIndex> reduce_output_shapes,
-      tensorflow::gtl::ArraySlice<
-          std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+      absl::Span<const llvm_ir::ElementGenerator> input_gens,
+      absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
+      absl::Span<HloComputation* const> reducers,
+      absl::Span<const ShapeIndex> reduce_output_shapes,
+      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
           extra_output_gens);
 
   // Emits code that reduces a tensor of arbitrary rank to a scalar.
   Status EmitReductionToScalar(
       HloInstruction* reduce, const Shape& input_shape,
-      tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> input_gens,
-      tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> init_value_gens,
-      tensorflow::gtl::ArraySlice<HloComputation*> reducers,
-      tensorflow::gtl::ArraySlice<ShapeIndex> reduce_output_shapes,
-      tensorflow::gtl::ArraySlice<
-          std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+      absl::Span<const llvm_ir::ElementGenerator> input_gens,
+      absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
+      absl::Span<HloComputation* const> reducers,
+      absl::Span<const ShapeIndex> reduce_output_shapes,
+      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
           extra_output_gens);
 
   // Figures out whether `reduce` is a row or column reduction, and which
@@ -180,13 +176,12 @@ class IrEmitterUnnested : public IrEmitter {
   // Prerequisite: `IsReductionToVector(*reduce)`
   Status EmitReductionToVector(
       HloInstruction* reduce, const Shape& input_shape,
-      tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> input_gens,
-      tensorflow::gtl::ArraySlice<llvm_ir::ElementGenerator> init_value_gens,
-      tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
-      tensorflow::gtl::ArraySlice<HloComputation*> reducers,
-      tensorflow::gtl::ArraySlice<ShapeIndex> reduce_output_shapes,
-      tensorflow::gtl::ArraySlice<
-          std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
+      absl::Span<const llvm_ir::ElementGenerator> input_gens,
+      absl::Span<const llvm_ir::ElementGenerator> init_value_gens,
+      absl::Span<const int64> dimensions_to_reduce,
+      absl::Span<HloComputation* const> reducers,
+      absl::Span<const ShapeIndex> reduce_output_shapes,
+      absl::Span<const std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
           extra_output_gens);
 
   // Returns true if a 0-2-1 tiling algorithm is already used to emit the kernel
@@ -195,10 +190,9 @@ class IrEmitterUnnested : public IrEmitter {
   // Emits a kernel for the hlo instruction using a 0-2-1 tiling algorithm and
   // returns the launch dimensions for the kernel. This is a helper to support
   // the implementation of CheckAndEmitHloWithTile021.
-  LaunchDimensions EmitHlo021Tile(
-      HloInstruction* hlo,
-      tensorflow::gtl::ArraySlice<int64> reduced_output_dims,
-      tensorflow::gtl::ArraySlice<int64> tiled_param_ids);
+  LaunchDimensions EmitHlo021Tile(HloInstruction* hlo,
+                                  absl::Span<const int64> reduced_output_dims,
+                                  absl::Span<const int64> tiled_param_ids);
   // Generates the IrArray for each output of hlo and returns the number of
   // outputs.
   int ConstructIrArrayForOutputs(const HloInstruction& hlo,
@@ -214,7 +208,7 @@ class IrEmitterUnnested : public IrEmitter {
   int ConstructOutputReducedShapeAndCastOutputIrArrayToShape(
       const HloInstruction& hlo,
       const std::vector<llvm_ir::IrArray>& output_arrays,
-      tensorflow::gtl::ArraySlice<int64> reduced_output_dims,
+      absl::Span<const int64> reduced_output_dims,
       std::vector<Shape>* output_reduced_shapes,
       std::vector<llvm_ir::IrArray>* output_in_reduced_shape_arrays);
   // For each input of the `hlo` instruction, checks its value in
@@ -226,7 +220,7 @@ class IrEmitterUnnested : public IrEmitter {
       const HloInstruction& hlo,
       const std::vector<llvm_ir::IrArray>& param_arrays,
       const std::vector<llvm::Value*>& param_buffers,
-      tensorflow::gtl::ArraySlice<int64> reduced_output_dims,
+      absl::Span<const int64> reduced_output_dims,
       std::vector<Shape>* param_reduced_shapes,
       std::vector<llvm_ir::IrArray>* param_in_reduced_shape_arrays);
 
diff --git a/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc b/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc
index 3259eaa2a2..878b0b96a1 100644
--- a/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc
+++ b/tensorflow/compiler/xla/service/gpu/kernel_thunk.cc
@@ -27,10 +27,10 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-KernelThunk::KernelThunk(
-    tensorflow::gtl::ArraySlice<const BufferAllocation*> args,
-    const string& kernel_name, const HloInstruction* hlo_instruction,
-    int unroll_factor)
+KernelThunk::KernelThunk(absl::Span<const BufferAllocation* const> args,
+                         const string& kernel_name,
+                         const HloInstruction* hlo_instruction,
+                         int unroll_factor)
     : Thunk(Kind::kKernel, hlo_instruction),
       args_(args.begin(), args.end()),
       kernel_name_(kernel_name),
diff --git a/tensorflow/compiler/xla/service/gpu/kernel_thunk.h b/tensorflow/compiler/xla/service/gpu/kernel_thunk.h
index d751de50ad..480f473037 100644
--- a/tensorflow/compiler/xla/service/gpu/kernel_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/kernel_thunk.h
@@ -47,7 +47,7 @@ class KernelThunk : public Thunk {
   // Constructs a thunk for the given kernel.
   //
   // `hlo_instruction` is as in Thunk. Other arguments are as the class members.
-  KernelThunk(tensorflow::gtl::ArraySlice<const BufferAllocation*> args,
+  KernelThunk(absl::Span<const BufferAllocation* const> args,
               const string& kernel_name, const HloInstruction* hlo_instruction,
               int unroll_factor);
   KernelThunk(const KernelThunk&) = delete;
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
index 79f7d31816..fa84d77223 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
@@ -23,7 +23,6 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-using tensorflow::gtl::ArraySlice;
 
 // We want the input/output feature counts of an f16 conv to be factors of 8,
 // because without this cudnn can't use tensor cores on the conv.
@@ -42,7 +41,7 @@ static constexpr double kMaxBytesTouchedIncrease = 1.2;
 
 // Pads the given dimensions in the given shape up to a multiple of
 // kDesiredNumFeaturesFactor.
-static Shape PadShape(Shape s, ArraySlice<int64> dims) {
+static Shape PadShape(Shape s, absl::Span<const int64> dims) {
   for (int64 dim : dims) {
     int64 dim_to_pad_size = s.dimensions(dim);
     int64 new_dim_to_pad_size =
diff --git a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc
index ca57cacb98..8154d75d23 100644
--- a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc
+++ b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.cc
@@ -40,7 +40,7 @@ ParallelLoopEmitter::ParallelLoopEmitter(
 
 ParallelLoopEmitter::ParallelLoopEmitter(
     const llvm_ir::ElementGenerator& target_element_generator,
-    tensorflow::gtl::ArraySlice<llvm_ir::IrArray> target_arrays,
+    absl::Span<const llvm_ir::IrArray> target_arrays,
     const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* b,
     int unroll_factor)
     : LoopEmitter(target_element_generator, target_arrays, b),
diff --git a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h
index cc7da2e73b..f32ea1ce4c 100644
--- a/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h
+++ b/tensorflow/compiler/xla/service/gpu/parallel_loop_emitter.h
@@ -47,11 +47,10 @@ class ParallelLoopEmitter : public llvm_ir::LoopEmitter {
   //
   // This is used in multi-output fusion.  target_element_generator should
   // produce a struct with N elements, one for each of target_arrays.
-  ParallelLoopEmitter(
-      const llvm_ir::ElementGenerator& target_element_generator,
-      tensorflow::gtl::ArraySlice<llvm_ir::IrArray> target_arrays,
-      const LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* b,
-      int unroll_factor = 1);
+  ParallelLoopEmitter(const llvm_ir::ElementGenerator& target_element_generator,
+                      absl::Span<const llvm_ir::IrArray> target_arrays,
+                      const LaunchDimensions& launch_dimensions,
+                      llvm::IRBuilder<>* b, int unroll_factor = 1);
 
   ParallelLoopEmitter(const ParallelLoopEmitter&) = delete;
   ParallelLoopEmitter& operator=(const ParallelLoopEmitter&) = delete;
diff --git a/tensorflow/compiler/xla/service/gpu/tuple_thunk.h b/tensorflow/compiler/xla/service/gpu/tuple_thunk.h
index 2d5735d6c4..a3a03b53f8 100644
--- a/tensorflow/compiler/xla/service/gpu/tuple_thunk.h
+++ b/tensorflow/compiler/xla/service/gpu/tuple_thunk.h
@@ -34,8 +34,7 @@ namespace gpu {
 // issue (b/31336476).
 class TupleThunk : public Thunk {
  public:
-  TupleThunk(tensorflow::gtl::ArraySlice<BufferAllocation::Slice>
-                 tuple_element_buffers,
+  TupleThunk(absl::Span<const BufferAllocation::Slice> tuple_element_buffers,
              const BufferAllocation::Slice& dest_buffer,
              const HloInstruction* hlo_instruction)
       : Thunk(Kind::kTuple, hlo_instruction),
diff --git a/tensorflow/compiler/xla/service/hlo_buffer.h b/tensorflow/compiler/xla/service/hlo_buffer.h
index 4873463b2e..a88c87e46c 100644
--- a/tensorflow/compiler/xla/service/hlo_buffer.h
+++ b/tensorflow/compiler/xla/service/hlo_buffer.h
@@ -84,7 +84,7 @@ class HloBuffer {
     return a->id() == b->id();
   }
 
-  HloBuffer(Id id, tensorflow::gtl::ArraySlice<const HloValue*> values)
+  HloBuffer(Id id, absl::Span<const HloValue* const> values)
       : id_(id), values_(values.begin(), values.end()) {}
 
   // Return the unique identifier for this HloBuffer.
diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc
index c2d0673f49..fe7f2be888 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.cc
+++ b/tensorflow/compiler/xla/service/hlo_computation.cc
@@ -558,7 +558,7 @@ HloComputation::CreateFromProto(
 }
 
 void HloComputation::FuseInstructionsInto(
-    tensorflow::gtl::ArraySlice<HloInstruction*> instructions_to_fuse,
+    absl::Span<HloInstruction* const> instructions_to_fuse,
     HloInstruction* fusion_instruction) {
   CHECK_EQ(HloOpcode::kFusion, fusion_instruction->opcode());
   HloInstruction* root = instructions_to_fuse.front();
@@ -577,7 +577,7 @@ void HloComputation::FuseInstructionsInto(
 }
 
 HloInstruction* HloComputation::CreateFusionInstruction(
-    tensorflow::gtl::ArraySlice<HloInstruction*> instructions_to_fuse,
+    absl::Span<HloInstruction* const> instructions_to_fuse,
     HloInstruction::FusionKind fusion_kind) {
   HloInstruction* root = instructions_to_fuse.front();
   HloInstruction* fusion_instruction = AddInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h
index 59016624f7..daafb711fd 100644
--- a/tensorflow/compiler/xla/service/hlo_computation.h
+++ b/tensorflow/compiler/xla/service/hlo_computation.h
@@ -237,7 +237,7 @@ class HloComputation {
   // removed if they have no uses after fusion (this is necessarily true for at
   // least the root).
   HloInstruction* CreateFusionInstruction(
-      tensorflow::gtl::ArraySlice<HloInstruction*> instructions_to_fuse,
+      absl::Span<HloInstruction* const> instructions_to_fuse,
       HloInstruction::FusionKind fusion_kind);
 
   // Create a deep copy of the given instruction and return the instruction
@@ -385,7 +385,7 @@ class HloComputation {
   //
   // Pre-condition: fusion_instruction's opcode is kFusion.
   void FuseInstructionsInto(
-      tensorflow::gtl::ArraySlice<HloInstruction*> instructions_to_fuse,
+      absl::Span<HloInstruction* const> instructions_to_fuse,
       HloInstruction* fusion_instruction);
 
   // Internal helper for recursive copying of an instruction. Creates and
diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
index 7cd1481a8a..07cd1efc12 100644
--- a/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_constant_folding_test.cc
@@ -105,8 +105,8 @@ TEST_F(HloConstantFoldingTest, ConvertF32ArrayToS64Array) {
 TEST_F(HloConstantFoldingTest, Concatenate) {
   const struct TestConfig {
     int concat_dimension;
-    tensorflow::gtl::ArraySlice<int64> dimensions;
-    tensorflow::gtl::ArraySlice<int64> concat_sizes;
+    absl::Span<const int64> dimensions;
+    absl::Span<const int64> concat_sizes;
   } test_configs[] = {
       {1, {11, 0, 7, 5, 9}, {2, 5, 7, 11}},
       {3, {1, 4, 17, 0, 8}, {1, 3, 9, 12}},
@@ -196,7 +196,7 @@ TEST_F(HloConstantFoldingTest, TransposeConstantFold) {
   using NativeT = typename primitive_util::PrimitiveTypeToNative<F32>::type;
   bool matched = true;
   root->literal().EachCell<NativeT>(
-      [&](tensorflow::gtl::ArraySlice<int64> indices, NativeT value) {
+      [&](absl::Span<const int64> indices, NativeT value) {
         std::vector<int64> rindexes = Permute(permutation, indices);
         matched = matched && (value == literal_clone->Get<NativeT>(rindexes));
       });
diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc
index 131846794d..19ffb465c0 100644
--- a/tensorflow/compiler/xla/service/hlo_creation_utils.cc
+++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc
@@ -24,7 +24,6 @@ limitations under the License.
 
 namespace xla {
 using absl::StrCat;
-using tensorflow::gtl::ArraySlice;
 
 StatusOr<HloInstruction*> MakeBinaryHlo(HloOpcode opcode, HloInstruction* lhs,
                                         HloInstruction* rhs) {
@@ -50,9 +49,9 @@ StatusOr<HloInstruction*> MakePadHlo(HloInstruction* operand,
 }
 
 StatusOr<HloInstruction*> MakeSliceHlo(HloInstruction* operand,
-                                       ArraySlice<int64> start_indices,
-                                       ArraySlice<int64> limit_indices,
-                                       ArraySlice<int64> strides) {
+                                       absl::Span<const int64> start_indices,
+                                       absl::Span<const int64> limit_indices,
+                                       absl::Span<const int64> strides) {
   HloComputation* computation = operand->parent();
   TF_ASSIGN_OR_RETURN(Shape slice_shape, ShapeInference::InferSliceShape(
                                              operand->shape(), start_indices,
@@ -74,7 +73,7 @@ StatusOr<HloInstruction*> MakeConvolveHlo(
 }
 
 StatusOr<HloInstruction*> MakeTransposeHlo(HloInstruction* operand,
-                                           ArraySlice<int64> dimensions) {
+                                           absl::Span<const int64> dimensions) {
   HloComputation* computation = operand->parent();
   TF_ASSIGN_OR_RETURN(
       Shape transpose_shape,
@@ -91,15 +90,15 @@ StatusOr<HloInstruction*> MakeReshapeHlo(const Shape& result_shape,
 }
 
 StatusOr<HloInstruction*> MakeReshapeHlo(
-    ArraySlice<int64> result_shape_dim_bounds, HloInstruction* operand) {
+    absl::Span<const int64> result_shape_dim_bounds, HloInstruction* operand) {
   Shape new_shape = ShapeUtil::MakeShape(operand->shape().element_type(),
                                          result_shape_dim_bounds);
   return MakeReshapeHlo(new_shape, operand);
 }
 
-StatusOr<HloInstruction*> MakeDynamicSliceHlo(HloInstruction* operand,
-                                              HloInstruction* start_indices,
-                                              ArraySlice<int64> slice_sizes) {
+StatusOr<HloInstruction*> MakeDynamicSliceHlo(
+    HloInstruction* operand, HloInstruction* start_indices,
+    absl::Span<const int64> slice_sizes) {
   HloComputation* computation = operand->parent();
   CHECK_EQ(computation, start_indices->parent());
   TF_ASSIGN_OR_RETURN(
@@ -125,8 +124,8 @@ StatusOr<HloInstruction*> MakeDynamicUpdateSliceHlo(
 }
 
 StatusOr<HloInstruction*> MakeBroadcastHlo(
-    HloInstruction* operand, ArraySlice<int64> broadcast_dimensions,
-    ArraySlice<int64> result_shape_bounds) {
+    HloInstruction* operand, absl::Span<const int64> broadcast_dimensions,
+    absl::Span<const int64> result_shape_bounds) {
   HloComputation* computation = operand->parent();
   Shape broadcast_shape = ShapeUtil::MakeShape(operand->shape().element_type(),
                                                result_shape_bounds);
@@ -146,8 +145,8 @@ StatusOr<HloInstruction*> MakeGetTupleElementHlo(HloInstruction* operand,
       HloInstruction::CreateGetTupleElement(gte_shape, operand, index));
 }
 
-StatusOr<HloInstruction*> MakeConcatHlo(ArraySlice<HloInstruction*> operands,
-                                        int64 dimension) {
+StatusOr<HloInstruction*> MakeConcatHlo(
+    absl::Span<HloInstruction* const> operands, int64 dimension) {
   CHECK_GT(operands.size(), 0);
 
   HloComputation* computation = operands[0]->parent();
@@ -176,9 +175,8 @@ StatusOr<HloInstruction*> MakeDotHlo(HloInstruction* lhs, HloInstruction* rhs,
       HloInstruction::CreateDot(dot_shape, lhs, rhs, dim_numbers));
 }
 
-StatusOr<HloInstruction*> MakeMapHlo(
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-    HloComputation* map_computation) {
+StatusOr<HloInstruction*> MakeMapHlo(absl::Span<HloInstruction* const> operands,
+                                     HloComputation* map_computation) {
   CHECK(!operands.empty()) << "Map Hlo requires at least one operand.";
   HloComputation* computation = operands.front()->parent();
   std::vector<const Shape*> operand_shapes;
@@ -235,7 +233,7 @@ StatusOr<HloInstruction*> PrependDegenerateDims(HloInstruction* operand,
 }
 
 StatusOr<HloInstruction*> ExpandFirstDimIntoNDims(
-    HloInstruction* operand, ArraySlice<int64> expanded_dims) {
+    HloInstruction* operand, absl::Span<const int64> expanded_dims) {
   CHECK_GT(operand->shape().dimensions_size(), 0);
   CHECK_EQ(operand->shape().dimensions(0), Product(expanded_dims));
 
@@ -251,8 +249,8 @@ StatusOr<HloInstruction*> ExpandFirstDimIntoNDims(
   return MakeReshapeHlo(new_shape, operand);
 }
 
-StatusOr<HloInstruction*> ElideDegenerateDims(HloInstruction* operand,
-                                              ArraySlice<int64> dims_to_elide) {
+StatusOr<HloInstruction*> ElideDegenerateDims(
+    HloInstruction* operand, absl::Span<const int64> dims_to_elide) {
   CHECK(absl::c_is_sorted(dims_to_elide));
 
   const Shape& input_shape = operand->shape();
@@ -277,7 +275,7 @@ StatusOr<HloInstruction*> ElideDegenerateDims(HloInstruction* operand,
 }
 
 StatusOr<HloInstruction*> InsertDegenerateDims(
-    HloInstruction* operand, ArraySlice<int64> dims_to_insert) {
+    HloInstruction* operand, absl::Span<const int64> dims_to_insert) {
   CHECK(absl::c_is_sorted(dims_to_insert));
 
   const Shape& operand_shape = operand->shape();
@@ -327,7 +325,7 @@ StatusOr<HloInstruction*> PadVectorWithZeros(HloInstruction* operand,
 
 StatusOr<HloInstruction*> BroadcastZeros(
     HloComputation* computation, PrimitiveType element_type,
-    ArraySlice<int64> broadcast_dimensions) {
+    absl::Span<const int64> broadcast_dimensions) {
   HloInstruction* zero =
       computation->AddInstruction(HloInstruction::CreateConstant(
           absl::make_unique<Literal>(LiteralUtil::Zero(element_type))));
@@ -336,7 +334,7 @@ StatusOr<HloInstruction*> BroadcastZeros(
 }
 
 StatusOr<std::unique_ptr<HloComputation>> CreateComputationWithSignature(
-    ArraySlice<const Shape*> domain, const Shape& range,
+    absl::Span<const Shape* const> domain, const Shape& range,
     absl::string_view name) {
   HloComputation::Builder b{string(name)};
   int64 param_idx = 0;
diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.h b/tensorflow/compiler/xla/service/hlo_creation_utils.h
index 1bc6d09b45..a1c4b374d1 100644
--- a/tensorflow/compiler/xla/service/hlo_creation_utils.h
+++ b/tensorflow/compiler/xla/service/hlo_creation_utils.h
@@ -40,10 +40,10 @@ StatusOr<HloInstruction*> MakePadHlo(HloInstruction* operand,
 
 // Creates a slice HLO instruction and adds it to the computation containing
 // `operand`.
-StatusOr<HloInstruction*> MakeSliceHlo(
-    HloInstruction* operand, tensorflow::gtl::ArraySlice<int64> start_indices,
-    tensorflow::gtl::ArraySlice<int64> limit_indices,
-    tensorflow::gtl::ArraySlice<int64> strides);
+StatusOr<HloInstruction*> MakeSliceHlo(HloInstruction* operand,
+                                       absl::Span<const int64> start_indices,
+                                       absl::Span<const int64> limit_indices,
+                                       absl::Span<const int64> strides);
 
 // Creates a convolution HLO instruction and adds it to the computation
 // containing `lhs` and `rhs` (`lhs` and `rhs` must be in the same computation).
@@ -53,8 +53,8 @@ StatusOr<HloInstruction*> MakeConvolveHlo(
 
 // Creates a transpose HLO instruction and adds it to the computation containing
 // `operand`.
-StatusOr<HloInstruction*> MakeTransposeHlo(
-    HloInstruction* operand, tensorflow::gtl::ArraySlice<int64> dimensions);
+StatusOr<HloInstruction*> MakeTransposeHlo(HloInstruction* operand,
+                                           absl::Span<const int64> dimensions);
 
 // Creates a reshape HLO instruction and adds it to the computation containing
 // `operand`.
@@ -62,15 +62,14 @@ StatusOr<HloInstruction*> MakeReshapeHlo(const Shape& result_shape,
                                          HloInstruction* operand);
 
 StatusOr<HloInstruction*> MakeReshapeHlo(
-    tensorflow::gtl::ArraySlice<int64> result_shape_dim_bounds,
-    HloInstruction* operand);
+    absl::Span<const int64> result_shape_dim_bounds, HloInstruction* operand);
 
 // Creates a dynamic-slice HLO instruction and adds it to the computation
 // containing `operand` and `start_indices` (`operand` and `start_indices` must
 // be in the same computation).
 StatusOr<HloInstruction*> MakeDynamicSliceHlo(
     HloInstruction* operand, HloInstruction* start_indices,
-    tensorflow::gtl::ArraySlice<int64> slice_sizes);
+    absl::Span<const int64> slice_sizes);
 
 // Creates a dynamic-update-slice HLO instruction and adds it to the computation
 // containing `operand`, `update` and `start_indices` (`operand`, `update` and
@@ -82,9 +81,8 @@ StatusOr<HloInstruction*> MakeDynamicUpdateSliceHlo(
 // Creates a broadcast HLO instruction and adds it to the computation containing
 // `operand`.
 StatusOr<HloInstruction*> MakeBroadcastHlo(
-    HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> broadcast_dimensions,
-    tensorflow::gtl::ArraySlice<int64> result_shape_bounds);
+    HloInstruction* operand, absl::Span<const int64> broadcast_dimensions,
+    absl::Span<const int64> result_shape_bounds);
 
 // Creates a GetTupleElement HLO instruction and adds it to the computation
 // containing `operand`.
@@ -95,7 +93,7 @@ StatusOr<HloInstruction*> MakeGetTupleElementHlo(HloInstruction* operand,
 // containing `operands` (`operands` must be non-empty and every element must be
 // contained in the same computation).
 StatusOr<HloInstruction*> MakeConcatHlo(
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands, int64 dimension);
+    absl::Span<HloInstruction* const> operands, int64 dimension);
 
 // Creates a Dot HLO instruction and adds it to the computation containing `lhs`
 // and `rhs` (both must be in the same computation).
@@ -104,9 +102,8 @@ StatusOr<HloInstruction*> MakeDotHlo(HloInstruction* lhs, HloInstruction* rhs,
 
 // Creates a Map HLO instruction and adds it to the computation containing the
 // operands. All operands must be in the same computation.
-StatusOr<HloInstruction*> MakeMapHlo(
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-    HloComputation* map_computation);
+StatusOr<HloInstruction*> MakeMapHlo(absl::Span<HloInstruction* const> operands,
+                                     HloComputation* map_computation);
 
 // -----------------------------------------------------------------------------
 // Some other miscellaneous helpers to generate common HLO patterns.  All of
@@ -138,7 +135,7 @@ StatusOr<HloInstruction*> PrependDegenerateDims(HloInstruction* operand,
 // For instance if `operand` has shape f32[200,9,7] and expanded_dims is
 // {2,5,20} the result is `operand` reshaped to [2,5,20,9,7].
 StatusOr<HloInstruction*> ExpandFirstDimIntoNDims(
-    HloInstruction* operand, tensorflow::gtl::ArraySlice<int64> expanded_dims);
+    HloInstruction* operand, absl::Span<const int64> expanded_dims);
 
 // Elides (via reshape) a set of degenerate dimensions (dimensions containing
 // exactly one element), `dims_to_elide` from `operand`.  Every dimension in
@@ -148,7 +145,7 @@ StatusOr<HloInstruction*> ExpandFirstDimIntoNDims(
 // For example if `operand` is of shape f32[19,1,20,1,7,1,9] and dims_to_elide
 // is {1,5} then the result is `operand` reshaped to [19,20,1,7,9].
 StatusOr<HloInstruction*> ElideDegenerateDims(
-    HloInstruction* operand, tensorflow::gtl::ArraySlice<int64> dims_to_elide);
+    HloInstruction* operand, absl::Span<const int64> dims_to_elide);
 
 // Inserts (via reshape) a set of degenerate dimensions (dimensions containing
 // exactly one element), `dims_to_insert` into `operand`. The dimensions in
@@ -158,7 +155,7 @@ StatusOr<HloInstruction*> ElideDegenerateDims(
 // For example, if `operand` is of shape f32[12,21,8,34] and dims_to_insert is
 // {0, 2}, then the result is `operand` reshaped to [1,12,1,21,8,34].
 StatusOr<HloInstruction*> InsertDegenerateDims(
-    HloInstruction* operand, tensorflow::gtl::ArraySlice<int64> dims_to_insert);
+    HloInstruction* operand, absl::Span<const int64> dims_to_insert);
 
 // Pads `operand` (which must have rank 1) with `zeros_to_prepend` zeros in the
 // front and `zeros_to_append` zeros in the back.
@@ -171,12 +168,12 @@ StatusOr<HloInstruction*> PadVectorWithZeros(HloInstruction* operand,
 // broadcast instruction is emitted into `computation`.
 StatusOr<HloInstruction*> BroadcastZeros(
     HloComputation* computation, PrimitiveType element_type,
-    tensorflow::gtl::ArraySlice<int64> broadcast_dimensions);
+    absl::Span<const int64> broadcast_dimensions);
 
 // Creates a HLO computation that takes arguments of type `domain` and produces
 // a value of type `range`.
 StatusOr<std::unique_ptr<HloComputation>> CreateComputationWithSignature(
-    tensorflow::gtl::ArraySlice<const Shape*> domain, const Shape& range,
+    absl::Span<const Shape* const> domain, const Shape& range,
     absl::string_view name);
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils_test.cc b/tensorflow/compiler/xla/service/hlo_creation_utils_test.cc
index 662f008205..eb6affadc8 100644
--- a/tensorflow/compiler/xla/service/hlo_creation_utils_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_creation_utils_test.cc
@@ -24,15 +24,13 @@ limitations under the License.
 
 namespace xla {
 namespace {
-using tensorflow::gtl::ArraySlice;
 
 class HloCreationUtilsTest : public HloVerifiedTestBase {
  protected:
-  HloModule* CreateModuleWithProgramShape(PrimitiveType primitive_type,
-                                          ArraySlice<int64> input_shape_dims,
-                                          ArraySlice<int64> output_shape_dims,
-                                          HloInstruction** param,
-                                          HloComputation** entry_computation) {
+  HloModule* CreateModuleWithProgramShape(
+      PrimitiveType primitive_type, absl::Span<const int64> input_shape_dims,
+      absl::Span<const int64> output_shape_dims, HloInstruction** param,
+      HloComputation** entry_computation) {
     Shape input_shape = ShapeUtil::MakeShape(primitive_type, input_shape_dims);
     Shape output_shape =
         ShapeUtil::MakeShape(primitive_type, output_shape_dims);
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
index 3376d170e6..6a63681996 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc
@@ -46,8 +46,7 @@ namespace {
 //
 // In this case, we should be able to reuse p0 and output, although p0 has
 // multiple uses.
-bool MultiDynamicSliceUseShareSameIndices(
-    tensorflow::gtl::ArraySlice<HloUse> uses) {
+bool MultiDynamicSliceUseShareSameIndices(absl::Span<const HloUse> uses) {
   if (uses.empty()) {
     return false;
   }
@@ -221,7 +220,7 @@ string HloDataflowAnalysis::ToString() const {
 
 bool HloDataflowAnalysis::Phi(
     HloInstruction* instruction,
-    tensorflow::gtl::ArraySlice<const InstructionValueSet*> inputs) {
+    absl::Span<const InstructionValueSet* const> inputs) {
   CHECK(ssa_form_);
   VLOG(4) << "Phi(" << instruction->name() << ")";
   VLOG(5) << "instruction value set = "
diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
index a1678d4943..6d5c375d6d 100644
--- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
+++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h
@@ -202,7 +202,7 @@ class HloDataflowAnalysis {
   // the given instruction. If skip_top_level is true, then the top level of the
   // value set of 'instruction' is not modified.
   bool Phi(HloInstruction* instruction,
-           tensorflow::gtl::ArraySlice<const InstructionValueSet*> inputs);
+           absl::Span<const InstructionValueSet* const> inputs);
 
   // Updates the positions of the HloValues in the output of the given
   // instruction. This should be called after the instruction value set of
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc
index c25869f87b..d316645a0b 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc
@@ -53,7 +53,6 @@ namespace xla {
 
 namespace {
 
-using tensorflow::gtl::ArraySlice;
 
 template <typename OperandT>
 StatusOr<std::unique_ptr<Literal>> Compare(const Shape& shape, HloOpcode opcode,
@@ -97,10 +96,11 @@ StatusOr<std::unique_ptr<Literal>> Compare(const Shape& shape, HloOpcode opcode,
   }
 
   auto result = absl::make_unique<Literal>(shape);
-  TF_RETURN_IF_ERROR(result->Populate<bool>([&](ArraySlice<int64> multi_index) {
-    return compare_op(lhs_literal.Get<OperandT>(multi_index),
-                      rhs_literal.Get<OperandT>(multi_index));
-  }));
+  TF_RETURN_IF_ERROR(
+      result->Populate<bool>([&](absl::Span<const int64> multi_index) {
+        return compare_op(lhs_literal.Get<OperandT>(multi_index),
+                          rhs_literal.Get<OperandT>(multi_index));
+      }));
 
   return std::move(result);
 }
@@ -127,10 +127,11 @@ StatusOr<std::unique_ptr<Literal>> Compare<complex64>(
   }
 
   auto result = absl::make_unique<Literal>(shape);
-  TF_RETURN_IF_ERROR(result->Populate<bool>([&](ArraySlice<int64> multi_index) {
-    return compare_op(lhs_literal.Get<complex64>(multi_index),
-                      rhs_literal.Get<complex64>(multi_index));
-  }));
+  TF_RETURN_IF_ERROR(
+      result->Populate<bool>([&](absl::Span<const int64> multi_index) {
+        return compare_op(lhs_literal.Get<complex64>(multi_index),
+                          rhs_literal.Get<complex64>(multi_index));
+      }));
 
   return std::move(result);
 }
@@ -194,7 +195,7 @@ HloEvaluator::HloEvaluator(int64 max_loop_iterations)
 
 template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
-    const HloModule& module, ArraySlice<LiteralPtr> arg_literals) {
+    const HloModule& module, absl::Span<const LiteralPtr> arg_literals) {
   XLA_VLOG_LINES(2, "HloEvaluator::Evaluate module:\n" + module.ToString());
 
   evaluated_.clear();
@@ -211,7 +212,8 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
 
 template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
-    const HloComputation& computation, ArraySlice<LiteralPtr> arg_literals) {
+    const HloComputation& computation,
+    absl::Span<const LiteralPtr> arg_literals) {
   CHECK(computation.parent() != nullptr);
   XLA_VLOG_LINES(
       2, "HloEvaluator::Evaluate computation:\n" + computation.ToString());
@@ -228,7 +230,7 @@ StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
 
 template <typename LiteralPtr>
 StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate(
-    HloInstruction* instruction, ArraySlice<LiteralPtr> arg_literals) {
+    HloInstruction* instruction, absl::Span<const LiteralPtr> arg_literals) {
   TF_RET_CHECK(hlo_query::AllOperandsAreParametersOrConstants(*instruction));
 
   evaluated_.clear();
@@ -390,7 +392,7 @@ Status HloEvaluator::HandleTranspose(HloInstruction* transpose) {
 }
 
 Status HloEvaluator::HandleConcatenate(HloInstruction* concatenate) {
-  ArraySlice<HloInstruction*> operands(concatenate->operands());
+  absl::Span<HloInstruction* const> operands(concatenate->operands());
   // The result concatenate dimension is going to be the sum of all
   // concatenate dimensions of the operands taking part of the operation.
   const Shape& reference_shape = operands[0]->shape();
@@ -588,7 +590,7 @@ ShapeUtil::IndexIterationSpace IterationSpaceForOutputBatchIndices(
 // Return an ShapeUtil::IndexIterationSpace that iterates over the output slice
 // dimensions while keeping the rest of the output dimensions clamped to 0.
 ShapeUtil::IndexIterationSpace IterationSpaceForOutputOffsetIndices(
-    int64 output_rank, ArraySlice<int64> slice_sizes,
+    int64 output_rank, absl::Span<const int64> slice_sizes,
     const GatherDimensionNumbers& dim_numbers) {
   std::vector<int64> index_base(output_rank, 0);
   std::vector<int64> index_count(output_rank, 1);
@@ -661,11 +663,12 @@ class OutputBatchIndexToInputIndex {
   //    same storage for all invocations.
   //
   // This returns an arrayslice into memory owned by the class.
-  StatusOr<ArraySlice<int64>> operator()(ArraySlice<int64> output_index) {
+  StatusOr<absl::Span<const int64>> operator()(
+      absl::Span<const int64> output_index) {
     PropagateOutputIndexGatherDimsToIndexVectorIndex(output_index);
     TF_RETURN_IF_ERROR(FetchIndexVector());
     PropagateIndexVectorToInputIndex();
-    return ArraySlice<int64>(input_index_);
+    return absl::Span<const int64>(input_index_);
   }
 
  private:
@@ -674,7 +677,7 @@ class OutputBatchIndexToInputIndex {
   // update the dim_numbers.index_vector_dim() dimension -- that's the dimension
   // we iterate over in FetchIndexVector.
   void PropagateOutputIndexGatherDimsToIndexVectorIndex(
-      ArraySlice<int64> output_index) {
+      absl::Span<const int64> output_index) {
     int64 index_vector_index_i = 0;
     for (int64 i = 0, e = output_index.size(); i < e; i++) {
       if (!output_dim_is_batch_dims_[i]) {
@@ -729,7 +732,7 @@ class OutputBatchIndexToInputIndex {
   // The index vector fetched from start_indices_.
   std::vector<int64> index_vector_;
 
-  // The result computed by this functor.  operator() returns an ArraySlice into
+  // The result computed by this functor.  operator() returns a Span into
   // this vector.
   std::vector<int64> input_index_;
 
@@ -779,9 +782,10 @@ class OutputOffsetIndexToInputIndex {
   // result (input_index_), mutating it in place.
   //
   // This returns an arrayslice into memory owned by the class.
-  StatusOr<ArraySlice<int64>> operator()(ArraySlice<int64> output_index) {
+  StatusOr<absl::Span<const int64>> operator()(
+      absl::Span<const int64> output_index) {
     PropagateOutputIndexWindowDimsToInputIndex(output_index);
-    return ArraySlice<int64>(input_index_);
+    return absl::Span<const int64>(input_index_);
   }
 
   // Returns for a given 'input_dim' the corresponding output dimension index,
@@ -794,7 +798,7 @@ class OutputOffsetIndexToInputIndex {
   // Propagates window dimensions from the output index to input_index_ by
   // mutating input_index_ in place.
   void PropagateOutputIndexWindowDimsToInputIndex(
-      ArraySlice<int64> output_index) {
+      absl::Span<const int64> output_index) {
     for (int64 i = 0, e = input_index_.size(); i < e; i++) {
       if (input_dim_value_to_output_index_[i] != -1) {
         input_index_[i] = output_index[input_dim_value_to_output_index_[i]];
@@ -810,7 +814,7 @@ class OutputOffsetIndexToInputIndex {
   // PropagateOutputIndexWindowDimsToInputIndex.
   std::vector<int64> input_dim_value_to_output_index_;
 
-  // The result computed by this functor.  operator() returns an ArraySlice into
+  // The result computed by this functor.  operator() returns a Span into
   // this vector.
   std::vector<int64> input_index_;
 };
@@ -872,11 +876,11 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) {
   const Shape& operand_shape = operand.shape();
 
   auto gather_inner_loop_body =
-      [&](ArraySlice<int64> output_window_index,
-          ArraySlice<int64> input_gather_index,
-          ArraySlice<int64> output_gather_index) -> StatusOr<bool> {
+      [&](absl::Span<const int64> output_window_index,
+          absl::Span<const int64> input_gather_index,
+          absl::Span<const int64> output_gather_index) -> StatusOr<bool> {
     TF_ASSIGN_OR_RETURN(
-        ArraySlice<int64> input_window_index,
+        absl::Span<const int64> input_window_index,
         output_offset_index_to_input_index(output_window_index));
     for (int i = 0, e = output_index.size(); i < e; i++) {
       output_index[i] = output_gather_index[i] + output_window_index[i];
@@ -909,8 +913,8 @@ Status HloEvaluator::HandleGather(HloInstruction* gather) {
   };
 
   auto gather_outer_loop_body =
-      [&](ArraySlice<int64> output_gather_index) -> StatusOr<bool> {
-    TF_ASSIGN_OR_RETURN(ArraySlice<int64> input_gather_index,
+      [&](absl::Span<const int64> output_gather_index) -> StatusOr<bool> {
+    TF_ASSIGN_OR_RETURN(absl::Span<const int64> input_gather_index,
                         output_batch_index_to_input_index(output_gather_index));
     TF_RETURN_IF_ERROR(ShapeUtil::ForEachIndexWithStatus(
         shape, offset_indices_iteration_space,
@@ -1170,12 +1174,11 @@ StatusOr<std::unique_ptr<Literal>> EvaluateSortInternal(
       result_values.push_back(key_value.second);
     }
     auto result_keys_literal = absl::make_unique<Literal>(keys_literal.shape());
-    result_keys_literal->PopulateR1(
-        tensorflow::gtl::ArraySlice<KeyType>(result_keys));
+    result_keys_literal->PopulateR1(absl::Span<const KeyType>(result_keys));
     auto result_values_literal =
         absl::make_unique<Literal>(values_literal.shape());
     result_values_literal->PopulateR1(
-        tensorflow::gtl::ArraySlice<ValueType>(result_values));
+        absl::Span<const ValueType>(result_values));
     return std::make_pair(std::move(result_keys_literal),
                           std::move(result_values_literal));
   };
@@ -1311,26 +1314,27 @@ Status HloEvaluator::Postprocess(HloInstruction* hlo) {
 // Explicit instantiation of templatized Evaluate* methods.
 //
 template StatusOr<std::unique_ptr<Literal>>
-HloEvaluator::Evaluate<const Literal*>(const HloModule& module,
-                                       ArraySlice<const Literal*> arg_literals);
+HloEvaluator::Evaluate<const Literal*>(
+    const HloModule& module, absl::Span<const Literal* const> arg_literals);
 template StatusOr<std::unique_ptr<Literal>>
 HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
-    const HloModule& module, ArraySlice<std::unique_ptr<Literal>> arg_literals);
+    const HloModule& module,
+    absl::Span<const std::unique_ptr<Literal>> arg_literals);
 
-template StatusOr<std::unique_ptr<Literal>>
-HloEvaluator::Evaluate<const Literal*>(const HloComputation& computation,
-                                       ArraySlice<const Literal*> arg_literals);
+template StatusOr<std::unique_ptr<Literal>> HloEvaluator::Evaluate<
+    const Literal*>(const HloComputation& computation,
+                    absl::Span<const Literal* const> arg_literals);
 template StatusOr<std::unique_ptr<Literal>>
 HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
     const HloComputation& computation,
-    ArraySlice<std::unique_ptr<Literal>> arg_literals);
+    absl::Span<const std::unique_ptr<Literal>> arg_literals);
 
 template StatusOr<std::unique_ptr<Literal>>
-HloEvaluator::Evaluate<const Literal*>(HloInstruction* instruction,
-                                       ArraySlice<const Literal*> arg_literals);
+HloEvaluator::Evaluate<const Literal*>(
+    HloInstruction* instruction, absl::Span<const Literal* const> arg_literals);
 template StatusOr<std::unique_ptr<Literal>>
 HloEvaluator::Evaluate<std::unique_ptr<Literal>>(
     HloInstruction* instruction,
-    ArraySlice<std::unique_ptr<Literal>> arg_literals);
+    absl::Span<const std::unique_ptr<Literal>> arg_literals);
 
 }  // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h
index 980a7fb9fa..3feb4e626f 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator.h
@@ -51,8 +51,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // type.
   template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
-      const HloModule& module,
-      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
+      const HloModule& module, absl::Span<const LiteralPtr> arg_literals);
 
   // Evaluates an HLO computation and an array of pointers to literals.
   // Returns the evaluated result as a literal if successful.
@@ -75,7 +74,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
       const HloComputation& computation,
-      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
+      absl::Span<const LiteralPtr> arg_literals);
 
   // Evaluates a single HLO instruction and an array of pointers to literals.
   // Return the evaluated result as literal if successful.
@@ -87,8 +86,7 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
   // type.
   template <typename LiteralPtr>
   StatusOr<std::unique_ptr<Literal>> Evaluate(
-      HloInstruction* instruction,
-      tensorflow::gtl::ArraySlice<LiteralPtr> arg_literals);
+      HloInstruction* instruction, absl::Span<const LiteralPtr> arg_literals);
 
   // Evaluates a single HLO instruction with constant operands.
   // Returns the evaluated result as literal if successful.
@@ -229,8 +227,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault {
     }
 
     auto result = absl::make_unique<Literal>(shape);
-    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+    TF_RETURN_IF_ERROR(
+        result->Populate<ReturnT>([&](absl::Span<const int64> multi_index) {
           return unary_op(operand_literal.Get<NativeT>(multi_index));
         }));
     return std::move(result);
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
index e3eb60a851..626daa527b 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc
@@ -60,7 +60,7 @@ class HloEvaluatorTest : public ::testing::WithParamInterface<bool>,
   }
 
   std::unique_ptr<Literal> Evaluate(
-      tensorflow::gtl::ArraySlice<const Literal*> arg_literals = {}) {
+      absl::Span<const Literal* const> arg_literals = {}) {
     if (use_bfloat16_) {
       // In BF16 mode, we convert all F32 type to BF16 and evaluate the module.
       auto type_converter = HloElementTypeConverter(F32, BF16);
@@ -344,7 +344,7 @@ TEST_P(HloEvaluatorTest, DoesReshape) {
 
   using NativeT = typename primitive_util::PrimitiveTypeToNative<F32>::type;
   result->EachCell<NativeT>(
-      [&](tensorflow::gtl::ArraySlice<int64> indices, NativeT value) {
+      [&](absl::Span<const int64> indices, NativeT value) {
         std::vector<int64> rindexes = Permute(permutation, indices);
         EXPECT_NEAR(value, literal_clone->Get<NativeT>(rindexes), 0.031250);
       });
diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
index d35163ebb8..980e343035 100644
--- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
+++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h
@@ -97,7 +97,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
   double GetAsDouble(const Literal& literal,
-                     tensorflow::gtl::ArraySlice<int64> input_index) {
+                     absl::Span<const int64> input_index) {
     return static_cast<double>(literal.Get<NativeT>(input_index));
   }
 
@@ -109,7 +109,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
   double GetAsDouble(const Literal& literal,
-                     tensorflow::gtl::ArraySlice<int64> input_index) {
+                     absl::Span<const int64> input_index) {
     LOG(FATAL) << "Trying to get complex literal as double: "
                << literal.ToString();
   }
@@ -980,8 +980,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand);
     auto result = absl::make_unique<Literal>(result_shape);
 
-    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> out_index) {
+    TF_RETURN_IF_ERROR(
+        result->Populate<ReturnT>([&](absl::Span<const int64> out_index) {
           std::vector<int64> from_index(out_index.begin(), out_index.end());
           for (const int64 dim : reverse_dimensions) {
             from_index[dim] = result_shape.dimensions(dim) - 1 - out_index[dim];
@@ -1048,8 +1048,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
     auto func = [&window_shape, &dnums, &lhs_shape, &rhs_shape, &window,
                  &lhs_dim_multipliers, &rhs_dim_multipliers, lhs_literal_data,
-                 rhs_literal_data](
-                    tensorflow::gtl::ArraySlice<int64> out_index) {
+                 rhs_literal_data](absl::Span<const int64> out_index) {
       // Dimension number applicable for input (lhs).
       const int64 input_batch_dim = dnums.input_batch_dimension();
       const int64 input_z_dim = dnums.input_feature_dimension();
@@ -1211,8 +1210,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     }
 
     auto result = absl::make_unique<Literal>(dot->shape());
-    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> result_index) {
+    TF_RETURN_IF_ERROR(
+        result->Populate<ReturnT>([&](absl::Span<const int64> result_index) {
           ElementwiseT result_val = static_cast<ElementwiseT>(0);
 
           for (int64 i = 0; i < result_index.size(); i++) {
@@ -1261,9 +1260,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
         parent_->GetEvaluatedLiteralFor(pad->operand(1)).Get<ReturnT>({});
     auto result = absl::make_unique<Literal>(pad->shape());
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&scalar](tensorflow::gtl::ArraySlice<int64> multi_index) {
-          return scalar;
-        }));
+        [&scalar](absl::Span<const int64> multi_index) { return scalar; }));
 
     const Literal& evaluated_operand =
         parent_->GetEvaluatedLiteralFor(pad->operand(0));
@@ -1276,7 +1273,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     // corresponding index of the resulting padded literal.
     const PaddingConfig& pad_config = pad->padding_config();
 
-    auto func = [&](tensorflow::gtl::ArraySlice<int64> input_index) {
+    auto func = [&](absl::Span<const int64> input_index) {
       for (auto i = 0; i < input_index.size(); ++i) {
         // Interior padding occurs logically before edge padding, so in the case
         // of negative edge padding elements are removed from the
@@ -1427,8 +1424,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     auto result = absl::make_unique<Literal>(map->shape());
 
     HloEvaluator embedded_evaluator(parent_->max_loop_iterations_);
-    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+    TF_RETURN_IF_ERROR(
+        result->Populate<ReturnT>([&](absl::Span<const int64> multi_index) {
           std::vector<std::unique_ptr<Literal>> arg_literals;
           arg_literals.reserve(operands.size());
 
@@ -1539,8 +1536,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
                   return SafeLess<ReturnT>(a, b);
                 });
       auto result_literal = absl::make_unique<Literal>(keys_literal.shape());
-      result_literal->PopulateR1(
-          tensorflow::gtl::ArraySlice<ReturnT>(result_data));
+      result_literal->PopulateR1(absl::Span<const ReturnT>(result_data));
       VLOG(3) << "HandleSort result_literal: " << result_literal->ToString();
       return result_literal;
     };
@@ -1582,7 +1578,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     HloReduceInstruction* reduce = Cast<HloReduceInstruction>(hlo);
     int64 num_args = reduce->inputs().size();
     bool has_tuple_output = ShapeUtil::IsTuple(reduce->shape());
-    tensorflow::gtl::ArraySlice<int64> dimensions(reduce->dimensions());
+    absl::Span<const int64> dimensions(reduce->dimensions());
     HloComputation* function = reduce->to_apply();
 
     absl::InlinedVector<const Shape*, 1> operand_shapes;
@@ -1650,7 +1646,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
     for (int64 input = 0; input < num_args; ++input) {
       TF_RETURN_IF_ERROR(results[input]->Populate<ReturnT>(
-          [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+          [&](absl::Span<const int64> multi_index) {
             if (!eval_status.ok()) {
               return init_scalars[input];
             }
@@ -1668,7 +1664,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
                 IsScalarAdd(function)) {
               CHECK_EQ(num_args, 1);
               double computed_result = 0;
-              auto func = [&](tensorflow::gtl::ArraySlice<int64> input_index) {
+              auto func = [&](absl::Span<const int64> input_index) {
                 computed_result +=
                     GetAsDouble<ReturnT>(*arg_literals[0], input_index);
                 return true;
@@ -1677,8 +1673,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
                                       arg_dim_counts, arg_dim_steps, func);
               return static_cast<ReturnT>(computed_result);
             }
-            auto func = [&](tensorflow::gtl::ArraySlice<int64> input_index)
-                -> StatusOr<bool> {
+            auto func =
+                [&](absl::Span<const int64> input_index) -> StatusOr<bool> {
               absl::InlinedVector<ReturnT, 1> arg_values(num_args);
               for (int64 i = 0; i < num_args; ++i) {
                 arg_values[i] = arg_literals[i]->Get<ReturnT>(input_index);
@@ -1767,9 +1763,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
     // Initialize result array with the init value.
     TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> output_index) {
-          return init_scalar;
-        }));
+        [&](absl::Span<const int64> output_index) { return init_scalar; }));
 
     std::vector<int64> window_dimension_sizes;
     for (const auto& window_dimension : window.dimensions()) {
@@ -1902,8 +1896,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     HloEvaluator embedded_evaluator(parent_->max_loop_iterations_);
     auto result = absl::make_unique<Literal>(reduce_window->shape());
     // For each resulting dimension, calculate and assign computed value.
-    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> output_index) {
+    TF_RETURN_IF_ERROR(
+        result->Populate<ReturnT>([&](absl::Span<const int64> output_index) {
           ReturnT result_val = init_scalar;
 
           std::fill(window_index.begin(), window_index.end(), 0);
@@ -2049,12 +2043,12 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     //    same storage for all invocations.
     //
     // This returns an arrayslice into memory owned by the class.
-    StatusOr<tensorflow::gtl::ArraySlice<int64>> operator()(
-        tensorflow::gtl::ArraySlice<int64> update_index) {
+    StatusOr<absl::Span<const int64>> operator()(
+        absl::Span<const int64> update_index) {
       PropagateUpdateIndexScatterDimsToIndexVectorIndex(update_index);
       TF_RETURN_IF_ERROR(FetchIndexVector());
       PropagateIndexVectorToInputIndex();
-      return tensorflow::gtl::ArraySlice<int64>(input_index_);
+      return absl::Span<const int64>(input_index_);
     }
 
    private:
@@ -2063,7 +2057,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     // update the dim_numbers.index_vector_dim() dimension -- that's the
     // dimension we iterate over in FetchIndexVector.
     void PropagateUpdateIndexScatterDimsToIndexVectorIndex(
-        tensorflow::gtl::ArraySlice<int64> update_index) {
+        absl::Span<const int64> update_index) {
       int64 index_vector_index_i = 0;
       for (int64 i = 0, e = update_index.size(); i < e; i++) {
         if (!update_dim_is_scatter_dims_[i]) {
@@ -2118,7 +2112,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     // The index vector fetched from scatter_indices_.
     std::vector<int64> index_vector_;
 
-    // The result computed by this functor.  operator() returns an ArraySlice
+    // The result computed by this functor.  operator() returns a Span
     // into this vector.
     std::vector<int64> input_index_;
 
@@ -2172,10 +2166,10 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     // result (input_index_), mutating it in place.
     //
     // This returns an arrayslice into memory owned by the class.
-    StatusOr<tensorflow::gtl::ArraySlice<int64>> operator()(
-        tensorflow::gtl::ArraySlice<int64> update_index) {
+    StatusOr<absl::Span<const int64>> operator()(
+        absl::Span<const int64> update_index) {
       PropagateUpdateIndexWindowDimsToInputIndex(update_index);
-      return tensorflow::gtl::ArraySlice<int64>(input_index_);
+      return absl::Span<const int64>(input_index_);
     }
 
     // Returns for a given 'input_dim' the corresponding update dimension index,
@@ -2188,7 +2182,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     // Propagates window dimensions from the update index to input_index_ by
     // mutating input_index_ in place.
     void PropagateUpdateIndexWindowDimsToInputIndex(
-        tensorflow::gtl::ArraySlice<int64> update_index) {
+        absl::Span<const int64> update_index) {
       for (int64 i = 0, e = input_index_.size(); i < e; i++) {
         if (input_dim_value_to_update_index_[i] != -1) {
           input_index_[i] = update_index[input_dim_value_to_update_index_[i]];
@@ -2204,7 +2198,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     // PropagateUpdateIndexWindowDimsToInputIndex.
     std::vector<int64> input_dim_value_to_update_index_;
 
-    // The result computed by this functor.  operator() returns an ArraySlice
+    // The result computed by this functor.  operator() returns a Span
     // into this vector.
     std::vector<int64> input_index_;
   };
@@ -2247,12 +2241,11 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     std::unique_ptr<Literal> result = operand.CloneToUnique();
     HloEvaluator embedded_evaluator;
     auto scatter_inner_loop_body =
-        [&](tensorflow::gtl::ArraySlice<int64> update_window_index,
-            tensorflow::gtl::ArraySlice<int64> input_scatter_index,
-            tensorflow::gtl::ArraySlice<int64> update_scatter_index)
-        -> StatusOr<bool> {
+        [&](absl::Span<const int64> update_window_index,
+            absl::Span<const int64> input_scatter_index,
+            absl::Span<const int64> update_scatter_index) -> StatusOr<bool> {
       TF_ASSIGN_OR_RETURN(
-          tensorflow::gtl::ArraySlice<int64> input_window_index,
+          absl::Span<const int64> input_window_index,
           update_window_index_to_input_index(update_window_index));
       for (int i = 0, e = update_index.size(); i < e; i++) {
         update_index[i] = update_scatter_index[i] + update_window_index[i];
@@ -2301,14 +2294,13 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     };
 
     auto scatter_outer_loop_body =
-        [&](tensorflow::gtl::ArraySlice<int64> update_scatter_index)
-        -> StatusOr<bool> {
+        [&](absl::Span<const int64> update_scatter_index) -> StatusOr<bool> {
       TF_ASSIGN_OR_RETURN(
-          tensorflow::gtl::ArraySlice<int64> input_scatter_index,
+          absl::Span<const int64> input_scatter_index,
           update_scatter_index_to_input_index(update_scatter_index));
       TF_RETURN_IF_ERROR(ShapeUtil::ForEachIndexWithStatus(
           updates_shape, window_indices_iteration_space,
-          [&](tensorflow::gtl::ArraySlice<int64> update_window_index) {
+          [&](absl::Span<const int64> update_window_index) {
             return scatter_inner_loop_body(
                 update_window_index, input_scatter_index, update_scatter_index);
           }));
@@ -2336,7 +2328,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
     const int64 rank = ShapeUtil::Rank(operand->shape());
     const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand);
-    auto func = [&](tensorflow::gtl::ArraySlice<int64> out_index) {
+    auto func = [&](absl::Span<const int64> out_index) {
       DimensionVector operand_index(rank);
       for (int64 i = 0; i < rank; ++i) {
         operand_index[i] =
@@ -2607,7 +2599,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
   // bound, call `f` with the base index.
   static void IterateThroughWindow(
       const Shape& window_shape, const Window& window, const Shape& base_shape,
-      const tensorflow::gtl::ArraySlice<int64>& window_count_index,
+      const absl::Span<const int64>& window_count_index,
       const std::function<void(const std::vector<int64>&)>& f) {
     const int64 rank = ShapeUtil::Rank(base_shape);
     DimensionVector window_index(rank);
@@ -2647,8 +2639,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
     std::vector<int64> operand_indices(start.size());
     auto result = absl::make_unique<Literal>(result_shape);
-    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+    TF_RETURN_IF_ERROR(
+        result->Populate<ReturnT>([&](absl::Span<const int64> multi_index) {
           for (int64 i = 0; i < operand_indices.size(); ++i) {
             CHECK_GE(multi_index[i] + start[i], 0);
             operand_indices[i] = multi_index[i] + start[i];
@@ -2679,7 +2671,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     }
     std::vector<int64> result_index(rank, 0);
 
-    auto func = [&](tensorflow::gtl::ArraySlice<int64> update_index) {
+    auto func = [&](absl::Span<const int64> update_index) {
       std::transform(update_index.begin(), update_index.end(), start.begin(),
                      result_index.begin(), std::plus<int64>());
       result->Set<ReturnT>(result_index,
@@ -2733,8 +2725,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
     auto result = absl::make_unique<Literal>(shape);
 
-    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+    TF_RETURN_IF_ERROR(
+        result->Populate<ReturnT>([&](absl::Span<const int64> multi_index) {
           return ConvertBinaryFunction(binary_op)(
               lhs_literal.Get<ReturnT>(multi_index),
               rhs_literal.Get<ReturnT>(multi_index));
@@ -2770,8 +2762,8 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
     auto result = absl::make_unique<Literal>(shape);
 
-    TF_RETURN_IF_ERROR(result->Populate<ReturnT>(
-        [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+    TF_RETURN_IF_ERROR(
+        result->Populate<ReturnT>([&](absl::Span<const int64> multi_index) {
           return ternary_op(lhs_literal.Get<LhsType>(multi_index),
                             rhs_literal.Get<RhsType>(multi_index),
                             ehs_literal.Get<EhsType>(multi_index));
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index b747a4ea5f..bd0b6af10d 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -113,7 +113,7 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
       std::vector<int64> fft_length(proto.fft_length().begin(),
                                     proto.fft_length().end());
       instruction = CreateFft(proto.shape(), operands(0), proto.fft_type(),
-                              tensorflow::gtl::ArraySlice<int64>(fft_length));
+                              absl::Span<const int64>(fft_length));
       break;
     }
     case HloOpcode::kSend:
@@ -519,13 +519,13 @@ HloInstruction::CreateGetTupleElement(const Shape& shape,
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateRng(
     const Shape& shape, RandomDistribution distribution,
-    tensorflow::gtl::ArraySlice<HloInstruction*> parameters) {
+    absl::Span<HloInstruction* const> parameters) {
   return absl::make_unique<HloRngInstruction>(shape, distribution, parameters);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateNary(
     const Shape& shape, HloOpcode opcode,
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands) {
+    absl::Span<HloInstruction* const> operands) {
   if (opcode == HloOpcode::kCopy) {
     // It is impossible to copy an opaque shape, we don't know how big it is.
     CHECK(!ShapeUtil::IsOpaque(shape));
@@ -627,13 +627,13 @@ HloInstruction::CreateGetTupleElement(const Shape& shape,
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateVariadic(
     const Shape& shape, HloOpcode opcode,
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands) {
+    absl::Span<HloInstruction* const> operands) {
   CHECK_EQ(HloOpcode::kTuple, opcode);
   return CreateNary(shape, opcode, operands);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateMap(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* map_computation) {
   return absl::make_unique<HloMapInstruction>(shape, operands, map_computation);
 }
@@ -648,7 +648,7 @@ HloInstruction::CreateGetTupleElement(const Shape& shape,
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateFft(
     const Shape& shape, HloInstruction* operand, FftType fft_type,
-    tensorflow::gtl::ArraySlice<int64> fft_length) {
+    absl::Span<const int64> fft_length) {
   return absl::make_unique<HloFftInstruction>(shape, operand, fft_type,
                                               fft_length);
 }
@@ -692,7 +692,7 @@ HloInstruction::CreateReducePrecision(const Shape& shape,
 
 /* static */ std::unique_ptr<HloInstruction>
 HloInstruction::CreateCrossReplicaSum(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* reduce_computation,
     const std::vector<ReplicaGroup>& replica_groups, absl::string_view barrier,
     const absl::optional<int64>& all_reduce_id) {
@@ -702,7 +702,7 @@ HloInstruction::CreateCrossReplicaSum(
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateAllToAll(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     const std::vector<ReplicaGroup>& replica_groups) {
   return absl::make_unique<HloAllToAllInstruction>(shape, operands,
                                                    replica_groups);
@@ -764,12 +764,12 @@ HloInstruction::CreateCollectivePermute(
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateReverse(
     const Shape& shape, HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> dimensions) {
+    absl::Span<const int64> dimensions) {
   return absl::make_unique<HloReverseInstruction>(shape, operand, dimensions);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateAfterAll(
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands) {
+    absl::Span<HloInstruction* const> operands) {
   CHECK(!operands.empty());
   auto instruction = absl::WrapUnique(
       new HloInstruction(HloOpcode::kAfterAll, ShapeUtil::MakeTokenShape()));
@@ -815,16 +815,15 @@ HloInstruction::CreateCollectivePermute(
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateSlice(
     const Shape& shape, HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> start_indices,
-    tensorflow::gtl::ArraySlice<int64> limit_indices,
-    tensorflow::gtl::ArraySlice<int64> strides) {
+    absl::Span<const int64> start_indices,
+    absl::Span<const int64> limit_indices, absl::Span<const int64> strides) {
   return absl::make_unique<HloSliceInstruction>(shape, operand, start_indices,
                                                 limit_indices, strides);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateDynamicSlice(
     const Shape& shape, HloInstruction* operand, HloInstruction* start_indices,
-    tensorflow::gtl::ArraySlice<int64> slice_sizes) {
+    absl::Span<const int64> slice_sizes) {
   return absl::make_unique<HloDynamicSliceInstruction>(
       shape, operand, start_indices, slice_sizes);
 }
@@ -843,7 +842,7 @@ HloInstruction::CreateDynamicUpdateSlice(const Shape& shape,
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateConcatenate(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     int64 dimension) {
   return absl::make_unique<HloConcatenateInstruction>(shape, operands,
                                                       dimension);
@@ -868,7 +867,7 @@ HloInstruction::CreateBitcastConvert(const Shape& shape,
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateReduce(
     const Shape& shape, HloInstruction* operand, HloInstruction* init_value,
-    tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
+    absl::Span<const int64> dimensions_to_reduce,
     HloComputation* reduce_computation) {
   auto instruction = absl::WrapUnique(new HloReduceInstruction(
       shape, {operand, init_value}, dimensions_to_reduce, reduce_computation));
@@ -876,9 +875,9 @@ HloInstruction::CreateBitcastConvert(const Shape& shape,
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateReduce(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-    tensorflow::gtl::ArraySlice<HloInstruction*> init_values,
-    tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
+    absl::Span<HloInstruction* const> init_values,
+    absl::Span<const int64> dimensions_to_reduce,
     HloComputation* reduce_computation) {
   std::vector<HloInstruction*> all_args;
   all_args.reserve(operands.size() * 2);
@@ -936,7 +935,7 @@ HloInstruction::CreateSelectAndScatter(
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateBroadcast(
     const Shape& shape, HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> broadcast_dimensions) {
+    absl::Span<const int64> broadcast_dimensions) {
   return absl::make_unique<HloBroadcastInstruction>(shape, operand,
                                                     broadcast_dimensions);
 }
@@ -1014,7 +1013,7 @@ HloInstruction::CreateBroadcastSequence(
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateTranspose(
     const Shape& shape, HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> dimensions) {
+    absl::Span<const int64> dimensions) {
   return absl::make_unique<HloTransposeInstruction>(shape, operand, dimensions);
 }
 
@@ -1032,7 +1031,7 @@ HloInstruction::CreateBroadcastSequence(
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateFusion(
     const Shape& shape, FusionKind fusion_kind,
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    absl::Span<HloInstruction* const> operands,
     HloComputation* fusion_computation) {
   return absl::make_unique<HloFusionInstruction>(shape, fusion_kind, operands,
                                                  fusion_computation);
@@ -1090,7 +1089,7 @@ bool HloInstruction::HasSideEffect() const {
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateCall(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* computation) {
   std::unique_ptr<HloInstruction> instruction =
       absl::WrapUnique(new HloInstruction(HloOpcode::kCall, shape));
@@ -1102,14 +1101,14 @@ bool HloInstruction::HasSideEffect() const {
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateCustomCall(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     absl::string_view custom_call_target) {
   return absl::make_unique<HloCustomCallInstruction>(shape, operands,
                                                      custom_call_target);
 }
 
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateTuple(
-    tensorflow::gtl::ArraySlice<HloInstruction*> elements) {
+    absl::Span<HloInstruction* const> elements) {
   std::vector<Shape> element_shapes;
   for (auto element : elements) {
     element_shapes.push_back(element->shape());
@@ -1121,7 +1120,7 @@ bool HloInstruction::HasSideEffect() const {
 /* static */ std::unique_ptr<HloInstruction> HloInstruction::CreateGather(
     const Shape& shape, HloInstruction* operand, HloInstruction* start_indices,
     const GatherDimensionNumbers& gather_dim_numbers,
-    tensorflow::gtl::ArraySlice<int64> slice_sizes) {
+    absl::Span<const int64> slice_sizes) {
   return absl::make_unique<HloGatherInstruction>(
       shape, operand, start_indices, gather_dim_numbers, slice_sizes);
 }
@@ -1149,8 +1148,7 @@ bool HloInstruction::HasSideEffect() const {
 }
 
 std::unique_ptr<HloInstruction> HloInstruction::CloneWithNewOperands(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   VLOG(3) << "CloneWithNewOperands:\n  " << ToString();
   VLOG(3) << "  new operands:";
@@ -1501,7 +1499,7 @@ void HloInstruction::AppendOperand(HloInstruction* operand) {
 }
 
 void HloInstruction::RemoveOperandsAtAscendingIndices(
-    tensorflow::gtl::ArraySlice<int> ascending_indices) {
+    absl::Span<const int> ascending_indices) {
   if (ascending_indices.empty()) {
     return;
   }
@@ -1997,7 +1995,7 @@ string HloInstruction::OperandsToStringWithCanonicalNameMap(
     const HloPrintOptions& options,
     CanonicalNameMap* canonical_name_map) const {
   string operands;
-  tensorflow::gtl::ArraySlice<HloInstruction*> slice(operands_);
+  absl::Span<HloInstruction* const> slice(operands_);
   const int64 kMaxOperandsToShowIfCompact = 4;
   if (options.compact_operands() &&
       slice.size() > kMaxOperandsToShowIfCompact) {
@@ -3310,7 +3308,7 @@ const GatherDimensionNumbers& HloInstruction::gather_dimension_numbers() const {
   return Cast<HloGatherInstruction>(this)->gather_dimension_numbers();
 }
 
-tensorflow::gtl::ArraySlice<int64> HloInstruction::gather_slice_sizes() const {
+absl::Span<const int64> HloInstruction::gather_slice_sizes() const {
   return Cast<HloGatherInstruction>(this)->gather_slice_sizes();
 }
 
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index f3fd287d88..88cb5d8acf 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -365,7 +365,7 @@ class HloInstruction {
   // random numbers from a given distribution.
   static std::unique_ptr<HloInstruction> CreateRng(
       const Shape& shape, RandomDistribution distribution,
-      tensorflow::gtl::ArraySlice<HloInstruction*> parameters);
+      absl::Span<HloInstruction* const> parameters);
 
   // Creates a unary instruction (one operand).
   // Precondition: opcode must be a legitimate unary operation.
@@ -392,13 +392,13 @@ class HloInstruction {
   // Precondition: opcode must be a legitimate variadic operation.
   static std::unique_ptr<HloInstruction> CreateVariadic(
       const Shape& shape, HloOpcode opcode,
-      tensorflow::gtl::ArraySlice<HloInstruction*> operands);
+      absl::Span<HloInstruction* const> operands);
 
   // Creates a map instruction, where the computation (given by the handle) is
   // applied element-wise to every element in operands (across the operands,
   // at a given index)
   static std::unique_ptr<HloInstruction> CreateMap(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
       HloComputation* map_computation);
 
   // Creates a convolution op, where rhs is the convolutional filter
@@ -412,7 +412,7 @@ class HloInstruction {
   // Creates an FFT op, of the type indicated by fft_type.
   static std::unique_ptr<HloInstruction> CreateFft(
       const Shape& shape, HloInstruction* operand, FftType fft_type,
-      tensorflow::gtl::ArraySlice<int64> fft_length);
+      absl::Span<const int64> fft_length);
 
   // Creates a dot op with operands 'lhs' and 'rhs' with contracting and batch
   // dimensions specified in 'dimension_numbers'.
@@ -449,7 +449,7 @@ class HloInstruction {
   //
   // TODO(b/79737069): Rename this to AllReduce.
   static std::unique_ptr<HloInstruction> CreateCrossReplicaSum(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
       HloComputation* reduce_computation,
       const std::vector<ReplicaGroup>& replica_groups,
       absl::string_view barrier, const absl::optional<int64>& all_reduce_id);
@@ -468,7 +468,7 @@ class HloInstruction {
   // be concatenated in the order of 1, 2, 3; another Alltoall will be applied
   // within replica 4, 5, 0, and the concatenation order is 4, 5, 0.
   static std::unique_ptr<HloInstruction> CreateAllToAll(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
       const std::vector<ReplicaGroup>& replica_groups);
 
   // Creates a communitation instructions that permutes data cross replicas.
@@ -536,17 +536,15 @@ class HloInstruction {
   // start/limit indices.
   static std::unique_ptr<HloInstruction> CreateSlice(
       const Shape& shape, HloInstruction* operand,
-      tensorflow::gtl::ArraySlice<int64> start_indices,
-      tensorflow::gtl::ArraySlice<int64> limit_indices,
-      tensorflow::gtl::ArraySlice<int64> strides);
+      absl::Span<const int64> start_indices,
+      absl::Span<const int64> limit_indices, absl::Span<const int64> strides);
 
   // Creates a slice instruction, where the first operand is sliced by
   // start indices specified in the second operand, and by size specified in
   // 'slice_sizes'.
   static std::unique_ptr<HloInstruction> CreateDynamicSlice(
       const Shape& shape, HloInstruction* operand,
-      HloInstruction* start_indices,
-      tensorflow::gtl::ArraySlice<int64> slice_sizes);
+      HloInstruction* start_indices, absl::Span<const int64> slice_sizes);
 
   // Creates a dynamic update slice instruction, which updates a slice
   // of 'operand' with 'update' and 'start_indices'.
@@ -557,7 +555,7 @@ class HloInstruction {
   // Creates a concatenate instruction, where the operands are concatenated on
   // the provided dimension.
   static std::unique_ptr<HloInstruction> CreateConcatenate(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
       int64 dimension);
 
   // Creates a reduce instruction, where the computation (given by the handle)
@@ -569,7 +567,7 @@ class HloInstruction {
   // f(f(init, value0), value1), ...)
   static std::unique_ptr<HloInstruction> CreateReduce(
       const Shape& shape, HloInstruction* operand, HloInstruction* init_value,
-      tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
+      absl::Span<const int64> dimensions_to_reduce,
       HloComputation* reduce_computation);
 
   // A more general, multiple-argument version of the above.
@@ -584,9 +582,9 @@ class HloInstruction {
   // ...
   // TODO(b/112040122): Add support to this in HLO passes and in backends.
   static std::unique_ptr<HloInstruction> CreateReduce(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-      tensorflow::gtl::ArraySlice<HloInstruction*> init_values,
-      tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
+      absl::Span<HloInstruction* const> init_values,
+      absl::Span<const int64> dimensions_to_reduce,
       HloComputation* reduce_computation);
 
   // Creates a reduce-window instruction, where the computation (given
@@ -623,7 +621,7 @@ class HloInstruction {
   // Creates a broadcast instruction.
   static std::unique_ptr<HloInstruction> CreateBroadcast(
       const Shape& shape, HloInstruction* operand,
-      tensorflow::gtl::ArraySlice<int64> broadcast_dimensions);
+      absl::Span<const int64> broadcast_dimensions);
 
   // Creates a sequence of instructions that performs an explicit broadcast of
   // the operand to the target shape.
@@ -653,7 +651,7 @@ class HloInstruction {
   // Creates a transpose instruction which permutes the operand dimensions.
   static std::unique_ptr<HloInstruction> CreateTranspose(
       const Shape& shape, HloInstruction* operand,
-      tensorflow::gtl::ArraySlice<int64> dimensions);
+      absl::Span<const int64> dimensions);
 
   // Creates a sort op, with a keys operand, and an optional values operand.
   static std::unique_ptr<HloInstruction> CreateSort(
@@ -679,7 +677,7 @@ class HloInstruction {
       const Shape& shape, HloInstruction* operand,
       HloInstruction* start_indices,
       const GatherDimensionNumbers& gather_dim_numbers,
-      tensorflow::gtl::ArraySlice<int64> slice_sizes);
+      absl::Span<const int64> slice_sizes);
 
   static std::unique_ptr<HloInstruction> CreateScatter(
       const Shape& shape, HloInstruction* operand,
@@ -703,37 +701,37 @@ class HloInstruction {
 
   static std::unique_ptr<HloInstruction> CreateFusion(
       const Shape& shape, FusionKind fusion_kind,
-      tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      absl::Span<HloInstruction* const> operands,
       HloComputation* fusion_computation);
 
   // Creates a call instruction that applies the given computation on the given
   // operands. "shape" is the resultant shape.
   static std::unique_ptr<HloInstruction> CreateCall(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
       HloComputation* computation);
 
   // Creates a custom call instruction that applies the given custom call target
   // to the given operands. "shape" is the resultant shape.
   static std::unique_ptr<HloInstruction> CreateCustomCall(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
       absl::string_view custom_call_target);
 
   // Creates a tuple instruction with the given elements. This is a convenience
   // wrapper around CreateVariadic.
   static std::unique_ptr<HloInstruction> CreateTuple(
-      tensorflow::gtl::ArraySlice<HloInstruction*> elements);
+      absl::Span<HloInstruction* const> elements);
 
   // Creates a reverse instruction, which reverses the order of the elements
   // in the specified dimensions.
   static std::unique_ptr<HloInstruction> CreateReverse(
       const Shape& shape, HloInstruction* operand,
-      tensorflow::gtl::ArraySlice<int64> dimensions);
+      absl::Span<const int64> dimensions);
 
   // Creates a Afterall instruction used for joining or creating new values of
   // token type which thread through side-effecting operations. Operands must
   // all be tokens, and there must be at least one operand.
   static std::unique_ptr<HloInstruction> CreateAfterAll(
-      tensorflow::gtl::ArraySlice<HloInstruction*> operands);
+      absl::Span<HloInstruction* const> operands);
 
   // Creates an AfterAll instruction which creates a token type out of thin air
   // (no operands). This is a separate method from CreateAfterAll to facility
@@ -1124,8 +1122,7 @@ class HloInstruction {
 
   // Clones the HLO instruction as above but with new shape and operands.
   std::unique_ptr<HloInstruction> CloneWithNewOperands(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context = nullptr) const;
 
   // Returns the computations this instruction directly calls (if any).
@@ -1505,7 +1502,7 @@ class HloInstruction {
   // Delegates to HloGatherInstruction::gather_dimension_numbers.
   const GatherDimensionNumbers& gather_dimension_numbers() const;
   // Delegates to HloGatherInstruction::gather_slice_sizes.
-  tensorflow::gtl::ArraySlice<int64> gather_slice_sizes() const;
+  absl::Span<const int64> gather_slice_sizes() const;
 
   // Delegates to HloScatterInstruction::scatter_dimension_numbers().
   const ScatterDimensionNumbers& scatter_dimension_numbers() const;
@@ -1531,7 +1528,7 @@ class HloInstruction {
 
   // Removes a list of operands with the given indices in ascending order.
   void RemoveOperandsAtAscendingIndices(
-      tensorflow::gtl::ArraySlice<int> ascending_indices);
+      absl::Span<const int> ascending_indices);
 
   void AppendComputation(HloComputation* computation) {
     called_computations_.push_back(computation);
@@ -1561,8 +1558,7 @@ class HloInstruction {
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   virtual std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const {
     // TODO(b/80131774): This should be pure virtual.
     LOG(FATAL) << "Unimplemented method.";
@@ -1608,7 +1604,7 @@ class HloInstruction {
   // Creates an n-ary elementwise operation.
   static std::unique_ptr<HloInstruction> CreateNary(
       const Shape& shape, HloOpcode opcode,
-      tensorflow::gtl::ArraySlice<HloInstruction*> operands);
+      absl::Span<HloInstruction* const> operands);
 
   // Adds a user for this instruction.
   void AddUser(HloInstruction* user);
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index e1c884d856..6871953755 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -91,8 +91,7 @@ HloBatchNormTrainingInstruction::HloBatchNormTrainingInstruction(
 
 std::unique_ptr<HloInstruction>
 HloBatchNormTrainingInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 3);
   return absl::make_unique<HloBatchNormTrainingInstruction>(
@@ -113,8 +112,7 @@ HloBatchNormInferenceInstruction::HloBatchNormInferenceInstruction(
 
 std::unique_ptr<HloInstruction>
 HloBatchNormInferenceInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 5);
   return absl::make_unique<HloBatchNormInferenceInstruction>(
@@ -135,8 +133,7 @@ HloBatchNormGradInstruction::HloBatchNormGradInstruction(
 
 std::unique_ptr<HloInstruction>
 HloBatchNormGradInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 5);
   return absl::make_unique<HloBatchNormGradInstruction>(
@@ -144,9 +141,9 @@ HloBatchNormGradInstruction::CloneWithNewOperandsImpl(
       new_operands[4], epsilon(), feature_index());
 }
 
-HloFftInstruction::HloFftInstruction(
-    const Shape& shape, HloInstruction* operand, FftType fft_type,
-    tensorflow::gtl::ArraySlice<int64> fft_length)
+HloFftInstruction::HloFftInstruction(const Shape& shape,
+                                     HloInstruction* operand, FftType fft_type,
+                                     absl::Span<const int64> fft_length)
     : HloInstruction(HloOpcode::kFft, shape), fft_type_(fft_type) {
   fft_length_.assign(fft_length.begin(), fft_length.end());
   AppendOperand(operand);
@@ -177,8 +174,7 @@ bool HloFftInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloFftInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloFftInstruction>(shape, new_operands[0], fft_type_,
@@ -232,8 +228,7 @@ HloSendInstruction::HloSendInstruction(HloInstruction* operand,
 }
 
 std::unique_ptr<HloInstruction> HloSendInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 2);
   return absl::make_unique<HloSendInstruction>(
@@ -250,8 +245,7 @@ HloSendDoneInstruction::HloSendDoneInstruction(HloSendInstruction* operand,
 
 std::unique_ptr<HloInstruction>
 HloSendDoneInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloSendDoneInstruction>(
@@ -271,8 +265,7 @@ HloRecvInstruction::HloRecvInstruction(const Shape& shape,
 }
 
 std::unique_ptr<HloInstruction> HloRecvInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloRecvInstruction>(
@@ -293,8 +286,7 @@ HloRecvDoneInstruction::HloRecvDoneInstruction(HloRecvInstruction* operand,
 
 std::unique_ptr<HloInstruction>
 HloRecvDoneInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloRecvDoneInstruction>(
@@ -303,7 +295,7 @@ HloRecvDoneInstruction::CloneWithNewOperandsImpl(
 
 HloCollectiveInstruction::HloCollectiveInstruction(
     HloOpcode opcode, const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    absl::Span<HloInstruction* const> operands,
     const std::vector<ReplicaGroup>& replica_groups)
     : HloInstruction(opcode, shape), replica_groups_(replica_groups) {
   for (auto operand : operands) {
@@ -344,7 +336,7 @@ bool HloCollectiveInstruction::IdenticalSlowPath(
 }
 
 HloAllReduceInstruction::HloAllReduceInstruction(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     HloComputation* reduce_computation,
     const std::vector<ReplicaGroup>& replica_groups, absl::string_view barrier,
     const absl::optional<int64>& all_reduce_id)
@@ -392,8 +384,7 @@ bool HloAllReduceInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloAllReduceInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* /*context*/) const {
   return absl::make_unique<HloAllReduceInstruction>(
       shape, new_operands, to_apply(), replica_groups(),
@@ -401,15 +392,14 @@ HloAllReduceInstruction::CloneWithNewOperandsImpl(
 }
 
 HloAllToAllInstruction::HloAllToAllInstruction(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     const std::vector<ReplicaGroup>& replica_groups)
     : HloCollectiveInstruction(HloOpcode::kAllToAll, shape, operands,
                                replica_groups) {}
 
 std::unique_ptr<HloInstruction>
 HloAllToAllInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* /*context*/) const {
   return absl::make_unique<HloAllToAllInstruction>(shape, new_operands,
                                                    replica_groups());
@@ -459,16 +449,15 @@ bool HloCollectivePermuteInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloCollectivePermuteInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* /*context*/) const {
   return absl::make_unique<HloCollectivePermuteInstruction>(
       shape, new_operands[0], source_target_pairs());
 }
 
-HloReverseInstruction::HloReverseInstruction(
-    const Shape& shape, HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> dimensions)
+HloReverseInstruction::HloReverseInstruction(const Shape& shape,
+                                             HloInstruction* operand,
+                                             absl::Span<const int64> dimensions)
     : HloInstruction(HloOpcode::kReverse, shape),
       dimensions_(dimensions.begin(), dimensions.end()) {
   AppendOperand(operand);
@@ -496,8 +485,7 @@ bool HloReverseInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloReverseInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloReverseInstruction>(shape, new_operands[0],
@@ -505,7 +493,7 @@ std::unique_ptr<HloInstruction> HloReverseInstruction::CloneWithNewOperandsImpl(
 }
 
 HloConcatenateInstruction::HloConcatenateInstruction(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     int64 dimension)
     : HloInstruction(HloOpcode::kConcatenate, shape), dimensions_({dimension}) {
   for (auto operand : operands) {
@@ -537,16 +525,15 @@ bool HloConcatenateInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloConcatenateInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   return absl::make_unique<HloConcatenateInstruction>(shape, new_operands,
                                                       dimensions(0));
 }
 
 HloReduceInstruction::HloReduceInstruction(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> args,
-    tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
+    const Shape& shape, absl::Span<HloInstruction* const> args,
+    absl::Span<const int64> dimensions_to_reduce,
     HloComputation* reduce_computation)
     : HloInstruction(HloOpcode::kReduce, shape),
       dimensions_(dimensions_to_reduce.begin(), dimensions_to_reduce.end()) {
@@ -581,8 +568,7 @@ bool HloReduceInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloReduceInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size() % 2, 0);
   return absl::make_unique<HloReduceInstruction>(shape, new_operands,
@@ -621,8 +607,7 @@ bool HloSortInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloSortInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   HloInstruction* keys = new_operands[0];
   HloInstruction* values = new_operands.size() == 2 ? new_operands[1] : nullptr;
@@ -632,7 +617,7 @@ std::unique_ptr<HloInstruction> HloSortInstruction::CloneWithNewOperandsImpl(
 
 HloTransposeInstruction::HloTransposeInstruction(
     const Shape& shape, HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> dimensions)
+    absl::Span<const int64> dimensions)
     : HloInstruction(HloOpcode::kTranspose, shape),
       dimensions_(dimensions.begin(), dimensions.end()) {
   CHECK_EQ(shape.dimensions().size(), dimensions.size());
@@ -676,8 +661,7 @@ bool HloTransposeInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloTransposeInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloTransposeInstruction>(shape, new_operands[0],
@@ -686,7 +670,7 @@ HloTransposeInstruction::CloneWithNewOperandsImpl(
 
 HloBroadcastInstruction::HloBroadcastInstruction(
     const Shape& shape, HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> broadcast_dimension)
+    absl::Span<const int64> broadcast_dimension)
     : HloInstruction(HloOpcode::kBroadcast, shape),
       dimensions_(broadcast_dimension.begin(), broadcast_dimension.end()) {
   AppendOperand(operand);
@@ -715,17 +699,16 @@ bool HloBroadcastInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloBroadcastInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloBroadcastInstruction>(shape, new_operands[0],
                                                     dimensions());
 }
 
-HloMapInstruction::HloMapInstruction(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-    HloComputation* map_computation)
+HloMapInstruction::HloMapInstruction(const Shape& shape,
+                                     absl::Span<HloInstruction* const> operands,
+                                     HloComputation* map_computation)
     : HloInstruction(HloOpcode::kMap, shape) {
   for (auto operand : operands) {
     AppendOperand(operand);
@@ -774,17 +757,16 @@ bool HloMapInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloMapInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   return absl::make_unique<HloMapInstruction>(shape, new_operands, to_apply());
 }
 
-HloSliceInstruction::HloSliceInstruction(
-    const Shape& shape, HloInstruction* operand,
-    tensorflow::gtl::ArraySlice<int64> start_indices,
-    tensorflow::gtl::ArraySlice<int64> limit_indices,
-    tensorflow::gtl::ArraySlice<int64> strides)
+HloSliceInstruction::HloSliceInstruction(const Shape& shape,
+                                         HloInstruction* operand,
+                                         absl::Span<const int64> start_indices,
+                                         absl::Span<const int64> limit_indices,
+                                         absl::Span<const int64> strides)
     : HloInstruction(HloOpcode::kSlice, shape),
       slice_starts_(start_indices.begin(), start_indices.end()),
       slice_limits_(limit_indices.begin(), limit_indices.end()),
@@ -835,8 +817,7 @@ bool HloSliceInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloSliceInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloSliceInstruction>(
@@ -889,8 +870,7 @@ bool HloConstantInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloConstantInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   return absl::make_unique<HloConstantInstruction>(literal_->CloneToUnique());
 }
@@ -947,8 +927,7 @@ bool HloTraceInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloTraceInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   LOG(FATAL) << "Not yet implemented, clone: " << HloOpcodeString(opcode());
 }
@@ -966,7 +945,7 @@ HloFusionInstruction::HloFusionInstruction(const Shape& shape,
 
 HloFusionInstruction::HloFusionInstruction(
     const Shape& shape, FusionKind fusion_kind,
-    tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    absl::Span<HloInstruction* const> operands,
     HloComputation* fusion_computation)
     : HloInstruction(HloOpcode::kFusion, shape), fusion_kind_(fusion_kind) {
   for (auto operand : operands) {
@@ -1373,8 +1352,7 @@ bool HloFusionInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloFusionInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   HloModule* module = context != nullptr ? context->module() : GetModule();
   HloComputation* new_fused_computation = nullptr;
@@ -1412,7 +1390,7 @@ Status HloFusionInstruction::DeduplicateFusionOperands() {
 
 HloRngInstruction::HloRngInstruction(
     const Shape& shape, RandomDistribution distribution,
-    tensorflow::gtl::ArraySlice<HloInstruction*> parameters)
+    absl::Span<HloInstruction* const> parameters)
     : HloInstruction(HloOpcode::kRng, shape), distribution_(distribution) {
   for (HloInstruction* param : parameters) {
     AppendOperand(param);
@@ -1443,8 +1421,7 @@ bool HloRngInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloRngInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   return absl::make_unique<HloRngInstruction>(shape, distribution_,
                                               new_operands);
@@ -1480,8 +1457,7 @@ bool HloParameterInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloParameterInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   return absl::make_unique<HloParameterInstruction>(parameter_number_, shape,
                                                     name());
@@ -1516,8 +1492,7 @@ bool HloGetTupleElementInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloGetTupleElementInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloGetTupleElementInstruction>(
@@ -1559,8 +1534,7 @@ bool HloReducePrecisionInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloReducePrecisionInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloReducePrecisionInstruction>(
@@ -1600,8 +1574,7 @@ bool HloInfeedInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloInfeedInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 1);
   return absl::make_unique<HloInfeedInstruction>(
@@ -1646,8 +1619,7 @@ bool HloOutfeedInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloOutfeedInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 2);
   return absl::make_unique<HloOutfeedInstruction>(
@@ -1717,8 +1689,7 @@ bool HloConvolutionInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloConvolutionInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 2);
   return absl::make_unique<HloConvolutionInstruction>(
@@ -1762,8 +1733,7 @@ bool HloReduceWindowInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloReduceWindowInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 2);
   return absl::make_unique<HloReduceWindowInstruction>(
@@ -1811,8 +1781,7 @@ bool HloSelectAndScatterInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloSelectAndScatterInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 3);
   return absl::make_unique<HloSelectAndScatterInstruction>(
@@ -1821,7 +1790,7 @@ HloSelectAndScatterInstruction::CloneWithNewOperandsImpl(
 }
 
 HloCustomCallInstruction::HloCustomCallInstruction(
-    const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+    const Shape& shape, absl::Span<HloInstruction* const> operands,
     absl::string_view custom_call_target)
     : HloInstruction(HloOpcode::kCustomCall, shape),
       custom_call_target_(custom_call_target.begin(),
@@ -1887,8 +1856,7 @@ bool HloCustomCallInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloCustomCallInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   auto cloned = absl::make_unique<HloCustomCallInstruction>(
       shape, new_operands, custom_call_target());
@@ -1931,8 +1899,7 @@ bool HloPadInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloPadInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 2);
   return absl::make_unique<HloPadInstruction>(shape, new_operands[0],
@@ -1941,7 +1908,7 @@ std::unique_ptr<HloInstruction> HloPadInstruction::CloneWithNewOperandsImpl(
 
 HloDynamicSliceInstruction::HloDynamicSliceInstruction(
     const Shape& shape, HloInstruction* operand, HloInstruction* start_indices,
-    tensorflow::gtl::ArraySlice<int64> slice_sizes)
+    absl::Span<const int64> slice_sizes)
     : HloInstruction(HloOpcode::kDynamicSlice, shape),
       dynamic_slice_sizes_(slice_sizes.begin(), slice_sizes.end()) {
   AppendOperand(operand);
@@ -1971,8 +1938,7 @@ bool HloDynamicSliceInstruction::IdenticalSlowPath(
 
 std::unique_ptr<HloInstruction>
 HloDynamicSliceInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 2);
   return absl::make_unique<HloDynamicSliceInstruction>(
@@ -1982,7 +1948,7 @@ HloDynamicSliceInstruction::CloneWithNewOperandsImpl(
 HloGatherInstruction::HloGatherInstruction(
     const Shape& shape, HloInstruction* operand, HloInstruction* start_indices,
     const GatherDimensionNumbers& gather_dim_numbers,
-    tensorflow::gtl::ArraySlice<int64> slice_sizes)
+    absl::Span<const int64> slice_sizes)
     : HloInstruction(HloOpcode::kGather, shape) {
   AppendOperand(operand);
   AppendOperand(start_indices);
@@ -2011,10 +1977,9 @@ string HloGatherInstruction::GatherDimensionNumbersToString() const {
 }
 
 /* static */ GatherDimensionNumbers HloGatherInstruction::MakeGatherDimNumbers(
-    tensorflow::gtl::ArraySlice<int64> offset_dims,
-    tensorflow::gtl::ArraySlice<int64> collapsed_slice_dims,
-    tensorflow::gtl::ArraySlice<int64> start_index_map,
-    int64 index_vector_dim) {
+    absl::Span<const int64> offset_dims,
+    absl::Span<const int64> collapsed_slice_dims,
+    absl::Span<const int64> start_index_map, int64 index_vector_dim) {
   GatherDimensionNumbers gather_dim_numbers;
   for (int64 output_window_dim : offset_dims) {
     gather_dim_numbers.add_offset_dims(output_window_dim);
@@ -2057,8 +2022,7 @@ bool HloGatherInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloGatherInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 2);
   return absl::make_unique<HloGatherInstruction>(
@@ -2102,9 +2066,9 @@ string HloScatterInstruction::ScatterDimensionNumbersToString() const {
 
 /* static */ ScatterDimensionNumbers
 HloScatterInstruction::MakeScatterDimNumbers(
-    tensorflow::gtl::ArraySlice<int64> update_window_dims,
-    tensorflow::gtl::ArraySlice<int64> inserted_window_dims,
-    tensorflow::gtl::ArraySlice<int64> scatter_dims_to_operand_dims,
+    absl::Span<const int64> update_window_dims,
+    absl::Span<const int64> inserted_window_dims,
+    absl::Span<const int64> scatter_dims_to_operand_dims,
     int64 index_vector_dim) {
   ScatterDimensionNumbers scatter_dim_numbers;
   for (int64 update_window_dim : update_window_dims) {
@@ -2144,8 +2108,7 @@ bool HloScatterInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloScatterInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   CHECK_EQ(new_operands.size(), 3);
   return absl::make_unique<HloScatterInstruction>(
@@ -2177,8 +2140,7 @@ bool HloIotaInstruction::IdenticalSlowPath(
 }
 
 std::unique_ptr<HloInstruction> HloIotaInstruction::CloneWithNewOperandsImpl(
-    const Shape& shape,
-    tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+    const Shape& shape, absl::Span<HloInstruction* const> new_operands,
     HloCloneContext* context) const {
   return absl::make_unique<HloIotaInstruction>(shape, iota_dimension());
 }
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 4fe5144aca..45a648bbe4 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -67,8 +67,7 @@ class HloBatchNormTrainingInstruction : public HloBatchNormInstruction {
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 };
 
@@ -82,8 +81,7 @@ class HloBatchNormInferenceInstruction : public HloBatchNormInstruction {
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 };
 
@@ -97,8 +95,7 @@ class HloBatchNormGradInstruction : public HloBatchNormInstruction {
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 };
 
@@ -106,7 +103,7 @@ class HloFftInstruction : public HloInstruction {
  public:
   explicit HloFftInstruction(const Shape& shape, HloInstruction* operand,
                              FftType fft_type,
-                             tensorflow::gtl::ArraySlice<int64> fft_length);
+                             absl::Span<const int64> fft_length);
   FftType fft_type() const { return fft_type_; }
 
   const std::vector<int64>& fft_length() const { return fft_length_; }
@@ -124,8 +121,7 @@ class HloFftInstruction : public HloInstruction {
 
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // Describes FFT type for an FFT instruction.
@@ -174,8 +170,7 @@ class HloSendInstruction : public HloSendRecvInstruction {
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 };
 
@@ -187,8 +182,7 @@ class HloSendDoneInstruction : public HloSendRecvInstruction {
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 };
 
@@ -200,8 +194,7 @@ class HloRecvInstruction : public HloSendRecvInstruction {
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 };
 
@@ -213,8 +206,7 @@ class HloRecvDoneInstruction : public HloSendRecvInstruction {
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 };
 
@@ -227,7 +219,7 @@ class HloCollectiveInstruction : public HloInstruction {
  protected:
   explicit HloCollectiveInstruction(
       HloOpcode opcode, const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      absl::Span<HloInstruction* const> operands,
       const std::vector<ReplicaGroup>& replica_groups);
 
   HloInstructionProto ToProto() const override;
@@ -245,7 +237,7 @@ class HloCollectiveInstruction : public HloInstruction {
 class HloAllReduceInstruction : public HloCollectiveInstruction {
  public:
   explicit HloAllReduceInstruction(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
       HloComputation* reduce_computation,
       const std::vector<ReplicaGroup>& replica_groups,
       absl::string_view barrier, const absl::optional<int64>& all_reduce_id);
@@ -274,8 +266,7 @@ class HloAllReduceInstruction : public HloCollectiveInstruction {
 
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // The string representation of the barrier config used for CrossReplicaSum.
@@ -290,14 +281,13 @@ class HloAllReduceInstruction : public HloCollectiveInstruction {
 class HloAllToAllInstruction : public HloCollectiveInstruction {
  public:
   explicit HloAllToAllInstruction(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
+      const Shape& shape, absl::Span<HloInstruction* const> operands,
       const std::vector<ReplicaGroup>& replica_groups);
 
  private:
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 };
 
@@ -324,8 +314,7 @@ class HloCollectivePermuteInstruction : public HloInstruction {
 
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   const std::vector<std::pair<int64, int64>> source_target_pairs_;
@@ -334,7 +323,7 @@ class HloCollectivePermuteInstruction : public HloInstruction {
 class HloReverseInstruction : public HloInstruction {
  public:
   explicit HloReverseInstruction(const Shape& shape, HloInstruction* operand,
-                                 tensorflow::gtl::ArraySlice<int64> dimensions);
+                                 absl::Span<const int64> dimensions);
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
@@ -350,8 +339,7 @@ class HloReverseInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::vector<int64> dimensions_;
@@ -359,9 +347,9 @@ class HloReverseInstruction : public HloInstruction {
 
 class HloConcatenateInstruction : public HloInstruction {
  public:
-  explicit HloConcatenateInstruction(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-      int64 dimension);
+  explicit HloConcatenateInstruction(const Shape& shape,
+                                     absl::Span<HloInstruction* const> operands,
+                                     int64 dimension);
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
@@ -379,8 +367,7 @@ class HloConcatenateInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::vector<int64> dimensions_;
@@ -388,10 +375,10 @@ class HloConcatenateInstruction : public HloInstruction {
 
 class HloReduceInstruction : public HloInstruction {
  public:
-  explicit HloReduceInstruction(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> args,
-      tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
-      HloComputation* reduce_computation);
+  explicit HloReduceInstruction(const Shape& shape,
+                                absl::Span<HloInstruction* const> args,
+                                absl::Span<const int64> dimensions_to_reduce,
+                                HloComputation* reduce_computation);
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
@@ -403,12 +390,12 @@ class HloReduceInstruction : public HloInstruction {
   int64 input_count() const { return operand_count() / 2; }
 
   // Returns the input tensors to be reduced.
-  tensorflow::gtl::ArraySlice<HloInstruction*> inputs() const {
+  absl::Span<HloInstruction* const> inputs() const {
     return absl::MakeSpan(operands()).subspan(0, input_count());
   }
 
   // Returns the init values of the reduction.
-  tensorflow::gtl::ArraySlice<HloInstruction*> init_values() const {
+  absl::Span<HloInstruction* const> init_values() const {
     return absl::MakeSpan(operands()).subspan(input_count(), operand_count());
   }
 
@@ -421,8 +408,7 @@ class HloReduceInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::vector<int64> dimensions_;
@@ -450,8 +436,7 @@ class HloSortInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::vector<int64> dimensions_;
@@ -459,9 +444,8 @@ class HloSortInstruction : public HloInstruction {
 
 class HloTransposeInstruction : public HloInstruction {
  public:
-  explicit HloTransposeInstruction(
-      const Shape& shape, HloInstruction* operand,
-      tensorflow::gtl::ArraySlice<int64> dimensions);
+  explicit HloTransposeInstruction(const Shape& shape, HloInstruction* operand,
+                                   absl::Span<const int64> dimensions);
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
@@ -479,8 +463,7 @@ class HloTransposeInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::vector<int64> dimensions_;
@@ -488,9 +471,8 @@ class HloTransposeInstruction : public HloInstruction {
 
 class HloBroadcastInstruction : public HloInstruction {
  public:
-  explicit HloBroadcastInstruction(
-      const Shape& shape, HloInstruction* operand,
-      tensorflow::gtl::ArraySlice<int64> broadcast_dimension);
+  explicit HloBroadcastInstruction(const Shape& shape, HloInstruction* operand,
+                                   absl::Span<const int64> broadcast_dimension);
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
@@ -506,8 +488,7 @@ class HloBroadcastInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::vector<int64> dimensions_;
@@ -515,9 +496,9 @@ class HloBroadcastInstruction : public HloInstruction {
 
 class HloMapInstruction : public HloInstruction {
  public:
-  explicit HloMapInstruction(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-      HloComputation* map_computation);
+  explicit HloMapInstruction(const Shape& shape,
+                             absl::Span<HloInstruction* const> operands,
+                             HloComputation* map_computation);
   // Returns the dimension sizes or numbers associated with this instruction.
   const std::vector<int64>& dimensions() const override { return dimensions_; }
   int64 dimensions(int64 index) const override { return dimensions()[index]; }
@@ -535,8 +516,7 @@ class HloMapInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::vector<int64> dimensions_;
@@ -545,9 +525,9 @@ class HloMapInstruction : public HloInstruction {
 class HloSliceInstruction : public HloInstruction {
  public:
   explicit HloSliceInstruction(const Shape& shape, HloInstruction* operand,
-                               tensorflow::gtl::ArraySlice<int64> start_indices,
-                               tensorflow::gtl::ArraySlice<int64> limit_indices,
-                               tensorflow::gtl::ArraySlice<int64> strides);
+                               absl::Span<const int64> start_indices,
+                               absl::Span<const int64> limit_indices,
+                               absl::Span<const int64> strides);
 
   HloInstructionProto ToProto() const override;
 
@@ -586,8 +566,7 @@ class HloSliceInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // Describes the [begin, end) index range for a slice.
@@ -629,8 +608,7 @@ class HloConstantInstruction : public HloInstruction {
       CanonicalNameMap* canonical_name_map) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
   // TODO(b/36360764): Remove unique_ptr wrapping.
   std::unique_ptr<Literal> literal_;
@@ -651,8 +629,7 @@ class HloTraceInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
   // TODO(b/36360764): Remove unique_ptr wrapping.
   std::unique_ptr<Literal> literal_;
@@ -663,10 +640,9 @@ class HloFusionInstruction : public HloInstruction {
   explicit HloFusionInstruction(const Shape& shape, FusionKind fusion_kind,
                                 HloInstruction* fused_root);
 
-  explicit HloFusionInstruction(
-      const Shape& shape, FusionKind fusion_kind,
-      tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-      HloComputation* fusion_computation);
+  explicit HloFusionInstruction(const Shape& shape, FusionKind fusion_kind,
+                                absl::Span<HloInstruction* const> operands,
+                                HloComputation* fusion_computation);
 
   string ToCategory() const override;
   // Returns a serialized representation of this instruction.
@@ -779,8 +755,7 @@ class HloFusionInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // The type of the fusion. Used by kFusion only.
@@ -789,9 +764,9 @@ class HloFusionInstruction : public HloInstruction {
 
 class HloRngInstruction : public HloInstruction {
  public:
-  explicit HloRngInstruction(
-      const Shape& shape, RandomDistribution distribution,
-      tensorflow::gtl::ArraySlice<HloInstruction*> parameters);
+  explicit HloRngInstruction(const Shape& shape,
+                             RandomDistribution distribution,
+                             absl::Span<HloInstruction* const> parameters);
   // Returns the random distribution for this rng node.
   RandomDistribution random_distribution() const { return distribution_; }
   // Returns a serialized representation of this instruction.
@@ -808,8 +783,7 @@ class HloRngInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // The distribution requested for random number generation.
@@ -834,8 +808,7 @@ class HloParameterInstruction : public HloInstruction {
       CanonicalNameMap* canonical_name_map) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   int64 parameter_number_ = 0;
@@ -859,8 +832,7 @@ class HloGetTupleElementInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   int64 tuple_index_ = -1;
@@ -888,8 +860,7 @@ class HloReducePrecisionInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // The bit sizes for a reduce-precision operation.
@@ -926,8 +897,7 @@ class HloInfeedInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // The string representation of the infeed configuration.
@@ -959,8 +929,7 @@ class HloOutfeedInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // Shape of outfeed request.
@@ -1001,8 +970,7 @@ class HloConvolutionInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
   Window window_;
   // Describes the dimension numbers used for a convolution.
@@ -1033,8 +1001,7 @@ class HloReduceWindowInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
   Window window_;
 };
@@ -1082,17 +1049,16 @@ class HloSelectAndScatterInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
   Window window_;
 };
 
 class HloCustomCallInstruction : public HloInstruction {
  public:
-  explicit HloCustomCallInstruction(
-      const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
-      absl::string_view custom_call_target);
+  explicit HloCustomCallInstruction(const Shape& shape,
+                                    absl::Span<HloInstruction* const> operands,
+                                    absl::string_view custom_call_target);
   const Window& window() const override {
     CHECK(window_ != nullptr);
     return *window_;
@@ -1125,8 +1091,7 @@ class HloCustomCallInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
   // Name of a global symbol to call, only present for kCustomCall.
   string custom_call_target_;
@@ -1155,8 +1120,7 @@ class HloPadInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // The padding configuration that describes the edge padding and interior
@@ -1166,10 +1130,10 @@ class HloPadInstruction : public HloInstruction {
 
 class HloDynamicSliceInstruction : public HloInstruction {
  public:
-  explicit HloDynamicSliceInstruction(
-      const Shape& shape, HloInstruction* operand,
-      HloInstruction* start_indices,
-      tensorflow::gtl::ArraySlice<int64> slice_sizes);
+  explicit HloDynamicSliceInstruction(const Shape& shape,
+                                      HloInstruction* operand,
+                                      HloInstruction* start_indices,
+                                      absl::Span<const int64> slice_sizes);
   // Old methods kept for smooth subclassing transition END.
   // Returns the size of the slice in the given dimension for a dynamic
   // slice node.
@@ -1191,8 +1155,7 @@ class HloDynamicSliceInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   // Describes the [start, start + size) range size for a dynamic slice
@@ -1206,12 +1169,12 @@ class HloGatherInstruction : public HloInstruction {
       const Shape& shape, HloInstruction* operand,
       HloInstruction* start_indices,
       const GatherDimensionNumbers& gather_dim_numbers,
-      tensorflow::gtl::ArraySlice<int64> slice_sizes);
+      absl::Span<const int64> slice_sizes);
   const GatherDimensionNumbers& gather_dimension_numbers() const {
     CHECK(gather_dimension_numbers_ != nullptr);
     return *gather_dimension_numbers_;
   }
-  tensorflow::gtl::ArraySlice<int64> gather_slice_sizes() const {
+  absl::Span<const int64> gather_slice_sizes() const {
     return gather_slice_sizes_;
   }
   // Returns the dump string of the gather dimension numbers.
@@ -1221,10 +1184,9 @@ class HloGatherInstruction : public HloInstruction {
 
   // Creates an instance of GatherDimensionNumbers.
   static GatherDimensionNumbers MakeGatherDimNumbers(
-      tensorflow::gtl::ArraySlice<int64> offset_dims,
-      tensorflow::gtl::ArraySlice<int64> collapsed_slice_dims,
-      tensorflow::gtl::ArraySlice<int64> start_index_map,
-      int64 index_vector_dim);
+      absl::Span<const int64> offset_dims,
+      absl::Span<const int64> collapsed_slice_dims,
+      absl::Span<const int64> start_index_map, int64 index_vector_dim);
 
  private:
   std::vector<string> ExtraAttributesToStringImpl(
@@ -1234,8 +1196,7 @@ class HloGatherInstruction : public HloInstruction {
       const std::function<bool(const HloComputation*, const HloComputation*)>&
           eq_computations) const override;
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::unique_ptr<GatherDimensionNumbers> gather_dimension_numbers_;
@@ -1260,9 +1221,9 @@ class HloScatterInstruction : public HloInstruction {
 
   // Creates an instance of ScatterDimensionNumbers.
   static ScatterDimensionNumbers MakeScatterDimNumbers(
-      tensorflow::gtl::ArraySlice<int64> update_window_dims,
-      tensorflow::gtl::ArraySlice<int64> inserted_window_dims,
-      tensorflow::gtl::ArraySlice<int64> scatter_dims_to_operand_dims,
+      absl::Span<const int64> update_window_dims,
+      absl::Span<const int64> inserted_window_dims,
+      absl::Span<const int64> scatter_dims_to_operand_dims,
       int64 index_vector_dim);
 
  private:
@@ -1274,8 +1235,7 @@ class HloScatterInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   std::unique_ptr<ScatterDimensionNumbers> scatter_dimension_numbers_;
@@ -1298,8 +1258,7 @@ class HloIotaInstruction : public HloInstruction {
           eq_computations) const override;
   // Implementation for non-common logic of CloneWithNewOperands.
   std::unique_ptr<HloInstruction> CloneWithNewOperandsImpl(
-      const Shape& shape,
-      tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
+      const Shape& shape, absl::Span<HloInstruction* const> new_operands,
       HloCloneContext* context) const override;
 
   const int64 iota_dimension_;
diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc
index 78167335c8..3a1bc4e328 100644
--- a/tensorflow/compiler/xla/service/hlo_module.cc
+++ b/tensorflow/compiler/xla/service/hlo_module.cc
@@ -353,7 +353,7 @@ bool IsUsedOutsideSubcomputation(
 }  // anonymous namespace
 
 HloInstruction* HloModule::OutlineExpressionFromComputation(
-    tensorflow::gtl::ArraySlice<HloInstruction*> instructions_to_outline,
+    absl::Span<HloInstruction* const> instructions_to_outline,
     const string& outlined_computation_name, HloComputation* computation) {
   auto builder = HloComputation::Builder(outlined_computation_name);
 
diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h
index cf129b835d..ee5601beec 100644
--- a/tensorflow/compiler/xla/service/hlo_module.h
+++ b/tensorflow/compiler/xla/service/hlo_module.h
@@ -192,7 +192,7 @@ class HloModule {
   // order (root of outlined instructions last). TODO(jingyue): takes a set of
   // instructions and topologically sorts them.
   HloInstruction* OutlineExpressionFromComputation(
-      tensorflow::gtl::ArraySlice<HloInstruction*> instructions_to_outline,
+      absl::Span<HloInstruction* const> instructions_to_outline,
       const string& outlined_computation_name, HloComputation* computation);
 
   // Returns a randomly generated uint64.
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.cc b/tensorflow/compiler/xla/service/hlo_module_group_util.cc
index d70328c8a3..d83ee71490 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_util.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_group_util.cc
@@ -193,7 +193,7 @@ std::vector<HloInstruction*> HloModuleGroupUtil::GlobalSuccessors(
 }
 
 std::vector<HloInstruction*> HloModuleGroupUtil::RootInstructions(
-    tensorflow::gtl::ArraySlice<HloComputation*> computations) {
+    absl::Span<HloComputation* const> computations) {
   std::vector<HloInstruction*> roots;
   for (HloComputation* computation : computations) {
     for (HloInstruction* instruction : computation->instructions()) {
@@ -293,7 +293,7 @@ Status HloModuleGroupUtil::VisitTopologicalOrder(
 }
 
 Status HloModuleGroupUtil::VerifyComputations(
-    tensorflow::gtl::ArraySlice<HloComputation*> computations) {
+    absl::Span<HloComputation* const> computations) {
   auto visit_function =
       [&](HloInstruction* instruction,
           const std::vector<HloInstruction*>& instruction_group) {
@@ -324,7 +324,7 @@ Status HloModuleGroupUtil::VerifyComputations(
 
 StatusOr<std::unique_ptr<HloReachabilityMap>>
 HloModuleGroupUtil::ComputeReachability(
-    tensorflow::gtl::ArraySlice<HloComputation*> computations) {
+    absl::Span<HloComputation* const> computations) {
   std::vector<HloInstruction*> post_order;
   auto visit_function =
       [&](HloInstruction* instruction,
diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.h b/tensorflow/compiler/xla/service/hlo_module_group_util.h
index c25ca1aff5..fe11fe1818 100644
--- a/tensorflow/compiler/xla/service/hlo_module_group_util.h
+++ b/tensorflow/compiler/xla/service/hlo_module_group_util.h
@@ -56,7 +56,7 @@ class HloModuleGroupUtil {
 
   // Returns the root instructions of the computations.
   std::vector<HloInstruction*> RootInstructions(
-      tensorflow::gtl::ArraySlice<HloComputation*> computations);
+      absl::Span<HloComputation* const> computations);
 
   // Visit state of each instruction during DFS traversal.
   enum VisitState {
@@ -93,15 +93,14 @@ class HloModuleGroupUtil {
                                HloInstruction* root);
 
   // Verifies that the computations are well-formed (e.g., no cycles).
-  Status VerifyComputations(
-      tensorflow::gtl::ArraySlice<HloComputation*> computations);
+  Status VerifyComputations(absl::Span<HloComputation* const> computations);
 
   // Below Reachability utils resemble those in HloComputation, except that
   // they can handle instructions across multiple computations.
   //
   // Creates the reachability map for the instructions in the computations.
   StatusOr<std::unique_ptr<HloReachabilityMap>> ComputeReachability(
-      tensorflow::gtl::ArraySlice<HloComputation*> computations);
+      absl::Span<HloComputation* const> computations);
 
   // Updates the reachability of the given instruction, taking the global
   // predeccessorss and successors into account.
diff --git a/tensorflow/compiler/xla/service/hlo_module_test.cc b/tensorflow/compiler/xla/service/hlo_module_test.cc
index 209ad5e58c..80009c7f7e 100644
--- a/tensorflow/compiler/xla/service/hlo_module_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_module_test.cc
@@ -44,7 +44,7 @@ class HloModuleTest : public HloTestBase {
 
   // Creates a computation which calls the given zero-parameter computations.
   std::unique_ptr<HloComputation> CreateCallComputation(
-      tensorflow::gtl::ArraySlice<HloComputation*> computations) {
+      absl::Span<HloComputation* const> computations) {
     auto builder = HloComputation::Builder("Call");
     for (auto computation : computations) {
       builder.AddInstruction(
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index b93e4f24f6..02201d4542 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -306,7 +306,7 @@ bool SplitToInt64s(absl::string_view s, char delim, std::vector<int64>* out) {
 // Creates replica groups from the provided nested array. groups[i] represents
 // the replica ids for group 'i'.
 std::vector<ReplicaGroup> CreateReplicaGroups(
-    tensorflow::gtl::ArraySlice<std::vector<int64>> groups) {
+    absl::Span<const std::vector<int64>> groups) {
   std::vector<ReplicaGroup> replica_groups;
   absl::c_transform(groups, std::back_inserter(replica_groups),
                     [](const std::vector<int64>& ids) {
@@ -997,10 +997,10 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
       }
       instruction = builder->AddInstruction(HloInstruction::CreateReduce(
           shape, /*operands=*/
-          tensorflow::gtl::ArraySlice<HloInstruction*>(operands).subspan(
+          absl::Span<HloInstruction* const>(operands).subspan(
               0, operands.size() / 2),
           /*init_values=*/
-          tensorflow::gtl::ArraySlice<HloInstruction*>(operands).subspan(
+          absl::Span<HloInstruction* const>(operands).subspan(
               operands.size() / 2, operands.size()),
           *dimensions_to_reduce, *reduce_computation));
       break;
diff --git a/tensorflow/compiler/xla/service/hlo_reachability.cc b/tensorflow/compiler/xla/service/hlo_reachability.cc
index 01b088a957..961930f0a8 100644
--- a/tensorflow/compiler/xla/service/hlo_reachability.cc
+++ b/tensorflow/compiler/xla/service/hlo_reachability.cc
@@ -18,7 +18,7 @@ limitations under the License.
 namespace xla {
 
 HloReachabilityMap::HloReachabilityMap(
-    tensorflow::gtl::ArraySlice<const HloInstruction*> instructions)
+    absl::Span<const HloInstruction* const> instructions)
     : size_(instructions.size()) {
   bit_vectors_.reserve(size_);
   for (const HloInstruction* hlo : instructions) {
@@ -29,7 +29,7 @@ HloReachabilityMap::HloReachabilityMap(
 }
 
 bool HloReachabilityMap::SetReachabilityToUnion(
-    tensorflow::gtl::ArraySlice<const HloInstruction*> inputs,
+    absl::Span<const HloInstruction* const> inputs,
     const HloInstruction* instruction) {
   BitVector& bit_vector = GetBitVector(instruction);
   tmp_bit_vector_ = bit_vector;
@@ -38,13 +38,13 @@ bool HloReachabilityMap::SetReachabilityToUnion(
 }
 
 void HloReachabilityMap::FastSetReachabilityToUnion(
-    tensorflow::gtl::ArraySlice<const HloInstruction*> inputs,
+    absl::Span<const HloInstruction* const> inputs,
     const HloInstruction* instruction) {
   SetReachabilityToUnionHelper(inputs, instruction, &GetBitVector(instruction));
 }
 
 void HloReachabilityMap::SetReachabilityToUnionHelper(
-    tensorflow::gtl::ArraySlice<const HloInstruction*> inputs,
+    absl::Span<const HloInstruction* const> inputs,
     const HloInstruction* instruction, BitVector* bit_vector) {
   // If instruction is part of inputs, don't reset the bit_vector.
   if (std::find(inputs.begin(), inputs.end(), instruction) == inputs.end()) {
diff --git a/tensorflow/compiler/xla/service/hlo_reachability.h b/tensorflow/compiler/xla/service/hlo_reachability.h
index 48215d32a8..2c8ebc8e6c 100644
--- a/tensorflow/compiler/xla/service/hlo_reachability.h
+++ b/tensorflow/compiler/xla/service/hlo_reachability.h
@@ -42,7 +42,7 @@ class HloReachabilityMap {
   // Sets up a graph with no edges and where the nodes correspond to the given
   // instructions.
   explicit HloReachabilityMap(
-      tensorflow::gtl::ArraySlice<const HloInstruction*> instructions);
+      absl::Span<const HloInstruction* const> instructions);
 
   // Set the reachability set of 'instruction' to the union of the reachability
   // sets of 'inputs'. Upon return, IsReachable(x, instruction) where
@@ -54,13 +54,12 @@ class HloReachabilityMap {
   // vector in the internal graph of this HloReachabilityMap for the given
   // instruction and does not transitively update any other part of the
   // adjacency matrix.
-  bool SetReachabilityToUnion(
-      tensorflow::gtl::ArraySlice<const HloInstruction*> inputs,
-      const HloInstruction* instruction);
+  bool SetReachabilityToUnion(absl::Span<const HloInstruction* const> inputs,
+                              const HloInstruction* instruction);
 
   // As above, but faster because it does not check if the reachability changed.
   void FastSetReachabilityToUnion(
-      tensorflow::gtl::ArraySlice<const HloInstruction*> inputs,
+      absl::Span<const HloInstruction* const> inputs,
       const HloInstruction* instruction);
 
   // Sets entry so that IsReachable(a, b) will return true
@@ -141,7 +140,7 @@ class HloReachabilityMap {
 
   // Helper for SetReachabilityToUnion/FastSetReachabilityToUnion.
   void SetReachabilityToUnionHelper(
-      tensorflow::gtl::ArraySlice<const HloInstruction*> inputs,
+      absl::Span<const HloInstruction* const> inputs,
       const HloInstruction* instruction, BitVector* bit_vector);
 
   // Return the index of the given instruction. The value is used to index into
diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
index 569d2e5d2d..c9629926ea 100644
--- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc
+++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc
@@ -202,8 +202,8 @@ class InstructionList {
   // On object construction this ordinal is precisely the instruction's index
   // in the list. Later, instructions inserted via InsertBefore receive
   // duplicate values. However, monotonicity is preserved.
-  void InsertBeforeInstructions(
-      Item* to_insert, tensorflow::gtl::ArraySlice<Item*> before_instructions) {
+  void InsertBeforeInstructions(Item* to_insert,
+                                absl::Span<Item* const> before_instructions) {
     VLOG(3) << "InsertBeforeInstructions: " << to_insert->instruction->name()
             << " before {"
             << absl::StrJoin(before_instructions, ", ",
diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc
index 7bd8a4a544..66ac1f66fd 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.cc
+++ b/tensorflow/compiler/xla/service/hlo_runner.cc
@@ -106,7 +106,7 @@ StatusOr<ScopedShapedBuffer> HloRunner::TransferLiteralToDevice(
 }
 
 StatusOr<std::vector<ScopedShapedBuffer>> HloRunner::TransferLiteralsToDevice(
-    const tensorflow::gtl::ArraySlice<const Literal*> literals) {
+    const absl::Span<const Literal* const> literals) {
   std::vector<ScopedShapedBuffer> buffers;
   for (const Literal* literal : literals) {
     CHECK(literal != nullptr);
@@ -118,7 +118,7 @@ StatusOr<std::vector<ScopedShapedBuffer>> HloRunner::TransferLiteralsToDevice(
 }
 
 StatusOr<std::vector<ScopedShapedBuffer>> HloRunner::TransferLiteralsToDevice(
-    const tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> literals) {
+    const absl::Span<const std::unique_ptr<Literal>> literals) {
   std::vector<const Literal*> literal_pointers;
   literal_pointers.reserve(literals.size());
   for (const auto& literal : literals) {
@@ -137,8 +137,8 @@ StatusOr<std::unique_ptr<Literal>> HloRunner::TransferLiteralFromDevice(
 
 StatusOr<std::unique_ptr<Literal>> HloRunner::Execute(
     std::unique_ptr<HloModule> module,
-    const tensorflow::gtl::ArraySlice<const Literal*> arguments,
-    bool run_hlo_passes, ExecutionProfile* profile) {
+    const absl::Span<const Literal* const> arguments, bool run_hlo_passes,
+    ExecutionProfile* profile) {
   TF_ASSIGN_OR_RETURN(std::vector<ScopedShapedBuffer> argument_buffers,
                       TransferLiteralsToDevice(arguments));
   TF_ASSIGN_OR_RETURN(ScopedShapedBuffer result,
@@ -152,7 +152,7 @@ StatusOr<std::unique_ptr<Literal>> HloRunner::Execute(
 
 StatusOr<std::unique_ptr<Literal>> HloRunner::Execute(
     std::unique_ptr<HloModule> module,
-    const tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> arguments,
+    const absl::Span<const std::unique_ptr<Literal>> arguments,
     bool run_hlo_passes, ExecutionProfile* profile) {
   // Construct a vector of plain pointers for the arguments.
   std::vector<const Literal*> argument_pointers;
@@ -169,8 +169,8 @@ StatusOr<std::unique_ptr<Literal>> HloRunner::Execute(
 
 StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
     std::unique_ptr<HloModule> module,
-    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
-    bool run_hlo_passes, ExecutionProfile* profile) {
+    const absl::Span<const ShapedBuffer* const> arguments, bool run_hlo_passes,
+    ExecutionProfile* profile) {
   // Get service run options.
   se::Stream stream(backend().default_stream_executor());
   stream.Init();
@@ -190,8 +190,8 @@ StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
 
 StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
     std::unique_ptr<HloModule> module,
-    const tensorflow::gtl::ArraySlice<ScopedShapedBuffer> arguments,
-    bool run_hlo_passes, ExecutionProfile* profile) {
+    const absl::Span<const ScopedShapedBuffer> arguments, bool run_hlo_passes,
+    ExecutionProfile* profile) {
   std::vector<const ShapedBuffer*> argument_pointers;
   argument_pointers.reserve(arguments.size());
   for (const auto& argument : arguments) {
@@ -226,8 +226,7 @@ StatusOr<std::vector<std::unique_ptr<Literal>>> HloRunner::ExecuteReplicated(
   // no arguments.
   std::vector<const ShapedBuffer*> argument_buffer_ptrs(
       options.num_replicas * options.arguments.size() + 1);
-  std::vector<tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
-      argument_buffer_slices;
+  std::vector<absl::Span<const ShapedBuffer* const>> argument_buffer_slices;
   int64 index = 0;
   for (int64 i = 0; i < options.num_replicas; ++i) {
     int64 device = device_assignment(i, 0);
diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h
index cfc519063e..547b5fc1bb 100644
--- a/tensorflow/compiler/xla/service/hlo_runner.h
+++ b/tensorflow/compiler/xla/service/hlo_runner.h
@@ -104,9 +104,9 @@ class HloRunner {
   // Transfers data between the host and device.
   StatusOr<ScopedShapedBuffer> TransferLiteralToDevice(const Literal& literal);
   StatusOr<std::vector<ScopedShapedBuffer>> TransferLiteralsToDevice(
-      const tensorflow::gtl::ArraySlice<const Literal*> literals);
+      const absl::Span<const Literal* const> literals);
   StatusOr<std::vector<ScopedShapedBuffer>> TransferLiteralsToDevice(
-      const tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> literals);
+      const absl::Span<const std::unique_ptr<Literal>> literals);
   StatusOr<std::unique_ptr<Literal>> TransferLiteralFromDevice(
       const ShapedBuffer& buffer);
 
@@ -117,24 +117,24 @@ class HloRunner {
   // optimization.
   StatusOr<std::unique_ptr<Literal>> Execute(
       std::unique_ptr<HloModule> module,
-      const tensorflow::gtl::ArraySlice<const Literal*> arguments,
+      const absl::Span<const Literal* const> arguments,
       bool run_hlo_passes = true, ExecutionProfile* profile = nullptr);
 
   StatusOr<std::unique_ptr<Literal>> Execute(
       std::unique_ptr<HloModule> module,
-      const tensorflow::gtl::ArraySlice<std::unique_ptr<Literal>> arguments,
+      const absl::Span<const std::unique_ptr<Literal>> arguments,
       bool run_hlo_passes = true, ExecutionProfile* profile = nullptr);
 
   // As Execute(), but accepts and returns device buffers instead of host
   // buffers.
   StatusOr<ScopedShapedBuffer> ExecuteWithDeviceBuffers(
       std::unique_ptr<HloModule> module,
-      const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      const absl::Span<const ShapedBuffer* const> arguments,
       bool run_hlo_passes = true, ExecutionProfile* profile = nullptr);
 
   StatusOr<ScopedShapedBuffer> ExecuteWithDeviceBuffers(
       std::unique_ptr<HloModule> module,
-      const tensorflow::gtl::ArraySlice<ScopedShapedBuffer> arguments,
+      const absl::Span<const ScopedShapedBuffer> arguments,
       bool run_hlo_passes = true, ExecutionProfile* profile = nullptr);
 
   // Executes a given HLO module into a set of replicas, and returns a map
diff --git a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc
index 930801288a..d49d09d459 100644
--- a/tensorflow/compiler/xla/service/hlo_scheduling_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_scheduling_test.cc
@@ -269,7 +269,7 @@ TEST_F(HloSchedulingTest, TuplesAreAccountedCorrectly) {
   auto abs_abs1 = builder.AddInstruction(
       HloInstruction::CreateUnary(r1f32, HloOpcode::kAbs, abs_const));
   auto tuple = builder.AddInstruction(HloInstruction::CreateTuple(
-      tensorflow::gtl::ArraySlice<HloInstruction*>({abs_abs1})));
+      absl::Span<HloInstruction* const>({abs_abs1})));
   auto tuple_elm = builder.AddInstruction(
       HloInstruction::CreateGetTupleElement(r1f32, tuple, 0));
 
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc
index 1235259764..de7e6b53d4 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding.cc
@@ -54,9 +54,8 @@ HloSharding HloSharding::Tuple(const ShapeTree<HloSharding>& sub_shardings) {
   return HloSharding(flattened_list);
 }
 
-HloSharding HloSharding::Tuple(
-    const Shape& tuple_shape,
-    tensorflow::gtl::ArraySlice<HloSharding> shardings) {
+HloSharding HloSharding::Tuple(const Shape& tuple_shape,
+                               absl::Span<const HloSharding> shardings) {
   CHECK(ShapeUtil::IsTuple(tuple_shape)) << ShapeUtil::HumanString(tuple_shape);
   for (auto& sharding : shardings) {
     CHECK(!sharding.IsTuple()) << sharding.ToString();
@@ -142,7 +141,7 @@ std::vector<int64> HloSharding::TileIndexForDevice(int64 device) const {
   CHECK(!maximal_);
   CHECK(!IsTuple());
   std::vector<int64> ret_index;
-  tile_assignment_.Each([&](tensorflow::gtl::ArraySlice<int64> index, int64 d) {
+  tile_assignment_.Each([&](absl::Span<const int64> index, int64 d) {
     if (d == device) {
       ret_index = {index.begin(), index.end()};
     }
@@ -151,8 +150,7 @@ std::vector<int64> HloSharding::TileIndexForDevice(int64 device) const {
   return ret_index;
 }
 
-int64 HloSharding::DeviceForTileIndex(
-    tensorflow::gtl::ArraySlice<int64> index) const {
+int64 HloSharding::DeviceForTileIndex(absl::Span<const int64> index) const {
   CHECK(!replicated_);
   CHECK(!IsTuple());
   if (maximal_) {
@@ -319,7 +317,7 @@ Status HloSharding::ValidateNonTuple(const Shape& shape,
   Status status = Status::OK();
   std::set<int64> seen_cores;
   tile_assignment_.Each(
-      [&](tensorflow::gtl::ArraySlice<int64> indices, int32 core) {
+      [&](absl::Span<const int64> indices, int32 core) {
         // Don't overwrite a bad status, so we report the first error.
         if (status.ok()) {
           if (core >= num_devices) {
diff --git a/tensorflow/compiler/xla/service/hlo_sharding.h b/tensorflow/compiler/xla/service/hlo_sharding.h
index be51c3f55b..01fd9f215d 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding.h
+++ b/tensorflow/compiler/xla/service/hlo_sharding.h
@@ -66,7 +66,7 @@ class HloSharding {
   // shardings must match the number of leaf nodes in tuple_shape. For
   // empty tuples, the shardings array must have one element.
   static HloSharding Tuple(const Shape& tuple_shape,
-                           tensorflow::gtl::ArraySlice<HloSharding> shardings);
+                           absl::Span<const HloSharding> shardings);
 
   // Creates a new sharding for a tuple type, with a single input sharding
   // repeated on each leaf.
@@ -132,7 +132,7 @@ class HloSharding {
   // Returns the device that should execute the given tile.
   // It is an error to call this if is_replicated() is true.
   // REQUIRES: !IsTuple()
-  int64 DeviceForTileIndex(tensorflow::gtl::ArraySlice<int64> index) const;
+  int64 DeviceForTileIndex(absl::Span<const int64> index) const;
 
   // Given a device ID, returns the offset within the specified shape of the
   // tile that should be executed on the given core. This returns the lower
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
index 6e9b96488c..34cba6136f 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
@@ -372,7 +372,7 @@ Status ApplyDomainSharding(const DomainMetadata::Domain& domain,
 }
 
 StatusOr<std::shared_ptr<const HloSharding>> ExtractOriginalCommonSharding(
-    tensorflow::gtl::ArraySlice<HloInstruction*> instructions) {
+    absl::Span<HloInstruction* const> instructions) {
   // If we are here, all the instructions being passed had the same sharding
   // (or no sharding), by the means of the ShardingMatches() API.
   // As such, no kDomain was inserted, and here we are asked to extract the
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_test.cc b/tensorflow/compiler/xla/service/hlo_sharding_test.cc
index 2341f8ada0..80634677e7 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding_test.cc
@@ -29,8 +29,8 @@ limitations under the License.
 namespace xla {
 namespace {
 
-Array<int64> MakeArray(tensorflow::gtl::ArraySlice<int64> dimensions,
-                       tensorflow::gtl::ArraySlice<int64> contents) {
+Array<int64> MakeArray(absl::Span<const int64> dimensions,
+                       absl::Span<const int64> contents) {
   Array<int64> a(dimensions);
   std::copy(contents.begin(), contents.end(), a.begin());
   return a;
diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc
index e0c1326177..773fc7d225 100644
--- a/tensorflow/compiler/xla/service/hlo_value.cc
+++ b/tensorflow/compiler/xla/service/hlo_value.cc
@@ -149,7 +149,7 @@ bool MayUseOperandValue(int64 operand_number, const ShapeIndex& index,
 }  // namespace
 
 void HloValue::SetPositionsAndComputeUses(
-    tensorflow::gtl::ArraySlice<HloPosition> positions) {
+    absl::Span<const HloPosition> positions) {
   CHECK_EQ(positions_.size(), 1) << "SetPositions should only be called once.";
 
   // The positions must be unique and should not contain the defining position
@@ -222,8 +222,7 @@ string HloValueSet::ToString() const {
       }));
 }
 
-bool HloValueSet::AssignUnionOf(
-    tensorflow::gtl::ArraySlice<const HloValueSet*> inputs) {
+bool HloValueSet::AssignUnionOf(absl::Span<const HloValueSet* const> inputs) {
   HloValueSet union_set;
   for (const HloValueSet* input : inputs) {
     for (const HloValue* value : input->values()) {
@@ -254,7 +253,7 @@ std::ostream& operator<<(std::ostream& out, const HloValueSet& value_set) {
 }
 
 bool InstructionValueSet::AssignUnionOf(
-    tensorflow::gtl::ArraySlice<const InstructionValueSet*> inputs) {
+    absl::Span<const InstructionValueSet* const> inputs) {
   CHECK_GT(inputs.size(), 0);
   for (int i = 1; i < inputs.size(); ++i) {
     DCHECK(ShapeUtil::Compatible(inputs[0]->shape(), inputs[i]->shape()));
diff --git a/tensorflow/compiler/xla/service/hlo_value.h b/tensorflow/compiler/xla/service/hlo_value.h
index a1151f65e0..6f2ad214f6 100644
--- a/tensorflow/compiler/xla/service/hlo_value.h
+++ b/tensorflow/compiler/xla/service/hlo_value.h
@@ -108,8 +108,7 @@ class HloValue : public BufferValue {
   // Sets the positions in the module at which the HloValue appears. Updates
   // uses. Should be called once and only once. The defining position should not
   // be included in 'positions' as this is set at construction time.
-  void SetPositionsAndComputeUses(
-      tensorflow::gtl::ArraySlice<HloPosition> positions);
+  void SetPositionsAndComputeUses(absl::Span<const HloPosition> positions);
 
   // Returns whether this value is a phi value.
   bool is_phi() const { return is_phi_; }
@@ -186,14 +185,14 @@ class HloValueSet {
  public:
   HloValueSet() = default;
 
-  explicit HloValueSet(tensorflow::gtl::ArraySlice<const HloValue*> values)
+  explicit HloValueSet(absl::Span<const HloValue* const> values)
       : values_(values.begin(), values.end()) {
     SortAndUniquifyValues();
   }
 
   // Sets this value set to the union of the given value sets. Returns whether
   // this value set changed.
-  bool AssignUnionOf(tensorflow::gtl::ArraySlice<const HloValueSet*> inputs);
+  bool AssignUnionOf(absl::Span<const HloValueSet* const> inputs);
 
   // Return the vector of HloValues in the set. Values in the vector are unique
   // and stably sorted by value id.
@@ -247,8 +246,7 @@ class InstructionValueSet : public ShapeTree<HloValueSet> {
 
   // Sets this value set to the union of the given value sets. Returns whether
   // this value set changed.
-  bool AssignUnionOf(
-      tensorflow::gtl::ArraySlice<const InstructionValueSet*> inputs);
+  bool AssignUnionOf(absl::Span<const InstructionValueSet* const> inputs);
 
   string ToString() const;
 };
diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc
index 744cd64bc5..95516dec74 100644
--- a/tensorflow/compiler/xla/service/hlo_verifier.cc
+++ b/tensorflow/compiler/xla/service/hlo_verifier.cc
@@ -699,8 +699,7 @@ Status ShapeVerifier::CheckVariadicShape(const HloInstruction* instruction) {
                         instruction->opcode(), instruction->operands()));
 }
 
-string ComputationsToString(
-    tensorflow::gtl::ArraySlice<HloComputation*> computations) {
+string ComputationsToString(absl::Span<HloComputation* const> computations) {
   return absl::StrJoin(computations, ",",
                        [](string* s, const HloComputation* computation) {
                          s->append(computation->name());
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.cc b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
index 4d4f681c8a..a4de02a890 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.cc
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.cc
@@ -35,7 +35,6 @@ using ConstantArray = Analysis::ConstantArray;
 using ReshapedArray = Analysis::ReshapedArray;
 using ScalarIndexedArray = Analysis::ScalarIndexedArray;
 using absl::StrJoin;
-using tensorflow::gtl::ArraySlice;
 }  // namespace
 
 string IndexedArrayAnalysis::ToString(Array* root, bool print_constants) {
@@ -186,7 +185,7 @@ StatusOr<Analysis::Array*> IndexedArrayAnalysis::ComputeArrayForConstant(
 
 StatusOr<ScalarIndexedArray*> IndexedArrayAnalysis::FoldGatherOfGather(
     ScalarIndexedArray* source, Array* indices, int64 source_dim,
-    tensorflow::gtl::ArraySlice<int64> output_dims, Shape shape) {
+    absl::Span<const int64> output_dims, Shape shape) {
   // We want to transform Gather(Gather(A, X), Y) => Gather(A, Gather(X, Y)).
   // `source` is the inner Gather(A, X).
 
@@ -252,8 +251,7 @@ StatusOr<ScalarIndexedArray*> IndexedArrayAnalysis::FoldGatherOfGather(
 
 StatusOr<Analysis::Array*> IndexedArrayAnalysis::ComputeArrayForGather(
     const Shape& shape, const GatherDimensionNumbers& dim_numbers,
-    tensorflow::gtl::ArraySlice<int64> slice_sizes, Array* source,
-    Array* indices) {
+    absl::Span<const int64> slice_sizes, Array* source, Array* indices) {
   if (dim_numbers.index_vector_dim() != indices->shape().dimensions_size()) {
     VLOG(3) << "ComputeArrayForGather: indices are not scalar";
     return nullptr;
@@ -314,7 +312,7 @@ namespace {
 // Returns an index into `values` such that the product of the range
 // [values.begin()+index, values.end()) is equal to `product`.  If there is no
 // such index, return -1.  All integers in `values` must be positive.
-int64 FindSuffixWithProduct(ArraySlice<int64> values, int64 product) {
+int64 FindSuffixWithProduct(absl::Span<const int64> values, int64 product) {
   DCHECK(absl::c_all_of(values, [](int64 value) { return value > 0; }));
 
   int64 current_product = 1;
@@ -343,7 +341,8 @@ struct ReshapePassthroughDimPair {
 // The returned vector of pairs is sorted in both the result_dim and the
 // operand_dim components.
 std::vector<ReshapePassthroughDimPair> ComputeReshapePassthroughDimPairs(
-    ArraySlice<int64> operand_shape, ArraySlice<int64> result_shape) {
+    absl::Span<const int64> operand_shape,
+    absl::Span<const int64> result_shape) {
   // A reshape can be seen as an index mapping from output index to input index:
   //
   // (i_0, ..., i_n) = f(o_0, ..., o_m)
@@ -420,7 +419,7 @@ std::vector<ReshapePassthroughDimPair> ComputeReshapePassthroughDimPairs(
 // Return true if `dim` is stated as an passthrough operand dim in
 // `passthrough_dims`.
 bool IsReshapePassthroughOperandDim(
-    ArraySlice<ReshapePassthroughDimPair> passthrough_dims, int64 dim) {
+    absl::Span<const ReshapePassthroughDimPair> passthrough_dims, int64 dim) {
   return absl::c_any_of(passthrough_dims,
                         [&](ReshapePassthroughDimPair passthrough_dim_pair) {
                           return passthrough_dim_pair.operand_dim == dim;
@@ -430,7 +429,8 @@ bool IsReshapePassthroughOperandDim(
 // Maps `operand_dim` which must be an passthrough operand dimension to its
 // corresponding passthrough result dimension based on `passthrough_dims`.
 int64 MapPassthroughOperandDimToResultDim(
-    ArraySlice<ReshapePassthroughDimPair> passthrough_dims, int64 operand_dim) {
+    absl::Span<const ReshapePassthroughDimPair> passthrough_dims,
+    int64 operand_dim) {
   auto it = absl::c_find_if(
       passthrough_dims, [&](ReshapePassthroughDimPair passthrough_dim_pair) {
         return passthrough_dim_pair.operand_dim == operand_dim;
@@ -439,9 +439,9 @@ int64 MapPassthroughOperandDimToResultDim(
   return it->result_dim;
 }
 
-int64 FindSourcePositionForPassthroughResultDim(ArraySlice<int64> operand_shape,
-                                                ArraySlice<int64> result_shape,
-                                                int64 source_passthrough_dim) {
+int64 FindSourcePositionForPassthroughResultDim(
+    absl::Span<const int64> operand_shape, absl::Span<const int64> result_shape,
+    int64 source_passthrough_dim) {
   VLOG(3) << "FindSourcePositionForPassthroughResultDim(["
           << StrJoin(operand_shape, ",") << "], [" << StrJoin(result_shape, ",")
           << "], " << source_passthrough_dim << ")";
@@ -519,8 +519,7 @@ IndexedArrayAnalysis::ReshapeToRemoveDegenerateDims(
 }
 
 StatusOr<ScalarIndexedArray*> IndexedArrayAnalysis::ReshapeToAddDegenerateDims(
-    ScalarIndexedArray* operand,
-    tensorflow::gtl::ArraySlice<int64> degenerate_dims) {
+    ScalarIndexedArray* operand, absl::Span<const int64> degenerate_dims) {
   if (degenerate_dims.empty()) {
     return operand;
   }
@@ -873,7 +872,7 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp(HloOpcode opcode,
     return nullptr;
   }
 
-  ArraySlice<int64> broadcast_dims = broadcast_instr->dimensions();
+  absl::Span<const int64> broadcast_dims = broadcast_instr->dimensions();
   auto is_broadcasted_dim = [&](int64 output_dim) {
     return absl::c_find(broadcast_dims, output_dim) == broadcast_dims.end();
   };
@@ -896,7 +895,7 @@ IndexedArrayAnalysis::ComputeArrayForElementwiseBinaryOp(HloOpcode opcode,
 
   // The scalar-indexed node "removes" the source dim and "inserts" the output
   // dims.  We do the opposite here to undo the scalar-indexed operation.
-  ArraySlice<int64> output_dims = scalar_indexed_const->output_dims();
+  absl::Span<const int64> output_dims = scalar_indexed_const->output_dims();
   for (int64 i = output_dims.size() - 1; i >= 0; --i) {
     CHECK(simulated_index[output_dims[i]] == IndexComponent::Broadcasted);
     EraseAt(&simulated_index, output_dims[i]);
@@ -973,8 +972,8 @@ namespace {
 // Returns the non-contracting non-batch dimension (as per `contracting_dims`
 // and `batch_dims`) if there is exactly one, otherwise returns nullopt.
 absl::optional<int64> GetOnlyNonContractingNonBatchDim(
-    int64 rank, ArraySlice<int64> contracting_dims,
-    ArraySlice<int64> batch_dims) {
+    int64 rank, absl::Span<const int64> contracting_dims,
+    absl::Span<const int64> batch_dims) {
   absl::optional<int64> result;
   for (int64 dim = 0; dim < rank; dim++) {
     if (!absl::c_linear_search(contracting_dims, dim) &&
@@ -998,7 +997,8 @@ absl::optional<int64> GetOnlyNonContractingNonBatchDim(
 // of whatever operand `indexed_array` is to the dot (LHS or RHS).
 bool CanFoldDotIntoIndexedArray(
     absl::string_view tag, Analysis::ScalarIndexedConstantArray* indexed_array,
-    ArraySlice<int64> contracting_dims, ArraySlice<int64> batch_dims) {
+    absl::Span<const int64> contracting_dims,
+    absl::Span<const int64> batch_dims) {
   absl::optional<int64> non_contracting_non_batch_dim =
       GetOnlyNonContractingNonBatchDim(ShapeUtil::Rank(indexed_array->shape()),
                                        contracting_dims, batch_dims);
diff --git a/tensorflow/compiler/xla/service/indexed_array_analysis.h b/tensorflow/compiler/xla/service/indexed_array_analysis.h
index 3fa7d749e1..dcfb725535 100644
--- a/tensorflow/compiler/xla/service/indexed_array_analysis.h
+++ b/tensorflow/compiler/xla/service/indexed_array_analysis.h
@@ -188,9 +188,7 @@ class IndexedArrayAnalysis {
     // `output_dims` are the dimensions in the output array that are being used
     // to compute an index into the `indices` array.  See the class
     // documentation and the overview for more details.
-    tensorflow::gtl::ArraySlice<int64> output_dims() const {
-      return output_dims_;
-    }
+    absl::Span<const int64> output_dims() const { return output_dims_; }
 
    private:
     explicit ScalarIndexedArray(Array* source, Array* indices, int64 source_dim,
@@ -265,8 +263,7 @@ class IndexedArrayAnalysis {
 
   StatusOr<Array*> ComputeArrayForGather(
       const Shape& shape, const GatherDimensionNumbers& dim_numbers,
-      tensorflow::gtl::ArraySlice<int64> slice_sizes, Array* source,
-      Array* indices);
+      absl::Span<const int64> slice_sizes, Array* source, Array* indices);
 
   StatusOr<Array*> ComputeArrayForDotWithIndexedLhs(
       const Shape& shape, const DotDimensionNumbers& dim_numbers,
@@ -303,7 +300,7 @@ class IndexedArrayAnalysis {
   //    G1 = [Arr[i] for i in I2]
   StatusOr<ScalarIndexedArray*> FoldGatherOfGather(
       ScalarIndexedArray* source, Array* indices, int64 source_dim,
-      tensorflow::gtl::ArraySlice<int64> output_dims, Shape shape);
+      absl::Span<const int64> output_dims, Shape shape);
 
   // Reshapes a scalar-indexed node to remove the degenerate dimensions in its
   // output.  The result is always a scalar-indexed node.
@@ -313,8 +310,7 @@ class IndexedArrayAnalysis {
   // Reshapes a scalar-indexed node such that the result has the degenerate
   // dimensions `degenerate_dims`.  The result is always a scalar-indexed node.
   StatusOr<ScalarIndexedArray*> ReshapeToAddDegenerateDims(
-      ScalarIndexedArray* operand,
-      tensorflow::gtl::ArraySlice<int64> degenerate_dims);
+      ScalarIndexedArray* operand, absl::Span<const int64> degenerate_dims);
 
   StatusOr<ScalarIndexedArray*> FoldReshapeOfGather(
       const Shape& shape, ScalarIndexedConstantArray* operand);
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc
index 4b5285031b..8c907eae0c 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.cc
+++ b/tensorflow/compiler/xla/service/instruction_fusion.cc
@@ -219,7 +219,7 @@ bool InstructionFusion::CanFuseOnAllPaths(
 
 InstructionFusion::HloInstructionSet
 InstructionFusion::ComputeGloballyUnfusible(
-    tensorflow::gtl::ArraySlice<HloInstruction*> post_order) {
+    absl::Span<HloInstruction* const> post_order) {
   // Forbid fusion of producers that:
   // a) Need to be duplicated, unless they can be fused into all consumers
   //    via all paths.
diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h
index 9802d4cfc1..00b658959a 100644
--- a/tensorflow/compiler/xla/service/instruction_fusion.h
+++ b/tensorflow/compiler/xla/service/instruction_fusion.h
@@ -123,7 +123,7 @@ class InstructionFusion : public HloPassInterface {
   // Computes the set of nodes that we do not want to fuse into any of their
   // consumers based on a global analysis of the HLO graph.
   HloInstructionSet ComputeGloballyUnfusible(
-      tensorflow::gtl::ArraySlice<HloInstruction*> post_order);
+      absl::Span<HloInstruction* const> post_order);
 
   // Used to determine if an HLO is expensive. Expensive operations will not be
   // duplicated.
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc
index 2259dc1083..5dea124768 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.cc
+++ b/tensorflow/compiler/xla/service/interpreter/executable.cc
@@ -47,7 +47,7 @@ InterpreterExecutable::~InterpreterExecutable() {}
 
 StatusOr<ScopedShapedBuffer> InterpreterExecutable::ExecuteOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+    absl::Span<const ShapedBuffer* const> arguments,
     HloExecutionProfile* hlo_execution_profile) {
   se::Stream* stream = run_options->stream();
   se::StreamExecutor* executor = stream->parent();
@@ -111,7 +111,7 @@ StatusOr<ScopedShapedBuffer> InterpreterExecutable::ExecuteOnStream(
 
 StatusOr<ScopedShapedBuffer> InterpreterExecutable::ExecuteAsyncOnStream(
     const ServiceExecutableRunOptions* run_options,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) {
+    absl::Span<const ShapedBuffer* const> arguments) {
   return tensorflow::errors::Unimplemented(
       "ExecuteAsyncOnStream is not yet supported on Interpreter.");
 }
diff --git a/tensorflow/compiler/xla/service/interpreter/executable.h b/tensorflow/compiler/xla/service/interpreter/executable.h
index 91d8148d26..588787d445 100644
--- a/tensorflow/compiler/xla/service/interpreter/executable.h
+++ b/tensorflow/compiler/xla/service/interpreter/executable.h
@@ -48,13 +48,13 @@ class InterpreterExecutable : public Executable {
 
   StatusOr<ScopedShapedBuffer> ExecuteOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      absl::Span<const ShapedBuffer* const> arguments,
       HloExecutionProfile* hlo_execution_profile) override
       LOCKS_EXCLUDED(evaluator_lock_);
 
   StatusOr<ScopedShapedBuffer> ExecuteAsyncOnStream(
       const ServiceExecutableRunOptions* run_options,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments) override;
+      absl::Span<const ShapedBuffer* const> arguments) override;
 
   static int64 ShapeSizeBytes(const Shape& shape);
 
diff --git a/tensorflow/compiler/xla/service/interpreter/executor.h b/tensorflow/compiler/xla/service/interpreter/executor.h
index db6b910b32..f600b14c6c 100644
--- a/tensorflow/compiler/xla/service/interpreter/executor.h
+++ b/tensorflow/compiler/xla/service/interpreter/executor.h
@@ -47,7 +47,7 @@ limitations under the License.
 namespace stream_executor {
 namespace interpreter {
 
-using Args = tensorflow::gtl::ArraySlice<DeviceMemoryBase>;
+using Args = absl::Span<const DeviceMemoryBase>;
 
 class XlaInterpreterExecutor : public internal::StreamExecutorInterface {
  public:
diff --git a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc
index ad350613dd..cc2e862f2e 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.cc
@@ -99,9 +99,10 @@ static Status EmitDynamicUpdateSliceInPlaceImpl(
   return LoopEmitter(loop_body_emitter, update_shape, b).EmitLoop(name);
 }
 
-Status EmitDynamicUpdateSliceInPlace(
-    tensorflow::gtl::ArraySlice<IrArray> operand_arrays,
-    const IrArray& output_array, absl::string_view name, llvm::IRBuilder<>* b) {
+Status EmitDynamicUpdateSliceInPlace(absl::Span<const IrArray> operand_arrays,
+                                     const IrArray& output_array,
+                                     absl::string_view name,
+                                     llvm::IRBuilder<>* b) {
   VLOG(2) << "EmitDynamicUpdateSliceInPlace for " << name;
 
   // No need to use operand_arrays[0], the input array of the
@@ -129,8 +130,7 @@ Status EmitDynamicUpdateSliceInPlace(
 //
 // Emits a sequential loop if launch_dimensions is null.
 static Status EmitFusedDynamicUpdateSliceInPlaceImpl(
-    HloInstruction* fusion,
-    tensorflow::gtl::ArraySlice<IrArray> fusion_operand_arrays,
+    HloInstruction* fusion, absl::Span<const IrArray> fusion_operand_arrays,
     const IrArray& fusion_output_array, ElementalIrEmitter* elemental_emitter,
     const gpu::LaunchDimensions* launch_dimensions, llvm::IRBuilder<>* b) {
   CHECK_EQ(fusion->opcode(), HloOpcode::kFusion);
@@ -173,8 +173,7 @@ static Status EmitFusedDynamicUpdateSliceInPlaceImpl(
 }
 
 Status EmitFusedDynamicUpdateSliceInPlace(
-    HloInstruction* fusion,
-    tensorflow::gtl::ArraySlice<IrArray> fusion_operand_arrays,
+    HloInstruction* fusion, absl::Span<const IrArray> fusion_operand_arrays,
     const IrArray& fusion_output_array, ElementalIrEmitter* elemental_emitter,
     llvm::IRBuilder<>* b) {
   return EmitFusedDynamicUpdateSliceInPlaceImpl(
@@ -183,8 +182,7 @@ Status EmitFusedDynamicUpdateSliceInPlace(
 }
 
 Status EmitParallelFusedDynamicUpdateSliceInPlace(
-    HloInstruction* fusion,
-    tensorflow::gtl::ArraySlice<IrArray> fusion_operand_arrays,
+    HloInstruction* fusion, absl::Span<const IrArray> fusion_operand_arrays,
     const IrArray& fusion_output_array, ElementalIrEmitter* elemental_emitter,
     const gpu::LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* b) {
   return EmitFusedDynamicUpdateSliceInPlaceImpl(
diff --git a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h
index e1631a62ae..fb3e4eb97c 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/dynamic_update_slice_util.h
@@ -63,25 +63,24 @@ inline bool CanEmitFusedDynamicUpdateSliceInPlace(
 // Emits IR for running the given dynamic-update-slice op in-place -- that is,
 // where the input and output buffers share the same slice, so we can simply
 // modify the input/output buffer without touching any of the other elements.
-Status EmitDynamicUpdateSliceInPlace(
-    tensorflow::gtl::ArraySlice<IrArray> operand_arrays,
-    const IrArray& output_array, absl::string_view name, llvm::IRBuilder<>* b);
+Status EmitDynamicUpdateSliceInPlace(absl::Span<const IrArray> operand_arrays,
+                                     const IrArray& output_array,
+                                     absl::string_view name,
+                                     llvm::IRBuilder<>* b);
 
 // Given a loop-fusion node whose root is a dynamic-update-slice op whose
 // array-to-be-updated and output share the same buffer slice, emits
 // (sequential) code for a fusion node that does the dynamic-update-slice in
 // place.
 Status EmitFusedDynamicUpdateSliceInPlace(
-    HloInstruction* fusion,
-    tensorflow::gtl::ArraySlice<IrArray> fusion_operand_arrays,
+    HloInstruction* fusion, absl::Span<const IrArray> fusion_operand_arrays,
     const IrArray& fusion_output_array, ElementalIrEmitter* elemental_emitter,
     llvm::IRBuilder<>* b);
 
 // Same as EmitFusedDynamicUpdateSliceInPlace, except emits a parallel loop with
 // the given launch dimensions.
 Status EmitParallelFusedDynamicUpdateSliceInPlace(
-    HloInstruction* fusion,
-    tensorflow::gtl::ArraySlice<IrArray> fusion_operand_arrays,
+    HloInstruction* fusion, absl::Span<const IrArray> fusion_operand_arrays,
     const IrArray& fusion_output_array, ElementalIrEmitter* elemental_emitter,
     const gpu::LaunchDimensions& launch_dimensions, llvm::IRBuilder<>* b);
 
diff --git a/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.cc b/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.cc
index 6d637cad6d..b606c993a2 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.cc
@@ -147,7 +147,7 @@ Status FusedIrEmitter::HandleParameter(HloInstruction* parameter) {
 }
 
 Status FusedIrEmitter::HandleTuple(HloInstruction* tuple) {
-  tensorflow::gtl::ArraySlice<HloInstruction*> operands(tuple->operands());
+  absl::Span<HloInstruction* const> operands(tuple->operands());
   std::vector<llvm::Type*> operand_elemental_ir_types;
   for (HloInstruction* operand : operands) {
     operand_elemental_ir_types.push_back(llvm_ir::PrimitiveTypeToIrType(
diff --git a/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h b/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h
index 30471480c4..25ec458160 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/fused_ir_emitter.h
@@ -54,7 +54,7 @@ class FusedIrEmitter : public DfsHloVisitorWithDefault {
  public:
   using Generator = llvm_ir::ElementGenerator;
 
-  FusedIrEmitter(tensorflow::gtl::ArraySlice<llvm_ir::IrArray> parameter_arrays,
+  FusedIrEmitter(absl::Span<const llvm_ir::IrArray> parameter_arrays,
                  ElementalIrEmitter* elemental_emitter)
       : parameter_arrays_(parameter_arrays),
         tiled_parameter_info_(nullptr),
@@ -94,7 +94,7 @@ class FusedIrEmitter : public DfsHloVisitorWithDefault {
 
  private:
   // Arrays of parameters of fusion instruction
-  tensorflow::gtl::ArraySlice<llvm_ir::IrArray> parameter_arrays_;
+  absl::Span<const llvm_ir::IrArray> parameter_arrays_;
   const llvm_ir::TiledParameterInfo* tiled_parameter_info_;
 
   ElementalIrEmitter* elemental_emitter_;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
index 36e713d1ac..67f7423121 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc
@@ -73,7 +73,7 @@ IrArray::Index::Index(llvm::Value* linear, const Shape& shape,
   Delinearize(&multidim_, linear, shape, b);
 }
 
-IrArray::Index::Index(tensorflow::gtl::ArraySlice<llvm::Value*> multidim,
+IrArray::Index::Index(absl::Span<llvm::Value* const> multidim,
                       llvm::Value* linear, const Shape& shape)
     : multidim_(multidim.begin(), multidim.end()),
       linear_(linear),
@@ -92,7 +92,7 @@ IrArray::Index::Index(tensorflow::gtl::ArraySlice<llvm::Value*> multidim,
       << " should have a layout.";
 }
 
-IrArray::Index::Index(tensorflow::gtl::ArraySlice<llvm::Value*> multidim,
+IrArray::Index::Index(absl::Span<llvm::Value* const> multidim,
                       const Shape& shape, llvm::IRBuilder<>* b)
     : multidim_(multidim.begin(), multidim.end()),
       layout_(shape.layout()),
@@ -147,7 +147,7 @@ IrArray::Index IrArray::Index::SourceIndexOfReshape(
   // indices in the same common factor.
   for (ssize_t k = common_factors.size() - 2; k >= 0; --k) {
     llvm::Value* logical_linear_index =
-        Index(tensorflow::gtl::ArraySlice<llvm::Value*>(multidim_).subspan(
+        Index(absl::Span<llvm::Value* const>(multidim_).subspan(
                   common_factors[k].second,
                   common_factors[k + 1].second - common_factors[k].second),
               index_type_)
@@ -184,9 +184,8 @@ IrArray::Index IrArray::Index::SourceIndexOfReshape(
 }
 
 IrArray::Index IrArray::Index::SourceIndexOfSlice(
-    const Shape& shape, tensorflow::gtl::ArraySlice<int64> starts,
-    tensorflow::gtl::ArraySlice<int64> strides,
-    llvm::IRBuilder<>* builder) const {
+    const Shape& shape, absl::Span<const int64> starts,
+    absl::Span<const int64> strides, llvm::IRBuilder<>* builder) const {
   Index source_index(index_type_, multidim_.size());
   for (int i = 0; i < multidim_.size(); ++i) {
     int64 stride = strides[i];
@@ -207,7 +206,7 @@ IrArray::Index IrArray::Index::SourceIndexOfSlice(
 
 IrArray::Index IrArray::Index::SourceIndexOfTranspose(
     const Shape& shape, const Shape& operand_shape,
-    tensorflow::gtl::ArraySlice<int64> dimension_mapping,
+    absl::Span<const int64> dimension_mapping,
     llvm::IRBuilder<>* builder) const {
   std::vector<llvm::Value*> operand_multidim_index =
       Permute(dimension_mapping, multidim());
@@ -256,7 +255,7 @@ IrArray::Index IrArray::Index::SourceIndexOfBitcast(
 
 IrArray::Index IrArray::Index::SourceIndexOfBroadcast(
     const Shape& shape, const Shape& operand_shape,
-    tensorflow::gtl::ArraySlice<int64> dimension_mapping,
+    absl::Span<const int64> dimension_mapping,
     llvm::IRBuilder<>* builder) const {
   int64 rank = ShapeUtil::Rank(operand_shape);
   std::vector<llvm::Value*> source_index(rank);
@@ -321,9 +320,8 @@ IrArray::Index IrArray::Index::SourceIndexOfBroadcast(
   return Index(source_index, linear, operand_shape);
 }
 
-llvm::Value* IrArray::Index::Linearize(
-    tensorflow::gtl::ArraySlice<int64> dimensions,
-    llvm::IRBuilder<>* builder) const {
+llvm::Value* IrArray::Index::Linearize(absl::Span<const int64> dimensions,
+                                       llvm::IRBuilder<>* builder) const {
   // Each dimension is multiplied by the product of the sizes of all
   // earlier dimensions and added to the accumulator logical_linear_index.
   CHECK_EQ(size(), dimensions.size());
diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h
index e913c109b3..7629806a36 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h
@@ -70,7 +70,7 @@ class IrArray {
 
     // Constructs an index from multi-dimensional index "multidim". The linear
     // index is set to nullptr.
-    explicit Index(tensorflow::gtl::ArraySlice<llvm::Value*> multidim,
+    explicit Index(absl::Span<llvm::Value* const> multidim,
                    llvm::Type* index_ty = nullptr)
         : multidim_(multidim.begin(), multidim.end()) {
       if (size() == 0) {
@@ -99,14 +99,14 @@ class IrArray {
     // that it indexes into.
     //
     // Precondition: "shape" has a layout.
-    Index(tensorflow::gtl::ArraySlice<llvm::Value*> multidim,
-          const Shape& shape, llvm::IRBuilder<>* b);
+    Index(absl::Span<llvm::Value* const> multidim, const Shape& shape,
+          llvm::IRBuilder<>* b);
 
     // Constructs an index from both a multi-dimensional index and a linear
     // index. "shape" has the same meaning as that in the constructor that takes
     // only a linear index.
-    Index(tensorflow::gtl::ArraySlice<llvm::Value*> multidim,
-          llvm::Value* linear, const Shape& shape);
+    Index(absl::Span<llvm::Value* const> multidim, llvm::Value* linear,
+          const Shape& shape);
 
     const std::vector<llvm::Value*>& multidim() const { return multidim_; }
     llvm::Value* linear() const { return linear_; }
@@ -145,17 +145,15 @@ class IrArray {
     // by starting indices `starts` and stride values `strides`.
     //
     // Precondition: "this" is an index into a slice whose shape is `shape`.
-    Index SourceIndexOfSlice(const Shape& shape,
-                             tensorflow::gtl::ArraySlice<int64> starts,
-                             tensorflow::gtl::ArraySlice<int64> strides,
+    Index SourceIndexOfSlice(const Shape& shape, absl::Span<const int64> starts,
+                             absl::Span<const int64> strides,
                              llvm::IRBuilder<>* builder) const;
 
     // Given that "this" is the target index of a transpose from `operand_shape`
     // to `shape` with the given dimension mapping, returns the source index.
-    Index SourceIndexOfTranspose(
-        const Shape& shape, const Shape& operand_shape,
-        tensorflow::gtl::ArraySlice<int64> dimension_mapping,
-        llvm::IRBuilder<>* builder) const;
+    Index SourceIndexOfTranspose(const Shape& shape, const Shape& operand_shape,
+                                 absl::Span<const int64> dimension_mapping,
+                                 llvm::IRBuilder<>* builder) const;
 
     // Given that "this" is the target index of a bitcast from `operand_shape`
     // to `shape`, returns the source index.
@@ -164,14 +162,13 @@ class IrArray {
 
     // Given that "this" is the target index of a broadcast from `operand_shape`
     // to `shape` with the given dimension mapping, returns the source index.
-    Index SourceIndexOfBroadcast(
-        const Shape& shape, const Shape& operand_shape,
-        tensorflow::gtl::ArraySlice<int64> dimension_mapping,
-        llvm::IRBuilder<>* builder) const;
+    Index SourceIndexOfBroadcast(const Shape& shape, const Shape& operand_shape,
+                                 absl::Span<const int64> dimension_mapping,
+                                 llvm::IRBuilder<>* builder) const;
 
     // Linearizes the index into the given shape, i.e. reshapes it to rank-1 and
     // returns the index into the sole dimension 0 of the new shape.
-    llvm::Value* Linearize(tensorflow::gtl::ArraySlice<int64> dimensions,
+    llvm::Value* Linearize(absl::Span<const int64> dimensions,
                            llvm::IRBuilder<>* builder) const;
 
     llvm::Type* GetType() const { return index_type_; }
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
index b152cf9275..43fec311f1 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_support_library.h
@@ -235,7 +235,7 @@ class KernelSupportLibrary {
                    }));
   }
 
-  using ArgumentVector = tensorflow::gtl::ArraySlice<llvm::Value*>;
+  using ArgumentVector = absl::Span<llvm::Value* const>;
 
   // Generates the following control flow structure:
   //
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.cc b/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.cc
index cb4d1db997..e5fbdbd51b 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.cc
@@ -28,7 +28,7 @@ namespace {
 // Returns the indices of the first elements of all consecutive subarrays of the
 // given array. For example:
 // ConsecutiveSegments({m, m+1, m+2, n, k, k+1}) = {0, 3, 4}
-std::vector<size_t> ConsecutiveSegments(tensorflow::gtl::ArraySlice<int64> xs) {
+std::vector<size_t> ConsecutiveSegments(absl::Span<const int64> xs) {
   std::vector<size_t> is = {0};
   for (size_t i = 1; i < xs.size(); ++i) {
     if (1 != xs[i] - xs[i - 1]) {
@@ -40,8 +40,7 @@ std::vector<size_t> ConsecutiveSegments(tensorflow::gtl::ArraySlice<int64> xs) {
 
 // Merges the sequences of dimensions of the given shape which start at the
 // given indices `segs`.
-Shape MergeDimensions(tensorflow::gtl::ArraySlice<size_t> segs,
-                      const Shape& shape) {
+Shape MergeDimensions(absl::Span<const size_t> segs, const Shape& shape) {
   std::vector<int64> dimensions;
   for (size_t i = 1; i <= segs.size(); ++i) {
     dimensions.push_back(std::accumulate(
diff --git a/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.h b/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.h
index 8bd06c42c3..5ea05b3188 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/kernel_tiling.h
@@ -50,7 +50,7 @@ IrArray::Index GetUnreducedOutputIndex(
 // for 021 transpose.
 class TiledParameterInfo {
  public:
-  TiledParameterInfo(tensorflow::gtl::ArraySlice<llvm::Value*> param_buffers,
+  TiledParameterInfo(absl::Span<llvm::Value* const> param_buffers,
                      llvm::Value* y, llvm::Value* x)
       : param_buffers_(param_buffers), y_(y), x_(x) {}
 
@@ -67,7 +67,7 @@ class TiledParameterInfo {
  private:
   // Param_buffers_[i] stores the tile buffer for the ith parameter or nullptr
   // if the parameter is not tiled.
-  tensorflow::gtl::ArraySlice<llvm::Value*> param_buffers_;
+  absl::Span<llvm::Value* const> param_buffers_;
   // The y coordinate within a tile.
   llvm::Value* y_;
   // The x coordinate within a tile.
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc
index 9f3329e7f0..219a9f221f 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.cc
@@ -241,7 +241,7 @@ IrArray::Index ForLoopNest::AddLoopsForShape(const Shape& shape,
 }
 
 IrArray::Index ForLoopNest::AddLoopsForShapeOnDimensions(
-    const Shape& shape, tensorflow::gtl::ArraySlice<int64> dimensions,
+    const Shape& shape, absl::Span<const int64> dimensions,
     absl::string_view suffix) {
   llvm_ir::IrArray::Index index(index_type_, shape.dimensions_size());
   for (int64 dimension : dimensions) {
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h
index 0a406bd90b..2be7bbd0de 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_loop.h
@@ -242,7 +242,7 @@ class ForLoopNest {
   // size equals the rank of shape and there is a null for each
   // dimension that is not in "dimensions".
   IrArray::Index AddLoopsForShapeOnDimensions(
-      const Shape& shape, tensorflow::gtl::ArraySlice<int64> dimensions,
+      const Shape& shape, absl::Span<const int64> dimensions,
       absl::string_view suffix);
 
   // Emits a series of nested loops for iterating over an operand array. Loops
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
index f0db2a3761..1a53c026be 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc
@@ -83,11 +83,10 @@ string DumpModuleToString(const llvm::Module& module) {
   return AsString(buffer_string);
 }
 
-llvm::Value* EmitCallToIntrinsic(
-    llvm::Intrinsic::ID intrinsic_id,
-    tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-    tensorflow::gtl::ArraySlice<llvm::Type*> overloaded_types,
-    llvm::IRBuilder<>* b) {
+llvm::Value* EmitCallToIntrinsic(llvm::Intrinsic::ID intrinsic_id,
+                                 absl::Span<llvm::Value* const> operands,
+                                 absl::Span<llvm::Type* const> overloaded_types,
+                                 llvm::IRBuilder<>* b) {
   llvm::Module* module = ModuleFromIRBuilder(b);
   llvm::Function* intrinsic = llvm::Intrinsic::getDeclaration(
       module, intrinsic_id, AsArrayRef(overloaded_types));
diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
index dde50e19d1..61b029eb08 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.h
@@ -59,7 +59,7 @@ llvm::ArrayRef<T> AsArrayRef(const std::vector<T>& vec) {
 }
 
 template <typename T>
-llvm::ArrayRef<T> AsArrayRef(const tensorflow::gtl::ArraySlice<T>& slice) {
+llvm::ArrayRef<T> AsArrayRef(const absl::Span<const T>& slice) {
   return llvm::ArrayRef<T>(slice.data(), slice.size());
 }
 
@@ -101,11 +101,10 @@ string SanitizeFunctionName(string function_name);
 // intrinsics (for example, "minnum") must include a type in overloaded_types
 // for each overloaded type. Typically, overloaded intrinsics have only a single
 // overloaded type.
-llvm::Value* EmitCallToIntrinsic(
-    llvm::Intrinsic::ID intrinsic_id,
-    tensorflow::gtl::ArraySlice<llvm::Value*> operands,
-    tensorflow::gtl::ArraySlice<llvm::Type*> overloaded_types,
-    llvm::IRBuilder<>* b);
+llvm::Value* EmitCallToIntrinsic(llvm::Intrinsic::ID intrinsic_id,
+                                 absl::Span<llvm::Value* const> operands,
+                                 absl::Span<llvm::Type* const> overloaded_types,
+                                 llvm::IRBuilder<>* b);
 
 // Emit float max. Emit maxnum intrinsic is fast math is disabled, or
 // fcmp+select otherwise
diff --git a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
index 1553b4fc91..0dc120e0b0 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.cc
@@ -69,7 +69,7 @@ static LoopEmitter::BodyEmitter MakeBodyEmitterForMultiOutputFusion(
 }
 
 LoopEmitter::LoopEmitter(const ElementGenerator& target_element_generator,
-                         tensorflow::gtl::ArraySlice<IrArray> target_arrays,
+                         absl::Span<const IrArray> target_arrays,
                          llvm::IRBuilder<>* b)
     : body_emitter_(MakeBodyEmitterForMultiOutputFusion(
           target_element_generator,
diff --git a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h
index 57d9d8bbc6..a537c00066 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/loop_emitter.h
@@ -53,8 +53,7 @@ class LoopEmitter {
   // This is used for multi-output fusion.  target_element_generator must
   // produce an LLVM struct with N elements.
   LoopEmitter(const ElementGenerator& target_element_generator,
-              tensorflow::gtl::ArraySlice<IrArray> target_arrays,
-              llvm::IRBuilder<>* b);
+              absl::Span<const IrArray> target_arrays, llvm::IRBuilder<>* b);
 
   LoopEmitter(const LoopEmitter&) = delete;
   LoopEmitter& operator=(const LoopEmitter&) = delete;
diff --git a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc
index 11ed6ee59f..7d49b8d6c2 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc
+++ b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.cc
@@ -64,8 +64,7 @@ void EmitTupleSelect(const IrArray& select, const IrArray& pred,
   }
 }
 
-void EmitTuple(const IrArray& tuple,
-               tensorflow::gtl::ArraySlice<llvm::Value*> operands,
+void EmitTuple(const IrArray& tuple, absl::Span<llvm::Value* const> operands,
                llvm::IRBuilder<>* b, llvm::Module* module) {
   for (size_t i = 0; i < operands.size(); ++i) {
     auto* store = b->CreateStore(
diff --git a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h
index cf6bf5d0b1..cee211d66f 100644
--- a/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h
+++ b/tensorflow/compiler/xla/service/llvm_ir/tuple_ops.h
@@ -65,8 +65,7 @@ void EmitTupleSelect(const IrArray& select, const IrArray& pred,
 
 // A tuple is an array of pointers, one for each operand. Each pointer points to
 // the output buffer of its corresponding operand.
-void EmitTuple(const IrArray& tuple,
-               tensorflow::gtl::ArraySlice<llvm::Value*> operands,
+void EmitTuple(const IrArray& tuple, absl::Span<llvm::Value* const> operands,
                llvm::IRBuilder<>* b, llvm::Module* module);
 
 // A tuple is an array of pointers, one for each operand. Each pointer points to
diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc
index 768105d9e1..0d0fb7946a 100644
--- a/tensorflow/compiler/xla/service/local_service.cc
+++ b/tensorflow/compiler/xla/service/local_service.cc
@@ -141,7 +141,7 @@ ExecutionOptions CreateExecutionOptions(
 
 StatusOr<std::unique_ptr<Executable>> LocalService::CompileExecutable(
     const XlaComputation& computation,
-    const tensorflow::gtl::ArraySlice<const Shape*> argument_layouts,
+    const absl::Span<const Shape* const> argument_layouts,
     const ExecutableBuildOptions& build_options) {
   const HloModuleProto& proto = computation.proto();
   TF_RET_CHECK(proto.has_program_shape());
diff --git a/tensorflow/compiler/xla/service/local_service.h b/tensorflow/compiler/xla/service/local_service.h
index 8f707ea904..acc8c6d2e0 100644
--- a/tensorflow/compiler/xla/service/local_service.h
+++ b/tensorflow/compiler/xla/service/local_service.h
@@ -48,7 +48,7 @@ class LocalService : public Service {
   // compiler is responsible for freeing any memory it allocates this way.
   StatusOr<std::unique_ptr<Executable>> CompileExecutable(
       const XlaComputation& computation,
-      const tensorflow::gtl::ArraySlice<const Shape*> argument_layouts,
+      const absl::Span<const Shape* const> argument_layouts,
       const ExecutableBuildOptions& build_options);
 
   // Returns the device ordinal that corresponds to the given replica number.
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.cc b/tensorflow/compiler/xla/service/multi_output_fusion.cc
index 4166ef5baf..b9ec31c497 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.cc
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.cc
@@ -262,7 +262,7 @@ void MultiOutputFusion::RecomputeReachability() {
 
 void MultiOutputFusion::UpdateReachability(
     HloInstruction* instr1, HloInstruction* instr2,
-    tensorflow::gtl::ArraySlice<HloInstruction*> instrs_to_update,
+    absl::Span<HloInstruction* const> instrs_to_update,
     const std::function<bool(HloInstruction*)>& skip) {
   for (auto instr : instrs_to_update) {
     if (skip != nullptr && skip(instr)) {
diff --git a/tensorflow/compiler/xla/service/multi_output_fusion.h b/tensorflow/compiler/xla/service/multi_output_fusion.h
index 4c8cb7d379..d2c52651c4 100644
--- a/tensorflow/compiler/xla/service/multi_output_fusion.h
+++ b/tensorflow/compiler/xla/service/multi_output_fusion.h
@@ -92,7 +92,7 @@ class MultiOutputFusion : public HloPassInterface {
   // Update the reachability map after fusing instr1 and instr2.
   void UpdateReachability(
       HloInstruction* instr1, HloInstruction* instr2,
-      tensorflow::gtl::ArraySlice<HloInstruction*> instrs_to_update,
+      absl::Span<HloInstruction* const> instrs_to_update,
       const std::function<bool(HloInstruction*)>& skip = nullptr);
 
   // Hook for multi-output fusion along producer-consumer edges.
diff --git a/tensorflow/compiler/xla/service/scatter_expander.cc b/tensorflow/compiler/xla/service/scatter_expander.cc
index 2077b57c05..2f4b2667c4 100644
--- a/tensorflow/compiler/xla/service/scatter_expander.cc
+++ b/tensorflow/compiler/xla/service/scatter_expander.cc
@@ -26,7 +26,6 @@ limitations under the License.
 
 namespace xla {
 
-using tensorflow::gtl::ArraySlice;
 
 // Transposes the given scatter_indices such that the index_vector_dim becomes
 // the most-minor dimension.
@@ -87,7 +86,7 @@ static StatusOr<HloInstruction*> CanonicalizeScatterIndices(
 // major dimensions and all the window dimensions appear in the minor
 // dimensions.
 static StatusOr<HloInstruction*> PermuteScatterAndWindowDims(
-    HloInstruction* updates, ArraySlice<int64> update_window_dims) {
+    HloInstruction* updates, absl::Span<const int64> update_window_dims) {
   std::vector<int64> permutation;
   const int64 updates_rank = ShapeUtil::Rank(updates->shape());
   permutation.reserve(updates_rank);
diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc
index e10c1d9927..f0e2566a3f 100644
--- a/tensorflow/compiler/xla/service/service.cc
+++ b/tensorflow/compiler/xla/service/service.cc
@@ -62,10 +62,9 @@ using absl::StrCat;
 using absl::StrFormat;
 
 // Records the arguments used to invoke a computation in an HloSnapshot proto.
-Status RecordArguments(
-    const tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
-    se::Stream* stream, TransferManager* transfer_manager,
-    HloSnapshot* module) {
+Status RecordArguments(const absl::Span<const ShapedBuffer* const> arguments,
+                       se::Stream* stream, TransferManager* transfer_manager,
+                       HloSnapshot* module) {
   module->clear_arguments();
   for (const ShapedBuffer* argument : arguments) {
     TF_ASSIGN_OR_RETURN(
@@ -207,8 +206,8 @@ Status Service::ValidateResultShape(const Shape& client_shape,
 
 StatusOr<std::vector<std::vector<const ShapedBuffer*>>>
 Service::ResolveAndValidateArguments(
-    tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
-    tensorflow::gtl::ArraySlice<se::StreamExecutor*> stream_executors) {
+    absl::Span<const GlobalDataHandle* const> arguments,
+    absl::Span<se::StreamExecutor* const> stream_executors) {
   CHECK_EQ(options_.number_of_replicas(), stream_executors.size());
   std::vector<std::vector<const ShapedBuffer*>> replicated_arguments;
   replicated_arguments.resize(options_.number_of_replicas());
@@ -242,7 +241,7 @@ Service::ResolveAndValidateArguments(
 
 StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
     const ProgramShape& program_shape,
-    tensorflow::gtl::ArraySlice<const Shape*> argument_shapes,
+    absl::Span<const Shape* const> argument_shapes,
     const ExecutionOptions* execution_options) {
   auto config = absl::make_unique<HloModuleConfig>(program_shape);
   ComputationLayout* computation_layout =
@@ -299,7 +298,7 @@ StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
 
 StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
     const ProgramShape& program_shape,
-    tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+    absl::Span<const ShapedBuffer* const> arguments,
     const ExecutionOptions& execution_options) {
   std::vector<const Shape*> argument_shapes;
   for (const auto* arg : arguments) {
@@ -367,12 +366,10 @@ StatusOr<std::vector<std::unique_ptr<Executable>>> Service::BuildExecutables(
 
 StatusOr<std::vector<GlobalDataHandle>>
 Service::ExecuteParallelAndRegisterResult(
-    tensorflow::gtl::ArraySlice<Executable*> executables,
-    tensorflow::gtl::ArraySlice<std::vector<std::vector<const ShapedBuffer*>>>
-        arguments,
-    Backend* backend, tensorflow::gtl::ArraySlice<DeviceHandle> device_handles,
-    tensorflow::gtl::ArraySlice<string> result_tags,
-    ExecutionProfile* profile) {
+    absl::Span<Executable* const> executables,
+    absl::Span<const std::vector<std::vector<const ShapedBuffer*>>> arguments,
+    Backend* backend, absl::Span<const DeviceHandle> device_handles,
+    absl::Span<const string> result_tags, ExecutionProfile* profile) {
   // Streams where the computation are launched, so we can wait on the streams
   // to complete.
   std::vector<StreamPool::Ptr> streams;
@@ -511,8 +508,7 @@ Service::ExecuteParallelAndRegisterResult(
 
 StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
     Executable* executable,
-    const tensorflow::gtl::ArraySlice<std::vector<const ShapedBuffer*>>
-        arguments,
+    const absl::Span<const std::vector<const ShapedBuffer*>> arguments,
     Backend* backend, const string& result_tag, ExecutionProfile* profile) {
   // Set up streams.
   std::vector<StreamPool::Ptr> streams;
@@ -555,8 +551,7 @@ StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
 
   // TODO(b/69985541): Support profiling also on this path.
 
-  std::vector<tensorflow::gtl::ArraySlice<const ShapedBuffer*>>
-      replicated_arguments;
+  std::vector<absl::Span<const ShapedBuffer* const>> replicated_arguments;
   for (const auto& arg : arguments) {
     replicated_arguments.push_back(arg);
   }
@@ -595,7 +590,7 @@ StatusOr<std::vector<se::StreamExecutor*>> Service::GetExecutors(
 
 StatusOr<std::vector<std::vector<const ShapedBuffer*>>> Service::GetArguments(
     const ExecutionOptions& execution_options,
-    tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments) {
+    absl::Span<const GlobalDataHandle* const> arguments) {
   // Resolve the allocations for the arguments of the computation, and create
   // a vector of device memory offsets for the arguments from the allocations.
   // In the case of partitioned computations, assume all arguments go on the
diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h
index 47d196fb2a..173300d8b6 100644
--- a/tensorflow/compiler/xla/service/service.h
+++ b/tensorflow/compiler/xla/service/service.h
@@ -176,7 +176,7 @@ class Service : public ServiceInterface {
   // class.
   StatusOr<std::unique_ptr<HloModuleConfig>> CreateModuleConfig(
       const ProgramShape& program_shape,
-      tensorflow::gtl::ArraySlice<const ShapedBuffer*> arguments,
+      absl::Span<const ShapedBuffer* const> arguments,
       const ExecutionOptions& execution_options);
 
   // Picks a parallel response and fills the result.
@@ -191,7 +191,7 @@ class Service : public ServiceInterface {
   // Prepare the arguments for executing parallel.
   StatusOr<std::vector<std::vector<const ShapedBuffer*>>> GetArguments(
       const ExecutionOptions& execution_options,
-      tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments);
+      absl::Span<const GlobalDataHandle* const> arguments);
 
  protected:
   friend class LocalExecutable;
@@ -207,14 +207,14 @@ class Service : public ServiceInterface {
   // the corresponding replica.
   StatusOr<std::vector<std::vector<const ShapedBuffer*>>>
   ResolveAndValidateArguments(
-      tensorflow::gtl::ArraySlice<const GlobalDataHandle*> arguments,
-      tensorflow::gtl::ArraySlice<se::StreamExecutor*> stream_executors);
+      absl::Span<const GlobalDataHandle* const> arguments,
+      absl::Span<se::StreamExecutor* const> stream_executors);
 
   // Create a Hlo module config for the given program shape and arguments.
   // execution_options is optional; if not given a default is used.
   StatusOr<std::unique_ptr<HloModuleConfig>> CreateModuleConfig(
       const ProgramShape& program_shape,
-      tensorflow::gtl::ArraySlice<const Shape*> argument_shapes,
+      absl::Span<const Shape* const> argument_shapes,
       const ExecutionOptions* execution_options);
 
   // Builds an Executable for the given parameters.
@@ -242,21 +242,17 @@ class Service : public ServiceInterface {
   // ExecutionProfile object which will be filled in with profile data.
   StatusOr<GlobalDataHandle> ExecuteAndRegisterResult(
       Executable* executable,
-      const tensorflow::gtl::ArraySlice<std::vector<const ShapedBuffer*>>
-          arguments,
+      const absl::Span<const std::vector<const ShapedBuffer*>> arguments,
       Backend* backend, const string& result_tag, ExecutionProfile* profile);
 
   // Runs the given executables with the given arguments and register the result
   // from each executable in the allocation tracker. The handles of the result
   // from the tracker are returned.
   StatusOr<std::vector<GlobalDataHandle>> ExecuteParallelAndRegisterResult(
-      tensorflow::gtl::ArraySlice<Executable*> executables,
-      tensorflow::gtl::ArraySlice<std::vector<std::vector<const ShapedBuffer*>>>
-          arguments,
-      Backend* backend,
-      tensorflow::gtl::ArraySlice<DeviceHandle> device_handles,
-      tensorflow::gtl::ArraySlice<string> result_tags,
-      ExecutionProfile* profile);
+      absl::Span<Executable* const> executables,
+      absl::Span<const std::vector<std::vector<const ShapedBuffer*>>> arguments,
+      Backend* backend, absl::Span<const DeviceHandle> device_handles,
+      absl::Span<const string> result_tags, ExecutionProfile* profile);
 
   // Executes a single computation which has more than one target device.
   // The N devices are expected to all return an empty tuple, but one, which
diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc
index 45427bba25..2611749862 100644
--- a/tensorflow/compiler/xla/service/shape_inference.cc
+++ b/tensorflow/compiler/xla/service/shape_inference.cc
@@ -45,7 +45,7 @@ using absl::StrFormat;
 using absl::StrJoin;
 
 // Returns true if no element is present in slice more than once.
-bool AllUnique(tensorflow::gtl::ArraySlice<int64> slice) {
+bool AllUnique(absl::Span<const int64> slice) {
   return std::set<int64>(slice.begin(), slice.end()).size() == slice.size();
 }
 
@@ -57,11 +57,10 @@ Status ExpectArray(const Shape& shape, absl::string_view op_type) {
   return Status::OK();
 }
 
-Status VerifyReducerShape(
-    const ProgramShape& reducer_shape,
-    tensorflow::gtl::ArraySlice<const Shape*> init_value_shapes,
-    tensorflow::gtl::ArraySlice<PrimitiveType> input_element_types,
-    int64 inputs) {
+Status VerifyReducerShape(const ProgramShape& reducer_shape,
+                          absl::Span<const Shape* const> init_value_shapes,
+                          absl::Span<const PrimitiveType> input_element_types,
+                          int64 inputs) {
   if (reducer_shape.parameters_size() != inputs * 2) {
     return InvalidArgument(
         "Reduction function must take %d parameters, but "
@@ -335,8 +334,7 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferConcatOpShape(
-    tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
-    const int64 dimension) {
+    absl::Span<const Shape* const> arg_shapes, const int64 dimension) {
   if (arg_shapes.empty()) {
     return InvalidArgument("Concatenate expects at least one argument.");
   }
@@ -394,7 +392,7 @@ StatusOr<Shape> InferWindowOutputShape(const Shape& base_shape,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferAfterAllShape(
-    tensorflow::gtl::ArraySlice<const Shape*> arg_shapes) {
+    absl::Span<const Shape* const> arg_shapes) {
   for (const Shape* arg_shape : arg_shapes) {
     if (arg_shape->element_type() != TOKEN) {
       return InvalidArgument(
@@ -550,22 +548,22 @@ Status ValidateDotDimensionNumbers(
     const Shape& lhs, const Shape& rhs,
     const DotDimensionNumbers& dimension_numbers) {
   // Check that dimension numbers are in range.
-  auto dims_in_range =
-      [](const int64 rank, tensorflow::gtl::ArraySlice<int64> contracting_dims,
-         tensorflow::gtl::ArraySlice<int64> batch_dims) -> bool {
+  auto dims_in_range = [](const int64 rank,
+                          absl::Span<const int64> contracting_dims,
+                          absl::Span<const int64> batch_dims) -> bool {
     auto in_range = [&rank](int64 i) -> bool { return 0 <= i && i < rank; };
     return std::all_of(contracting_dims.begin(), contracting_dims.end(),
                        in_range) &&
            std::all_of(batch_dims.begin(), batch_dims.end(), in_range);
   };
 
-  tensorflow::gtl::ArraySlice<int64> lhs_contracting_dimensions =
+  absl::Span<const int64> lhs_contracting_dimensions =
       AsInt64Slice(dimension_numbers.lhs_contracting_dimensions());
-  tensorflow::gtl::ArraySlice<int64> rhs_contracting_dimensions =
+  absl::Span<const int64> rhs_contracting_dimensions =
       AsInt64Slice(dimension_numbers.rhs_contracting_dimensions());
-  tensorflow::gtl::ArraySlice<int64> lhs_batch_dimensions =
+  absl::Span<const int64> lhs_batch_dimensions =
       AsInt64Slice(dimension_numbers.lhs_batch_dimensions());
-  tensorflow::gtl::ArraySlice<int64> rhs_batch_dimensions =
+  absl::Span<const int64> rhs_batch_dimensions =
       AsInt64Slice(dimension_numbers.rhs_batch_dimensions());
 
   if (!dims_in_range(ShapeUtil::Rank(lhs), lhs_contracting_dimensions,
@@ -577,8 +575,8 @@ Status ValidateDotDimensionNumbers(
   }
 
   // Check that dimension numbers are unique.
-  auto dims_unique = [](tensorflow::gtl::ArraySlice<int64> contracting_dims,
-                        tensorflow::gtl::ArraySlice<int64> batch_dims) -> bool {
+  auto dims_unique = [](absl::Span<const int64> contracting_dims,
+                        absl::Span<const int64> batch_dims) -> bool {
     tensorflow::gtl::FlatSet<int64> dim_set;
     auto is_unique = [&dim_set](int64 i) -> bool {
       return dim_set.insert(i).second;
@@ -748,7 +746,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 
 /* static */ StatusOr<Shape> ShapeInference::InferInDimBroadcastShape(
     const Shape& smaller_shape, const Shape& larger_shape,
-    tensorflow::gtl::ArraySlice<int64> broadcast_dimensions) {
+    absl::Span<const int64> broadcast_dimensions) {
   if (broadcast_dimensions.empty() && !ShapeUtil::IsScalar(smaller_shape)) {
     // Reject "magic" inference for binops on different shapes, requiring
     // the user to provide an explicit broadcast dimension in this case.
@@ -849,7 +847,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 
 /* static */ StatusOr<Shape> ShapeInference::InferElementwiseBinaryOpShape(
     HloOpcode operation, const Shape& lhs, const Shape& rhs,
-    tensorflow::gtl::ArraySlice<int64> broadcast_dimensions) {
+    absl::Span<const int64> broadcast_dimensions) {
   TF_RETURN_IF_ERROR(ExpectArray(lhs, "lhs of elementwise binary operation"));
   TF_RETURN_IF_ERROR(ExpectArray(rhs, "rhs of elementwise binary operation"));
 
@@ -906,7 +904,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 
 /* static */ StatusOr<Shape> ShapeInference::InferBinaryOpShape(
     HloOpcode opcode, const Shape& lhs, const Shape& rhs,
-    tensorflow::gtl::ArraySlice<int64> broadcast_dimensions) {
+    absl::Span<const int64> broadcast_dimensions) {
   VLOG(2) << StrFormat(
       "inferring shape for <%s>(%s, %s) with broadcast_dimensions={%s}",
       HloOpcodeString(opcode), ShapeUtil::HumanString(lhs),
@@ -1005,8 +1003,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferVariadicOpShape(
-    HloOpcode opcode,
-    tensorflow::gtl::ArraySlice<const HloInstruction*> operands) {
+    HloOpcode opcode, absl::Span<const HloInstruction* const> operands) {
   std::vector<const Shape*> operand_shapes;
   operand_shapes.reserve(operands.size());
   for (const HloInstruction* operand : operands) {
@@ -1016,8 +1013,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferVariadicOpShape(
-    HloOpcode opcode,
-    tensorflow::gtl::ArraySlice<const Shape*> operand_shapes) {
+    HloOpcode opcode, absl::Span<const Shape* const> operand_shapes) {
   for (const Shape* shape : operand_shapes) {
     TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(*shape));
   }
@@ -1053,9 +1049,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferMapShape(
-    tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
-    const ProgramShape& to_apply,
-    tensorflow::gtl::ArraySlice<int64> dimensions) {
+    absl::Span<const Shape* const> arg_shapes, const ProgramShape& to_apply,
+    absl::Span<const int64> dimensions) {
   if (arg_shapes.empty()) {
     return InvalidArgument("Map expects at least one argument.");
   }
@@ -1711,7 +1706,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 
 /* static */ StatusOr<Shape> ShapeInference::InferFftShape(
     const Shape& in, const FftType fft_type,
-    const tensorflow::gtl::ArraySlice<int64> fft_length) {
+    const absl::Span<const int64> fft_length) {
   const int64 fft_rank = fft_length.size();
   if (fft_rank < 1 || fft_rank > 3) {
     return InvalidArgument("FFT only supports ranks 1-3; got %d.", fft_rank);
@@ -1792,7 +1787,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferCrossReplicaSumShape(
-    tensorflow::gtl::ArraySlice<const Shape*> operand_shapes) {
+    absl::Span<const Shape* const> operand_shapes) {
   for (const Shape* operand_shape : operand_shapes) {
     TF_RETURN_IF_ERROR(
         ExpectArray(*operand_shape, "operand of cross replica sum"));
@@ -1835,7 +1830,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferAllToAllTupleShape(
-    tensorflow::gtl::ArraySlice<const Shape*> operand_shapes) {
+    absl::Span<const Shape* const> operand_shapes) {
   // An Alltoall HLO instruction receives N operands (with the same shape) and
   // returns a tuple that contains N array shapes.
   TF_RET_CHECK(!operand_shapes.empty());
@@ -1859,8 +1854,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferReduceShape(
-    tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
-    tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
+    absl::Span<const Shape* const> arg_shapes,
+    absl::Span<const int64> dimensions_to_reduce,
     const ProgramShape& to_apply) {
   if (arg_shapes.empty()) {
     return InvalidArgument("Reduce must have at least 2 arguments, has 0");
@@ -1998,9 +1993,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferSliceShape(
-    const Shape& arg, tensorflow::gtl::ArraySlice<int64> starts,
-    tensorflow::gtl::ArraySlice<int64> limits,
-    tensorflow::gtl::ArraySlice<int64> strides) {
+    const Shape& arg, absl::Span<const int64> starts,
+    absl::Span<const int64> limits, absl::Span<const int64> strides) {
   auto error = [&](const string& message) {
     return InvalidArgument(
         "%s in slice operation; argument shape: %s; starts: {%s}; limits: "
@@ -2062,7 +2056,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 
 /* static */ StatusOr<Shape> ShapeInference::InferDynamicSliceShape(
     const Shape& operand_shape, const Shape& start_indices_shape,
-    tensorflow::gtl::ArraySlice<int64> slice_sizes) {
+    absl::Span<const int64> slice_sizes) {
   TF_RETURN_IF_ERROR(ExpectArray(operand_shape, "operand of dynamic slice"));
   TF_RETURN_IF_ERROR(
       ExpectArray(start_indices_shape, "start indices of dynamic slice"));
@@ -2189,7 +2183,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /*static */ StatusOr<Shape> ShapeInference::InferReverseShape(
-    const Shape& operand_shape, tensorflow::gtl::ArraySlice<int64> dimensions) {
+    const Shape& operand_shape, absl::Span<const int64> dimensions) {
   TF_RETURN_IF_ERROR(ExpectArray(operand_shape, "operand of reverse"));
   if (!AllUnique(dimensions)) {
     return InvalidArgument("a dimension number is duplicated in reverse");
@@ -2315,7 +2309,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferBroadcastShape(
-    const Shape& operand, tensorflow::gtl::ArraySlice<int64> broadcast_sizes) {
+    const Shape& operand, absl::Span<const int64> broadcast_sizes) {
   TF_RETURN_IF_ERROR(ExpectArray(operand, "operand of broadcast"));
   for (int64 size : broadcast_sizes) {
     if (size < 0) {
@@ -2333,8 +2327,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferReshapeShape(
-    const Shape& operand, tensorflow::gtl::ArraySlice<int64> dimensions,
-    tensorflow::gtl::ArraySlice<int64> new_sizes) {
+    const Shape& operand, absl::Span<const int64> dimensions,
+    absl::Span<const int64> new_sizes) {
   TF_RETURN_IF_ERROR(ExpectArray(operand, "reshape"));
 
   Shape inferred_shape =
@@ -2366,7 +2360,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferTransposeShape(
-    const Shape& operand, tensorflow::gtl::ArraySlice<int64> dimensions) {
+    const Shape& operand, absl::Span<const int64> dimensions) {
   TF_RETURN_IF_ERROR(ExpectArray(operand, "transpose"));
 
   std::vector<int64> indices(ShapeUtil::Rank(operand));
@@ -2471,8 +2465,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 /* static */ StatusOr<Shape> ShapeInference::InferCallShape(
-    tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
-    const ProgramShape& to_apply) {
+    absl::Span<const Shape* const> arg_shapes, const ProgramShape& to_apply) {
   // The applied function's arity equals the number of arguments.
   if (arg_shapes.size() != to_apply.parameters_size()) {
     string computation_signature = ShapeUtil::HumanString(to_apply);
@@ -2505,8 +2498,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation,
 }
 
 static Status ValidateGatherDimensionNumbers(
-    const Shape& input_shape,
-    tensorflow::gtl::ArraySlice<int64> start_indices_shape,
+    const Shape& input_shape, absl::Span<const int64> start_indices_shape,
     const GatherDimensionNumbers& dim_numbers) {
   if (!absl::c_is_sorted(dim_numbers.offset_dims())) {
     return InvalidArgument(
@@ -2599,7 +2591,7 @@ static Status ValidateGatherDimensionNumbers(
 /*static*/ StatusOr<Shape> ShapeInference::InferGatherShape(
     const Shape& input_shape, const Shape& start_indices_shape,
     const GatherDimensionNumbers& gather_dim_numbers,
-    tensorflow::gtl::ArraySlice<int64> slice_sizes) {
+    absl::Span<const int64> slice_sizes) {
   TF_RETURN_IF_ERROR(
       ExpectArray(input_shape, "input tensor operand gather op"));
   TF_RETURN_IF_ERROR(
@@ -2709,8 +2701,7 @@ static Status ValidateGatherDimensionNumbers(
 namespace {
 
 Status ValidateScatterDimensionNumbers(
-    const Shape& operand_shape,
-    tensorflow::gtl::ArraySlice<int64> scatter_indices_shape,
+    const Shape& operand_shape, absl::Span<const int64> scatter_indices_shape,
     const Shape& updates_shape, const ScatterDimensionNumbers& dim_numbers) {
   // Validate update_window_dims in ScatterDimensionNumbers.
   if (!absl::c_is_sorted(dim_numbers.update_window_dims())) {
diff --git a/tensorflow/compiler/xla/service/shape_inference.h b/tensorflow/compiler/xla/service/shape_inference.h
index 235b1a4cf3..072ada2d8f 100644
--- a/tensorflow/compiler/xla/service/shape_inference.h
+++ b/tensorflow/compiler/xla/service/shape_inference.h
@@ -55,7 +55,7 @@ class ShapeInference {
   // given input shapes.
   static StatusOr<Shape> InferBinaryOpShape(
       HloOpcode opcode, const Shape& lhs, const Shape& rhs,
-      tensorflow::gtl::ArraySlice<int64> broadcast_dimensions);
+      absl::Span<const int64> broadcast_dimensions);
   static StatusOr<Shape> InferBinaryOpShape(HloOpcode opcode,
                                             const HloInstruction* lhs,
                                             const HloInstruction* rhs);
@@ -73,18 +73,15 @@ class ShapeInference {
   // Infers the shape produced by applying the given variadic operation to the
   // given input operand shapes.
   static StatusOr<Shape> InferVariadicOpShape(
-      HloOpcode opcode,
-      tensorflow::gtl::ArraySlice<const Shape*> operand_shapes);
+      HloOpcode opcode, absl::Span<const Shape* const> operand_shapes);
   static StatusOr<Shape> InferVariadicOpShape(
-      HloOpcode opcode,
-      tensorflow::gtl::ArraySlice<const HloInstruction*> operands);
+      HloOpcode opcode, absl::Span<const HloInstruction* const> operands);
 
   // Infers the shape produced by applying the given mapping computation shape
   // to the given operand shapes.
   static StatusOr<Shape> InferMapShape(
-      tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
-      const ProgramShape& to_apply,
-      tensorflow::gtl::ArraySlice<int64> dimensions);
+      absl::Span<const Shape* const> arg_shapes, const ProgramShape& to_apply,
+      absl::Span<const int64> dimensions);
 
   // Infers the shape produced by InferBatchNormTraining with the given
   // operands.
@@ -116,14 +113,13 @@ class ShapeInference {
       int64 feature_group_count = 1);
 
   // Infers the shape produced by the given FFT type on the given operand.
-  static StatusOr<Shape> InferFftShape(
-      const Shape& in, FftType fft_type,
-      tensorflow::gtl::ArraySlice<int64> fft_length);
+  static StatusOr<Shape> InferFftShape(const Shape& in, FftType fft_type,
+                                       absl::Span<const int64> fft_length);
 
   // Infers the shape produced by a cross replica sum with the given operand
   // shapes.
   static StatusOr<Shape> InferCrossReplicaSumShape(
-      tensorflow::gtl::ArraySlice<const Shape*> operand_shapes);
+      absl::Span<const Shape* const> operand_shapes);
 
   // Infers final shape of an Alltoall operation that is created by the xla
   // builder.
@@ -134,7 +130,7 @@ class ShapeInference {
 
   // Infers the shape of an HLO all-to-all instruction.
   static StatusOr<Shape> InferAllToAllTupleShape(
-      tensorflow::gtl::ArraySlice<const Shape*> operand_shapes);
+      absl::Span<const Shape* const> operand_shapes);
 
   // Infers the shape of a collective permute operation.
   static StatusOr<Shape> InferCollectivePermuteShape(const Shape& shape);
@@ -146,8 +142,8 @@ class ShapeInference {
   // index as the leading parameter, and the program shape should match
   // accordingly (or an error will result).
   static StatusOr<Shape> InferReduceShape(
-      tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
-      tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce,
+      absl::Span<const Shape* const> arg_shapes,
+      absl::Span<const int64> dimensions_to_reduce,
       const ProgramShape& to_apply);
 
   // Infers the shape produced by applying the given computation to the operand
@@ -165,24 +161,23 @@ class ShapeInference {
 
   // Infers the shape produced by a reverse operation that reverses the order
   // of the elements in the given dimensions.
-  static StatusOr<Shape> InferReverseShape(
-      const Shape& operand_shape,
-      tensorflow::gtl::ArraySlice<int64> dimensions);
+  static StatusOr<Shape> InferReverseShape(const Shape& operand_shape,
+                                           absl::Span<const int64> dimensions);
 
   // Infers the shape produced by a slice operation spanning from the starts to
   // the limits in the original shape's dimensions.
   //
   // e.g. slice f32[32x32] 0:16 0:16 -> f32[16x16]
-  static StatusOr<Shape> InferSliceShape(
-      const Shape& arg, tensorflow::gtl::ArraySlice<int64> starts,
-      tensorflow::gtl::ArraySlice<int64> limits,
-      tensorflow::gtl::ArraySlice<int64> strides);
+  static StatusOr<Shape> InferSliceShape(const Shape& arg,
+                                         absl::Span<const int64> starts,
+                                         absl::Span<const int64> limits,
+                                         absl::Span<const int64> strides);
 
   // Infers the shape produced by a dynamic slice operation of size specified
   // in 'slice_sizes', with dynamic start indices shape 'start_indices_shape'.
   static StatusOr<Shape> InferDynamicSliceShape(
       const Shape& operand_shape, const Shape& start_indices_shape,
-      tensorflow::gtl::ArraySlice<int64> slice_sizes);
+      absl::Span<const int64> slice_sizes);
 
   // Infers the shape produced by a dynamic update slice operation based
   // on the shape of operand and update.
@@ -213,30 +208,30 @@ class ShapeInference {
 
   // Infers the shape produced by a broadcast operation.
   static StatusOr<Shape> InferBroadcastShape(
-      const Shape& operand, tensorflow::gtl::ArraySlice<int64> broadcast_sizes);
+      const Shape& operand, absl::Span<const int64> broadcast_sizes);
 
   // Infers the shape produced by a reshape operation from the element type of
   // its operand and the new dimension sizes specified.
-  static StatusOr<Shape> InferReshapeShape(
-      const Shape& operand, tensorflow::gtl::ArraySlice<int64> dimensions,
-      tensorflow::gtl::ArraySlice<int64> new_sizes);
+  static StatusOr<Shape> InferReshapeShape(const Shape& operand,
+                                           absl::Span<const int64> dimensions,
+                                           absl::Span<const int64> new_sizes);
 
   // Infers the shape produced by a transpose operation from the element type of
   // its operand and its dimensions field.
   static StatusOr<Shape> InferTransposeShape(
-      const Shape& operand, tensorflow::gtl::ArraySlice<int64> dimensions);
+      const Shape& operand, absl::Span<const int64> dimensions);
 
   // Helper that infers the shape produced by performing a concatenate operation
   // with the given operand shapes.
   static StatusOr<Shape> InferConcatOpShape(
-      tensorflow::gtl::ArraySlice<const Shape*> arg_shapes, int64 dimension);
+      absl::Span<const Shape* const> arg_shapes, int64 dimension);
 
   // Infers the shape produced by a kAfterAll. Trivially this shape is always a
   // TOKEN shape. However, ShapeInference serves two purposes: inferring shapes
   // and checking operand shapes. This method verifies that the operand shapes
   // are all TOKENs.
   static StatusOr<Shape> InferAfterAllShape(
-      tensorflow::gtl::ArraySlice<const Shape*> arg_shapes);
+      absl::Span<const Shape* const> arg_shapes);
 
   // Helper that validates the given operand shape can be converted to the
   // target output_shape via a convert instruction -- the requirement is that
@@ -266,8 +261,7 @@ class ShapeInference {
   // Helper that validates the given arg_shapes are compatible with the shape of
   // the to_apply parameters, and returns the to_apply result shape.
   static StatusOr<Shape> InferCallShape(
-      tensorflow::gtl::ArraySlice<const Shape*> arg_shapes,
-      const ProgramShape& to_apply);
+      absl::Span<const Shape* const> arg_shapes, const ProgramShape& to_apply);
 
   // Helper that infers the shape produced by performing a dot operation with
   // the given LHS and RHS shapes.
@@ -281,7 +275,7 @@ class ShapeInference {
   static StatusOr<Shape> InferGatherShape(
       const Shape& input_shape, const Shape& start_indices_shape,
       const GatherDimensionNumbers& gather_dim_numbers,
-      tensorflow::gtl::ArraySlice<int64> slice_sizes);
+      absl::Span<const int64> slice_sizes);
 
   // Helper that validates the given input shape, scatter indices shape, updates
   // shape, and scatter dimension numbers that constitute a scatter operation,
@@ -299,7 +293,7 @@ class ShapeInference {
   // even in the presence of broadcasting of one of the operands over the other.
   static StatusOr<Shape> InferElementwiseBinaryOpShape(
       HloOpcode operation, const Shape& lhs, const Shape& rhs,
-      tensorflow::gtl::ArraySlice<int64> broadcast_dimensions);
+      absl::Span<const int64> broadcast_dimensions);
 
   // Helper for inferring the shape of Clamp ops.
   static StatusOr<Shape> InferClampShape(const Shape& min, const Shape& operand,
@@ -327,7 +321,7 @@ class ShapeInference {
   // smaller_shape is broadcast to.
   static StatusOr<Shape> InferInDimBroadcastShape(
       const Shape& smaller_shape, const Shape& larger_shape,
-      tensorflow::gtl::ArraySlice<int64> broadcast_dimensions);
+      absl::Span<const int64> broadcast_dimensions);
 
   TF_DISALLOW_COPY_AND_ASSIGN(ShapeInference);
 };
diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc
index 4ed8fc6b86..5dbe5a1611 100644
--- a/tensorflow/compiler/xla/service/shape_inference_test.cc
+++ b/tensorflow/compiler/xla/service/shape_inference_test.cc
@@ -28,7 +28,6 @@ limitations under the License.
 namespace xla {
 namespace {
 
-using ::tensorflow::gtl::ArraySlice;
 using ::testing::ContainsRegex;
 using ::testing::HasSubstr;
 
@@ -58,9 +57,9 @@ class ReduceShapeInferenceTest : public ShapeInferenceTest {
   // Helper that runs reduce shape inference with the input 'arg' and given
   // dimensions to reduce, and checks the inferred shape is as expected. The
   // element type here is hard-coded to F32.
-  void ExpectInferredReduceShape(
-      const Shape& expected_inferred_shape, const Shape& arg,
-      tensorflow::gtl::ArraySlice<int64> dimensions_to_reduce) {
+  void ExpectInferredReduceShape(const Shape& expected_inferred_shape,
+                                 const Shape& arg,
+                                 absl::Span<const int64> dimensions_to_reduce) {
     ProgramShape to_apply = ShapeUtil::MakeProgramShape({f32_, f32_}, f32_);
     auto inferred_status = ShapeInference::InferReduceShape(
         {&arg, &f32_}, dimensions_to_reduce, to_apply);
@@ -252,7 +251,7 @@ TEST_F(ShapeInferenceTest, ClampBadShapes) {
 
 TEST_F(ShapeInferenceTest, Complex) {
   auto complex_shape = [&](const Shape& lhs, const Shape& rhs,
-                           const tensorflow::gtl::ArraySlice<int64>& bcast) {
+                           const absl::Span<const int64>& bcast) {
     return ShapeInference::InferBinaryOpShape(HloOpcode::kComplex, lhs, rhs,
                                               bcast);
   };
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index f77690a462..0c393c53a1 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -130,7 +130,7 @@ class TransferManager {
 
   // Resets the devices associated with this transfer manager.
   virtual Status ResetDevices(
-      tensorflow::gtl::ArraySlice<se::StreamExecutor*> executor) = 0;
+      absl::Span<se::StreamExecutor* const> executor) = 0;
 
   // Given an allocated ShapedBuffer, constructs the tuple index table(s) in
   // each buffer of the given ShapedBuffer corresponding to tuple shapes. If the
@@ -211,8 +211,7 @@ class TransferManager {
   // to construct a tuple index table in the platform-specific tuple
   // representation.
   virtual Status WriteSingleTupleIndexTable(
-      se::Stream* stream,
-      tensorflow::gtl::ArraySlice<se::DeviceMemoryBase> elements,
+      se::Stream* stream, absl::Span<const se::DeviceMemoryBase> elements,
       const Shape& shape, se::DeviceMemoryBase* region) = 0;
 
  private:
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
index cf00ca102b..6fed7c76d0 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc
@@ -360,7 +360,7 @@ Status TuplePointsToAnalysis::HandleSend(HloInstruction* send) {
 }
 
 Status TuplePointsToAnalysis::HandleTuple(HloInstruction* tuple) {
-  tensorflow::gtl::ArraySlice<HloInstruction*> operands(tuple->operands());
+  absl::Span<HloInstruction* const> operands(tuple->operands());
   PointsToSet& points_to_set = CreateEmptyPointsToSet(tuple);
   points_to_set.AddPointedToBuffer(
       logical_buffer_analysis_->GetBuffer(tuple, /*index=*/{}),
diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
index 10d382e8ab..a32d1f9026 100644
--- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
+++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc
@@ -72,9 +72,8 @@ class TuplePointsToAnalysisTest : public HloTestBase {
 
   // Checks that the given points-to set contains exactly (unordered) the given
   // LogicalBuffers.
-  void ExpectHasBuffers(
-      const PointsToSet::BufferList& points_to_set,
-      tensorflow::gtl::ArraySlice<const LogicalBuffer*> buffers) {
+  void ExpectHasBuffers(const PointsToSet::BufferList& points_to_set,
+                        absl::Span<const LogicalBuffer* const> buffers) {
     std::vector<const LogicalBuffer*> vec(buffers.begin(), buffers.end());
     EXPECT_THAT(points_to_set, UnorderedElementsAreArray(vec));
   }
@@ -83,7 +82,7 @@ class TuplePointsToAnalysisTest : public HloTestBase {
   // top-level buffers of the given instructions.
   void ExpectHasTopLevelBuffers(
       const PointsToSet::BufferList& points_to_set,
-      tensorflow::gtl::ArraySlice<HloInstruction*> instructions) {
+      absl::Span<HloInstruction* const> instructions) {
     PointsToSet::BufferList buffers;
     for (auto instruction : instructions) {
       buffers.push_back(GetBuffer(instruction, /*index=*/{}));
@@ -94,7 +93,7 @@ class TuplePointsToAnalysisTest : public HloTestBase {
   // Overload which takes a set instead of a vector.
   void ExpectHasTopLevelBuffers(
       const PointsToSet::BufferSet& points_to_set,
-      tensorflow::gtl::ArraySlice<HloInstruction*> instructions) {
+      absl::Span<HloInstruction* const> instructions) {
     ExpectHasTopLevelBuffers(
         PointsToSet::BufferList(points_to_set.begin(), points_to_set.end()),
         instructions);
@@ -104,8 +103,7 @@ class TuplePointsToAnalysisTest : public HloTestBase {
   // aliases which are exactly (unordered) the given instruction/index pairs.
   void ExpectHasBufferAliases(
       const HloInstruction* instruction, const ShapeIndex& index,
-      tensorflow::gtl::ArraySlice<std::pair<HloInstruction*, ShapeIndex>>
-          expected) {
+      absl::Span<const std::pair<HloInstruction*, ShapeIndex>> expected) {
     const LogicalBuffer* buffer =
         points_to_analysis_->GetBufferDefinedAt(instruction, index)
             .ValueOrDie();
diff --git a/tensorflow/compiler/xla/service/tuple_util.cc b/tensorflow/compiler/xla/service/tuple_util.cc
index 4a530bb0b2..9ba01ef7a6 100644
--- a/tensorflow/compiler/xla/service/tuple_util.cc
+++ b/tensorflow/compiler/xla/service/tuple_util.cc
@@ -40,7 +40,7 @@ namespace xla {
 
 /*static*/ HloInstruction* TupleUtil::AppendSuffix(
     HloInstruction* input_tuple,
-    tensorflow::gtl::ArraySlice<HloInstruction*> trailing_values) {
+    absl::Span<HloInstruction* const> trailing_values) {
   CHECK(ShapeUtil::IsTuple(input_tuple->shape()));
 
   HloComputation* computation = input_tuple->parent();
diff --git a/tensorflow/compiler/xla/service/tuple_util.h b/tensorflow/compiler/xla/service/tuple_util.h
index e5ff9aaa83..bc5aac09f2 100644
--- a/tensorflow/compiler/xla/service/tuple_util.h
+++ b/tensorflow/compiler/xla/service/tuple_util.h
@@ -38,7 +38,7 @@ class TupleUtil {
   // `input_tuple`.
   static HloInstruction* AppendSuffix(
       HloInstruction* input_tuple,
-      tensorflow::gtl::ArraySlice<HloInstruction*> trailing_values);
+      absl::Span<HloInstruction* const> trailing_values);
 };
 }  // namespace xla
 
diff --git a/tensorflow/compiler/xla/service/while_loop_analysis.cc b/tensorflow/compiler/xla/service/while_loop_analysis.cc
index 7e4ac92a7c..c3c2603c7e 100644
--- a/tensorflow/compiler/xla/service/while_loop_analysis.cc
+++ b/tensorflow/compiler/xla/service/while_loop_analysis.cc
@@ -211,8 +211,7 @@ optional<int64> ComputeWhileLoopTripCount(HloInstruction* while_op,
       VLOG(2) << "Couldn't evaluate while cond: " << result.status();
       return nullopt;
     }
-    if (result.ValueOrDie()->data<bool>() ==
-        tensorflow::gtl::ArraySlice<bool>{false}) {
+    if (result.ValueOrDie()->data<bool>() == absl::Span<const bool>{false}) {
       VLOG(2) << "Loop has static trip count of " << trip_count;
       return trip_count;
     }
diff --git a/tensorflow/compiler/xla/service/while_util.cc b/tensorflow/compiler/xla/service/while_util.cc
index e8f76ff745..f90ac91f9d 100644
--- a/tensorflow/compiler/xla/service/while_util.cc
+++ b/tensorflow/compiler/xla/service/while_util.cc
@@ -94,7 +94,7 @@ WidenWhileBody(HloComputation* narrow_body, const Shape& wide_shape) {
 /*static*/ StatusOr<WhileUtil::MakeInstructionsLiveInResult>
 WhileUtil::MakeInstructionsLiveIn(
     HloInstruction* while_instr,
-    tensorflow::gtl::ArraySlice<HloInstruction*> instructions) {
+    absl::Span<HloInstruction* const> instructions) {
   CHECK(ShapeUtil::IsTuple(while_instr->shape()));
 
   int64 elements_in_old_while_shape = while_instr->shape().tuple_shapes_size();
diff --git a/tensorflow/compiler/xla/service/while_util.h b/tensorflow/compiler/xla/service/while_util.h
index e67636d80f..b1c4486887 100644
--- a/tensorflow/compiler/xla/service/while_util.h
+++ b/tensorflow/compiler/xla/service/while_util.h
@@ -55,7 +55,7 @@ class WhileUtil {
   //   that contains `while_instr`.
   static StatusOr<MakeInstructionsLiveInResult> MakeInstructionsLiveIn(
       HloInstruction* while_instr,
-      tensorflow::gtl::ArraySlice<HloInstruction*> instructions);
+      absl::Span<HloInstruction* const> instructions);
 
   using LoopStateTy = std::vector<HloInstruction*>;
   using LoopBodyGeneratorTy = std::function<StatusOr<LoopStateTy>(
author	Tim Shen <timshen@google.com>	2018-08-30 16:03:10 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-08-30 16:07:27 -0700
commit	6f879f891abe2e267c5cf512d034d7c3641cfdb0 (patch)
tree	33dfda2aa13bdec06d3aa330dd5816441d449fa7 /tensorflow/compiler/xla/service
parent	5d5591fbd4624ff7e50f305464667315f2d41ebb (diff)