aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar avijit-nervana <avijit.chakraborty@intel.com>2018-08-22 11:37:18 -0700
committerGravatar avijit-nervana <avijit.chakraborty@intel.com>2018-08-22 11:37:18 -0700
commit792aa3d576242b83ad38c36f7e50fc4f5714b561 (patch)
tree44f7a3338e92703e3501fea16ba42ec11f4e67ab
parent6528b69885fa00c21db648c004be93b823d36d0d (diff)
parent1f6f5e3b9fe092c5218b872648a5fb65334a2af8 (diff)
Merge remote-tracking branch 'upstream/master'
-rw-r--r--tensorflow/compiler/jit/xla_device_context.cc6
-rw-r--r--tensorflow/compiler/xla/client/xla_builder.cc22
-rw-r--r--tensorflow/compiler/xla/client/xla_builder.h29
-rw-r--r--tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc2
-rw-r--r--tensorflow/compiler/xla/service/bfloat16_normalization_test.cc2
-rw-r--r--tensorflow/compiler/xla/service/hlo.proto5
-rw-r--r--tensorflow/compiler/xla/service/hlo_domain_isolator.cc21
-rw-r--r--tensorflow/compiler/xla/service/hlo_domain_isolator.h6
-rw-r--r--tensorflow/compiler/xla/service/hlo_domain_test.cc64
-rw-r--r--tensorflow/compiler/xla/service/hlo_instruction.cc17
-rw-r--r--tensorflow/compiler/xla/service/hlo_instruction.h12
-rw-r--r--tensorflow/compiler/xla/service/hlo_instructions.cc27
-rw-r--r--tensorflow/compiler/xla/service/hlo_instructions.h11
-rw-r--r--tensorflow/compiler/xla/service/hlo_parser.cc44
-rw-r--r--tensorflow/compiler/xla/service/hlo_parser_test.cc4
-rw-r--r--tensorflow/compiler/xla/service/hlo_sharding_metadata.cc20
-rw-r--r--tensorflow/compiler/xla/service/hlo_sharding_metadata.h4
-rw-r--r--tensorflow/contrib/autograph/converters/control_flow.py16
-rw-r--r--tensorflow/contrib/autograph/converters/control_flow_test.py20
-rw-r--r--tensorflow/contrib/lite/schema/schema.fbs4
-rw-r--r--tensorflow/core/kernels/cwise_op_div.cc1
-rw-r--r--tensorflow/core/kernels/cwise_op_gpu_div.cu.cc1
-rw-r--r--tensorflow/core/ops/array_grad.cc21
-rw-r--r--tensorflow/python/eager/function_test.py12
-rw-r--r--tensorflow/python/estimator/keras_test.py54
-rw-r--r--tensorflow/python/framework/sparse_tensor.py2
-rw-r--r--tensorflow/python/ops/math_ops_test.py2
-rw-r--r--tensorflow/tools/docs/generate_lib.py17
28 files changed, 296 insertions, 150 deletions
diff --git a/tensorflow/compiler/jit/xla_device_context.cc b/tensorflow/compiler/jit/xla_device_context.cc
index 175a571ddb..2027ec7737 100644
--- a/tensorflow/compiler/jit/xla_device_context.cc
+++ b/tensorflow/compiler/jit/xla_device_context.cc
@@ -124,11 +124,11 @@ void XlaTransferManager::TransferLiteralFromDevice(
TensorReference ref(device_tensor);
transfer_manager_->TransferLiteralFromDevice(
device_to_host_stream_.get(), shaped_buffer, literal,
- [=, &shaped_buffer, &literal](xla::Status status) {
+ [=, &shaped_buffer](xla::Status status) {
ref.Unref();
done([&]() -> Status {
- VLOG(1) << "Transfer from device as literal: " << literal.ToString()
- << " " << shaped_buffer.ToString();
+ VLOG(1) << "Transfer from device as literal: "
+ << shaped_buffer.ToString();
return status;
}());
});
diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc
index 7bc6e8d860..4e7ef66dc5 100644
--- a/tensorflow/compiler/xla/client/xla_builder.cc
+++ b/tensorflow/compiler/xla/client/xla_builder.cc
@@ -1881,7 +1881,7 @@ XlaOp XlaBuilder::BatchNormGrad(const XlaOp& operand, const XlaOp& scale,
XlaOp XlaBuilder::CrossReplicaSum(
const XlaOp& operand,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids) {
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups) {
return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
TF_ASSIGN_OR_RETURN(const Shape& shape, GetShape(operand));
const Shape& scalar_shape = ShapeUtil::MakeShape(shape.element_type(), {});
@@ -1889,14 +1889,14 @@ XlaOp XlaBuilder::CrossReplicaSum(
b->Add(b->Parameter(/*parameter_number=*/0, scalar_shape, "x"),
b->Parameter(/*parameter_number=*/1, scalar_shape, "y"));
TF_ASSIGN_OR_RETURN(auto computation, b->Build());
- return CrossReplicaSum(operand, computation, replica_group_ids,
+ return CrossReplicaSum(operand, computation, replica_groups,
/*channel_id=*/absl::nullopt);
});
}
XlaOp XlaBuilder::CrossReplicaSum(
const XlaOp& operand, const XlaComputation& computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids,
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups,
const absl::optional<ChannelHandle>& channel_id) {
return ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
HloInstructionProto instr;
@@ -1904,8 +1904,9 @@ XlaOp XlaBuilder::CrossReplicaSum(
TF_ASSIGN_OR_RETURN(
*instr.mutable_shape(),
ShapeInference::InferCrossReplicaSumShape({&operand_shape}));
- for (int64 replica_group_id : replica_group_ids) {
- instr.add_replica_group_ids(replica_group_id);
+
+ for (const ReplicaGroup& group : replica_groups) {
+ *instr.add_replica_groups() = group;
}
if (channel_id.has_value()) {
@@ -2767,16 +2768,17 @@ XlaOp ReduceWindowWithGeneralPadding(
padding);
}
-XlaOp CrossReplicaSum(const XlaOp& operand,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids) {
- return operand.builder()->CrossReplicaSum(operand, replica_group_ids);
+XlaOp CrossReplicaSum(
+ const XlaOp& operand,
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups) {
+ return operand.builder()->CrossReplicaSum(operand, replica_groups);
}
XlaOp CrossReplicaSum(const XlaOp& operand, const XlaComputation& computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids,
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups,
const absl::optional<ChannelHandle>& channel_id) {
return operand.builder()->CrossReplicaSum(operand, computation,
- replica_group_ids, channel_id);
+ replica_groups, channel_id);
}
XlaOp AllToAll(const XlaOp& operand, int64 split_dimension,
diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h
index 8d9ec9a18a..adb62f5f02 100644
--- a/tensorflow/compiler/xla/client/xla_builder.h
+++ b/tensorflow/compiler/xla/client/xla_builder.h
@@ -685,7 +685,7 @@ class XlaBuilder {
// sum for each subgroup.
XlaOp CrossReplicaSum(
const XlaOp& operand,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids = {});
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups = {});
// Enqueues an operation that do an AllReduce of the operand cross cores. Here
// AllReduce means doing a reduction on the input operand cross cores and then
@@ -694,10 +694,11 @@ class XlaBuilder {
// scalars, e.g., add, min, or max. The way that AllReduce is applied is
// configured by:
//
- // - `replica_group_ids`: maps replica ids to subgroup ids. If empty, all
- // replicas belong to one group. Allreduce will be applied within subgroups.
- // For example, we have 4 replicas, then replica_group_ids={0,1,0,1} means,
- // replica 0 and 2 are in subgroup 0, replica 1 and 3 are in subgroup 1.
+ // - `replica_groups`: each ReplicaGroup contains a list of replica id. If
+ // empty, all replicas belong to one group. Allreduce will be applied within
+ // subgroups. For example, we have 4 replicas, then
+ // replica_groups={{0,2},{1,3}} means, replica 0 and 2 are in subgroup 0,
+ // replica 1 and 3 are in subgroup 1.
//
// - `channel_id`: for Allreduce nodes from different modules, if they have
// the same channel_id, they will be 'Allreduce'd. If empty, Allreduce will
@@ -706,7 +707,7 @@ class XlaBuilder {
// TODO(b/79737069): Rename this to AllReduce when it's ready to use.
XlaOp CrossReplicaSum(
const XlaOp& operand, const XlaComputation& computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids = {},
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups = {},
const absl::optional<ChannelHandle>& channel_id = absl::nullopt);
// Enqueues an operation that do an Alltoall of the operand cross cores.
@@ -1253,10 +1254,10 @@ class XlaBuilder {
tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding);
friend XlaOp CrossReplicaSum(
const XlaOp& operand,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids);
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups);
friend XlaOp CrossReplicaSum(
const XlaOp& operand, const XlaComputation& computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids,
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups,
const absl::optional<ChannelHandle>& channel_id);
friend XlaOp AllToAll(const XlaOp& operand, int64 split_dimension,
int64 concat_dimension, int64 split_count,
@@ -1833,7 +1834,7 @@ XlaOp ReduceWindowWithGeneralPadding(
// sum for each subgroup.
XlaOp CrossReplicaSum(
const XlaOp& operand,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids = {});
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups = {});
// Enqueues an operation that do an AllReduce of the operand cross cores. Here
// AllReduce means doing a reduction on the input operand cross cores and then
@@ -1842,10 +1843,10 @@ XlaOp CrossReplicaSum(
// scalars, e.g., add, min, or max. The way that AllReduce is applied is
// configured by:
//
-// - `replica_group_ids`: maps replica ids to subgroup ids. If empty, all
-// replicas belong to one group. Allreduce will be applied within subgroups.
-// For example, we have 4 replicas, then replica_group_ids={0,1,0,1} means,
-// replica 0 and 2 are in subgroup 0, replica 1 and 3 are in subgroup 1.
+// - `replica_groups`: each ReplicaGroup contains a list of replica id. If
+// empty, all replicas belong to one group. Allreduce will be applied within
+// subgroups. For example, we have 4 replicas, then replica_groups={{0,2},{1,3}}
+// means, replica 0 and 2 are in subgroup 0, replica 1 and 3 are in subgroup 1.
//
// - `channel_id`: for Allreduce nodes from different modules, if they have the
// same channel_id, they will be 'Allreduce'd. If empty, Allreduce will not be
@@ -1854,7 +1855,7 @@ XlaOp CrossReplicaSum(
// TODO(b/79737069): Rename this to AllReduce when it's ready to use.
XlaOp CrossReplicaSum(
const XlaOp& operand, const XlaComputation& computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids = {},
+ tensorflow::gtl::ArraySlice<ReplicaGroup> replica_groups = {},
const absl::optional<ChannelHandle>& channel_id = absl::nullopt);
// Enqueues an operation that do an Alltoall of the operand cross cores.
diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
index a44756e136..6363a21c3b 100644
--- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding_test.cc
@@ -235,7 +235,7 @@ TEST_F(BFloat16ConversionFoldingTest, FoldCrossReplicaSumTupleOutput) {
HloInstruction* crs =
builder.AddInstruction(HloInstruction::CreateCrossReplicaSum(
ShapeUtil::MakeTupleShape({f32_shape, f32_shape}), {convert_a, b},
- sum, /*replica_group_ids=*/{}, /*barrier=*/"",
+ sum, /*replica_groups=*/{}, /*barrier=*/"",
/*all_reduce_id=*/absl::nullopt));
HloInstruction* gte_a = builder.AddInstruction(
HloInstruction::CreateGetTupleElement(f32_shape, crs, 0));
diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
index 303ceac2e0..49ae5320b0 100644
--- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
+++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc
@@ -251,7 +251,7 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleCrossReplicaSum) {
HloInstruction* crs =
builder.AddInstruction(HloInstruction::CreateCrossReplicaSum(
ShapeUtil::MakeTupleShape({f32_shape, bf16_shape}), {a, b}, reduction,
- /*replica_group_ids=*/{}, /*barrier=*/"",
+ /*replica_groups=*/{}, /*barrier=*/"",
/*all_reduce_id=*/absl::nullopt));
HloInstruction* gte = builder.AddInstruction(
HloInstruction::CreateGetTupleElement(bf16_shape, crs, 1));
diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto
index 12b609a60f..821c599863 100644
--- a/tensorflow/compiler/xla/service/hlo.proto
+++ b/tensorflow/compiler/xla/service/hlo.proto
@@ -46,6 +46,8 @@ message HloInstructionProto {
reserved "control_predecessor_names";
reserved 6;
reserved "called_computation_names";
+ reserved 44;
+ reserved "replica_group_ids";
string name = 1;
string opcode = 2;
@@ -158,9 +160,6 @@ message HloInstructionProto {
string backend_config = 43;
// Cross replica op fields.
- // TODO(b/112107579): remove replica_group_ids field and always use
- // replica_groups.
- repeated int64 replica_group_ids = 44;
repeated ReplicaGroup replica_groups = 49;
int64 all_reduce_id = 45;
string cross_replica_sum_barrier = 46;
diff --git a/tensorflow/compiler/xla/service/hlo_domain_isolator.cc b/tensorflow/compiler/xla/service/hlo_domain_isolator.cc
index 78955db0da..af904647f8 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_isolator.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_isolator.cc
@@ -31,12 +31,12 @@ class HloDomainIsolator::RunContext {
StatusOr<bool> Run();
private:
- // Inserts a kDomain instruction between parent and operand, in case
- // the attribute (ie, sharding) values change between instruction and operand.
+ // Inserts a kDomain instruction between operand and instruction in case
+ // the attribute (ie, sharding) values change between root and instruction.
// Returns the newly inserted kDomain instruction, or nullptr if no kDomain
// instruction was necessary.
StatusOr<HloInstruction*> CreateDomain(HloInstruction* instruction,
- HloInstruction* parent,
+ HloInstruction* root,
HloInstruction* operand);
HloModule* module_;
@@ -44,14 +44,14 @@ class HloDomainIsolator::RunContext {
};
StatusOr<HloInstruction*> HloDomainIsolator::RunContext::CreateDomain(
- HloInstruction* instruction, HloInstruction* parent,
+ HloInstruction* instruction, HloInstruction* root,
HloInstruction* operand) {
HloInstruction* domain = nullptr;
std::unique_ptr<HloInstruction> domain_instruction =
- isolator_->creator_(instruction, operand);
+ isolator_->creator_(instruction, root, operand);
if (domain_instruction != nullptr) {
domain = operand->parent()->AddInstruction(std::move(domain_instruction));
- TF_RETURN_IF_ERROR(operand->ReplaceUseWith(parent, domain));
+ TF_RETURN_IF_ERROR(operand->ReplaceUseWith(instruction, domain));
}
return domain;
}
@@ -71,14 +71,13 @@ StatusOr<bool> HloDomainIsolator::RunContext::Run() {
// When applying multiple domains, we could end up stacking more than
// one in one edge, so here we want to build the effective
// (kDomain-less) instruction->operand edge.
- HloInstruction* parent = instruction;
- while (operand->opcode() == HloOpcode::kDomain) {
- parent = operand;
- operand = operand->mutable_operand(0);
+ HloInstruction* root = operand;
+ while (root->opcode() == HloOpcode::kDomain) {
+ root = root->mutable_operand(0);
}
// Check whether a kDomain is necessary between instruction and operand.
TF_ASSIGN_OR_RETURN(HloInstruction * domain,
- CreateDomain(instruction, parent, operand));
+ CreateDomain(instruction, root, operand));
if (domain != nullptr) {
VLOG(4) << "New domain: " << domain->ToString();
++added_domains;
diff --git a/tensorflow/compiler/xla/service/hlo_domain_isolator.h b/tensorflow/compiler/xla/service/hlo_domain_isolator.h
index eded3e78ee..bb1537766c 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_isolator.h
+++ b/tensorflow/compiler/xla/service/hlo_domain_isolator.h
@@ -34,10 +34,12 @@ class HloDomainIsolator : public HloPassInterface {
public:
// Creates a new kDomain instruction for the edge between the use instruction
// (the first HloInstruction argument), and the operand instruction (the
- // second HloInstruction argument).
+ // third HloInstruction argument) if the interesting attribute of the
+ // instruction differes from the attribute of the root (the second
+ // HloInstruction argument).
// Returns nullptr in case no domain separation is necessary.
using DomainCreator = std::function<std::unique_ptr<HloInstruction>(
- HloInstruction*, HloInstruction*)>;
+ HloInstruction*, HloInstruction*, HloInstruction*)>;
explicit HloDomainIsolator(DomainCreator creator);
diff --git a/tensorflow/compiler/xla/service/hlo_domain_test.cc b/tensorflow/compiler/xla/service/hlo_domain_test.cc
index 7d48be15cf..2654929bf0 100644
--- a/tensorflow/compiler/xla/service/hlo_domain_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_domain_test.cc
@@ -106,12 +106,13 @@ class OpNameMetadata : public DomainMetadata {
// Creator function for OpNameMetadata domains.
std::unique_ptr<HloInstruction> OpNameDomainCreator(HloInstruction* instruction,
+ HloInstruction* root,
HloInstruction* operand) {
- if (instruction->metadata().op_name() == operand->metadata().op_name()) {
+ if (instruction->metadata().op_name() == root->metadata().op_name()) {
return nullptr;
}
std::unique_ptr<DomainMetadata> operand_side_metadata =
- absl::make_unique<OpNameMetadata>(operand->metadata().op_name());
+ absl::make_unique<OpNameMetadata>(root->metadata().op_name());
std::unique_ptr<DomainMetadata> user_side_metadata =
absl::make_unique<OpNameMetadata>(instruction->metadata().op_name());
return HloInstruction::CreateDomain(operand->shape(), operand,
@@ -524,5 +525,64 @@ ENTRY entry {
tpl->sharding());
}
+TEST_F(HloDomainTest, MultiDomainMultiUser) {
+ const char* const hlo_string = R"(
+ HloModule Module
+
+ENTRY %entry (p0: (f32[4], f32[4])) -> (f32[4], f32[4], f32[4]) {
+ %p0 = (f32[4], f32[4]) parameter(0)
+ %a = f32[4]{0} get-tuple-element(%p0), index=0
+ %domain = f32[4] domain(%a),
+ domain={kind="sharding", entry={maximal device=1}, exit={maximal device=0}}
+ %b = f32[4] get-tuple-element(%p0), index=1
+ %domain.1 = f32[4] domain(%b),
+ domain={kind="sharding", entry={maximal device=1}, exit={maximal device=0}}
+ %c = f32[4] add(%domain, %domain.1), sharding={maximal device=1}
+ %domain.2 = f32[4] domain(%c),
+ domain={kind="sharding", entry={maximal device=0}, exit={maximal device=1}}
+ %d = f32[4] subtract(%domain, %c),
+ sharding={maximal device=1}, metadata={op_name="D"}
+ %domain.3 = f32[4] domain(%d),
+ domain={kind="sharding", entry={maximal device=0}, exit={maximal device=1}}
+ %e = f32[4] multiply(%c, %d),
+ sharding={maximal device=1}, metadata={op_name="D"}
+ %f = f32[4] add(f32[4]{0} %e, f32[4]{0} %c), sharding={maximal device=1}
+ %domain.4 = f32[4]{0} domain(%f),
+ domain={kind="sharding", entry={maximal device=0}, exit={maximal device=1}}
+ ROOT %g = (f32[4], f32[4], f32[4]) tuple(%domain.2, %domain.3, %domain.4)
+})";
+
+ TF_ASSERT_OK_AND_ASSIGN(HloModule * module, ParseModule(hlo_string));
+ LOG(INFO) << "Original module:\n" << module->ToString();
+
+ HloDomainIsolator opname_isolator(OpNameDomainCreator);
+ TF_ASSERT_OK_AND_ASSIGN(bool opname_isolator_changed,
+ opname_isolator.Run(module));
+ EXPECT_TRUE(opname_isolator_changed);
+
+ EXPECT_TRUE(HasDomainEdge(module, "c", "a"));
+ EXPECT_TRUE(HasDomainEdge(module, "c", "b"));
+ EXPECT_TRUE(HasDomainEdge(module, "d", "a"));
+ EXPECT_TRUE(HasDomainEdge(module, "d", "c"));
+ EXPECT_FALSE(HasDomainEdge(module, "e", "d"));
+
+ HloDomainRemover sharding_remover(ShardingMetadata::KindName(),
+ ShardingMetadata::NormalizeShardingDomain);
+ TF_ASSERT_OK_AND_ASSIGN(bool sharding_remover_changed,
+ sharding_remover.Run(module));
+ EXPECT_TRUE(sharding_remover_changed);
+
+ HloDomainRemover opname_remover(OpNameMetadata::KindName(),
+ OpNameDomainNormalizer);
+ TF_ASSERT_OK_AND_ASSIGN(bool opname_remover_changed,
+ opname_remover.Run(module));
+ EXPECT_TRUE(opname_remover_changed);
+
+ EXPECT_FALSE(HasDomainEdge(module, "c", "a"));
+ EXPECT_FALSE(HasDomainEdge(module, "c", "b"));
+ EXPECT_FALSE(HasDomainEdge(module, "d", "a"));
+ EXPECT_FALSE(HasDomainEdge(module, "d", "c"));
+}
+
} // namespace
} // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc
index 9d795da100..a211167519 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.cc
+++ b/tensorflow/compiler/xla/service/hlo_instruction.cc
@@ -302,9 +302,9 @@ StatusOr<std::unique_ptr<HloInstruction>> HloInstruction::CreateFromProto(
}
instruction = CreateCrossReplicaSum(
proto.shape(), all_operands(), computations(0),
- /*replica_group_ids=*/
- std::vector<int64>(proto.replica_group_ids().begin(),
- proto.replica_group_ids().end()),
+ /*replica_groups=*/
+ std::vector<ReplicaGroup>(proto.replica_groups().begin(),
+ proto.replica_groups().end()),
/*barrier=*/proto.cross_replica_sum_barrier(),
/*all_reduce_id=*/all_reduce_id);
break;
@@ -665,11 +665,11 @@ HloInstruction::CreateReducePrecision(const Shape& shape,
HloInstruction::CreateCrossReplicaSum(
const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
HloComputation* reduce_computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids,
+ const std::vector<ReplicaGroup>& replica_groups,
tensorflow::StringPiece barrier,
const absl::optional<int64>& all_reduce_id) {
return absl::make_unique<HloAllReduceInstruction>(
- shape, operands, reduce_computation, replica_group_ids, barrier,
+ shape, operands, reduce_computation, replica_groups, barrier,
all_reduce_id);
}
@@ -3184,11 +3184,10 @@ const string& HloInstruction::outfeed_config() const {
return Cast<HloOutfeedInstruction>(this)->outfeed_config();
}
-const std::vector<int64>& HloInstruction::replica_group_ids() const {
- return Cast<HloAllReduceInstruction>(this)->replica_group_ids();
-}
-
const std::vector<ReplicaGroup>& HloInstruction::replica_groups() const {
+ if (opcode() == HloOpcode::kCrossReplicaSum) {
+ return Cast<HloAllReduceInstruction>(this)->replica_groups();
+ }
return Cast<HloAllToAllInstruction>(this)->replica_groups();
}
diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h
index 21710bd31d..fdd34544eb 100644
--- a/tensorflow/compiler/xla/service/hlo_instruction.h
+++ b/tensorflow/compiler/xla/service/hlo_instruction.h
@@ -435,9 +435,10 @@ class HloInstruction {
//
// `reduction_computation`: the reduction function.
//
- // `replica_group_ids`: maps replica ids to subgroup ids. If empty, all
- // replicas belong to one group. Allreduce will be applied within subgroups.
- // For example, we have 4 replicas, then replica_group_ids={0,1,0,1} means,
+ // `replica_groups`: each ReplicaGroup contains a list of replica id. If
+ // empty, all replicas belong to one group in the order of 0 - (n-1).
+ // Allreduce will be applied within subgroups.
+ // For example, we have 4 replicas, then replica_groups={{0,2},{1,3}} means,
// replica 0 and 2 are in subgroup 0, replica 1 and 3 are in subgroup 1.
//
// `all_reduce_id`: for Allreduce nodes from different modules, if they have
@@ -448,7 +449,7 @@ class HloInstruction {
static std::unique_ptr<HloInstruction> CreateCrossReplicaSum(
const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
HloComputation* reduce_computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids,
+ const std::vector<ReplicaGroup>& replica_groups,
tensorflow::StringPiece barrier,
const absl::optional<int64>& all_reduce_id);
@@ -1439,9 +1440,6 @@ class HloInstruction {
// Returns the shape for the Outfeed instruction.
const Shape& outfeed_shape() const;
- // Delegates to HloAllReduceInstruction::replica_group_ids.
- const std::vector<int64>& replica_group_ids() const;
-
// Delegates to HloAllToAllInstruction::replica_groups.
const std::vector<ReplicaGroup>& replica_groups() const;
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc
index dbafa35b2a..36fac4a266 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.cc
+++ b/tensorflow/compiler/xla/service/hlo_instructions.cc
@@ -300,10 +300,10 @@ HloRecvDoneInstruction::CloneWithNewOperandsImpl(
HloAllReduceInstruction::HloAllReduceInstruction(
const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
HloComputation* reduce_computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids,
+ const std::vector<ReplicaGroup>& replica_groups,
tensorflow::StringPiece barrier, const absl::optional<int64>& all_reduce_id)
: HloInstruction(HloOpcode::kCrossReplicaSum, shape),
- replica_group_ids_(replica_group_ids.begin(), replica_group_ids.end()),
+ replica_groups_(replica_groups),
cross_replica_sum_barrier_(barrier.begin(), barrier.end()),
all_reduce_id_(all_reduce_id) {
for (auto operand : operands) {
@@ -314,9 +314,8 @@ HloAllReduceInstruction::HloAllReduceInstruction(
HloInstructionProto HloAllReduceInstruction::ToProto() const {
HloInstructionProto proto = HloInstruction::ToProto();
- for (int64 i : replica_group_ids_) {
- proto.add_replica_group_ids(i);
- }
+ *proto.mutable_replica_groups() = {replica_groups_.begin(),
+ replica_groups_.end()};
// Proto3 is so sad.
if (all_reduce_id_) {
proto.set_all_reduce_id(*all_reduce_id_);
@@ -327,8 +326,14 @@ HloInstructionProto HloAllReduceInstruction::ToProto() const {
std::vector<string> HloAllReduceInstruction::ExtraAttributesToStringImpl(
const HloPrintOptions& /*options*/) const {
- std::vector<string> result = {
- StrCat("replica_group_ids={", Join(replica_group_ids(), ","), "}")};
+ std::vector<string> result;
+ std::vector<string> replica_group_str;
+ for (const ReplicaGroup& group : replica_groups()) {
+ replica_group_str.push_back(
+ StrCat("{", Join(group.replica_ids(), ","), "}"));
+ }
+ result.push_back(
+ StrCat("replica_groups={", Join(replica_group_str, ","), "}"));
if (!cross_replica_sum_barrier().empty()) {
result.push_back(StrCat("barrier=\"", cross_replica_sum_barrier(), "\""));
}
@@ -343,7 +348,11 @@ bool HloAllReduceInstruction::IdenticalSlowPath(
const std::function<bool(const HloComputation*, const HloComputation*)>&
eq_computations) const {
const auto& casted_other = static_cast<const HloAllReduceInstruction&>(other);
- return replica_group_ids() == casted_other.replica_group_ids() &&
+ return ContainersEqual(replica_groups(), casted_other.replica_groups(),
+ [](const ReplicaGroup& a, const ReplicaGroup& b) {
+ return ContainersEqual(a.replica_ids(),
+ b.replica_ids());
+ }) &&
eq_computations(to_apply(), casted_other.to_apply()) &&
cross_replica_sum_barrier() ==
casted_other.cross_replica_sum_barrier() &&
@@ -356,7 +365,7 @@ HloAllReduceInstruction::CloneWithNewOperandsImpl(
tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
HloCloneContext* /*context*/) const {
return absl::make_unique<HloAllReduceInstruction>(
- shape, new_operands, to_apply(), replica_group_ids(),
+ shape, new_operands, to_apply(), replica_groups(),
cross_replica_sum_barrier(), all_reduce_id());
}
diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h
index 93e4c21b2f..0a6a0c6233 100644
--- a/tensorflow/compiler/xla/service/hlo_instructions.h
+++ b/tensorflow/compiler/xla/service/hlo_instructions.h
@@ -223,13 +223,12 @@ class HloAllReduceInstruction : public HloInstruction {
explicit HloAllReduceInstruction(
const Shape& shape, tensorflow::gtl::ArraySlice<HloInstruction*> operands,
HloComputation* reduce_computation,
- tensorflow::gtl::ArraySlice<int64> replica_group_ids,
+ const std::vector<ReplicaGroup>& replica_groups,
tensorflow::StringPiece barrier,
const absl::optional<int64>& all_reduce_id);
- // Returns the group ids of each replica for CrossReplicaSum op.
- const std::vector<int64>& replica_group_ids() const {
- return replica_group_ids_;
+ const std::vector<ReplicaGroup>& replica_groups() const {
+ return replica_groups_;
}
// Returns the barrier config used for the CrossReplicaSum implementation of
@@ -260,8 +259,8 @@ class HloAllReduceInstruction : public HloInstruction {
tensorflow::gtl::ArraySlice<HloInstruction*> new_operands,
HloCloneContext* context) const override;
- // The group id of each replica for CrossReplicaSum.
- std::vector<int64> replica_group_ids_;
+ // The replica ids of each subgroup for CrossReplicaSum op.
+ std::vector<ReplicaGroup> replica_groups_;
// The string representation of the barrier config used for CrossReplicaSum.
string cross_replica_sum_barrier_;
diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc
index ede55510d3..beef96476c 100644
--- a/tensorflow/compiler/xla/service/hlo_parser.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser.cc
@@ -293,6 +293,20 @@ class HloParser {
missing_instruction_hook_;
};
+// Creates replica groups from the provided nested array. groups[i] represents
+// the replica ids for group 'i'.
+std::vector<ReplicaGroup> CreateReplicaGroups(
+ tensorflow::gtl::ArraySlice<std::vector<int64>> groups) {
+ std::vector<ReplicaGroup> replica_groups;
+ absl::c_transform(groups, std::back_inserter(replica_groups),
+ [](const std::vector<int64>& ids) {
+ ReplicaGroup group;
+ *group.mutable_replica_ids() = {ids.begin(), ids.end()};
+ return group;
+ });
+ return replica_groups;
+}
+
bool HloParser::Error(LocTy loc, StringPiece msg) {
auto line_col = lexer_.GetLineAndColumn(loc);
const unsigned line = line_col.first;
@@ -637,31 +651,29 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
break;
}
case HloOpcode::kCrossReplicaSum: {
+ optional<std::vector<std::vector<int64>>> tmp_groups;
optional<HloComputation*> to_apply;
optional<std::vector<int64>> replica_group_ids;
optional<string> barrier;
optional<int64> all_reduce_id;
attrs["to_apply"] = {/*required=*/true, AttrTy::kHloComputation,
&to_apply};
- attrs["replica_group_ids"] = {
- /*required=*/false, AttrTy::kBracedInt64List, &replica_group_ids};
+ attrs["replica_groups"] = {/*required=*/false,
+ AttrTy::kBracedInt64ListList, &tmp_groups};
attrs["barrier"] = {/*required=*/false, AttrTy::kString, &barrier};
attrs["all_reduce_id"] = {/*required=*/false, AttrTy::kInt64,
&all_reduce_id};
if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
return false;
}
- if (replica_group_ids) {
- instruction =
- builder->AddInstruction(HloInstruction::CreateCrossReplicaSum(
- shape, operands, *to_apply, *replica_group_ids,
- barrier ? *barrier : "", all_reduce_id));
- } else {
- instruction =
- builder->AddInstruction(HloInstruction::CreateCrossReplicaSum(
- shape, operands, *to_apply, {}, barrier ? *barrier : "",
- all_reduce_id));
+ std::vector<ReplicaGroup> replica_groups;
+ if (tmp_groups) {
+ replica_groups = CreateReplicaGroups(*tmp_groups);
}
+ instruction =
+ builder->AddInstruction(HloInstruction::CreateCrossReplicaSum(
+ shape, operands, *to_apply, replica_groups,
+ barrier ? *barrier : "", all_reduce_id));
break;
}
case HloOpcode::kAllToAll: {
@@ -675,13 +687,7 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
}
std::vector<ReplicaGroup> replica_groups;
if (tmp_groups) {
- absl::c_transform(
- *tmp_groups, std::back_inserter(replica_groups),
- [](const std::vector<int64>& ids) {
- ReplicaGroup group;
- *group.mutable_replica_ids() = {ids.begin(), ids.end()};
- return group;
- });
+ replica_groups = CreateReplicaGroups(*tmp_groups);
}
instruction = builder->AddInstruction(HloInstruction::CreateAllToAll(
shape, operands, replica_groups, barrier ? *barrier : ""));
diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc
index 5a1993a3bb..f52cfadb81 100644
--- a/tensorflow/compiler/xla/service/hlo_parser_test.cc
+++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc
@@ -1052,7 +1052,7 @@ add {
ENTRY CRS {
input = f32[8]{0} parameter(0)
- ROOT crs = f32[8]{0} cross-replica-sum(input), replica_group_ids={}, to_apply=add
+ ROOT crs = f32[8]{0} cross-replica-sum(input), replica_groups={}, to_apply=add
}
)"
@@ -1070,7 +1070,7 @@ add {
ENTRY CrossReplicaSumWithSubgroups {
input = f32[128,32]{0,1} parameter(0)
- ROOT cross-replica-sum = f32[128,32]{0,1} cross-replica-sum(input), replica_group_ids={0,0,1,1}, barrier="abc", to_apply=add
+ ROOT cross-replica-sum = f32[128,32]{0,1} cross-replica-sum(input), replica_groups={{0,1},{2,3}}, barrier="abc", to_apply=add
}
)"
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
index 4e19557f82..6f0353ee5f 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
+++ b/tensorflow/compiler/xla/service/hlo_sharding_metadata.cc
@@ -284,18 +284,19 @@ Status ApplyDomainSharding(const DomainMetadata::Domain& domain,
// The kDomain instruction will be created only if the sharding differ between
// the instruction and the operand.
std::unique_ptr<HloInstruction> CreateDomain(HloInstruction* instruction,
+ HloInstruction* root,
HloInstruction* operand) {
const HloSharding* instruction_sharding =
instruction->has_sharding() ? &instruction->sharding() : nullptr;
- const HloSharding* operand_sharding =
- operand->has_sharding() ? &operand->sharding() : nullptr;
+ const HloSharding* root_sharding =
+ root->has_sharding() ? &root->sharding() : nullptr;
// No need for domain if they both have no sharding.
- if (instruction_sharding == nullptr && operand_sharding == nullptr) {
+ if (instruction_sharding == nullptr && root_sharding == nullptr) {
return nullptr;
}
// No need for domain if they match.
- if (instruction_sharding != nullptr && operand_sharding != nullptr &&
- ShardingMatches(*instruction_sharding, *operand_sharding)) {
+ if (instruction_sharding != nullptr && root_sharding != nullptr &&
+ ShardingMatches(*instruction_sharding, *root_sharding)) {
return nullptr;
}
std::unique_ptr<HloSharding> real_instruction_sharding;
@@ -303,8 +304,8 @@ std::unique_ptr<HloInstruction> CreateDomain(HloInstruction* instruction,
if (instruction_sharding != nullptr) {
real_instruction_sharding = CloneShardingForDomain(*instruction_sharding);
}
- if (operand_sharding != nullptr) {
- real_operand_sharding = CloneShardingForDomain(*operand_sharding);
+ if (root_sharding != nullptr) {
+ real_operand_sharding = CloneShardingForDomain(*root_sharding);
}
VLOG(3) << "Creating domain:";
VLOG(3) << " Instruction: " << instruction->name();
@@ -417,8 +418,9 @@ Status ShardingMetadata::NormalizeShardingDomain(
}
std::unique_ptr<HloInstruction> CreateShardingDomain(
- HloInstruction* instruction, HloInstruction* operand) {
- return CreateDomain(instruction, operand);
+ HloInstruction* instruction, HloInstruction* root,
+ HloInstruction* operand) {
+ return CreateDomain(instruction, root, operand);
}
} // namespace xla
diff --git a/tensorflow/compiler/xla/service/hlo_sharding_metadata.h b/tensorflow/compiler/xla/service/hlo_sharding_metadata.h
index 5e01fc0e22..dc258e4094 100644
--- a/tensorflow/compiler/xla/service/hlo_sharding_metadata.h
+++ b/tensorflow/compiler/xla/service/hlo_sharding_metadata.h
@@ -60,10 +60,10 @@ class ShardingMetadata : public DomainMetadata {
// Given an HLO graph edge between instruction and one of its operands, creates
// a ShardingMetadata based kDomain instruction if the sharding between
-// instruction and operand changes. Returns nullptr if there is no need for a
+// instruction and parent changes. Returns nullptr if there is no need for a
// domain separation.
std::unique_ptr<HloInstruction> CreateShardingDomain(
- HloInstruction* instruction, HloInstruction* operand);
+ HloInstruction* instruction, HloInstruction* root, HloInstruction* operand);
} // namespace xla
diff --git a/tensorflow/contrib/autograph/converters/control_flow.py b/tensorflow/contrib/autograph/converters/control_flow.py
index 5a5a6ad63a..f7dd3183b0 100644
--- a/tensorflow/contrib/autograph/converters/control_flow.py
+++ b/tensorflow/contrib/autograph/converters/control_flow.py
@@ -95,6 +95,18 @@ class ControlFlowTransformer(converter.Base):
return 'no variables'
return ', '.join(map(str, symbol_set))
+ def _validate_no_live_vars_created(self, node):
+ body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
+ live_vars_out = anno.getanno(node, anno.Static.LIVE_VARS_OUT)
+ live_vars_created_in_body = live_vars_out & body_scope.created
+ if live_vars_created_in_body:
+ raise ValueError(
+ 'The following variables are created inside the loop and used later:'
+ '\n%s\n'
+ 'Variables must be declared outside loops because loops may not'
+ ' necessarily execute.' % self._fmt_symbol_list(
+ live_vars_created_in_body))
+
def visit_If(self, node):
node = self.generic_visit(node)
@@ -197,6 +209,8 @@ class ControlFlowTransformer(converter.Base):
def visit_While(self, node):
self.generic_visit(node)
+ self._validate_no_live_vars_created(node)
+
body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
body_closure = body_scope.modified - body_scope.created
all_referenced = body_scope.referenced
@@ -262,6 +276,8 @@ class ControlFlowTransformer(converter.Base):
def visit_For(self, node):
self.generic_visit(node)
+ self._validate_no_live_vars_created(node)
+
body_scope = anno.getanno(node, annos.NodeAnno.BODY_SCOPE)
body_closure = body_scope.modified - body_scope.created
all_referenced = body_scope.referenced
diff --git a/tensorflow/contrib/autograph/converters/control_flow_test.py b/tensorflow/contrib/autograph/converters/control_flow_test.py
index 6cb907f69a..02bc00dbc8 100644
--- a/tensorflow/contrib/autograph/converters/control_flow_test.py
+++ b/tensorflow/contrib/autograph/converters/control_flow_test.py
@@ -57,6 +57,17 @@ class ControlFlowTest(converter_testing.TestCase):
self.assertTransformedResult(test_fn, constant_op.constant(5), 0)
+ def test_while_variable_defined_in_body(self):
+ def bad_while_loop(n):
+ while n > 0:
+ n -= 1
+ s = n
+ return s
+
+ node, ctx = self.prepare(bad_while_loop, {})
+ with self.assertRaises(transformer.AutographParseError):
+ control_flow.transform(node, ctx)
+
def test_if_basic(self):
def test_fn(n):
@@ -196,6 +207,15 @@ class ControlFlowTest(converter_testing.TestCase):
self.assertEqual(result.test_fn(5), 10)
self.assertEqual(eval_count[0], 1)
+ def test_for_variable_defined_in_body(self):
+ def bad_for_loop(n):
+ for i in range(n):
+ s = i
+ return s
+
+ node, ctx = self.prepare(bad_for_loop, {})
+ with self.assertRaises(transformer.AutographParseError):
+ control_flow.transform(node, ctx)
if __name__ == '__main__':
test.main()
diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs
index 58a94ff4a5..dcad77ccbb 100644
--- a/tensorflow/contrib/lite/schema/schema.fbs
+++ b/tensorflow/contrib/lite/schema/schema.fbs
@@ -639,9 +639,9 @@ table SubGraph {
}
// Table of raw data buffers (used for constant tensors). Referenced by tensors
-// by index.
+// by index. The generous alignment accommodates mmap-friendly data structures.
table Buffer {
- data:[ubyte];
+ data:[ubyte] (force_align: 16);
}
table Model {
diff --git a/tensorflow/core/kernels/cwise_op_div.cc b/tensorflow/core/kernels/cwise_op_div.cc
index 35662e278f..313d976e2c 100644
--- a/tensorflow/core/kernels/cwise_op_div.cc
+++ b/tensorflow/core/kernels/cwise_op_div.cc
@@ -33,6 +33,7 @@ REGISTER4(BinaryOp, GPU, "TruncateDiv", functor::div, uint8, uint16, int16,
int64);
REGISTER5(BinaryOp, GPU, "RealDiv", functor::div, float, Eigen::half, double,
complex64, complex128);
+REGISTER2(BinaryOp, GPU, "DivNoNan", functor::div_no_nan, float, double);
// A special GPU kernel for int32.
// TODO(b/25387198): Also enable int32 in device memory. This kernel
diff --git a/tensorflow/core/kernels/cwise_op_gpu_div.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_div.cu.cc
index 0b05416274..25ccdcfb00 100644
--- a/tensorflow/core/kernels/cwise_op_gpu_div.cu.cc
+++ b/tensorflow/core/kernels/cwise_op_gpu_div.cu.cc
@@ -21,6 +21,7 @@ namespace tensorflow {
namespace functor {
DEFINE_BINARY10(div, Eigen::half, float, double, uint8, uint16, int16, int32,
int64, complex64, complex128);
+DEFINE_BINARY2(div_no_nan, float, double);
} // namespace functor
} // namespace tensorflow
diff --git a/tensorflow/core/ops/array_grad.cc b/tensorflow/core/ops/array_grad.cc
index 1f2e57e9a9..3d03bc1d5f 100644
--- a/tensorflow/core/ops/array_grad.cc
+++ b/tensorflow/core/ops/array_grad.cc
@@ -354,6 +354,27 @@ Status TransposeGrad(const AttrSlice& attrs, FunctionDef* g) {
}
REGISTER_OP_GRADIENT("Transpose", TransposeGrad);
+Status GatherNdGrad(const AttrSlice& attrs, FunctionDef* g) {
+ // clang-format off
+ *g = FDH::Define(
+ // Arg defs
+ {"params: Tparams", "indices: Tindices", "doutput: Tparams"},
+ // Ret val defs
+ {"dparams: Tparams", "dindices: Tindices"},
+ // Attr defs
+ {"Tparams: type", "Tindices: type"},
+ // Nodes
+ {
+ {{"x_shape"}, "Shape", {"params"}, {{"T", "$Tparams"}}},
+ {{"dparams"}, "ScatterNd", {"indices", "doutput", "x_shape"},
+ {{"T", "$Tparams"}, {"Tindices", "$Tindices"}}},
+ {{"dindices"}, "ZerosLike", {"indices"}, {{"T", "$Tindices"}}},
+ });
+ // clang-format on
+ return Status::OK();
+}
+REGISTER_OP_GRADIENT("GatherNd", GatherNdGrad);
+
Status ConjugateTransposeGrad(const AttrSlice& attrs, FunctionDef* g) {
*g = FDH::Define(
// Arg defs
diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py
index 380bcf763f..ca6aafd715 100644
--- a/tensorflow/python/eager/function_test.py
+++ b/tensorflow/python/eager/function_test.py
@@ -211,6 +211,18 @@ class FunctionTest(test.TestCase):
random_seed.set_random_seed(1)
self.assertAllEqual(f(), x)
+ def testSymGradGatherNd(self):
+ with ops.Graph().as_default(), self.test_session() as sess:
+
+ @function.defun
+ def f(x):
+ return array_ops.gather_nd(x, [[0]])
+
+ c = constant_op.constant([[2.]])
+ f_c = f(c)
+ g, = gradients_impl.gradients(f_c, c)
+ self.assertAllEqual(sess.run(g), [[1.0]])
+
def testNestedInputsDefunOpGraphMode(self):
matmul = function.defun(math_ops.matmul)
diff --git a/tensorflow/python/estimator/keras_test.py b/tensorflow/python/estimator/keras_test.py
index dbcad323b9..290c4604ce 100644
--- a/tensorflow/python/estimator/keras_test.py
+++ b/tensorflow/python/estimator/keras_test.py
@@ -203,7 +203,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
optimizer='rmsprop',
metrics=['mse', keras.metrics.categorical_accuracy])
- with self.cached_session():
+ with self.test_session():
est_keras = keras_lib.model_to_estimator(
keras_model=keras_model, config=self._config)
before_eval_results = est_keras.evaluate(
@@ -228,7 +228,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
metrics=['mse', keras.metrics.categorical_accuracy])
my_hook = MyHook()
- with self.cached_session():
+ with self.test_session():
est_keras = keras_lib.model_to_estimator(
keras_model=keras_model, config=self._config)
before_eval_results = est_keras.evaluate(
@@ -252,7 +252,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
optimizer=rmsprop.RMSPropOptimizer(1e-3),
metrics=['mse', keras.metrics.categorical_accuracy])
my_hook = MyHook()
- with self.cached_session():
+ with self.test_session():
keras_model.fit(x_train, y_train, epochs=1)
keras_est = keras_lib.model_to_estimator(
@@ -274,7 +274,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
optimizer=rmsprop.RMSPropOptimizer(1e-3),
metrics=['mse', keras.metrics.categorical_accuracy])
- with self.cached_session():
+ with self.test_session():
est_keras = keras_lib.model_to_estimator(
keras_model=keras_model,
config=self._config)
@@ -297,7 +297,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
optimizer=rmsprop.RMSPropOptimizer(1e-3),
metrics=['mse', keras.metrics.categorical_accuracy])
- with self.cached_session():
+ with self.test_session():
est_keras = keras_lib.model_to_estimator(
keras_model=keras_model, config=self._config)
est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16)
@@ -316,7 +316,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
optimizer=rmsprop.RMSPropOptimizer(1e-3),
metrics=['mse', keras.metrics.categorical_accuracy])
- with self.cached_session():
+ with self.test_session():
# Create state
keras_model.train_on_batch(np.random.random((10,) + _INPUT_SIZE),
np.random.random((10, _NUM_CLASS)))
@@ -343,7 +343,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
x_test, y_test), _, eval_input_fn = get_resource_for_simple_model(
model_type='functional', is_evaluate=True)
- with self.cached_session():
+ with self.test_session():
metrics = [
'binary_accuracy', 'binary_crossentropy', 'categorical_accuracy',
'categorical_crossentropy', 'cosine_proximity', 'hinge',
@@ -357,7 +357,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
keras_model.fit(x_train, y_train, epochs=1)
keras_eval = keras_model.evaluate(x_test, y_test, batch_size=32)
- with self.cached_session():
+ with self.test_session():
keras_est = keras_lib.model_to_estimator(
keras_model=keras_model, config=self._config)
est_eval = keras_est.evaluate(input_fn=eval_input_fn)
@@ -385,7 +385,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
x_test, _), _, pred_input_fn = get_resource_for_simple_model(
model_type='sequential', is_evaluate=False)
- with self.cached_session():
+ with self.test_session():
keras_model.compile(
loss='categorical_crossentropy',
optimizer='adam',
@@ -393,7 +393,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
keras_model.fit(x_train, y_train, epochs=1)
keras_pred = [np.argmax(y) for y in keras_model.predict(x_test)]
- with self.cached_session():
+ with self.test_session():
keras_est = keras_lib.model_to_estimator(
keras_model=keras_model, config=self._config)
est_pred = [
@@ -439,7 +439,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
output_dict = {'dense_2': c_test, 'dense_3': d_test}
return input_dict, output_dict
- with self.cached_session():
+ with self.test_session():
model = multi_inputs_multi_outputs_model()
est_keras = keras_lib.model_to_estimator(
keras_model=model, config=self._config)
@@ -456,7 +456,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
x_test, _), _, pred_input_fn = get_resource_for_simple_model(
model_type='functional', is_evaluate=False)
- with self.cached_session():
+ with self.test_session():
keras_model.compile(
loss='categorical_crossentropy',
optimizer='rmsprop',
@@ -466,7 +466,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
fname = os.path.join(self._base_dir, 'keras_model.h5')
keras.models.save_model(keras_model, fname)
- with self.cached_session():
+ with self.test_session():
keras_est = keras_lib.model_to_estimator(
keras_model_path=fname, config=self._config)
est_pred = [
@@ -479,19 +479,19 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
with self.assertRaisesRegexp(ValueError, 'Either'):
keras_lib.model_to_estimator()
- with self.cached_session():
+ with self.test_session():
keras_model = simple_sequential_model()
with self.assertRaisesRegexp(ValueError, 'not both'):
keras_lib.model_to_estimator(
keras_model=keras_model,
keras_model_path=tempfile.mkdtemp(dir=self._base_dir))
- with self.cached_session():
+ with self.test_session():
keras_model = simple_sequential_model()
with self.assertRaisesRegexp(ValueError, 'compiled'):
keras_lib.model_to_estimator(keras_model=keras_model)
- with self.cached_session():
+ with self.test_session():
keras_model = simple_sequential_model()
with self.assertRaisesRegexp(ValueError, 'not a local path'):
keras_lib.model_to_estimator(
@@ -516,10 +516,10 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
model = simple_functional_model()
model.compile(
loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
- with self.cached_session():
+ with self.test_session():
est_keras = keras_lib.model_to_estimator(
keras_model=model, config=self._config)
- with self.cached_session():
+ with self.test_session():
with self.assertRaisesRegexp(KeyError,
'Difference: .*invalid_input_name'):
est_keras.train(input_fn=invald_input_name_input_fn, steps=100)
@@ -554,13 +554,13 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
num_epochs=None,
batch_size=16)
with self.assertRaisesRegexp(ValueError, 'relu6'):
- with self.cached_session():
+ with self.test_session():
est = keras_lib.model_to_estimator(
keras_model=keras_model,
model_dir=tempfile.mkdtemp(dir=self._base_dir))
est.train(input_fn=train_input_fn, steps=1)
- with self.cached_session():
+ with self.test_session():
est = keras_lib.model_to_estimator(
keras_model=keras_model,
model_dir=tempfile.mkdtemp(dir=self._base_dir),
@@ -586,7 +586,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
}
})
with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}):
- with self.cached_session():
+ with self.test_session():
keras_lib.model_to_estimator(
keras_model=keras_model,
model_dir=tempfile.mkdtemp(dir=self._base_dir))
@@ -602,7 +602,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3)
sess_config = config_pb2.ConfigProto(gpu_options=gpu_options)
self._config._session_config = sess_config
- with self.cached_session():
+ with self.test_session():
keras_lib.model_to_estimator(
keras_model=keras_model, config=self._config)
self.assertEqual(
@@ -618,7 +618,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
optimizer='rmsprop',
metrics=['mse', keras.metrics.categorical_accuracy])
- with self.cached_session():
+ with self.test_session():
est_keras = keras_lib.model_to_estimator(
keras_model=keras_model, model_dir=self._base_dir,
config=run_config_lib.RunConfig())
@@ -629,7 +629,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
self.assertEqual(self._base_dir, est_keras._config.model_dir)
self.assertEqual(self._base_dir, est_keras._model_dir)
- with self.cached_session():
+ with self.test_session():
est_keras = keras_lib.model_to_estimator(
keras_model=keras_model, model_dir=self._base_dir,
config=None)
@@ -648,7 +648,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
optimizer='rmsprop',
metrics=['mse', keras.metrics.categorical_accuracy])
- with self.cached_session():
+ with self.test_session():
with test.mock.patch.object(tempfile, 'mkdtemp', return_value=_TMP_DIR):
est_keras = keras_lib.model_to_estimator(
keras_model=keras_model,
@@ -663,7 +663,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
optimizer='rmsprop',
metrics=['mse', keras.metrics.categorical_accuracy])
- with self.cached_session():
+ with self.test_session():
with self.assertRaisesRegexp(ValueError, '`model_dir` are set both in '
'constructor and `RunConfig`'):
keras_lib.model_to_estimator(
@@ -676,7 +676,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase):
loss='categorical_crossentropy',
optimizer=rmsprop.RMSPropOptimizer(1e-3),
metrics=['mse', keras.metrics.categorical_accuracy])
- with self.cached_session():
+ with self.test_session():
keras_model.train_on_batch(
np.random.random((10,) + _INPUT_SIZE),
np.random.random((10, _NUM_CLASS)))
diff --git a/tensorflow/python/framework/sparse_tensor.py b/tensorflow/python/framework/sparse_tensor.py
index 4823ba541d..d1bdd9b80a 100644
--- a/tensorflow/python/framework/sparse_tensor.py
+++ b/tensorflow/python/framework/sparse_tensor.py
@@ -112,8 +112,6 @@ class SparseTensor(_TensorLike):
values: A 1-D tensor of any type and shape `[N]`.
dense_shape: A 1-D int64 tensor of shape `[ndims]`.
- Returns:
- A `SparseTensor`.
"""
with ops.name_scope(None, "SparseTensor",
[indices, values, dense_shape]):
diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py
index 6bd41020c5..1b01d1d37f 100644
--- a/tensorflow/python/ops/math_ops_test.py
+++ b/tensorflow/python/ops/math_ops_test.py
@@ -483,7 +483,7 @@ class DivNoNanTest(test_util.TensorFlowTestCase):
np_result = np.true_divide(nums, divs)
np_result[:, divs[0] == 0] = 0
- with self.cached_session():
+ with self.cached_session(use_gpu=True):
tf_result = math_ops.div_no_nan(nums, divs).eval()
self.assertAllEqual(tf_result, np_result)
diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py
index 653e46fc41..090cf48a07 100644
--- a/tensorflow/tools/docs/generate_lib.py
+++ b/tensorflow/tools/docs/generate_lib.py
@@ -171,14 +171,15 @@ def write_docs(output_dir,
os.path.join('/', from_path),
os.path.join('/', to_path)))
- redirects = sorted(redirects)
- template = ('- from: {}\n'
- ' to: {}\n')
- redirects = [template.format(f, t) for f, t in redirects]
- api_redirects_path = os.path.join(output_dir, '_redirects.yaml')
- with open(api_redirects_path, 'w') as redirect_file:
- redirect_file.write('redirects:\n')
- redirect_file.write(''.join(redirects))
+ if redirects:
+ redirects = sorted(redirects)
+ template = ('- from: {}\n'
+ ' to: {}\n')
+ redirects = [template.format(f, t) for f, t in redirects]
+ api_redirects_path = os.path.join(output_dir, '_redirects.yaml')
+ with open(api_redirects_path, 'w') as redirect_file:
+ redirect_file.write('redirects:\n')
+ redirect_file.write(''.join(redirects))
if yaml_toc:
# Generate table of contents