1 files changed, 266 insertions, 7 deletions
diff --git a/tensorflow/core/ops/candidate_sampling_ops.cc b/tensorflow/core/ops/candidate_sampling_ops.cc
index 6e4d100b04..18700be67a 100644
--- a/tensorflow/core/ops/candidate_sampling_ops.cc
+++ b/tensorflow/core/ops/candidate_sampling_ops.cc
@@ -55,7 +55,42 @@ REGISTER_OP("UniformCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a uniform distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("LogUniformCandidateSampler")
     .Input("true_classes: int64")
@@ -69,7 +104,43 @@ REGISTER_OP("LogUniformCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a log-uniform distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("LearnedUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -83,7 +154,42 @@ REGISTER_OP("LearnedUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a learned unigram distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("ThreadUnsafeUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -97,7 +203,42 @@ REGISTER_OP("ThreadUnsafeUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a learned unigram distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("FixedUnigramCandidateSampler")
     .Input("true_classes: int64")
@@ -117,7 +258,70 @@ REGISTER_OP("FixedUnigramCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a learned unigram distribution.
+
+A unigram sampler could use a fixed unigram distribution read from a
+file or passed in as an in-memory array instead of building up the distribution
+from data on the fly. There is also an option to skew the distribution by
+applying a distortion power to the weights.
+
+The vocabulary file should be in CSV-like format, with the last field
+being the weight associated with the word.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to randomly sample.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+range_max: The sampler will sample integers from the interval [0, range_max).
+vocab_file: Each valid line in this file (which should have a CSV-like format)
+  corresponds to a valid word ID. IDs are in sequential order, starting from
+  num_reserved_ids. The last entry in each line is expected to be a value
+  corresponding to the count or relative probability. Exactly one of vocab_file
+  and unigrams needs to be passed to this op.
+distortion: The distortion is used to skew the unigram probability distribution.
+  Each weight is first raised to the distortion's power before adding to the
+  internal unigram distribution. As a result, distortion = 1.0 gives regular
+  unigram sampling (as defined by the vocab file), and distortion = 0.0 gives
+  a uniform distribution.
+num_reserved_ids: Optionally some reserved IDs can be added in the range [0,
+  ..., num_reserved_ids) by the users. One use case is that a special unknown
+  word token is used as ID 0. These IDs will have a sampling probability of 0.
+num_shards: A sampler can be used to sample from a subset of the original range
+  in order to speed up the whole computation through parallelism. This parameter
+  (together with 'shard') indicates the number of partitions that are being
+  used in the overall computation.
+shard: A sampler can be used to sample from a subset of the original range
+  in order to speed up the whole computation through parallelism. This parameter
+  (together with 'num_shards') indicates the particular partition number of a
+  sampler op, when partitioning is being used.
+unigrams: A list of unigram counts or probabilities, one per ID in sequential
+  order. Exactly one of vocab_file and unigrams should be passed to this op.
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("AllCandidateSampler")
     .Input("true_classes: int64")
@@ -130,7 +334,41 @@ REGISTER_OP("AllCandidateSampler")
     .Attr("seed: int = 0")
     .Attr("seed2: int = 0")
     .SetShapeFn(CandidateSamplerShapeFn)
-    .SetIsStateful();
+    .SetIsStateful()
+    .Doc(R"doc(
+Generates labels for candidate sampling with a learned unigram distribution.
+
+See explanations of candidate sampling and the data formats at
+go/candidate-sampling.
+
+For each batch, this op picks a single set of sampled candidate labels.
+
+The advantages of sampling candidates per-batch are simplicity and the
+possibility of efficient dense matrix multiplication. The disadvantage is that
+the sampled candidates must be chosen independently of the context and of the
+true labels.
+
+true_classes: A batch_size * num_true matrix, in which each row contains the
+  IDs of the num_true target_classes in the corresponding original label.
+sampled_candidates: A vector of length num_sampled, in which each element is
+  the ID of a sampled candidate.
+true_expected_count: A batch_size * num_true matrix, representing
+  the number of times each candidate is expected to occur in a batch
+  of sampled candidates. If unique=true, then this is a probability.
+sampled_expected_count: A vector of length num_sampled, for each sampled
+  candidate representing the number of times the candidate is expected
+  to occur in a batch of sampled candidates.  If unique=true, then this is a
+  probability.
+num_true: Number of true labels per context.
+num_sampled: Number of candidates to produce.
+unique: If unique is true, we sample with rejection, so that all sampled
+  candidates in a batch are unique. This requires some approximation to
+  estimate the post-rejection sampling probabilities.
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 REGISTER_OP("ComputeAccidentalHits")
     .Input("true_classes: int64")
@@ -158,6 +396,27 @@ REGISTER_OP("ComputeAccidentalHits")
       c->set_output(1, v);
       c->set_output(2, v);
       return Status::OK();
-    });
+    })
+    .Doc(R"doc(
+Computes the ids of the positions in sampled_candidates that match true_labels.
+
+When doing log-odds NCE, the result of this op should be passed through a
+SparseToDense op, then added to the logits of the sampled candidates. This has
+the effect of 'removing' the sampled labels that match the true labels by
+making the classifier sure that they are sampled labels.
+
+true_classes: The true_classes output of UnpackSparseLabels.
+sampled_candidates: The sampled_candidates output of CandidateSampler.
+indices: A vector of indices corresponding to rows of true_candidates.
+ids: A vector of IDs of positions in sampled_candidates that match a true_label
+  for the row with the corresponding index in indices.
+weights: A vector of the same length as indices and ids, in which each element
+  is -FLOAT_MAX.
+num_true: Number of true labels per context.
+seed: If either seed or seed2 are set to be non-zero, the random number
+  generator is seeded by the given seed.  Otherwise, it is seeded by a
+  random seed.
+seed2: An second seed to avoid seed collision.
+)doc");
 
 }  // namespace tensorflow