aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py2
-rw-r--r--tensorflow/contrib/boosted_trees/python/utils/losses_test.py2
-rw-r--r--tensorflow/contrib/cloud/python/ops/bigquery_reader_ops.py2
-rw-r--r--tensorflow/contrib/data/python/util/nest.py2
-rw-r--r--tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py2
-rw-r--r--tensorflow/contrib/distributions/python/ops/shape.py4
-rw-r--r--tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py2
-rw-r--r--tensorflow/contrib/graph_editor/select.py6
-rw-r--r--tensorflow/contrib/graph_editor/subgraph.py2
-rw-r--r--tensorflow/contrib/layers/python/layers/feature_column.py14
-rw-r--r--tensorflow/contrib/layers/python/layers/feature_column_ops_test.py2
-rw-r--r--tensorflow/contrib/layers/python/layers/regularizers.py2
-rw-r--r--tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py12
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/estimator.py2
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/kmeans.py2
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py2
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/run_config.py2
-rw-r--r--tensorflow/contrib/learn/python/learn/estimators/svm.py6
-rw-r--r--tensorflow/contrib/learn/python/learn/learn_runner.py2
-rw-r--r--tensorflow/contrib/learn/python/learn/metric_spec.py2
-rw-r--r--tensorflow/contrib/linear_optimizer/python/sdca_estimator.py8
-rw-r--r--tensorflow/contrib/metrics/python/ops/metric_ops.py14
-rw-r--r--tensorflow/contrib/resampler/python/ops/resampler_ops.py2
-rw-r--r--tensorflow/contrib/resampler/python/ops/resampler_ops_test.py2
-rw-r--r--tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py4
-rw-r--r--tensorflow/contrib/rnn/python/ops/rnn_cell.py2
-rw-r--r--tensorflow/contrib/session_bundle/bundle_shim.py4
-rw-r--r--tensorflow/contrib/slim/python/slim/data/parallel_reader.py2
-rw-r--r--tensorflow/contrib/solvers/python/ops/lanczos.py2
-rw-r--r--tensorflow/contrib/tensor_forest/hybrid/python/models/stochastic_soft_decisions_to_data_then_nn.py2
-rw-r--r--tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py2
-rw-r--r--tensorflow/core/kernels/BUILD2
-rw-r--r--tensorflow/core/kernels/pooling_ops_3d.cc28
-rw-r--r--tensorflow/core/kernels/pooling_ops_3d.h14
-rw-r--r--tensorflow/core/kernels/pooling_ops_3d_sycl.h759
-rw-r--r--tensorflow/core/ops/image_ops.cc6
-rw-r--r--tensorflow/core/platform/default/gpu_tracer.cc2
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node.cc13
-rw-r--r--tensorflow/docs_src/extend/adding_an_op.md10
-rw-r--r--tensorflow/examples/image_retraining/label_image.py2
-rw-r--r--tensorflow/examples/image_retraining/retrain.py8
-rw-r--r--tensorflow/examples/learn/text_classification.py4
-rw-r--r--tensorflow/python/client/session_test.py4
-rw-r--r--tensorflow/python/debug/lib/debug_data.py2
-rw-r--r--tensorflow/python/debug/lib/debug_gradients.py2
-rw-r--r--tensorflow/python/debug/lib/stepper_test.py2
-rw-r--r--tensorflow/python/estimator/canned/head.py2
-rw-r--r--tensorflow/python/estimator/inputs/queues/feeding_functions.py2
-rw-r--r--tensorflow/python/estimator/model_fn.py8
-rw-r--r--tensorflow/python/framework/function.py2
-rw-r--r--tensorflow/python/framework/ops.py2
-rw-r--r--tensorflow/python/ops/distributions/bernoulli.py2
-rw-r--r--tensorflow/python/ops/metrics_impl.py16
-rw-r--r--tensorflow/python/ops/nn_ops.py8
-rw-r--r--tensorflow/python/ops/rnn.py2
-rw-r--r--tensorflow/python/ops/sparse_ops.py6
-rw-r--r--tensorflow/python/ops/variables.py2
-rw-r--r--tensorflow/python/summary/text_summary.py2
-rw-r--r--tensorflow/python/training/monitored_session.py2
-rw-r--r--tensorflow/python/training/optimizer.py8
-rw-r--r--tensorflow/python/util/nest.py4
-rwxr-xr-xtensorflow/tools/ci_build/install/install_pip_packages.sh2
-rwxr-xr-xtensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh2
-rwxr-xr-xtensorflow/tools/docker/parameterized_docker_build.sh2
-rw-r--r--third_party/gpus/cuda_configure.bzl2
65 files changed, 912 insertions, 138 deletions
diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py
index 2b64235bb2..51e084b79c 100644
--- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py
+++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py
@@ -662,7 +662,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase):
apply_averaging=False,
center_bias=False,
reduce_dim=False))
- # The first example will get bias class 1 -0.2 and -2 for class 2 from
+ # The first example will get bias class 1 -0.2 and -2 for class 2 from
# first tree and leaf 2 payload (sparse feature missing) of 0.5 hence
# 0.5, -0.2], the second example will get the same bias and leaf 3 payload
# of class 1 1.2 and class 2-0.7 hence [0.0, 1.0, -2.7].
diff --git a/tensorflow/contrib/boosted_trees/python/utils/losses_test.py b/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
index 1b8d8ac43e..886e5ce2c8 100644
--- a/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
+++ b/tensorflow/contrib/boosted_trees/python/utils/losses_test.py
@@ -59,7 +59,7 @@ class LossesTest(test_util.TensorFlowTestCase):
pos_loss = loss_for_positives.eval()
neg_loss = loss_for_negatives.eval()
# For positive labels, points <= 0.3 get max loss of e.
- # For negative labels, these points have minimum loss of 1/e.
+ # For negative labels, these points have minimum loss of 1/e.
for i in range(2):
self.assertEqual(math.exp(1), pos_loss[i])
self.assertEqual(math.exp(-1), neg_loss[i])
diff --git a/tensorflow/contrib/cloud/python/ops/bigquery_reader_ops.py b/tensorflow/contrib/cloud/python/ops/bigquery_reader_ops.py
index cc8644bfd5..76c6bc05ff 100644
--- a/tensorflow/contrib/cloud/python/ops/bigquery_reader_ops.py
+++ b/tensorflow/contrib/cloud/python/ops/bigquery_reader_ops.py
@@ -48,7 +48,7 @@ class BigQueryReader(io_ops.ReaderBase):
features=features)
# Populate a queue with the BigQuery Table partitions.
- queue = tf.training.string_input_producer(reader.partitions())
+ queue = tf.train.string_input_producer(reader.partitions())
# Read and parse examples.
row_id, examples_serialized = reader.read(queue)
diff --git a/tensorflow/contrib/data/python/util/nest.py b/tensorflow/contrib/data/python/util/nest.py
index a29c3c562b..6e9d588d79 100644
--- a/tensorflow/contrib/data/python/util/nest.py
+++ b/tensorflow/contrib/data/python/util/nest.py
@@ -242,7 +242,7 @@ def map_structure(func, *structure, **check_types_dict):
*structure: scalar, or tuple or list of constructed scalars and/or other
tuples/lists, or scalars. Note: numpy arrays are considered scalars.
**check_types_dict: only valid keyword argument is `check_types`. If set to
- `True` (default) the types of iterables within the structures have to be
+ `True` (default) the types of iterables within the structures have to be
same (e.g. `map_structure(func, [1], (1,))` raises a `TypeError`
exception). To allow this set this argument to `False`.
diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py
index 6a5c37da16..d8698788c1 100644
--- a/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py
+++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine_impl.py
@@ -349,7 +349,7 @@ class Affine(bijector.Bijector):
y *= self._scale
if self.shift is not None:
return y + self.shift
- return y
+ return y
y, sample_shape = self._shaper.make_batch_of_event_sample_matrices(
y, expand_batch_dim=False)
with ops.control_dependencies(self._maybe_check_scale() if
diff --git a/tensorflow/contrib/distributions/python/ops/shape.py b/tensorflow/contrib/distributions/python/ops/shape.py
index 516d7b60fe..5fb6f0c7ea 100644
--- a/tensorflow/contrib/distributions/python/ops/shape.py
+++ b/tensorflow/contrib/distributions/python/ops/shape.py
@@ -364,7 +364,7 @@ class _DistributionShape(object):
"""Reshapes/transposes `Distribution` `Tensor` from S+B+E to B_+E_+S_.
Where:
- - `B_ = B if B or not expand_batch_dim else [1]`,
+ - `B_ = B if B or not expand_batch_dim else [1]`,
- `E_ = E if E else [1]`,
- `S_ = [tf.reduce_prod(S)]`.
@@ -402,7 +402,7 @@ class _DistributionShape(object):
"""Reshapes/transposes `Distribution` `Tensor` from B_+E_+S_ to S+B+E.
Where:
- - `B_ = B if B or not expand_batch_dim else [1]`,
+ - `B_ = B if B or not expand_batch_dim else [1]`,
- `E_ = E if E else [1]`,
- `S_ = [tf.reduce_prod(S)]`.
diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py
index 41fd114f0f..41f986dd07 100644
--- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py
+++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op.py
@@ -8,7 +8,7 @@
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
diff --git a/tensorflow/contrib/graph_editor/select.py b/tensorflow/contrib/graph_editor/select.py
index 706c409118..3ea6ff4d61 100644
--- a/tensorflow/contrib/graph_editor/select.py
+++ b/tensorflow/contrib/graph_editor/select.py
@@ -620,7 +620,7 @@ def select_ops(*args, **kwargs):
"""Helper to select operations.
Args:
- *args: list of 1) regular expressions (compiled or not) or 2) (array of)
+ *args: list of 1) regular expressions (compiled or not) or 2) (array of)
`tf.Operation`. `tf.Tensor` instances are silently ignored.
**kwargs: 'graph': `tf.Graph` in which to perform the regex query.This is
required when using regex.
@@ -686,7 +686,7 @@ def select_ts(*args, **kwargs):
"""Helper to select tensors.
Args:
- *args: list of 1) regular expressions (compiled or not) or 2) (array of)
+ *args: list of 1) regular expressions (compiled or not) or 2) (array of)
`tf.Tensor`. `tf.Operation` instances are silently ignored.
**kwargs: 'graph': `tf.Graph` in which to perform the regex query.This is
required when using regex.
@@ -752,7 +752,7 @@ def select_ops_and_ts(*args, **kwargs):
"""Helper to select operations and tensors.
Args:
- *args: list of 1) regular expressions (compiled or not) or 2) (array of)
+ *args: list of 1) regular expressions (compiled or not) or 2) (array of)
`tf.Operation` 3) (array of) tf.Tensor. Regular expressions matching
tensors must start with the comment `"(?#ts)"`, for instance:
`"(?#ts)^foo/.*"`.
diff --git a/tensorflow/contrib/graph_editor/subgraph.py b/tensorflow/contrib/graph_editor/subgraph.py
index 18485f2039..6650e996d7 100644
--- a/tensorflow/contrib/graph_editor/subgraph.py
+++ b/tensorflow/contrib/graph_editor/subgraph.py
@@ -630,7 +630,7 @@ def make_view(*args, **kwargs):
"""Create a SubGraphView from selected operations and passthrough tensors.
Args:
- *args: list of 1) regular expressions (compiled or not) or 2) (array of)
+ *args: list of 1) regular expressions (compiled or not) or 2) (array of)
`tf.Operation` 3) (array of) `tf.Tensor`. Those objects will be converted
into a list of operations and a list of candidate for passthrough tensors.
**kwargs: keyword graph is used 1) to check that the ops and ts are from
diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py
index 0ba9873f3a..409f7d27bd 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column.py
@@ -15,13 +15,13 @@
"""This API defines FeatureColumn abstraction.
FeatureColumns provide a high level abstraction for ingesting and representing
-features in tf.learn Estimator models.
+features in `Estimator` models.
FeatureColumns are the primary way of encoding features for pre-canned
-tf.learn Estimators.
+`Estimator` models.
-When using FeatureColumns with tf.learn models, the type of feature column you
-should choose depends on (1) the feature type and (2) the model type.
+When using FeatureColumns with `Estimator` models, the type of feature column
+you should choose depends on (1) the feature type and (2) the model type.
(1) Feature type:
@@ -74,7 +74,7 @@ should choose depends on (1) the feature type and (2) the model type.
columns=[department_column, bucketized_age_column],
hash_bucket_size=1000)
-Example of building tf.learn model using FeatureColumns:
+Example of building an `Estimator` model using FeatureColumns:
# Define features and transformations
deep_feature_columns = [age_column, embedded_dept_column]
@@ -104,7 +104,7 @@ FeatureColumns can also be transformed into a generic input layer for
custom models using `input_from_feature_columns` within
`feature_column_ops.py`.
-Example of building non-tf.learn model using FeatureColumns:
+Example of building a non-`Estimator` model using FeatureColumns:
# Building model via layers
@@ -1184,7 +1184,7 @@ def _embeddings_from_arguments(column,
raise ValueError(
"The embedding variable with name {} already "
"exists, but its shape does not match required "
- "embedding shape here. Please make sure to use "
+ "embedding shape here. Please make sure to use "
"different shared_embedding_name for different "
"shared embeddings.".format(args.shared_embedding_name))
else:
diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
index 797a7c11db..fbfa0e32de 100644
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
@@ -1173,7 +1173,7 @@ class CreateInputLayersForDNNsTest(test.TestCase):
features, [real_valued, bucket, embeded_sparse],
weight_collections=["my_collection"],
trainable=True)
- # There should one trainable variable for embeded sparse
+ # There should one trainable variable for embeded sparse
self.assertEqual(1, len(variables_lib.trainable_variables()))
def testInputLayerWithNonTrainableEmbeddingForDNN(self):
diff --git a/tensorflow/contrib/layers/python/layers/regularizers.py b/tensorflow/contrib/layers/python/layers/regularizers.py
index 2c3774c560..95d57e6186 100644
--- a/tensorflow/contrib/layers/python/layers/regularizers.py
+++ b/tensorflow/contrib/layers/python/layers/regularizers.py
@@ -119,7 +119,7 @@ def l1_l2_regularizer(scale_l1=1.0, scale_l2=1.0, scope=None):
Returns:
A function with signature `l1_l2(weights)` that applies a weighted sum of
- L1 L2 regularization.
+ L1 L2 regularization.
Raises:
ValueError: If scale is negative or if scale is not a float.
diff --git a/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py b/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
index f316c5c980..a19426a248 100644
--- a/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/tensorflow_dataframe.py
@@ -600,7 +600,7 @@ class TensorFlowDataFrame(df.DataFrame):
shuffle=True,
seed=None,
data_name="pandas_data"):
- """Create a `tf.learn.DataFrame` from a `pandas.DataFrame`.
+ """Create a `DataFrame` from a `pandas.DataFrame`.
Args:
pandas_dataframe: `pandas.DataFrame` that serves as a data source.
@@ -615,7 +615,7 @@ class TensorFlowDataFrame(df.DataFrame):
data_name: a scope name identifying the data.
Returns:
- A `tf.learn.DataFrame` that contains batches drawn from the given
+ A `DataFrame` that contains batches drawn from the given
`pandas_dataframe`.
"""
pandas_source = in_memory_source.PandasSource(
@@ -643,7 +643,7 @@ class TensorFlowDataFrame(df.DataFrame):
shuffle=True,
seed=None,
data_name="numpy_data"):
- """Creates a `tf.learn.DataFrame` from a `numpy.ndarray`.
+ """Creates a `DataFrame` from a `numpy.ndarray`.
The returned `DataFrame` contains two columns: 'index' and 'value'. The
'value' column contains a row from the array. The 'index' column contains
@@ -662,7 +662,7 @@ class TensorFlowDataFrame(df.DataFrame):
data_name: a scope name identifying the data.
Returns:
- A `tf.learn.DataFrame` that contains batches drawn from the given
+ A `DataFrame` that contains batches drawn from the given
array.
"""
numpy_source = in_memory_source.NumpySource(
@@ -690,7 +690,7 @@ class TensorFlowDataFrame(df.DataFrame):
shuffle=True,
seed=None,
data_name="numpy_data"):
- """Creates a `tf.learn.DataFrame` from an `OrderedDict` of `numpy.ndarray`.
+ """Creates a `DataFrame` from an `OrderedDict` of `numpy.ndarray`.
The returned `DataFrame` contains a column for each key of the dict plus an
extra 'index' column. The 'index' column contains the row number. Each of
@@ -710,7 +710,7 @@ class TensorFlowDataFrame(df.DataFrame):
data_name: a scope name identifying the data.
Returns:
- A `tf.learn.DataFrame` that contains batches drawn from the given arrays.
+ A `DataFrame` that contains batches drawn from the given arrays.
Raises:
ValueError: `ordered_dict_of_arrays` contains the reserved name 'index'.
diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
index c184b14654..7c72e516c9 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py
@@ -363,7 +363,7 @@ class BaseEstimator(
sklearn.BaseEstimator, evaluable.Evaluable, trainable.Trainable):
"""Abstract BaseEstimator class to train and evaluate TensorFlow models.
- Users should not instantiate or subclass this class. Instead, use `Estimator`.
+ Users should not instantiate or subclass this class. Instead, use an `Estimator`.
"""
__metaclass__ = abc.ABCMeta
diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
index a473cf46d5..a92302420f 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
-"""Implementation of k-means clustering on top of tf.learn API."""
+"""Implementation of k-means clustering on top of `Estimator` API."""
from __future__ import absolute_import
from __future__ import division
diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py
index d03057087c..fb339160d5 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py
@@ -111,7 +111,7 @@ def LogisticRegressor( # pylint: disable=invalid-name
into the model.
Returns:
- A `tf.contrib.learn.Estimator` instance.
+ An `Estimator` instance.
"""
return estimator.Estimator(
model_fn=_get_model_fn_with_logistic_metrics(model_fn),
diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
index 0642c5900a..060c7a37be 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
@@ -64,7 +64,7 @@ class TaskType(object):
class ClusterConfig(object):
"""This class specifies the configurations for a distributed run.
- If you're using `tf.learn` `Estimators`, you should probably use the subclass
+ If you're using an `Estimator`, you should probably use the subclass
RunConfig instead.
"""
diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py
index 5a991da891..72920d73c0 100644
--- a/tensorflow/contrib/learn/python/learn/estimators/svm.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py
@@ -43,9 +43,9 @@ class SVM(estimator.Estimator):
num_loss_partitions is larger or equal to this value, convergence is
guaranteed but becomes slower as num_loss_partitions increases. If it is set
to a smaller value, the optimizer is more aggressive in reducing the global
- loss but convergence is not guaranteed. The recommended value in tf.learn
- (where there is one process per worker) is the number of workers running the
- train steps. It defaults to 1 (single machine).
+ loss but convergence is not guaranteed. The recommended value in an
+ `Estimator` (where there is one process per worker) is the number of workers
+ running the train steps. It defaults to 1 (single machine).
Example:
diff --git a/tensorflow/contrib/learn/python/learn/learn_runner.py b/tensorflow/contrib/learn/python/learn/learn_runner.py
index bd9fcde8ce..8d84d230da 100644
--- a/tensorflow/contrib/learn/python/learn/learn_runner.py
+++ b/tensorflow/contrib/learn/python/learn/learn_runner.py
@@ -159,7 +159,7 @@ def run(experiment_fn, output_dir=None, schedule=None, run_config=None,
constructor; `hparams` used as the hyper-paremeters of the model).
It must return an `Experiment`. For this case, `output_dir` must be None.
output_dir: Base output directory [Deprecated].
- schedule: The name of the method in the `Experiment` to run.
+ schedule: The name of the method in the `Experiment` to run.
run_config: `RunConfig` instance. The `run_config.model_dir` must be
non-empty. If `run_config` is set, `output_dir` must be None.
hparams: `HParams` instance. The default hyper-parameters, which will be
diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py
index 2f2c931786..ed6683abed 100644
--- a/tensorflow/contrib/learn/python/learn/metric_spec.py
+++ b/tensorflow/contrib/learn/python/learn/metric_spec.py
@@ -233,7 +233,7 @@ class MetricSpec(object):
`Estimator` then knows which predictions, labels, and weight to use to call a
given metric function.
- When building the ops to run in evaluation, `Estimator` will call
+ When building the ops to run in evaluation, an `Estimator` will call
`create_metric_ops`, which will connect the given `metric_fn` to the model
as detailed in the docstring for `create_metric_ops`, and return the metric.
diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
index f4961ab9db..701fc1c059 100644
--- a/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
+++ b/tensorflow/contrib/linear_optimizer/python/sdca_estimator.py
@@ -227,7 +227,7 @@ class _SDCAEstimator(estimator.Estimator):
will be multiplied by the loss of the example.
model_dir: Directory to save model parameters, graph etc. This can also be
used to load checkpoints from the directory into an estimator to
- continue training a previously saved model.
+ continue training a previously saved model.
head: type of head. Currently, _BinaryLogisticHead and _BinarySvmHead are
supported for classification and _RegressionHead for regression. It
should be a subclass of _SingleHead.
@@ -312,7 +312,7 @@ class SDCALogisticClassifier(_SDCAEstimator):
```
The input_fn provided to `fit`, `evaluate` and predict_* methods should return
- the following features, otherwise there will be a `KeyError`:
+ the following features, otherwise there will be a `KeyError`:
* A feature with `key=example_id_column` whose value is a `Tensor` of dtype
string.
* If `weight_column_name` is not `None`, a feature with
@@ -438,7 +438,7 @@ class SDCALinearRegressor(_SDCAEstimator):
```
The input_fn provided to `fit`, `evaluate` and predict_* methods should return
- the following features, otherwise there will be a `KeyError`:
+ the following features, otherwise there will be a `KeyError`:
* A feature with `key=example_id_column` whose value is a `Tensor` of dtype
string.
* If `weight_column_name` is not `None`, a feature with
@@ -478,7 +478,7 @@ class SDCALinearRegressor(_SDCAEstimator):
will be multiplied by the loss of the example.
model_dir: Directory to save model parameters, graph etc. This can also be
used to load checkpoints from the directory into an estimator to
- continue training a previously saved model.
+ continue training a previously saved model.
l1_regularization: L1-regularization parameter. Refers to global L1
regularization (across all examples).
l2_regularization: L2-regularization parameter. Refers to global L2
diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py
index 6d66a257bc..b5d8c95678 100644
--- a/tensorflow/contrib/metrics/python/ops/metric_ops.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py
@@ -334,7 +334,7 @@ def streaming_mean(values, weights=None, metrics_collections=None,
returned as `mean` which is an idempotent operation that simply divides
`total` by `count`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` operation that updates these variables and returns the `mean`.
`update_op` increments `total` with the reduced sum of the product of `values`
and `weights`, and it increments `count` with the reduced sum of `weights`.
@@ -381,7 +381,7 @@ def streaming_mean_tensor(values, weights=None, metrics_collections=None,
`values`. This average is ultimately returned as `mean` which is an idempotent
operation that simply divides `total` by `count`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` operation that updates these variables and returns the `mean`.
`update_op` increments `total` with the reduced sum of the product of `values`
and `weights`, and it increments `count` with the reduced sum of `weights`.
@@ -425,7 +425,7 @@ def streaming_accuracy(predictions, labels, weights=None,
matches `labels`. This frequency is ultimately returned as `accuracy`: an
idempotent operation that simply divides `total` by `count`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` operation that updates these variables and returns the `accuracy`.
Internally, an `is_correct` operation computes a `Tensor` with elements 1.0
where the corresponding elements of `predictions` and `labels` match and 0.0
@@ -477,7 +477,7 @@ def streaming_precision(predictions, labels, weights=None,
operation that simply divides `true_positives` by the sum of `true_positives`
and `false_positives`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` operation that updates these variables and returns the
`precision`. `update_op` weights each prediction by the corresponding value in
`weights`.
@@ -526,7 +526,7 @@ def streaming_recall(predictions, labels, weights=None,
ultimately returned as `recall`, an idempotent operation that simply divides
`true_positives` by the sum of `true_positives` and `false_negatives`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` that updates these variables and returns the `recall`. `update_op`
weights each prediction by the corresponding value in `weights`.
@@ -1571,7 +1571,7 @@ def streaming_sparse_average_precision_at_k(predictions,
Returns:
mean_average_precision: Scalar `float64` `Tensor` with the mean average
precision values.
- update: `Operation` that increments variables appropriately, and whose
+ update: `Operation` that increments variables appropriately, and whose
value matches `metric`.
"""
return metrics.sparse_average_precision_at_k(
@@ -1628,7 +1628,7 @@ def streaming_sparse_average_precision_at_top_k(top_k_predictions,
Returns:
mean_average_precision: Scalar `float64` `Tensor` with the mean average
precision values.
- update: `Operation` that increments variables appropriately, and whose
+ update: `Operation` that increments variables appropriately, and whose
value matches `metric`.
Raises:
diff --git a/tensorflow/contrib/resampler/python/ops/resampler_ops.py b/tensorflow/contrib/resampler/python/ops/resampler_ops.py
index 355d15f0c7..8b632527f6 100644
--- a/tensorflow/contrib/resampler/python/ops/resampler_ops.py
+++ b/tensorflow/contrib/resampler/python/ops/resampler_ops.py
@@ -9,7 +9,7 @@
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
diff --git a/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py b/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
index 6a4360150c..9aa1e05628 100644
--- a/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
+++ b/tensorflow/contrib/resampler/python/ops/resampler_ops_test.py
@@ -9,7 +9,7 @@
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py
index e0d063a1b6..eda8cb3c12 100644
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py
@@ -156,7 +156,7 @@ class StackBidirectionalRNNTest(test.TestCase):
# - Reset states, and iterate for 5 steps. Last state is state_5.
# - Reset the sets to state_3 and iterate for 2 more steps,
# last state will be state_5'.
- # - Check that the state_5 and state_5' (forward and backward) are the
+ # - Check that the state_5 and state_5' (forward and backward) are the
# same for the first layer (it does not apply for the second layer since
# it has forward-backward dependencies).
with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
@@ -340,7 +340,7 @@ class StackBidirectionalRNNTest(test.TestCase):
# - Reset states, and iterate for 5 steps. Last state is state_5.
# - Reset the sets to state_3 and iterate for 2 more steps,
# last state will be state_5'.
- # - Check that the state_5 and state_5' (forward and backward) are the
+ # - Check that the state_5 and state_5' (forward and backward) are the
# same for the first layer (it does not apply for the second layer since
# it has forward-backward dependencies).
with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess:
diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
index ecce1d22f0..090d28a078 100644
--- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py
+++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py
@@ -2026,7 +2026,7 @@ class GLSTMCell(rnn_cell_impl.RNNCell):
Here output_dim is:
num_proj if num_proj was set,
num_units otherwise.
- - LSTMStateTuple representing the new state of G-LSTM cell
+ - LSTMStateTuple representing the new state of G-LSTM cell
after reading `inputs` when the previous state was `state`.
Raises:
diff --git a/tensorflow/contrib/session_bundle/bundle_shim.py b/tensorflow/contrib/session_bundle/bundle_shim.py
index 0aeda0f0de..062c9cc680 100644
--- a/tensorflow/contrib/session_bundle/bundle_shim.py
+++ b/tensorflow/contrib/session_bundle/bundle_shim.py
@@ -39,7 +39,7 @@ def _add_input_to_signature_def(tensor_name, map_key, signature_def):
Args:
tensor_name: string name of tensor to add to signature_def inputs
map_key: string key to key into signature_def inputs map
- signature_def: object of type meta_graph_pb2.SignatureDef()
+ signature_def: object of type meta_graph_pb2.SignatureDef()
Sideffect:
adds a TensorInfo with tensor_name to signature_def inputs map keyed with
@@ -55,7 +55,7 @@ def _add_output_to_signature_def(tensor_name, map_key, signature_def):
Args:
tensor_name: string name of tensor to add to signature_def outputs
map_key: string key to key into signature_def outputs map
- signature_def: object of type meta_graph_pb2.SignatureDef()
+ signature_def: object of type meta_graph_pb2.SignatureDef()
Sideffect:
adds a TensorInfo with tensor_name to signature_def outputs map keyed with
diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py
index 6082af008a..e97f500572 100644
--- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py
+++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py
@@ -175,7 +175,7 @@ def parallel_read(data_sources,
scope=None):
"""Reads multiple records in parallel from data_sources using n readers.
- It uses a ParallelReader to read from multiple files in parallel using
+ It uses a ParallelReader to read from multiple files in parallel using
multiple readers created using `reader_class` with `reader_kwargs'.
If shuffle is True the common_queue would be a RandomShuffleQueue otherwise
diff --git a/tensorflow/contrib/solvers/python/ops/lanczos.py b/tensorflow/contrib/solvers/python/ops/lanczos.py
index 8631002a53..af1b293743 100644
--- a/tensorflow/contrib/solvers/python/ops/lanczos.py
+++ b/tensorflow/contrib/solvers/python/ops/lanczos.py
@@ -206,7 +206,7 @@ def bidiag_matmul(matrix, alpha, beta, adjoint_b=False, name="bidiag_matmul"):
A * B = A[:, :-1] * diag(alpha) + A[:, 1:] * diag(beta)
- If adjoint_b is True, computes A * B[:-1, :]' as follows
+ If adjoint_b is True, computes A * B[:-1, :]' as follows
A * B[:-1, :]' =
A * diag(alpha) + [zeros(m,1), A[:, :-1] * diag(beta[:-1])]
diff --git a/tensorflow/contrib/tensor_forest/hybrid/python/models/stochastic_soft_decisions_to_data_then_nn.py b/tensorflow/contrib/tensor_forest/hybrid/python/models/stochastic_soft_decisions_to_data_then_nn.py
index 2db4249f5d..c2f3f60341 100644
--- a/tensorflow/contrib/tensor_forest/hybrid/python/models/stochastic_soft_decisions_to_data_then_nn.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/models/stochastic_soft_decisions_to_data_then_nn.py
@@ -20,7 +20,7 @@ from __future__ import print_function
from tensorflow.contrib.tensor_forest.hybrid.python.layers import decisions_to_data
from tensorflow.contrib.tensor_forest.hybrid.python.layers import fully_connected
from tensorflow.contrib.tensor_forest.hybrid.python.models import hard_decisions_to_data_then_nn
-from tensorflow.python.training import adagrad
+from tensorflow.python.training import adagrad
class StochasticSoftDecisionsToDataThenNN(
diff --git a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py
index 9312070e52..c21900a0c8 100644
--- a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py
+++ b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py
@@ -1814,7 +1814,7 @@ def _reconstruct_sparse_tensor_seq(sequence,
Counter-part of `_flatten_tensor` which is called on the input of
`_restore_sparse` while this method is called on the output of it.
- Together they work around the limitation of `_restore_sparse` to only
+ Together they work around the limitation of `_restore_sparse` to only
accept 1D handles.
The `indices` in `sp_tensor` is a 2D `Tensor` of `shape [N, ndims]`, where
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD
index 6887d8cfb6..fdf45beed3 100644
--- a/tensorflow/core/kernels/BUILD
+++ b/tensorflow/core/kernels/BUILD
@@ -3035,7 +3035,7 @@ tf_kernel_library(
"maxpooling_op.h",
"pooling_ops_3d.h",
"pooling_ops_common.h",
- ],
+ ] + if_sycl(["pooling_ops_3d_sycl.h"]),
gpu_srcs = [
"avgpooling_op.h",
"avgpooling_op_gpu.cu.cc",
diff --git a/tensorflow/core/kernels/pooling_ops_3d.cc b/tensorflow/core/kernels/pooling_ops_3d.cc
index 538dca24ae..a406317213 100644
--- a/tensorflow/core/kernels/pooling_ops_3d.cc
+++ b/tensorflow/core/kernels/pooling_ops_3d.cc
@@ -37,10 +37,18 @@ limitations under the License.
#include "tensorflow/core/kernels/cudnn_pooling_gpu.h"
#include "tensorflow/core/kernels/pooling_ops_3d_gpu.h"
#endif
+
+#ifdef TENSORFLOW_USE_SYCL
+#include "tensorflow/core/kernels/pooling_ops_3d_sycl.h"
+#endif // TENSORFLOW_USE_SYCL
+
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice;
+#ifdef TENSORFLOW_USE_SYCL
+typedef Eigen::SyclDevice SYCLDevice;
+#endif // TENSORFLOW_USE_SYCL
Pool3dParameters::Pool3dParameters(OpKernelContext* context,
const std::vector<int32>& ksize,
@@ -89,11 +97,6 @@ TensorShape Pool3dParameters::forward_output_shape() {
{{out_plane, out_height, out_width}}, depth);
}
-enum PoolingType { MAX, AVG };
-
-template <typename Device, typename T, PoolingType Type>
-struct LaunchPoolingOp;
-
template <typename T>
struct LaunchPoolingOp<CPUDevice, T, AVG> {
static void launch(OpKernelContext* context, const Tensor& tensor_in,
@@ -200,9 +203,6 @@ class Pooling3DOp : public UnaryOp<T> {
TensorFormat data_format_;
};
-template <typename Device, typename T>
-struct LaunchMaxPooling3dGradOp;
-
template <typename T>
struct LaunchMaxPooling3dGradOp<CPUDevice, T> {
static void launch(OpKernelContext* context, const Tensor& tensor_in,
@@ -377,9 +377,6 @@ class MaxPooling3dGradOp : public OpKernel {
TensorFormat data_format_;
};
-template <typename Device, typename T>
-struct LaunchAvgPooling3dGradOp;
-
template <typename T>
struct LaunchAvgPooling3dGradOp<CPUDevice, T> {
static void launch(OpKernelContext* context,
@@ -541,9 +538,6 @@ class AvgPooling3dGradOp : public OpKernel {
TensorFormat data_format_;
};
-template <typename Device, typename T>
-struct LaunchMaxPooling3dGradGradOp;
-
template <typename T>
struct LaunchMaxPooling3dGradGradOp<CPUDevice, T> {
static void launch(OpKernelContext* context, const Pool3dParameters& params,
@@ -837,6 +831,12 @@ TF_CALL_float(REGISTER_GPU_KERNELS) TF_CALL_half(REGISTER_GPU_KERNELS)
#endif // GOOGLE_CUDA
+#ifdef TENSORFLOW_USE_SYCL
+#define REGISTER_SYCL_KERNELS(T) REGISTER_KERNELS(SYCL, T)
+TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS)
+#undef REGISTER_SYCL_KERNELS
+#endif // TENSORFLOW_USE_SYCL
+
#undef REGISTER_KERNELS
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/pooling_ops_3d.h b/tensorflow/core/kernels/pooling_ops_3d.h
index 7954e2cf83..d1be3ba407 100644
--- a/tensorflow/core/kernels/pooling_ops_3d.h
+++ b/tensorflow/core/kernels/pooling_ops_3d.h
@@ -22,6 +22,20 @@ limitations under the License.
namespace tensorflow {
+enum PoolingType { MAX, AVG };
+
+template <typename Device, typename T, PoolingType Type>
+struct LaunchPoolingOp;
+
+template <typename Device, typename T>
+struct LaunchAvgPooling3dGradOp;
+
+template <typename Device, typename T>
+struct LaunchMaxPooling3dGradOp;
+
+template <typename Device, typename T>
+struct LaunchMaxPooling3dGradGradOp;
+
// A helper class to manage sizes and shapes for 3d pooling operations.
struct Pool3dParameters {
// Updates context->status if there is an invalid input.
diff --git a/tensorflow/core/kernels/pooling_ops_3d_sycl.h b/tensorflow/core/kernels/pooling_ops_3d_sycl.h
new file mode 100644
index 0000000000..d8cbc589a1
--- /dev/null
+++ b/tensorflow/core/kernels/pooling_ops_3d_sycl.h
@@ -0,0 +1,759 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if !TENSORFLOW_USE_SYCL
+#error This file must only be included when building with SYCL support
+#endif
+
+#ifndef TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_
+#define TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_
+
+#include "tensorflow/core/kernels/pooling_ops_3d.h"
+
+namespace tensorflow {
+
+typedef Eigen::SyclDevice SYCLDevice;
+
+// Helper struct to contain the various pool parameters used in the SYCL
+// pooling kernels. Similar to the Pool3dParameters, but with a number of
+// convenient constructors.
+struct SYCL3DPoolParams {
+ SYCL3DPoolParams(const int depth, const int batch, const int in_planes,
+ const int in_rows, const int in_cols, const int out_planes,
+ const int out_rows, const int out_cols,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& padding)
+ : depth_(depth),
+ batch_(batch),
+ in_planes_(in_planes),
+ in_rows_(in_rows),
+ in_cols_(in_cols),
+ window_planes_(window[2]),
+ window_rows_(window[1]),
+ window_cols_(window[0]),
+ stride_planes_(stride[2]),
+ stride_rows_(stride[1]),
+ stride_cols_(stride[0]),
+ out_planes_(out_planes),
+ out_rows_(out_rows),
+ out_cols_(out_cols),
+ pad_planes_(padding[2]),
+ pad_rows_(padding[1]),
+ pad_cols_(padding[0]) {}
+
+ SYCL3DPoolParams(const int depth, const int batch, const int in_planes,
+ const int in_rows, const int in_cols,
+ const std::array<int64, 3>& out_shape,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& padding)
+ : SYCL3DPoolParams(depth, batch, in_planes, in_rows, in_cols,
+ out_shape[2], out_shape[1], out_shape[0], window,
+ stride, padding) {}
+
+ SYCL3DPoolParams(const Pool3dParameters& params)
+ : depth_(params.depth),
+ batch_(params.tensor_in_batch),
+ in_planes_(params.tensor_in_planes),
+ in_rows_(params.tensor_in_rows),
+ in_cols_(params.tensor_in_cols),
+ window_planes_(params.window_planes),
+ window_rows_(params.window_rows),
+ window_cols_(params.window_cols),
+ stride_planes_(params.plane_stride),
+ stride_rows_(params.row_stride),
+ stride_cols_(params.col_stride),
+ out_planes_(params.out_plane),
+ out_rows_(params.out_height),
+ out_cols_(params.out_width),
+ pad_planes_(params.pad_planes),
+ pad_rows_(params.pad_rows),
+ pad_cols_(params.pad_cols) {}
+
+ const int depth_;
+ const int batch_;
+ const int in_planes_;
+ const int in_rows_;
+ const int in_cols_;
+
+ const int window_planes_;
+ const int window_rows_;
+ const int window_cols_;
+
+ const int stride_planes_;
+ const int stride_rows_;
+ const int stride_cols_;
+
+ const int out_planes_;
+ const int out_rows_;
+ const int out_cols_;
+
+ const int pad_planes_;
+ const int pad_rows_;
+ const int pad_cols_;
+};
+// MaxPool3d SYCL kernel. Expects the number of threads to be equal to the
+// number of elements in the output tensor.
+//
+// For each output element, find the corresponding input window and run over
+// all values in the window to find the maximum value. This value is then
+// copied into that output element.
+template <typename T>
+class MaxPool3DSYCL {
+ using write_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
+ cl::sycl::access::target::global_buffer>;
+ using read_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
+ cl::sycl::access::target::global_buffer>;
+
+ public:
+ MaxPool3DSYCL(const int depth, const int batch, const int in_planes,
+ const int in_rows, const int in_cols, const int out_planes,
+ const int out_rows, const int out_cols,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& padding,
+ const read_accessor input_accessor,
+ write_accessor output_accessor)
+ : p_(depth, batch, in_planes, in_rows, in_cols, out_planes, out_rows,
+ out_cols, window, stride, padding),
+ input_accessor_(input_accessor),
+ output_accessor_(output_accessor) {}
+ void operator()(cl::sycl::item<1> item) {
+ T* input_data = ConvertToActualTypeSycl(T, input_accessor_);
+ T* output_data = ConvertToActualTypeSycl(T, output_accessor_);
+
+ int index = item.get_linear_id();
+ int n = index;
+ int d = n % p_.depth_;
+ n /= p_.depth_;
+ int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_;
+ int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
+ cstart = std::max(cstart, 0);
+ n /= p_.out_cols_;
+ int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_;
+ int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
+ rstart = std::max(rstart, 0);
+ n /= p_.out_rows_;
+ int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_;
+ int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
+ pstart = std::max(pstart, 0);
+ n /= p_.out_planes_;
+ T maxval = Eigen::NumTraits<T>::lowest();
+ const T* input_data_n =
+ input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
+ for (int p = pstart; p < pend; ++p) {
+ for (int r = rstart; r < rend; ++r) {
+ for (int c = cstart; c < cend; ++c) {
+ int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d;
+ if (input_data_n[idx] > maxval) {
+ maxval = input_data_n[idx];
+ }
+ }
+ }
+ }
+ output_data[index] = maxval;
+ }
+
+ private:
+ const SYCL3DPoolParams p_;
+ const read_accessor input_accessor_;
+ write_accessor output_accessor_;
+};
+template <typename T>
+struct LaunchPoolingOp<SYCLDevice, T, MAX> {
+ static void launch(OpKernelContext* context, const Tensor& tensor_in,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& padding,
+ TensorFormat data_format, Padding padding_type,
+ Tensor* output) {
+ const SYCLDevice& device = context->eigen_device<SYCLDevice>();
+ const int out_planes = GetTensorDim(*output, data_format, '0');
+ const int out_rows = GetTensorDim(*output, data_format, '1');
+ const int out_cols = GetTensorDim(*output, data_format, '2');
+ const int batch = GetTensorDim(tensor_in, data_format, 'N');
+ const int in_planes = GetTensorDim(tensor_in, data_format, '0');
+ const int in_rows = GetTensorDim(tensor_in, data_format, '1');
+ const int in_cols = GetTensorDim(tensor_in, data_format, '2');
+ const int depth = GetTensorDim(tensor_in, data_format, 'C');
+
+ const int num_threads = output->NumElements();
+
+ auto input_buffer =
+ device.get_sycl_buffer(tensor_in.template flat<T>().data());
+ auto output_buffer =
+ device.get_sycl_buffer(output->template flat<T>().data());
+
+ device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
+ auto input_access =
+ input_buffer.template get_access<cl::sycl::access::mode::read>(cgh);
+ auto output_access =
+ output_buffer.template get_access<cl::sycl::access::mode::write>(cgh);
+ MaxPool3DSYCL<T> max_pool(depth, batch, in_planes, in_rows, in_cols,
+ out_planes, out_rows, out_cols, window, stride,
+ padding, input_access, output_access);
+
+ cgh.parallel_for(cl::sycl::range<1>(num_threads), max_pool);
+ });
+ }
+};
+// MaxPool3DGrad SYCL kernel. Expects the number of threads to be equal to the
+// number of elements in the output backprop tenor (i.e. the number of elements
+// in the input data tensor).
+//
+// For each output backprop element we compute the possible window of values in
+// the input backprop tensor which might contribute to this element. Then for
+// each error in this window, compute the corresponding input window which was
+// pooled into that element in the output. Walk through this input window to
+// determine whether the input value is the first maximum value, and so the
+// error should be propagated back to the corresponding backprop element.
+template <typename T>
+class MaxPool3DGradSYCL {
+ using write_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
+ cl::sycl::access::target::global_buffer>;
+ using read_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
+ cl::sycl::access::target::global_buffer>;
+
+ public:
+ MaxPool3DGradSYCL(const int depth, const int batch, const int in_planes,
+ const int in_rows, const int in_cols,
+ const std::array<int64, 3>& output_shape,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& padding,
+ const read_accessor input_data_accessor,
+ const read_accessor output_data_accessor,
+ const read_accessor input_backprop_accessor,
+ write_accessor output_backprop_accessor)
+ : p_(depth, batch, in_planes, in_rows, in_cols, output_shape, window,
+ stride, padding),
+ input_data_accessor_(input_data_accessor),
+ output_data_accessor_(output_data_accessor),
+ input_backprop_accessor_(input_backprop_accessor),
+ output_backprop_accessor_(output_backprop_accessor) {}
+ void operator()(cl::sycl::item<1> item) {
+ T* input_data = ConvertToActualTypeSycl(T, input_data_accessor_);
+ T* output_data = ConvertToActualTypeSycl(T, output_data_accessor_);
+ T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_);
+ T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_);
+
+ const int index = item.get_linear_id();
+ T output_value = 0;
+ int n = index;
+ const int d = n % p_.depth_;
+ n /= p_.depth_;
+ const int c = (n % p_.in_cols_) + p_.pad_cols_;
+ const int poolcstart =
+ (c < p_.window_cols_) ? 0 : (c - p_.window_cols_) / p_.stride_cols_ + 1;
+ const int poolcend = std::min(c / p_.stride_cols_ + 1, p_.out_cols_);
+ n /= p_.in_cols_;
+ const int r = (n % p_.in_rows_) + p_.pad_rows_;
+ const int poolrstart =
+ (r < p_.window_rows_) ? 0 : (r - p_.window_rows_) / p_.stride_rows_ + 1;
+ const int poolrend = std::min(r / p_.stride_rows_ + 1, p_.out_rows_);
+ n /= p_.in_rows_;
+ const int p = (n % p_.in_planes_) + p_.pad_planes_;
+ const int poolpstart =
+ (p < p_.window_planes_)
+ ? 0
+ : (p - p_.window_planes_) / p_.stride_planes_ + 1;
+ const int poolpend = std::min(p / p_.stride_planes_ + 1, p_.out_planes_);
+ n /= p_.in_planes_;
+ const int index_no_n =
+ index - n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
+
+ const T* input_data_n =
+ input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
+ const T* output_data_n =
+ output_data +
+ n * p_.out_planes_ * p_.out_cols_ * p_.out_rows_ * p_.depth_;
+ const T* input_backprop_n =
+ input_backprop +
+ n * p_.out_planes_ * p_.out_cols_ * p_.out_rows_ * p_.depth_;
+ for (int poolp = poolpstart; poolp < poolpend; ++poolp) {
+ int pstart = poolp * p_.stride_planes_ - p_.pad_planes_;
+ const int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
+ pstart = std::max(pstart, 0);
+
+ for (int poolr = poolrstart; poolr < poolrend; ++poolr) {
+ int rstart = poolr * p_.stride_rows_ - p_.pad_rows_;
+ const int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
+ rstart = std::max(rstart, 0);
+
+ for (int poolc = poolcstart; poolc < poolcend; ++poolc) {
+ int cstart = poolc * p_.stride_cols_ - p_.pad_cols_;
+ const int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
+ cstart = std::max(cstart, 0);
+
+ const int output_data_idx =
+ ((poolp * p_.out_rows_ + poolr) * p_.out_cols_ + poolc) *
+ p_.depth_ +
+ d;
+ bool should_continue = true;
+ bool is_max = (input_data[index] == output_data_n[output_data_idx]);
+ for (int win_p = pstart; win_p < pend && should_continue; ++win_p) {
+ for (int win_r = rstart; win_r < rend && should_continue; ++win_r) {
+ for (int win_c = cstart; win_c < cend && should_continue;
+ ++win_c) {
+ const int input_data_idx =
+ ((win_p * p_.in_rows_ + win_r) * p_.in_cols_ + win_c) *
+ p_.depth_ +
+ d;
+ if (input_data_idx == index_no_n) {
+ should_continue = false;
+ } else if (input_data_n[input_data_idx] ==
+ output_data_n[output_data_idx]) {
+ should_continue = false;
+ is_max = false;
+ }
+ }
+ }
+ }
+ if (is_max) {
+ output_value += input_backprop_n[output_data_idx];
+ }
+ }
+ }
+ }
+ output_backprop[index] = output_value;
+ }
+
+ private:
+ const SYCL3DPoolParams p_;
+
+ const read_accessor input_data_accessor_;
+ const read_accessor output_data_accessor_;
+ const read_accessor input_backprop_accessor_;
+ write_accessor output_backprop_accessor_;
+};
+template <typename T>
+struct LaunchMaxPooling3dGradOp<SYCLDevice, T> {
+ static void launch(OpKernelContext* context, const Tensor& tensor_in,
+ const Tensor& tensor_out, const Tensor& out_backprop,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& out,
+ const std::array<int64, 3>& padding,
+ TensorFormat data_format, Tensor* output) {
+ const SYCLDevice& device = context->eigen_device<SYCLDevice>();
+ const int batch = GetTensorDim(tensor_in, data_format, 'N');
+ const int in_planes = GetTensorDim(tensor_in, data_format, '0');
+ const int in_rows = GetTensorDim(tensor_in, data_format, '1');
+ const int in_cols = GetTensorDim(tensor_in, data_format, '2');
+ const int depth = GetTensorDim(tensor_in, data_format, 'C');
+
+ const int output_size = output->NumElements();
+
+ auto input_data_buffer =
+ device.get_sycl_buffer(tensor_in.template flat<T>().data());
+ auto output_data_buffer =
+ device.get_sycl_buffer(tensor_out.template flat<T>().data());
+ auto input_backprop_buffer =
+ device.get_sycl_buffer(out_backprop.template flat<T>().data());
+ auto output_backprop_buffer =
+ device.get_sycl_buffer(output->template flat<T>().data());
+
+ device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
+ auto input_data_access =
+ input_data_buffer.template get_access<cl::sycl::access::mode::read>(
+ cgh);
+ auto output_data_access =
+ output_data_buffer.template get_access<cl::sycl::access::mode::read>(
+ cgh);
+ auto input_backprop_access =
+ input_backprop_buffer
+ .template get_access<cl::sycl::access::mode::read>(cgh);
+ auto output_backprop_access =
+ output_backprop_buffer
+ .template get_access<cl::sycl::access::mode::write>(cgh);
+ MaxPool3DGradSYCL<T> max_pool(
+ depth, batch, in_planes, in_rows, in_cols, out, window, stride,
+ padding, input_data_access, output_data_access, input_backprop_access,
+ output_backprop_access);
+
+ cgh.parallel_for(cl::sycl::range<1>(output_size), max_pool);
+ });
+ }
+};
+// MaxPool3DGradGrad SYCL kernel. Expects the number of threads to be equal to
+// the number of elements in the output backprop tensor, i.e. the number of
+// elements in the output tensor.
+//
+// For each element in the output backprop tensor, find the corresponding input
+// window, and compare the input and output data to find the index of the
+// maximum value in the input tensor. This is then the index of the gradient to
+// pass through to the output backprop tensor.
+template <typename T>
+class MaxPool3DGradGradSYCL {
+ using write_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
+ cl::sycl::access::target::global_buffer>;
+ using read_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
+ cl::sycl::access::target::global_buffer>;
+
+ public:
+ MaxPool3DGradGradSYCL(const Pool3dParameters& params,
+ const read_accessor input_data_accessor,
+ const read_accessor output_data_accessor,
+ const read_accessor input_backprop_accessor,
+ write_accessor output_backprop_accessor)
+ : p_(params),
+ input_data_accessor_(input_data_accessor),
+ output_data_accessor_(output_data_accessor),
+ input_backprop_accessor_(input_backprop_accessor),
+ output_backprop_accessor_(output_backprop_accessor) {}
+ void operator()(cl::sycl::item<1> item) {
+ T* input_data = ConvertToActualTypeSycl(T, input_data_accessor_);
+ T* output_data = ConvertToActualTypeSycl(T, output_data_accessor_);
+ T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_);
+ T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_);
+
+ int index = item.get_linear_id();
+ int n = index;
+ int d = n % p_.depth_;
+ n /= p_.depth_;
+ int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_;
+ int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
+ cstart = std::max(cstart, 0);
+ n /= p_.out_cols_;
+ int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_;
+ int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
+ rstart = std::max(rstart, 0);
+ n /= p_.out_rows_;
+ int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_;
+ int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
+ pstart = std::max(pstart, 0);
+ n /= p_.out_planes_;
+ int maxidx = -1;
+ bool should_stop = false;
+ const T* input_data_n =
+ input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
+ for (int p = pstart; p < pend && !should_stop; ++p) {
+ for (int r = rstart; r < rend && !should_stop; ++r) {
+ for (int c = cstart; c < cend && !should_stop; ++c) {
+ int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d;
+ if (output_data[index] == input_data_n[idx]) {
+ maxidx = idx;
+ should_stop = true;
+ }
+ }
+ }
+ }
+ if (maxidx != -1) {
+ output_backprop[index] = input_backprop[n * p_.in_planes_ * p_.in_rows_ *
+ p_.in_cols_ * p_.depth_ +
+ maxidx];
+ }
+ }
+
+ private:
+ const SYCL3DPoolParams p_;
+
+ const read_accessor input_data_accessor_;
+ const read_accessor output_data_accessor_;
+ const read_accessor input_backprop_accessor_;
+ write_accessor output_backprop_accessor_;
+};
+template <typename T>
+struct LaunchMaxPooling3dGradGradOp<SYCLDevice, T> {
+ static void launch(OpKernelContext* context, const Pool3dParameters& params,
+ const Tensor& tensor_in, const Tensor& tensor_out,
+ const Tensor& out_backprop, Tensor* output) {
+ const SYCLDevice& device = context->eigen_device<SYCLDevice>();
+
+ const int num_threads = output->NumElements();
+
+ auto input_data_buffer =
+ device.get_sycl_buffer(tensor_in.template flat<T>().data());
+ auto output_data_buffer =
+ device.get_sycl_buffer(tensor_out.template flat<T>().data());
+ auto input_backprop_buffer =
+ device.get_sycl_buffer(out_backprop.template flat<T>().data());
+ auto output_backprop_buffer =
+ device.get_sycl_buffer(output->template flat<T>().data());
+
+ device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
+ auto input_data_access =
+ input_data_buffer.template get_access<cl::sycl::access::mode::read>(
+ cgh);
+ auto output_data_access =
+ output_data_buffer.template get_access<cl::sycl::access::mode::read>(
+ cgh);
+ auto input_backprop_access =
+ input_backprop_buffer
+ .template get_access<cl::sycl::access::mode::read>(cgh);
+ auto output_backprop_access =
+ output_backprop_buffer
+ .template get_access<cl::sycl::access::mode::write>(cgh);
+ MaxPool3DGradGradSYCL<T> functor(
+ params, input_data_access, output_data_access, input_backprop_access,
+ output_backprop_access);
+
+ cgh.parallel_for(cl::sycl::range<1>(num_threads), functor);
+ });
+ }
+};
+// AvgPool3D SYCL kernel. Expects the number of threads to be equal to the
+// number of elements in the output tensor.
+//
+// For each output value find the corresponding input window, and run through
+// the window accumulating the values to form an average. We divide each value
+// before accumulating to prevent the accumulator from becoming significantly
+// bigger than the values we are adding and so decrease any errors.
+template <typename T>
+class AvgPool3DSYCL {
+ using write_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
+ cl::sycl::access::target::global_buffer>;
+ using read_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
+ cl::sycl::access::target::global_buffer>;
+
+ public:
+ AvgPool3DSYCL(const int depth, const int batch, const int in_planes,
+ const int in_rows, const int in_cols, const int out_planes,
+ const int out_rows, const int out_cols,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& padding,
+ const read_accessor input_accessor,
+ write_accessor output_accessor)
+ : p_(depth, batch, in_planes, in_rows, in_cols, out_planes, out_rows,
+ out_cols, window, stride, padding),
+ input_accessor_(input_accessor),
+ output_accessor_(output_accessor) {}
+ void operator()(cl::sycl::item<1> item) {
+ T* input_data = ConvertToActualTypeSycl(T, input_accessor_);
+ T* output_data = ConvertToActualTypeSycl(T, output_accessor_);
+
+ int index = item.get_linear_id();
+ int n = index;
+ int d = n % p_.depth_;
+ n /= p_.depth_;
+ int cstart = (n % p_.out_cols_) * p_.stride_cols_ - p_.pad_cols_;
+ int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
+ cstart = std::max(cstart, 0);
+ n /= p_.out_cols_;
+ int rstart = (n % p_.out_rows_) * p_.stride_rows_ - p_.pad_rows_;
+ int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
+ rstart = std::max(rstart, 0);
+ n /= p_.out_rows_;
+ int pstart = (n % p_.out_planes_) * p_.stride_planes_ - p_.pad_planes_;
+ int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
+ pstart = std::max(pstart, 0);
+ n /= p_.out_planes_;
+ T accum = T(0);
+ T count =
+ static_cast<T>((pend - pstart) * (rend - rstart) * (cend - cstart));
+ const T* input_data_n =
+ input_data + n * p_.in_planes_ * p_.in_cols_ * p_.in_rows_ * p_.depth_;
+ for (int p = pstart; p < pend; ++p) {
+ for (int r = rstart; r < rend; ++r) {
+ for (int c = cstart; c < cend; ++c) {
+ int idx = ((p * p_.in_rows_ + r) * p_.in_cols_ + c) * p_.depth_ + d;
+ accum += input_data_n[idx] / count;
+ }
+ }
+ }
+ output_data[index] = accum;
+ }
+
+ private:
+ const SYCL3DPoolParams p_;
+ const read_accessor input_accessor_;
+ write_accessor output_accessor_;
+};
+template <typename T>
+struct LaunchPoolingOp<SYCLDevice, T, AVG> {
+ static void launch(OpKernelContext* context, const Tensor& tensor_in,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& padding,
+ TensorFormat data_format, Padding padding_type,
+ Tensor* output) {
+ const SYCLDevice& device = context->eigen_device<SYCLDevice>();
+ const int out_planes = GetTensorDim(*output, data_format, '0');
+ const int out_rows = GetTensorDim(*output, data_format, '1');
+ const int out_cols = GetTensorDim(*output, data_format, '2');
+ const int batch = GetTensorDim(tensor_in, data_format, 'N');
+ const int in_planes = GetTensorDim(tensor_in, data_format, '0');
+ const int in_rows = GetTensorDim(tensor_in, data_format, '1');
+ const int in_cols = GetTensorDim(tensor_in, data_format, '2');
+ const int depth = GetTensorDim(tensor_in, data_format, 'C');
+
+ const int num_threads = output->NumElements();
+
+ auto input_buffer =
+ device.get_sycl_buffer(tensor_in.template flat<T>().data());
+ auto output_buffer =
+ device.get_sycl_buffer(output->template flat<T>().data());
+
+ device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
+ auto input_access =
+ input_buffer.template get_access<cl::sycl::access::mode::read>(cgh);
+ auto output_access =
+ output_buffer.template get_access<cl::sycl::access::mode::write>(cgh);
+ AvgPool3DSYCL<T> avg_pool(depth, batch, in_planes, in_rows, in_cols,
+ out_planes, out_rows, out_cols, window, stride,
+ padding, input_access, output_access);
+
+ cgh.parallel_for(cl::sycl::range<1>(num_threads), avg_pool);
+ });
+ }
+};
+// AvgPool3DGrad SYCL kernel. Expects the number of threads to be equal to the
+// number of elements in the output backprop tensor, i.e. the number of
+// elements in the input tensor.
+//
+// For each output backprop index find a window in the input backprop tensor
+// which corresponds to all the values of the output which were affected by the
+// input value at this index. Then for each gradient in this window, compute
+// the size of the input window which was averaged to give this output, and use
+// this size to scale the gradient accordingly. Add this scaled gradient to the
+// output backprop value.
+template <typename T>
+class AvgPool3DGradSYCL {
+ using write_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::write,
+ cl::sycl::access::target::global_buffer>;
+ using read_accessor =
+ cl::sycl::accessor<uint8_t, 1, cl::sycl::access::mode::read,
+ cl::sycl::access::target::global_buffer>;
+
+ public:
+ AvgPool3DGradSYCL(const int depth, const int batch, const int in_planes,
+ const int in_rows, const int in_cols,
+ const std::array<int64, 3>& out_shape,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& padding,
+ const read_accessor input_backprop_accessor,
+ write_accessor output_backprop_accessor)
+ : p_(depth, batch, in_planes, in_rows, in_cols, out_shape, window, stride,
+ padding),
+ input_backprop_accessor_(input_backprop_accessor),
+ output_backprop_accessor_(output_backprop_accessor) {}
+ void operator()(cl::sycl::item<1> item) {
+ T* input_backprop = ConvertToActualTypeSycl(T, input_backprop_accessor_);
+ T* output_backprop = ConvertToActualTypeSycl(T, output_backprop_accessor_);
+
+ const int index = item.get_linear_id();
+ int n = index;
+ const int d = n % p_.depth_;
+ n /= p_.depth_;
+ const int c = (n % p_.in_cols_) + p_.pad_cols_;
+ const int poolcstart =
+ (c < p_.window_cols_) ? 0 : (c - p_.window_cols_) / p_.stride_cols_ + 1;
+ const int poolcend = std::min(c / p_.stride_cols_ + 1, p_.out_cols_);
+ n /= p_.in_cols_;
+ const int r = (n % p_.in_rows_) + p_.pad_rows_;
+ const int poolrstart =
+ (r < p_.window_rows_) ? 0 : (r - p_.window_rows_) / p_.stride_rows_ + 1;
+ const int poolrend = std::min(r / p_.stride_rows_ + 1, p_.out_rows_);
+ n /= p_.in_rows_;
+ const int p = (n % p_.in_planes_) + p_.pad_planes_;
+ const int poolpstart =
+ (p < p_.window_planes_)
+ ? 0
+ : (p - p_.window_planes_) / p_.stride_planes_ + 1;
+ const int poolpend = std::min(p / p_.stride_planes_ + 1, p_.out_planes_);
+ n /= p_.in_planes_;
+
+ T gradient = T(0);
+ const T* input_backprop_n =
+ input_backprop +
+ n * p_.out_planes_ * p_.out_cols_ * p_.out_rows_ * p_.depth_;
+ for (int poolp = poolpstart; poolp < poolpend; ++poolp) {
+ int pstart = poolp * p_.stride_planes_ - p_.pad_planes_;
+ const int pend = std::min(pstart + p_.window_planes_, p_.in_planes_);
+ pstart = std::max(pstart, 0);
+ const int plane_window_size = pend - pstart;
+ for (int poolr = poolrstart; poolr < poolrend; ++poolr) {
+ int rstart = poolr * p_.stride_rows_ - p_.pad_rows_;
+ const int rend = std::min(rstart + p_.window_rows_, p_.in_rows_);
+ rstart = std::max(rstart, 0);
+ const int row_window_size = rend - rstart;
+ for (int poolc = poolcstart; poolc < poolcend; ++poolc) {
+ const int idx =
+ ((poolp * p_.out_rows_ + poolr) * p_.out_cols_ + poolc) *
+ p_.depth_ +
+ d;
+ int cstart = poolc * p_.stride_cols_ - p_.pad_cols_;
+ const int cend = std::min(cstart + p_.window_cols_, p_.in_cols_);
+ cstart = std::max(cstart, 0);
+ const int col_window_size = cend - cstart;
+ const int window_size =
+ plane_window_size * row_window_size * col_window_size;
+ gradient += input_backprop_n[idx] / static_cast<T>(window_size);
+ }
+ }
+ }
+ output_backprop[index] = gradient;
+ }
+
+ private:
+ const SYCL3DPoolParams p_;
+ const read_accessor input_backprop_accessor_;
+ write_accessor output_backprop_accessor_;
+};
+template <typename T>
+struct LaunchAvgPooling3dGradOp<SYCLDevice, T> {
+ static void launch(OpKernelContext* context,
+ const TensorShape& tensor_in_shape,
+ const Tensor& out_backprop,
+ const std::array<int64, 3>& window,
+ const std::array<int64, 3>& stride,
+ const std::array<int64, 3>& output_shape,
+ const std::array<int64, 3>& padding,
+ TensorFormat data_format, Tensor* output) {
+ const SYCLDevice& device = context->eigen_device<SYCLDevice>();
+ const int batch = GetTensorDim(tensor_in_shape, data_format, 'N');
+ const int in_planes = GetTensorDim(tensor_in_shape, data_format, '0');
+ const int in_rows = GetTensorDim(tensor_in_shape, data_format, '1');
+ const int in_cols = GetTensorDim(tensor_in_shape, data_format, '2');
+ const int depth = GetTensorDim(tensor_in_shape, data_format, 'C');
+
+ const int num_threads = output->NumElements();
+
+ auto input_backprop_buffer =
+ device.get_sycl_buffer(out_backprop.template flat<T>().data());
+ auto output_backprop_buffer =
+ device.get_sycl_buffer(output->template flat<T>().data());
+
+ device.sycl_queue().submit([&](cl::sycl::handler& cgh) {
+ auto input_backprop_access =
+ input_backprop_buffer
+ .template get_access<cl::sycl::access::mode::read>(cgh);
+ auto output_backprop_access =
+ output_backprop_buffer
+ .template get_access<cl::sycl::access::mode::write>(cgh);
+ AvgPool3DGradSYCL<T> functor(
+ depth, batch, in_planes, in_rows, in_cols, output_shape, window,
+ stride, padding, input_backprop_access, output_backprop_access);
+
+ cgh.parallel_for(cl::sycl::range<1>(num_threads), functor);
+ });
+ }
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_CORE_KERNELS_POOLING_OP_3D_SYCL_H_
diff --git a/tensorflow/core/ops/image_ops.cc b/tensorflow/core/ops/image_ops.cc
index 5b71f046c3..1bfa37f5a7 100644
--- a/tensorflow/core/ops/image_ops.cc
+++ b/tensorflow/core/ops/image_ops.cc
@@ -710,9 +710,9 @@ bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The
bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and
height of the underlying image.
-For example, if an image is 100 x 200 pixels and the bounding box is
-`[0.1, 0.2, 0.5, 0.9]`, the bottom-left and upper-right coordinates of the
-bounding box will be `(10, 40)` to `(50, 180)`.
+For example, if an image is 100 x 200 pixels (height x width) and the bounding
+box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of
+the bounding box will be `(40, 10)` to `(100, 50)` (in (x,y) coordinates).
Parts of the bounding box may fall outside the image.
diff --git a/tensorflow/core/platform/default/gpu_tracer.cc b/tensorflow/core/platform/default/gpu_tracer.cc
index 86ab70afdd..50c27b3cf6 100644
--- a/tensorflow/core/platform/default/gpu_tracer.cc
+++ b/tensorflow/core/platform/default/gpu_tracer.cc
@@ -205,7 +205,7 @@ Status CUPTIManager::DisableTrace() {
CUPTI_CALL(ActivityDisable(CUPTI_ACTIVITY_KIND_MEMCPY));
CUPTI_CALL(ActivityDisable(CUPTI_ACTIVITY_KIND_MEMCPY2));
CUPTI_CALL(ActivityDisable(CUPTI_ACTIVITY_KIND_MEMSET));
- CUPTI_CALL(ActivityFlushAll(0));
+ CUPTI_CALL(ActivityFlushAll(CUPTI_ACTIVITY_FLAG_FLUSH_FORCED));
{
// Don't acquire this lock until Flush returns, since Flush
// will potentially cause callbacks into BufferCompleted.
diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index 69198019cd..70b91c37e4 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -25,7 +25,7 @@ bool CountAsAcceleratorTime(const string& device) {
}
bool CountAsCPUTime(const string& device) {
- return RE2::FullMatch(device, ".*/(gpu|cpu):\\d+");
+ return RE2::FullMatch(device, ".*/(gpu|cpu|device:sycl):\\d+");
}
bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); }
@@ -145,7 +145,7 @@ void TFGraphNode::AddStepStat(int64 step, const string& device,
// See run_metadata_test.py
// It can be /job:0/replica:0/xxxx/gpu:0, or simply /gpu:0.
// It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx.
- if (IsCanonicalDevice(device)) {
+ if (IsCanonicalDevice(dev)) {
if (!canonical_device_.empty()) {
if (canonical_device_ != dev) {
fprintf(stderr, "Unexpected: graph node changed device: %s->%s.\n",
@@ -155,7 +155,11 @@ void TFGraphNode::AddStepStat(int64 step, const string& device,
} else {
canonical_device_ = dev;
// TODO(xpan): Support things other than gpu?
- host_device_ = StringReplace(dev, "gpu:\\d+", "cpu:0");
+ if (dev.find("sycl") != dev.npos) {
+ host_device_ = StringReplace(dev, "device:sycl:\\d+", "cpu:0");
+ } else {
+ host_device_ = StringReplace(dev, "gpu:\\d+", "cpu:0");
+ }
AddOpType(canonical_device_);
}
}
@@ -229,7 +233,8 @@ TensorShapeProto VecToShapeProto(const std::vector<int64> shape_vec) {
}
bool IsPlacedOnAccelerator(const string& device) {
- return device.find("gpu") != device.npos;
+ return device.find("gpu") != device.npos ||
+ device.find("sycl") != device.npos;
}
} // namespace tfprof
} // namespace tensorflow
diff --git a/tensorflow/docs_src/extend/adding_an_op.md b/tensorflow/docs_src/extend/adding_an_op.md
index 4b1d1b6e34..a51a607031 100644
--- a/tensorflow/docs_src/extend/adding_an_op.md
+++ b/tensorflow/docs_src/extend/adding_an_op.md
@@ -178,9 +178,7 @@ suggested implementation is to:
file, but the specialization for the GPUDevice is defined in a .cu.cc file,
since it will be compiled with the CUDA compiler.
-<!--zippy-->
-
-Expand this to see the example implementation.
+Here is an example implementation.
```c++
// example.h
@@ -307,8 +305,6 @@ template struct ExampleFunctor<GPUDevice, int32>;
#endif // GOOGLE_CUDA
```
-<!--endzippy-->
-
## Build the op library
### Compile the op using your system compiler (TensorFlow binary installation)
@@ -763,7 +759,7 @@ Your op registration now specifies that the input's type must be `float`, or
> """
> ```
-<pre><pre class="prettyprint"><code class="lang-cpp">
+<pre class="prettyprint"><code class="lang-cpp">
\#include "tensorflow/core/framework/op_kernel.h"<br/>
class ZeroOut<b>Int32</b>Op : public OpKernel {
// as before
@@ -803,7 +799,7 @@ REGISTER\_KERNEL\_BUILDER(
.Device(DEVICE\_CPU)
.TypeConstraint&lt;float&gt;("T"),
ZeroOutFloatOp);
-</b></code></pre></pre>
+</b></code></pre>
> To preserve [backwards compatibility](#backwards-compatibility), you should
> specify a [default value](#default-values-constraints) when adding an attr to
diff --git a/tensorflow/examples/image_retraining/label_image.py b/tensorflow/examples/image_retraining/label_image.py
index ecfa672462..de2713fc10 100644
--- a/tensorflow/examples/image_retraining/label_image.py
+++ b/tensorflow/examples/image_retraining/label_image.py
@@ -99,7 +99,7 @@ def run_graph(image_data, labels, input_layer_name, output_layer_name,
num_top_predictions):
with tf.Session() as sess:
# Feed the image_data as input to the graph.
- # predictions will contain a two-dimensional array, where one
+ # predictions will contain a two-dimensional array, where one
# dimension represents the input image count, and the other has
# predictions per class
softmax_tensor = sess.graph.get_tensor_by_name(output_layer_name)
diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py
index 2e2e578050..3549891461 100644
--- a/tensorflow/examples/image_retraining/retrain.py
+++ b/tensorflow/examples/image_retraining/retrain.py
@@ -293,7 +293,7 @@ def run_bottleneck_on_image(sess, image_data, image_data_tensor,
sess: Current active TensorFlow Session.
image_data: String of raw JPEG data.
image_data_tensor: Input data layer in the graph.
- decoded_image_tensor: Output of initial image resizing and preprocessing.
+ decoded_image_tensor: Output of initial image resizing and preprocessing.
resized_input_tensor: The input node of the recognition graph.
bottleneck_tensor: Layer before the final softmax.
@@ -391,9 +391,9 @@ def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
label_name: Label string we want to get an image for.
index: Integer offset of the image we want. This will be modulo-ed by the
available number of images for the label, so it can be arbitrarily large.
- image_dir: Root folder string of the subfolders containing the training
+ image_dir: Root folder string of the subfolders containing the training
images.
- category: Name string of which set to pull images from - training, testing,
+ category: Name string of which set to pull images from - training, testing,
or validation.
bottleneck_dir: Folder string holding cached files of bottleneck values.
jpeg_data_tensor: The tensor to feed loaded jpeg data into.
@@ -969,7 +969,7 @@ def main(_):
# See https://github.com/tensorflow/tensorflow/issues/3047
tf.logging.set_verbosity(tf.logging.INFO)
- # Prepare necessary directories that can be used during training
+ # Prepare necessary directories that can be used during training
prepare_file_system()
# Gather information about the model architecture we'll be using.
diff --git a/tensorflow/examples/learn/text_classification.py b/tensorflow/examples/learn/text_classification.py
index 21d98e9ea2..26e6e086b3 100644
--- a/tensorflow/examples/learn/text_classification.py
+++ b/tensorflow/examples/learn/text_classification.py
@@ -110,9 +110,9 @@ def main(unused_argv):
# Prepare training and testing data
dbpedia = tf.contrib.learn.datasets.load_dataset(
'dbpedia', test_with_fake_data=FLAGS.test_with_fake_data)
- x_train = pandas.DataFrame(dbpedia.train.data)[1]
+ x_train = pandas.Series(dbpedia.train.data[:,1])
y_train = pandas.Series(dbpedia.train.target)
- x_test = pandas.DataFrame(dbpedia.test.data)[1]
+ x_test = pandas.Series(dbpedia.test.data[:,1])
y_test = pandas.Series(dbpedia.test.target)
# Process vocabulary
diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py
index 0cec75cf99..15e7ae18bb 100644
--- a/tensorflow/python/client/session_test.py
+++ b/tensorflow/python/client/session_test.py
@@ -341,7 +341,7 @@ class SessionTest(test_util.TensorFlowTestCase):
a = constant_op.constant(a_val)
b = control_flow_ops.no_op() # An op, not a tensor.
c = constant_op.constant(c_val)
- # List of lists, tuples, namedtuple, and dict
+ # List of lists, tuples, namedtuple, and dict
res = sess.run([[a, b, c], (a, b, c), ABC(a=a, b=b, c=c),
{'a': a.name, 'c': c, 'b': b}])
self.assertTrue(isinstance(res, list))
@@ -365,7 +365,7 @@ class SessionTest(test_util.TensorFlowTestCase):
self.assertEqual(a_val, res[3]['a'])
self.assertEqual(b_val, res[3]['b'])
self.assertEqual(c_val, res[3]['c'])
- # Tuple of lists, tuples, namedtuple, and dict
+ # Tuple of lists, tuples, namedtuple, and dict
res = sess.run(([a, b, c], (a.name, b, c), ABC(a=a, b=b, c=c),
{'a': a, 'c': c, 'b': b}))
self.assertTrue(isinstance(res, tuple))
diff --git a/tensorflow/python/debug/lib/debug_data.py b/tensorflow/python/debug/lib/debug_data.py
index a51d8a7774..044a91a7ce 100644
--- a/tensorflow/python/debug/lib/debug_data.py
+++ b/tensorflow/python/debug/lib/debug_data.py
@@ -720,7 +720,7 @@ class DebugDumpDir(object):
"""Load `DebugTensorDatum` instances from the dump root of a given device.
Populates a map {device_name: a list of `DebugTensorDatum`}, where the list
- is sorted by ascending timestamp.
+ is sorted by ascending timestamp.
This sorting order reflects the order in which the TensorFlow executor
processed the nodes of the graph. It is (one of many possible) topological
diff --git a/tensorflow/python/debug/lib/debug_gradients.py b/tensorflow/python/debug/lib/debug_gradients.py
index 8689a68875..5306391613 100644
--- a/tensorflow/python/debug/lib/debug_gradients.py
+++ b/tensorflow/python/debug/lib/debug_gradients.py
@@ -345,7 +345,7 @@ class GradientsDebugger(object):
def _get_tensor_name(self, tensor):
if isinstance(tensor, (ops.Tensor, variables.Variable)):
return tensor.name
- elif isinstance(tensor, six.string_types):
+ elif isinstance(tensor, six.string_types):
return tensor
else:
raise TypeError(
diff --git a/tensorflow/python/debug/lib/stepper_test.py b/tensorflow/python/debug/lib/stepper_test.py
index 686fb45238..4cf37797f9 100644
--- a/tensorflow/python/debug/lib/stepper_test.py
+++ b/tensorflow/python/debug/lib/stepper_test.py
@@ -401,7 +401,7 @@ class StepperTest(test_util.TensorFlowTestCase):
elif i == 5:
fetches = {"e": "e:0", "fz": {"f": "f:0", "z": "z:0"}}
- with NodeStepper(self.sess, fetches) as stepper:
+ with NodeStepper(self.sess, fetches) as stepper:
sorted_nodes = stepper.sorted_nodes()
self.assertEqual(13, len(sorted_nodes))
diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py
index bc868a493f..a9681df56d 100644
--- a/tensorflow/python/estimator/canned/head.py
+++ b/tensorflow/python/estimator/canned/head.py
@@ -301,7 +301,7 @@ def _multi_class_head_with_softmax_cross_entropy_loss(n_classes,
provided and labels are string.
Returns:
- An instance of `_Head` for multi class classification.
+ An instance of `_Head` for multi class classification.
Raises:
ValueError: if `n_classes`, `metric_class_ids` or `label_keys` is invalid.
diff --git a/tensorflow/python/estimator/inputs/queues/feeding_functions.py b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
index 0480325604..847b27b904 100644
--- a/tensorflow/python/estimator/inputs/queues/feeding_functions.py
+++ b/tensorflow/python/estimator/inputs/queues/feeding_functions.py
@@ -286,7 +286,7 @@ def _enqueue_data(data,
Args:
data: a numpy `ndarray`, `OrderedDict` of numpy arrays, or a generator
- yielding `dict`s of numpy arrays or pandas `DataFrame` that will be read
+ yielding `dict`s of numpy arrays or pandas `DataFrame` that will be read
into the queue.
capacity: the capacity of the queue.
shuffle: whether or not to shuffle the rows of the array.
diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py
index 1a023c971f..1a4b0c5fc0 100644
--- a/tensorflow/python/estimator/model_fn.py
+++ b/tensorflow/python/estimator/model_fn.py
@@ -58,9 +58,9 @@ class EstimatorSpec(
'export_outputs', 'training_chief_hooks', 'training_hooks',
'scaffold', 'evaluation_hooks'
])):
- """Ops and objects returned from a `model_fn` and passed to `Estimator`.
+ """Ops and objects returned from a `model_fn` and passed to an `Estimator`.
- `EstimatorSpec` fully defines the model to be run by `Estimator`.
+ `EstimatorSpec` fully defines the model to be run by an `Estimator`.
"""
def __new__(cls,
@@ -82,8 +82,8 @@ class EstimatorSpec(
* For `mode == ModeKeys.PREDICT`: required fields are `predictions`.
model_fn can populate all arguments independent of mode. In this case, some
- arguments will be ignored by `Estimator`. E.g. `train_op` will be ignored
- in eval and infer modes. Example:
+ arguments will be ignored by an `Estimator`. E.g. `train_op` will be
+ ignored in eval and infer modes. Example:
```python
def my_model_fn(mode, features, labels):
diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py
index 1f2d376b50..34295d8c20 100644
--- a/tensorflow/python/framework/function.py
+++ b/tensorflow/python/framework/function.py
@@ -190,7 +190,7 @@ class Declare(object):
later during a graph construction.
For example,
- # Declares a function Foo, which takes a tf.int32 named "n" and a
+ # Declares a function Foo, which takes a tf.int32 named "n" and a
# tf.float32 named "x" as inputs and returns a tf.float32 named "z"
# as its output.
foo = Declare("Foo", [("n", tf.int32), ("x", tf.float32)],
diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py
index f9e8158705..5d3ac45020 100644
--- a/tensorflow/python/framework/ops.py
+++ b/tensorflow/python/framework/ops.py
@@ -3474,7 +3474,7 @@ class Graph(object):
additional mechanism to add control dependencies.
Args:
- graph: The graph that this controller is managing.
+ graph: The graph that this controller is managing.
control_inputs: List of ops to use as control inputs in addition
to the current control dependencies. None to indicate that
the dependencies should be cleared.
diff --git a/tensorflow/python/ops/distributions/bernoulli.py b/tensorflow/python/ops/distributions/bernoulli.py
index 2b981e7b19..b6b20d1b4a 100644
--- a/tensorflow/python/ops/distributions/bernoulli.py
+++ b/tensorflow/python/ops/distributions/bernoulli.py
@@ -125,7 +125,7 @@ class Bernoulli(distribution.Distribution):
event, target_dtype=dtypes.bool)
# TODO(jaana): The current sigmoid_cross_entropy_with_logits has
- # inconsistent behavior for logits = inf/-inf.
+ # inconsistent behavior for logits = inf/-inf.
event = math_ops.cast(event, self.logits.dtype)
logits = self.logits
# sigmoid_cross_entropy_with_logits doesn't broadcast shape,
diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py
index fab4c5cb0f..3b0a357b16 100644
--- a/tensorflow/python/ops/metrics_impl.py
+++ b/tensorflow/python/ops/metrics_impl.py
@@ -296,7 +296,7 @@ def mean(values, weights=None, metrics_collections=None,
returned as `mean` which is an idempotent operation that simply divides
`total` by `count`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` operation that updates these variables and returns the `mean`.
`update_op` increments `total` with the reduced sum of the product of `values`
and `weights`, and it increments `count` with the reduced sum of `weights`.
@@ -366,7 +366,7 @@ def accuracy(labels, predictions, weights=None, metrics_collections=None,
matches `labels`. This frequency is ultimately returned as `accuracy`: an
idempotent operation that simply divides `total` by `count`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` operation that updates these variables and returns the `accuracy`.
Internally, an `is_correct` operation computes a `Tensor` with elements 1.0
where the corresponding elements of `predictions` and `labels` match and 0.0
@@ -614,7 +614,7 @@ def auc(labels, predictions, weights=None, num_thresholds=200,
"""
with variable_scope.variable_scope(
name, 'auc', (labels, predictions, weights)):
- if curve != 'ROC' and curve != 'PR':
+ if curve != 'ROC' and curve != 'PR':
raise ValueError('curve must be either ROC or PR, %s unknown' %
(curve))
kepsilon = 1e-7 # to account for floating point imprecisions
@@ -1067,7 +1067,7 @@ def mean_tensor(values, weights=None, metrics_collections=None,
`values`. This average is ultimately returned as `mean` which is an idempotent
operation that simply divides `total` by `count`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` operation that updates these variables and returns the `mean`.
`update_op` increments `total` with the reduced sum of the product of `values`
and `weights`, and it increments `count` with the reduced sum of `weights`.
@@ -1329,7 +1329,7 @@ def precision(labels, predictions, weights=None,
operation that simply divides `true_positives` by the sum of `true_positives`
and `false_positives`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` operation that updates these variables and returns the
`precision`. `update_op` weights each prediction by the corresponding value in
`weights`.
@@ -1522,7 +1522,7 @@ def recall(labels, predictions, weights=None,
ultimately returned as `recall`, an idempotent operation that simply divides
`true_positives` by the sum of `true_positives` and `false_negatives`.
- For estimation of the metric over a stream of data, the function creates an
+ For estimation of the metric over a stream of data, the function creates an
`update_op` that updates these variables and returns the `recall`. `update_op`
weights each prediction by the corresponding value in `weights`.
@@ -2453,7 +2453,7 @@ def _streaming_sparse_average_precision_at_top_k(labels,
Returns:
mean_average_precision: Scalar `float64` `Tensor` with the mean average
precision values.
- update: `Operation` that increments variables appropriately, and whose
+ update: `Operation` that increments variables appropriately, and whose
value matches `metric`.
"""
with ops.name_scope(name, 'average_precision_at_top_k',
@@ -2551,7 +2551,7 @@ def sparse_average_precision_at_k(labels,
Returns:
mean_average_precision: Scalar `float64` `Tensor` with the mean average
precision values.
- update: `Operation` that increments variables appropriately, and whose
+ update: `Operation` that increments variables appropriately, and whose
value matches `metric`.
Raises:
diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py
index 0e29ee82cb..245495a620 100644
--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -1721,9 +1721,9 @@ def avg_pool(value, ksize, strides, padding, data_format="NHWC", name=None):
Args:
value: A 4-D `Tensor` of shape `[batch, height, width, channels]` and type
`float32`, `float64`, `qint8`, `quint8`, or `qint32`.
- ksize: A list of ints that has length >= 4.
+ ksize: A 1-D int Tensor of 4 elements.
The size of the window for each dimension of the input tensor.
- strides: A list of ints that has length >= 4.
+ strides: A 1-D int Tensor of 4 elements
The stride of the sliding window for each dimension of the
input tensor.
padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
@@ -1750,9 +1750,9 @@ def max_pool(value, ksize, strides, padding, data_format="NHWC", name=None):
Args:
value: A 4-D `Tensor` with shape `[batch, height, width, channels]` and
type `tf.float32`.
- ksize: A list of ints that has length >= 4. The size of the window for
+ ksize: A 1-D int Tensor of 4 elements. The size of the window for
each dimension of the input tensor.
- strides: A list of ints that has length >= 4. The stride of the sliding
+ strides: A 1-D int Tensor of 4 elements. The stride of the sliding
window for each dimension of the input tensor.
padding: A string, either `'VALID'` or `'SAME'`. The padding algorithm.
See the @{tf.nn.convolution$comment here}
diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py
index 3c3c18b1c9..2860618bb7 100644
--- a/tensorflow/python/ops/rnn.py
+++ b/tensorflow/python/ops/rnn.py
@@ -890,7 +890,7 @@ def raw_rnn(cell, loop_fn,
appropriate type and shape `[batch_size] + cell.state_size`.
If `cell.state_size` is a (possibly nested) tuple of ints or
`TensorShape`, this will be a tuple having the corresponding shapes.
- The `emit_output` value may be either `None` or a (possibly nested)
+ The `emit_output` value may be either `None` or a (possibly nested)
tuple structure of tensors, e.g.,
`(tf.zeros(shape_0, dtype=dtype_0), tf.zeros(shape_1, dtype=dtype_1))`.
If this first `emit_output` return value is `None`,
diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py
index db33541218..5a179048b1 100644
--- a/tensorflow/python/ops/sparse_ops.py
+++ b/tensorflow/python/ops/sparse_ops.py
@@ -268,7 +268,7 @@ def sparse_add(a, b, thresh=0):
Then,
* `thresh == 0` (the default): all 5 index/value pairs will be returned.
- * `thresh == 0.11`: only .1 and 0 will vanish, and the remaining three
+ * `thresh == 0.11`: only .1 and 0 will vanish, and the remaining three
index/value pairs will be returned.
* `thresh == 0.21`: .1, 0, and -.2 will vanish.
@@ -1263,7 +1263,7 @@ def sparse_reset_shape(sp_input, new_shape=None):
Returns:
A `SparseTensor` indices and values unchanged from `input_sp`. Its shape is
- `new_shape` if that is set. Otherwise it is the tight bounding box of
+ `new_shape` if that is set. Otherwise it is the tight bounding box of
`input_sp`
Raises:
@@ -1720,7 +1720,7 @@ def sparse_tensor_dense_matmul(sp_a,
def sparse_softmax(sp_input, name=None):
"""Applies softmax to a batched N-D `SparseTensor`.
- The inputs represent an N-D SparseTensor with logical shape `[..., B, C]`
+ The inputs represent an N-D SparseTensor with logical shape `[..., B, C]`
(where `N >= 2`), and with indices sorted in the canonical lexicographic
order.
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
index e6dd29077a..7c12020263 100644
--- a/tensorflow/python/ops/variables.py
+++ b/tensorflow/python/ops/variables.py
@@ -229,7 +229,7 @@ class Variable(object):
which is the initial value for the Variable. The initial value must have
a shape specified unless `validate_shape` is set to False. Can also be a
callable with no argument that returns the initial value when called.
- (Note that initializer functions from init_ops.py must first be bound
+ (Note that initializer functions from init_ops.py must first be bound
to a shape before being used here.)
trainable: If `True`, the default, also adds the variable to the graph
collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as
diff --git a/tensorflow/python/summary/text_summary.py b/tensorflow/python/summary/text_summary.py
index b97c02666c..0282554a6f 100644
--- a/tensorflow/python/summary/text_summary.py
+++ b/tensorflow/python/summary/text_summary.py
@@ -56,7 +56,7 @@ def text_summary(name, tensor, collections=None):
summary to. Defaults to [_ops.GraphKeys.SUMMARIES]
Returns:
- A TensorSummary op that is configured so that TensorBoard will recognize
+ A TensorSummary op that is configured so that TensorBoard will recognize
that it contains textual data. The TensorSummary is a scalar `Tensor` of
type `string` which contains `Summary` protobufs.
diff --git a/tensorflow/python/training/monitored_session.py b/tensorflow/python/training/monitored_session.py
index 9f71395c96..6263351a53 100644
--- a/tensorflow/python/training/monitored_session.py
+++ b/tensorflow/python/training/monitored_session.py
@@ -375,7 +375,7 @@ class SessionCreator(object):
class ChiefSessionCreator(SessionCreator):
- """Creates a tf.Session for a chief."""
+ """Creates a tf.Session for a chief."""
def __init__(self,
scaffold=None,
diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py
index 03591480d4..4f1237f3a2 100644
--- a/tensorflow/python/training/optimizer.py
+++ b/tensorflow/python/training/optimizer.py
@@ -42,7 +42,7 @@ def _get_variable_for(v):
if (isinstance(var, resource_variable_ops.ResourceVariable)
and var.handle.op is v.op):
return var
- raise ValueError("Got %s but could not locate source variable." % (str(v)))
+ raise ValueError("Got %s but could not locate source variable." % (str(v)))
return v
@@ -738,7 +738,7 @@ class Optimizer(object):
val: A `Tensor`. The initial value of the slot.
slot_name: Name for the slot.
op_name: Name to use when scoping the Variable that
- needs to be created for the slot.
+ needs to be created for the slot.
Returns:
A `Variable` object.
@@ -759,7 +759,7 @@ class Optimizer(object):
dtype: Type of the value of the slot.
slot_name: Name for the slot.
op_name: Name to use when scoping the Variable that
- needs to be created for the slot.
+ needs to be created for the slot.
Returns:
A `Variable` object.
@@ -777,7 +777,7 @@ class Optimizer(object):
var: A `Variable` object.
slot_name: Name for the slot.
op_name: Name to use when scoping the Variable that
- needs to be created for the slot.
+ needs to be created for the slot.
Returns:
A `Variable` object.
diff --git a/tensorflow/python/util/nest.py b/tensorflow/python/util/nest.py
index c9612c857b..c8c0d2a047 100644
--- a/tensorflow/python/util/nest.py
+++ b/tensorflow/python/util/nest.py
@@ -370,9 +370,9 @@ def map_structure(func, *structure, **check_types_dict):
Args:
func: A callable that accepts as many arguments as there are structures.
*structure: scalar, or tuple or list of constructed scalars and/or other
- tuples/lists, or scalars. Note: numpy arrays are considered as scalars.
+ tuples/lists, or scalars. Note: numpy arrays are considered as scalars.
**check_types_dict: only valid keyword argument is `check_types`. If set to
- `True` (default) the types of iterables within the structures have to be
+ `True` (default) the types of iterables within the structures have to be
same (e.g. `map_structure(func, [1], (1,))` raises a `TypeError`
exception). To allow this set this argument to `False`.
diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh
index 44fc21df94..3b3a2da6cd 100755
--- a/tensorflow/tools/ci_build/install/install_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh
@@ -62,7 +62,7 @@ pip3 install scipy==0.18.1
pip2 install scikit-learn==0.18.1
pip3 install scikit-learn==0.18.1
-# pandas required by tf.learn/inflow
+# pandas required by `inflow`
pip2 install pandas==0.19.2
pip3 install pandas==0.19.2
diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
index 706d414746..e452c50221 100755
--- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
+++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh
@@ -80,7 +80,7 @@ pip3.5 install scipy==0.18.1
pip3.5 install scikit-learn==0.18.1
-# pandas required by tf.learn/inflow
+# pandas required by `inflow`
pip3 install pandas==0.19.2
# Install recent-enough version of wheel for Python 3.5 wheel builds
diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh
index ea88d8165f..b320a6222d 100755
--- a/tensorflow/tools/docker/parameterized_docker_build.sh
+++ b/tensorflow/tools/docker/parameterized_docker_build.sh
@@ -154,7 +154,7 @@ fi
# Verify that the original Dockerfile exists
ORIG_DOCKERFILE="${SCRIPT_DIR}/${ORIG_DOCKERFILE}"
if [[ ! -f "${ORIG_DOCKERFILE}" ]]; then
- die "ERROR: Cannot find Dockerilfe at: ${ORIG_DOCKERFILE}"
+ die "ERROR: Cannot find Dockerfile at: ${ORIG_DOCKERFILE}"
fi
echo ""
diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl
index 4dd3169d41..b85e565f36 100644
--- a/third_party/gpus/cuda_configure.bzl
+++ b/third_party/gpus/cuda_configure.bzl
@@ -106,7 +106,7 @@ def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp):
else:
inc_dirs = result.stderr[index1 + 1:index2].strip()
- return [repository_ctx.path(_cxx_inc_convert(p))
+ return [str(repository_ctx.path(_cxx_inc_convert(p)))
for p in inc_dirs.split("\n")]
def get_cxx_inc_directories(repository_ctx, cc):