diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-12-14 13:05:24 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-12-14 13:08:58 -0800 |
commit | ccef6a711dcadfc57b80783216ee025bfcae4b47 (patch) | |
tree | 4128755c60a7277b55bd5a289d225a3ca146e04b /tensorflow/contrib/cudnn_rnn | |
parent | a99b32fb149d028cd31fe638f81c6ca56c6e3b57 (diff) |
Add RNN performance information.
Update cudnn_rnn_ops_benchmark as it had API rotted.
PiperOrigin-RevId: 179084042
Diffstat (limited to 'tensorflow/contrib/cudnn_rnn')
-rw-r--r-- | tensorflow/contrib/cudnn_rnn/BUILD | 5 | ||||
-rw-r--r-- | tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py | 43 |
2 files changed, 20 insertions, 28 deletions
diff --git a/tensorflow/contrib/cudnn_rnn/BUILD b/tensorflow/contrib/cudnn_rnn/BUILD index fce2c03e69..0751624bc4 100644 --- a/tensorflow/contrib/cudnn_rnn/BUILD +++ b/tensorflow/contrib/cudnn_rnn/BUILD @@ -146,10 +146,10 @@ cuda_py_test( cuda_py_test( name = "cudnn_rnn_ops_benchmark", - size = "large", + size = "small", srcs = ["python/kernel_tests/cudnn_rnn_ops_benchmark.py"], additional_deps = [ - ":cudnn_rnn_ops_py", + ":cudnn_rnn_py", "//tensorflow/contrib/rnn:rnn_py", "//tensorflow/python:array_ops", "//tensorflow/python:client", @@ -164,7 +164,6 @@ cuda_py_test( "//tensorflow/python:variables", ], tags = [ - "manual", "noasan", # http://b/62067814 "nomsan", "notsan", diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py index ff409ac718..4fc5ff1bd1 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py @@ -20,8 +20,8 @@ from __future__ import print_function import time +from tensorflow.contrib import rnn as contrib_rnn from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops -from tensorflow.contrib.rnn.python.ops import core_rnn from tensorflow.contrib.rnn.python.ops import lstm_ops from tensorflow.python.client import session from tensorflow.python.framework import dtypes @@ -29,8 +29,7 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import rnn_cell +from tensorflow.python.ops import rnn from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -44,19 +43,19 @@ class CudnnRNNBenchmark(test.Benchmark): "large": { "num_layers": 4, "num_units": 1024, - "seq_length": 40, + "seq_length": 50, "batch_size": 64, }, "medium": { "num_layers": 4, "num_units": 512, - "seq_length": 30, + "seq_length": 50, "batch_size": 64, }, "small": { "num_layers": 4, "num_units": 128, - "seq_length": 20, + "seq_length": 50, "batch_size": 64, }, } @@ -71,7 +70,7 @@ class CudnnRNNBenchmark(test.Benchmark): def _BenchmarkOp(self, op, desc): burn_in_steps = 10 - benchmark_steps = 40 + benchmark_steps = 20 with session.Session() as sess: sess.run(variables.global_variables_initializer()) for i in xrange(burn_in_steps + benchmark_steps): @@ -126,16 +125,12 @@ class CudnnRNNBenchmark(test.Benchmark): seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/device:GPU:0"): - inputs = seq_length * [ - array_ops.zeros([batch_size, num_units], dtypes.float32) - ] - initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127) - - cell = rnn_cell.LSTMCell( - num_units=num_units, initializer=initializer, state_is_tuple=True) - multi_cell = rnn_cell.MultiRNNCell( - [cell() for _ in range(num_layers)]) - outputs, final_state = core_rnn.static_rnn( + inputs = array_ops.zeros([batch_size, seq_length, num_units], + dtypes.float32) + + multi_cell = contrib_rnn.MultiRNNCell( + [contrib_rnn.BasicLSTMCell(num_units) for _ in range(num_layers)]) + outputs, final_state = rnn.dynamic_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) @@ -154,14 +149,12 @@ class CudnnRNNBenchmark(test.Benchmark): seq_length = config["seq_length"] with ops.Graph().as_default(), ops.device("/device:GPU:0"): - inputs = seq_length * [ - array_ops.zeros([batch_size, num_units], dtypes.float32) - ] - cell = lambda: lstm_ops.LSTMBlockCell(num_units=num_units) # pylint: disable=cell-var-from-loop - - multi_cell = rnn_cell.MultiRNNCell( - [cell() for _ in range(num_layers)]) - outputs, final_state = core_rnn.static_rnn( + inputs = array_ops.zeros([batch_size, seq_length, num_units], + dtypes.float32) + + multi_cell = contrib_rnn.MultiRNNCell( + [lstm_ops.LSTMBlockCell(num_units) for _ in range(num_layers)]) + outputs, final_state = rnn.dynamic_rnn( multi_cell, inputs, dtype=dtypes.float32) trainable_variables = ops.get_collection( ops.GraphKeys.TRAINABLE_VARIABLES) |