# Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Benchmark for split and grad of split.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session as session_lib from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import variables from tensorflow.python.platform import benchmark from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging as logging def build_graph(device, input_shape, output_sizes, axis): """Build a graph containing a sequence of split operations. Args: device: string, the device to run on. input_shape: shape of the input tensor. output_sizes: size of each output along axis. axis: axis to be split along. Returns: An array of tensors to run() """ with ops.device("/%s:0" % device): inp = array_ops.zeros(input_shape) outputs = [] for _ in range(100): outputs.extend(array_ops.split(inp, output_sizes, axis)) return control_flow_ops.group(*outputs) class SplitBenchmark(test.Benchmark): """Benchmark split!""" def _run_graph(self, device, output_shape, variable, num_outputs, axis): """Run the graph and print its execution time. Args: device: string, the device to run on. output_shape: shape of each output tensors. variable: whether or not the output shape should be fixed num_outputs: the number of outputs to split the input into axis: axis to be split Returns: The duration of the run in seconds. """ graph = ops.Graph() with graph.as_default(): if not variable: if axis == 0: input_shape = [output_shape[0] * num_outputs, output_shape[1]] sizes = [output_shape[0] for _ in range(num_outputs)] else: input_shape = [output_shape[0], output_shape[1] * num_outputs] sizes = [output_shape[1] for _ in range(num_outputs)] else: sizes = np.random.randint( low=max(1, output_shape[axis] - 2), high=output_shape[axis] + 2, size=num_outputs) total_size = np.sum(sizes) if axis == 0: input_shape = [total_size, output_shape[1]] else: input_shape = [output_shape[0], total_size] outputs = build_graph(device, input_shape, sizes, axis) config = config_pb2.ConfigProto(graph_options=config_pb2.GraphOptions( optimizer_options=config_pb2.OptimizerOptions( opt_level=config_pb2.OptimizerOptions.L0))) with session_lib.Session(graph=graph, config=config) as session: logging.set_verbosity("info") variables.global_variables_initializer().run() bench = benchmark.TensorFlowBenchmark() bench.run_op_benchmark( session, outputs, mbs=input_shape[0] * input_shape[1] * 4 * 2 * 100 / 1e6, extras={ "input_shape": input_shape, "variable": variable, "axis": axis }) def benchmark_split(self): print("Forward vs backward concat") shapes = [[2000, 8], [8, 2000], [100, 18], [1000, 18], [10000, 18], [100, 97], [1000, 97], [10000, 1], [1, 10000]] axis_ = [1] # 0 is very fast because it doesn't actually do any copying num_outputs = 100 variable = [False, True] # fixed input size or not for shape in shapes: for axis in axis_: for v in variable: self._run_graph("gpu", shape, v, num_outputs, axis) if __name__ == "__main__": test.main()