syntax = "proto3"; package tensorflow.tfprof; import "tensorflow/core/framework/attr_value.proto"; import "tensorflow/core/framework/step_stats.proto"; // It specifies the Python callstack that creates an op. message CodeDef { repeated Trace traces = 1; message Trace { string file = 1 [deprecated = true]; // deprecated by file_id. int64 file_id = 6; int32 lineno = 2; string function = 3 [deprecated = true]; // deprecated by function_id. int64 function_id = 7; string line = 4 [deprecated = true]; // deprecated line_id. int64 line_id = 8; int32 func_start_line = 5; } } message OpLogEntry { // op name. string name = 1; // float_ops is filled by tfprof Python API when called. It requires the // op has RegisterStatistics defined. Currently, Conv2D, MatMul, etc, are // implemented. int64 float_ops = 2; // User can define extra op type information for an op. This allows the user // to select a group of ops precisely using op_type as a key. repeated string types = 3; // Used to support tfprof "code" view. CodeDef code_def = 4; } message OpLogProto { repeated OpLogEntry log_entries = 1; // Maps from id of CodeDef file,function,line to its string // In the future can also map other id of other fields to string. map id_to_string = 2; } // A proto representation of the profiler's profile. // It allows serialization, shipping around and deserialization of the profiles. // // Please don't depend on the internals of the profile proto. message ProfileProto { map nodes = 1; // Whether or not has code traces. bool has_trace = 2; // Whether or not the TF device tracer fails to return accelerator // information (which could lead to 0 accelerator execution time). bool miss_accelerator_stream = 5; // Traced steps. repeated int64 steps = 3; // Maps from id of CodeDef file,function,line to its string // In the future can also map other id of other fields to string. map id_to_string = 4; } message ProfileNode { // graph node name. string name = 1; // graph operation type. string op = 9; // A unique id for the node. int64 id = 13; map inputs = 2; map input_shapes = 16; map outputs = 3; map output_shapes = 15; // A map from source node id to its output index to current node. map src_output_index = 14; repeated int64 shape = 4; repeated string op_types = 5; string canonical_device = 6; string host_device = 7; int64 float_ops = 8; CodeDef trace = 10; map attrs = 11; map execs = 12; } message ExecProfile { // Can be larger than 1 if run multiple times in loop. int64 run_count = 1; // The earliest/latest time including scheduling and execution. int64 all_start_micros = 2; int64 latest_end_micros = 3; // device -> vector of {op_start_micros, op_exec_micros} pairs. // accelerator_execs: gpu:id/stream:all -> {op_start_micros, op_exec_micros} // For accelerator, vector size can be larger than 1, multiple kernel fires // or in tf.while_loop. map accelerator_execs = 4; // cpu_execs: cpu/gpu:id -> {op_start_micros, op_exec_micros} // For cpu, vector size can be larger than 1 if in tf.while_loop. map cpu_execs = 5; // Each entry to memory information of a scheduling of the node. // Normally, there will be multiple entries in while_loop. repeated ExecMemory memory_execs = 7; // The allocation and deallocation times and sizes throughout execution. repeated AllocationRecord allocations = 11; // The devices related to this execution. repeated string devices = 6; } message ExecTime { repeated Tuple times = 1; } message ExecMemory { // This is the timestamp when the memory information was tracked. int64 memory_micros = 1; // NOTE: Please don't depend on the following 4 fields yet. Due to // TensorFlow internal tracing issues, the numbers can be quite wrong. // TODO(xpan): Fix the TensorFlow internal tracing. int64 host_temp_bytes = 2; int64 host_persistent_bytes = 3; int64 accelerator_temp_bytes = 4; int64 accelerator_persistent_bytes = 5; // Total bytes requested by the op. int64 requested_bytes = 6; // Total bytes requested by the op and released before op end. int64 peak_bytes = 7; // Total bytes requested by the op and not released after op end. int64 residual_bytes = 8; // Total bytes output by the op (not necessarily requested by the op). int64 output_bytes = 9; // The total number of bytes currently allocated by the allocator if >0. int64 allocator_bytes_in_use = 10; // The memory of each output of the operation. map output_memory = 11; } message Tuple { repeated int64 int64_values = 1; } message Memory { int64 bytes = 1; uint64 ptr = 2; }