From 6bcb0391f144322c0eda536a02f568817302793d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Jun 2017 17:11:32 -0700 Subject: Support multiple inputs in hexagon_control_wrapper PiperOrigin-RevId: 159175598 --- .../src_impl/graph_functions_wrapper.c | 81 ++++-- .../src_impl/hexagon_controller.c | 323 +++++++++++++++------ .../src_impl/include/hexagon_controller.h | 33 ++- .../hexagon_controller/src_log/include/tfm_log.h | 3 + .../src_soc_interface/include/node_data_float.h | 5 +- .../src_soc_interface/include/soc_interface.h | 21 +- .../src_soc_interface/soc_interface.c | 110 ++++--- 7 files changed, 401 insertions(+), 175 deletions(-) (limited to 'tensorflow/contrib/hvx') diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c index 7c82158522..d83b58dc6b 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c @@ -93,13 +93,16 @@ static uint32_t FindMaxIdx(const float* data, uint32_t entries) { return FindMaxIdxWithExcludeList(data, entries, 0, NULL); } -void hexagon_controller_PrintMaxNIdx(const float *data, const uint32_t entries, +void hexagon_controller_PrintMaxNIdx(const float* data, const uint32_t entries, const int n, int* out_ranking) { if (DUMP_OUTPUT) { for (int i = 0; i < entries; ++i) { TFMLOGD("%d: val = %f", i, data[i]); } } + if (n >= entries) { + TFMLOGD("Too many N %d >= %d", n, entries); + } for (int i = 0; i < n; ++i) { out_ranking[i] = INT_MAX; } @@ -182,6 +185,32 @@ uint32_t hexagon_controller_SetupGraph(int version) { return nn_id; } +bool hexagon_controller_ExecuteGraphWithMultipleInOut( + const uint32_t nn_id, const int input_count, hexagon_nn_tensordef* inputs, + const int output_count, hexagon_nn_tensordef* outputs) { + if (DBG_EXECUTION) { + TFMLOGD("Preparing to execute... in = %d, out = %d", input_count, + output_count); + LogDHexagon("Execute graph!"); + } + + const int err = + hexagon_nn_execute_new(nn_id, inputs, input_count, outputs, output_count); + if (err != 0) { + if (DBG_EXECUTION) { + LogDHexagon("Execution failed!"); + TFMLOGE("execute got err: %d\n", err); + hexagon_controller_PrintLog(nn_id); + } + return false; + } else { + if (DBG_EXECUTION) { + LogDHexagon("Execution succeeded!"); + } + return true; + } +} + bool hexagon_controller_ExecuteGraph( const uint32_t nn_id, const uint32_t batches, @@ -197,7 +226,6 @@ bool hexagon_controller_ExecuteGraph( uint8_t* out_vals, const uint32_t output_val_byte_size, uint32_t* out_data_byte_size) { - int err; if (DBG_EXECUTION) { TFMLOGD("Preparing to execute..."); TFMLOGD("Input: %d, %d, %d, %d, %d, %d", @@ -205,35 +233,34 @@ bool hexagon_controller_ExecuteGraph( TFMLOGD("Output: %d, %p", output_val_byte_size, out_vals); LogDHexagon("Execute graph!"); } - - if ((err = hexagon_nn_execute(nn_id, - batches, - height, - width, - depth, - int_data, - int_data_size, - out_batches, - out_height, - out_width, - out_depth, - out_vals, - output_val_byte_size, - out_data_byte_size)) != 0) { - if (DBG_EXECUTION) { - LogDHexagon("Execution failed!"); - TFMLOGE("execute got err: %d\n",err); - } + + hexagon_nn_tensordef input; + hexagon_nn_tensordef output; + + input.batches = batches; + input.height = height; + input.width = width; + input.depth = depth; + input.data = int_data; + input.dataLen = int_data_size; + + output.data = out_vals; + output.dataLen = output_val_byte_size; + + if (!hexagon_controller_ExecuteGraphWithMultipleInOut(nn_id, 1, &input, 1, + &output)) { return false; } else { + *out_batches = output.batches; + *out_height = output.height; + *out_width = output.width; + *out_depth = output.depth; + *out_data_byte_size = output.dataLen; + if (DBG_EXECUTION) { LogDHexagon("Execution succeeded!"); - TFMLOGD("%d x %d x %d x %d, byte size = %d\n", - *out_batches, - *out_height, - *out_width, - *out_depth, - *out_data_byte_size); + TFMLOGD("%d x %d x %d x %d, byte size = %d\n", *out_batches, *out_height, + *out_width, *out_depth, *out_data_byte_size); } return true; } diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c index 31caebf872..ba50da6abd 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c @@ -28,7 +28,8 @@ limitations under the License. #include "soc_interface.h" #include "tfm_log.h" -// if false, use int data as input. This is only for acceleration purpose +// if false, use int data as input. This is only for acceleration purpose. +// Also you may need to change android.min. static const bool USE_FLOAT_DATA = true; // if true, show id for each node @@ -43,27 +44,95 @@ extern uint8_t inception_dummy_int_data_224x224[]; extern uint8_t inception_dummy_int_data_299x299[]; extern float inception_dummy_float_data_299x299[]; -#define HEXAGON_CONTROLLER_VERSION 92 +#define HEXAGON_CONTROLLER_VERSION 101 // allocate print bufsize in advance @MB #define PRINT_BUFSIZE (2 * 1024 * 1024) static unsigned char s_print_buf[PRINT_BUFSIZE]; -// input node data buffer size -// x2 1024 * 1024 * 2 > 299 * 299 * 3 * 4 > 1024 * 1024 -static const int INPUT_NODE_DATA_BUFFER_SIZE = 1024 * 1024 * 2; -// output node data buffer size -// (1008 is enough for inception) -static const int OUTPUT_NODE_DATA_BUFFER_SIZE = 300 * 300 * 3 * 4; - -static struct NodeDataFloat s_input_node_data_float_buffer; -static float* s_output_node_data_float_buffer; -static int s_output_node_data_float_buffer_byte_size; -static int s_output_node_data_float_array_size; +#define MAX_INPUTS 10 +#define MAX_OUTPUTS 10 + +static struct NodeDataFloat s_input_node_data_buffer[MAX_INPUTS]; +static uint8_t* s_output_node_data_buffer[MAX_OUTPUTS]; +static int s_output_node_data_buffer_max_byte_size[MAX_OUTPUTS]; +static int s_output_node_data_array_byte_size[MAX_OUTPUTS]; static uint32_t s_target_graph_id; static bool s_dbg_use_inception_dummy_data = false; +static int s_dbg_inception_version = 3; + +static int GetInputNodeCount() { + for (int i = 0; i < MAX_INPUTS; ++i) { + if (s_input_node_data_buffer[i].max_buf_byte_size == 0) { + return i; + } + } + return 0; +} + +static int GetOutputNodeCount() { + for (int i = 0; i < MAX_OUTPUTS; ++i) { + if (s_output_node_data_buffer_max_byte_size[i] == 0) { + return i; + } + } + return 0; +} + +static bool SetInputTensorDef(int port, hexagon_nn_tensordef* tensordef) { + if (port >= GetInputNodeCount()) { + TFMLOGE("Error exceeds input count."); + return false; + } + NodeDataFloat* input_node_data_buffer = &s_input_node_data_buffer[port]; + tensordef->batches = input_node_data_buffer->x; + tensordef->height = input_node_data_buffer->y; + tensordef->width = input_node_data_buffer->z; + tensordef->depth = input_node_data_buffer->d; + tensordef->data = input_node_data_buffer->byte_array_data; + tensordef->dataLen = input_node_data_buffer->array_byte_size; + + return true; +} + +bool hexagon_controller_SetAllInputTensorDef(int node_count, + hexagon_nn_tensordef* tensordef) { + bool success = true; + if (node_count != GetInputNodeCount()) { + TFMLOGE("Error invalid input node count."); + return false; + } + for (int i = 0; i < node_count; ++i) { + SetInputTensorDef(i, &tensordef[i]); + } + return success; +} + +static bool SetOutputTensorDef(int port, hexagon_nn_tensordef* tensordef) { + if (port >= GetOutputNodeCount()) { + TFMLOGE("Error exceeds output count."); + return false; + } + tensordef->data = s_output_node_data_buffer[port]; + tensordef->dataLen = s_output_node_data_buffer_max_byte_size[port]; + return true; +} + +bool hexagon_controller_SetAllOutputTensorDef(int node_count, + hexagon_nn_tensordef* tensordef) { + bool success = true; + if (node_count != GetOutputNodeCount()) { + TFMLOGE("Error invalid output node count. %d != %d", node_count, + GetOutputNodeCount()); + return false; + } + for (int i = 0; i < node_count; ++i) { + SetOutputTensorDef(i, &tensordef[i]); + } + return success; +} void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) { if (version == 1) { @@ -72,44 +141,54 @@ void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) { return; } hexagon_controller_CopyByteNodeData( - INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1, + 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1, INCEPTION_PARAM_WIDTH_V1, INCEPTION_PARAM_DEPTH, 1, inception_dummy_int_data_224x224); } else if (version == 3) { if (USE_FLOAT_DATA) { hexagon_controller_CopyByteNodeData( - INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, + 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, sizeof(float), (uint8_t*)inception_dummy_float_data_299x299); } else { hexagon_controller_CopyByteNodeData( - INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, + 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, 1, inception_dummy_int_data_299x299); } } } -bool hexagon_controller_ExecuteGraphWithBuffer( - uint32_t nn_id, bool show_ranking) { - uint32_t out_batches, out_height, out_width, out_depth; - uint32_t out_data_size; - int x = s_input_node_data_float_buffer.x; - int y = s_input_node_data_float_buffer.y; - int z = s_input_node_data_float_buffer.z; - int d = s_input_node_data_float_buffer.d; - uint8_t *byte_data = s_input_node_data_float_buffer.byte_array_data; - int array_size = s_input_node_data_float_buffer.array_size; - const bool success = hexagon_controller_ExecuteGraph( - nn_id, x, y, z, d, byte_data, array_size, - &out_batches, &out_height, &out_width, &out_depth, - (uint8_t *)s_output_node_data_float_buffer, - s_output_node_data_float_buffer_byte_size, - &out_data_size); - s_output_node_data_float_array_size = - out_batches * out_height * out_width * out_depth; +bool hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id, + bool show_ranking) { + const int input_node_count = GetInputNodeCount(); + hexagon_nn_tensordef inputs[input_node_count]; + const int output_node_count = GetOutputNodeCount(); + if (output_node_count <= 0) { + TFMLOGI("Error output node count is 0."); + return false; + } + hexagon_nn_tensordef outputs[output_node_count]; + hexagon_controller_SetAllInputTensorDef(input_node_count, inputs); + hexagon_controller_SetAllOutputTensorDef(output_node_count, outputs); + const bool success = hexagon_controller_ExecuteGraphWithMultipleInOut( + nn_id, input_node_count, inputs, output_node_count, outputs); + for (int i = 0; i < output_node_count; ++i) { + s_output_node_data_array_byte_size[i] = outputs[i].data_valid_len; + } + + const hexagon_nn_tensordef* output0 = &outputs[0]; + + const uint32_t out_batches = output0->batches; + const uint32_t out_height = output0->height; + const uint32_t out_width = output0->width; + const uint32_t out_depth = output0->depth; + const uint32_t out_data_size = output0->data_valid_len; + const uint32_t out_buf_byte_size = output0->dataLen; + if (!success) { TFMLOGE("Execution failed"); + hexagon_controller_PrintLog(nn_id); return false; } else if (!show_ranking) { return true; @@ -118,15 +197,11 @@ bool hexagon_controller_ExecuteGraphWithBuffer( static const int OUT_RANKING_SIZE = 5; int out_ranking[OUT_RANKING_SIZE]; hexagon_controller_PrintMaxNIdx( - s_output_node_data_float_buffer, - out_batches * out_height * out_width * out_depth, - OUT_RANKING_SIZE, out_ranking); - TFMLOGD("%d x %d x %d x %d, byte size = %d\n", - out_batches, - out_height, - out_width, - out_depth, - out_data_size); + (float*)s_output_node_data_buffer[0], + out_batches * out_height * out_width * out_depth, OUT_RANKING_SIZE, + out_ranking); + TFMLOGD("%d x %d x %d x %d, byte size = %d, buf size = %d\n", out_batches, + out_height, out_width, out_depth, out_data_size, out_buf_byte_size); if (s_dbg_use_inception_dummy_data) { // Check the result of inception with a dummy data. This step shouldn't // be passed when show_ranking != true to avoid adding unnecessary @@ -168,64 +243,123 @@ int hexagon_controller_GetHexagonBinaryVersion() { return retval; } -bool hexagon_controller_AllocateNodeDataBuffers( - int input_size, int output_size) { - TFMLOGD("Allocate memory for input / output node data float"); - if (s_input_node_data_float_buffer.buf_size != 0) { +bool hexagon_controller_AllocateInputNodeDataBuffers(int port, + int input_buf_byte_size) { + TFMLOGD("Allocate memory for input node data. port = %d, size = %d", port, + input_buf_byte_size); + if (s_input_node_data_buffer[port].max_buf_byte_size != 0) { TFMLOGE("ERROR! input buffer is already allocated!!"); return false; } else { - int byte_array_data_size = USE_FLOAT_DATA ? - input_size * sizeof(float) : input_size; /* sizeof(uint8_t) ? */ - s_input_node_data_float_buffer.buf_size = input_size; - // unused? remove? - s_input_node_data_float_buffer.array_data = - malloc(input_size * sizeof(float)); - s_input_node_data_float_buffer.byte_array_data = - malloc(byte_array_data_size); - - s_output_node_data_float_buffer = malloc(output_size * sizeof(float)); - s_output_node_data_float_buffer_byte_size = output_size * sizeof(float); - s_output_node_data_float_array_size = 0; - TFMLOGD("allocate node data buffers"); + s_input_node_data_buffer[port].max_buf_byte_size = input_buf_byte_size; + posix_memalign((void**)&s_input_node_data_buffer[port].byte_array_data, 128, + input_buf_byte_size); + TFMLOGD("allocate input node data buffers done"); } return true; } -bool hexagon_controller_ReleaseNodeDataBuffers() { - if (s_input_node_data_float_buffer.buf_size == 0) { +bool hexagon_controller_AllocateOutputNodeDataBuffers( + int port, int output_buf_byte_size) { + TFMLOGD("Allocate memory for output node data. port = %d, size = %d", port, + output_buf_byte_size); + if (s_output_node_data_buffer_max_byte_size[port] != 0) { + TFMLOGE("ERROR! input buffer is already allocated!!"); + return false; + } else { + // s_output_node_data_buffer = malloc(output_size * sizeof(float)); + posix_memalign((void**)&s_output_node_data_buffer[port], 128, + output_buf_byte_size); + s_output_node_data_buffer_max_byte_size[port] = output_buf_byte_size; + s_output_node_data_array_byte_size[port] = 0; + TFMLOGD("allocate output node data buffers"); + } + return true; +} + +bool hexagon_controller_AllocateMultipleNodeDataBuffers(int input_count, + int* input_sizes, + int output_count, + int* output_sizes) { + bool success = true; + for (int i = 0; i < input_count; ++i) { + success &= + hexagon_controller_AllocateInputNodeDataBuffers(i, input_sizes[i]); + } + for (int i = 0; i < output_count; ++i) { + success &= + hexagon_controller_AllocateOutputNodeDataBuffers(i, output_sizes[i]); + } + + if (s_dbg_use_inception_dummy_data) { + hexagon_controller_InitInputNodeDataToInceptionDummyData( + s_dbg_inception_version); + } + return success; +} + +bool hexagon_controller_AllocateNodeDataBuffers(int input_size, + int output_size) { + return hexagon_controller_AllocateMultipleNodeDataBuffers(1, &input_size, 1, + &output_size); +} + +bool hexagon_controller_ReleaseInputNodeDataBuffersWithPort(int port) { + NodeDataFloat* input_node_data_buffer = &s_input_node_data_buffer[port]; + if (input_node_data_buffer->max_buf_byte_size == 0) { TFMLOGE("ERROR! input buffer has not been allocated yet!!"); return false; } else { - s_input_node_data_float_buffer.buf_size = 0; - free(s_input_node_data_float_buffer.array_data); + input_node_data_buffer->max_buf_byte_size = 0; + input_node_data_buffer->array_byte_size = 0; + free(input_node_data_buffer->byte_array_data); } - if (s_output_node_data_float_buffer_byte_size == 0) { + return true; +} + +bool hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(int port) { + if (s_output_node_data_buffer_max_byte_size[port] == 0) { TFMLOGE("ERROR! output buffer has not been allocated yet!!"); return false; } else { - s_output_node_data_float_buffer_byte_size = 0; - free(s_input_node_data_float_buffer.byte_array_data); + s_output_node_data_buffer_max_byte_size[port] = 0; + s_output_node_data_array_byte_size[port] = 0; + free(s_output_node_data_buffer[port]); } return true; } -bool hexagon_controller_CopyByteNodeData( - int x, int y, int z, int d, int type_byte_size, uint8_t* array_data) { +bool hexagon_controller_ReleaseNodeDataBuffers() { + bool success = true; + for (int i = 0; i < GetInputNodeCount(); ++i) { + success &= hexagon_controller_ReleaseInputNodeDataBuffersWithPort(i); + } + for (int i = 0; i < GetOutputNodeCount(); ++i) { + success &= hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(i); + } + return success; +} + +bool hexagon_controller_CopyByteNodeData(int port, int x, int y, int z, int d, + int type_byte_size, + uint8_t* array_data) { int array_byte_size = x * y * z * d * type_byte_size; - TFMLOGD("--- %d, %d, %d, %d, %d, %d",x,y,z,d,type_byte_size,array_byte_size); - if (s_input_node_data_float_buffer.buf_size < array_byte_size) { + TFMLOGD("--- %d, %d, %d, %d, %d, %d", x, y, z, d, type_byte_size, + array_byte_size); + NodeDataFloat* input_node_data_buffer = s_input_node_data_buffer[0]; + + if (input_node_data_buffer->max_buf_byte_size < array_byte_size) { TFMLOGE("ERROR! input buffer size is too small! %d < %d", - s_input_node_data_float_buffer.buf_size, array_byte_size); + input_node_data_buffer->max_buf_byte_size, array_byte_size); return false; } - memcpy(s_input_node_data_float_buffer.byte_array_data, - array_data, array_byte_size); - s_input_node_data_float_buffer.array_size = array_byte_size; - s_input_node_data_float_buffer.x = x; - s_input_node_data_float_buffer.y = y; - s_input_node_data_float_buffer.z = z; - s_input_node_data_float_buffer.d = d; + memcpy(input_node_data_buffer->byte_array_data, array_data, + array_byte_size); + input_node_data_buffer->array_byte_size = array_byte_size; + input_node_data_buffer->x = x; + input_node_data_buffer->y = y; + input_node_data_buffer->z = z; + input_node_data_buffer->d = d; return true; } @@ -263,13 +397,8 @@ int hexagon_controller_InitHexagonWithMaxAttributes( dspCV_initQ6_with_attributes(attrib, sizeof(attrib) / sizeof(attrib[0])); TFMLOGD("Return value from dspCV_initQ6() : %d\n", retval); - hexagon_controller_AllocateNodeDataBuffers( - INPUT_NODE_DATA_BUFFER_SIZE, OUTPUT_NODE_DATA_BUFFER_SIZE); - - if (s_dbg_use_inception_dummy_data) { - hexagon_controller_InitInputNodeDataToInceptionDummyData(version); - } s_target_graph_id = 0; + s_dbg_inception_version = version; return retval; } @@ -289,14 +418,20 @@ void hexagon_controller_GrowMemorySize() { hexagon_nn_config(); } -struct NodeDataFloat* hexagon_controller_GetInputNodeDataFloatBuffer() { - return &s_input_node_data_float_buffer; +struct NodeDataFloat* hexagon_controller_GetInputNodeDataBuffer(int port) { + if (port >= GetInputNodeCount()) { + TFMLOGE("port should be less than 1"); + } + return &s_input_node_data_buffer[port]; } -float* hexagon_controller_GetOutputNodeDataFloatBuffer( - const char *const node_name, int* out_array_size) { - *out_array_size = s_output_node_data_float_array_size; - return s_output_node_data_float_buffer; +uint8_t* hexagon_controller_GetOutputNodeDataBuffer(int port, + int* out_array_byte_size) { + if (port >= GetOutputNodeCount()) { + TFMLOGE("port should be less than 1"); + } + *out_array_byte_size = s_output_node_data_array_byte_size[port]; + return s_output_node_data_buffer[port]; } // Append const node to the graph @@ -377,10 +512,8 @@ bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled() { } void hexagon_controller_PrintLog(uint32_t nn_id) { - unsigned char *buf; - if ((buf = malloc(PRINT_BUFSIZE)) == NULL) { - return; - } + unsigned char* buf = NULL; + posix_memalign((void**)&buf, 128, PRINT_BUFSIZE); hexagon_nn_getlog(nn_id, buf, PRINT_BUFSIZE); TFMLOGE("DUMP HEXAGON LOG: %s", buf); free(buf); diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h b/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h index ab8c80c0f3..d23048c116 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h @@ -40,16 +40,37 @@ int hexagon_controller_GetWrapperVersion(); int hexagon_controller_GetHexagonBinaryVersion(); +// Buffer operations +bool hexagon_controller_SetAllInputTensorDef(int node_count, + hexagon_nn_tensordef* tensordef); + +bool hexagon_controller_SetAllInputTensorDef(int node_count, + hexagon_nn_tensordef* tensordef); + // Hexagon perf functions int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs, int bus_usage, int version); +bool hexagon_controller_AllocateInputNodeDataBuffersWithPort(int port, + int input_size); + +bool hexagon_controller_AllocateOutNodeDataBuffersWithPort(int port, + int output_size); + bool hexagon_controller_AllocateNodeDataBuffers(int input_size, int output_size); +bool hexagon_controller_AllocateMultipleNodeDataBuffers(int input_count, + int* input_sizes, + int output_count, + int* output_sizes); + +bool hexagon_controller_ReleaseInputNodeDataBuffersWithPort(int port); +bool hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(int port); + bool hexagon_controller_ReleaseNodeDataBuffers(); -bool hexagon_controller_CopyByteNodeData(int x, int y, int z, int d, +bool hexagon_controller_CopyByteNodeData(int port, int x, int y, int z, int d, int type_byte_size, uint8_t* array_data); @@ -63,10 +84,10 @@ void hexagon_controller_SetTargetGraphId(uint32_t graph_id); void hexagon_controller_GrowMemorySize(); // Graph data transfer functions -struct NodeDataFloat* hexagon_controller_GetInputNodeDataFloatBuffer(); +struct NodeDataFloat* hexagon_controller_GetInputNodeDataBuffer(int port); -float* hexagon_controller_GetOutputNodeDataFloatBuffer( - const char* const node_name, int* out_array_size); +uint8_t* hexagon_controller_GetOutputNodeDataBuffer(int port, + int* out_array_byte_size); // Graph functions uint32_t hexagon_controller_InstantiateGraph(); @@ -79,6 +100,10 @@ uint32_t hexagon_controller_SetupGraph(int version); bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id); +bool hexagon_controller_ExecuteGraphWithMultipleInOut( + const uint32_t nn_id, const int input_count, hexagon_nn_tensordef* inputs, + const int output_count, hexagon_nn_tensordef* outputs); + bool hexagon_controller_ExecuteGraph( const uint32_t nn_id, const uint32_t batches, const uint32_t height, const uint32_t width, const uint32_t depth, uint8_t* int_data, diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h b/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h index e8615fd4ec..91bd15d383 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h +++ b/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h @@ -33,6 +33,9 @@ static inline bool IsLogOn(int log_level) { return log_level >= s_log_level; } static inline void SetLogLevel(int log_level) { s_log_level = log_level; } +// Do nothing +static inline void SetExperimentalDebug() {} + #define TFMLOGV(fmt, ...) \ do { \ if (!IsLogOn(TFM_LOG_LEVEL_VERBOSE)) break; \ diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h index a9c3296e9f..c7034cc3a0 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h +++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h @@ -28,9 +28,8 @@ struct NodeDataFloat { int y; int z; int d; - int buf_size; - int array_size; - float* array_data; + int max_buf_byte_size; + int array_byte_size; uint8_t* byte_array_data; char node_name[NODE_DATA_FLOAT_NODE_NAME_BUF_SIZE]; }; diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/soc_interface.h b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/soc_interface.h index 6d85e6ce48..30fad13fb5 100644 --- a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/soc_interface.h +++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/soc_interface.h @@ -43,13 +43,30 @@ bool soc_interface_Finalize(); bool soc_interface_ExecuteGraph(); // Teardown graph setup bool soc_interface_TeardownGraph(); + +// Allocate buffers for input node and output node +bool soc_interface_AllocateInOutNodeBuffers(int input_count, int* input_sizes, + int output_count, + int* output_sizes); + +// Send input data to SOC with port +bool soc_interface_FillInputNodeWithPort(int port, int x, int y, int z, int d, + const uint8_t* const buf, + uint64_t buf_byte_size); + // Send input data to SOC bool soc_interface_FillInputNodeFloat(int x, int y, int z, int d, const uint8_t* const buf, - uint64_t buf_size); + uint64_t buf_byte_size); + +// Load output data from SOC with port +bool soc_interface_ReadOutputNodeWithPort(int port, uint8_t** buf, + uint64_t* buf_byte_size); + // Load output data from SOC bool soc_interface_ReadOutputNodeFloat(const char* const node_name, - uint8_t** buf, uint64_t* buf_size); + uint8_t** buf, uint64_t* buf_byte_size); + // Setup graph // TODO(satok): Remove and use runtime version bool soc_interface_setupDummyGraph(int version); diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c index 7db8d4870c..a1387ee573 100755 --- a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c @@ -22,7 +22,12 @@ limitations under the License. #include "node_data_float.h" #include "tfm_log.h" +// to demonstrate the performance difference between ION and HLOS memory +// for sharing with ADSP. +#define USE_ION_MEMORY + const int64_t FLAG_ENABLE_INCEPTION_DUMMY_BINARY_INPUT = 0x01; +const int64_t FLAG_ENABLE_EXPERIMENTAL_DEBUG = 0x02; static const int INCEPTION_VERSION = 3; @@ -84,48 +89,62 @@ bool soc_interface_TeardownGraph() { return true; } -bool soc_interface_FillInputNodeFloat( - int x, int y, int z, int d, const uint8_t* const buf, - uint64_t buf_size) { - TFMLOGD("FillInputNodeFloat"); - struct NodeDataFloat* node_data_float = - hexagon_controller_GetInputNodeDataFloatBuffer(); - const int array_size = x * y * z * d; - if (array_size > node_data_float->buf_size) { - TFMLOGE("Array size exceeds buf size %d > %d", - array_size, node_data_float->buf_size); - return false; - } - if (buf_size != array_size * sizeof(float)) { - TFMLOGE("Invalid buf size!"); +bool soc_interface_AllocateInOutNodeBuffers(int input_count, int* input_sizes, + int output_count, + int* output_sizes) { + TFMLOGD("AllocateInOutNodeBuffers"); + return hexagon_controller_AllocateMultipleNodeDataBuffers( + input_count, input_sizes, output_count, output_sizes); +} + +bool soc_interface_FillInputNodeWithPort(int port, int x, int y, int z, int d, + const uint8_t* const buf, + uint64_t buf_byte_size) { + TFMLOGD("FillInputNodeWithPort %d", port); + struct NodeDataFloat* node_data = + hexagon_controller_GetInputNodeDataBuffer(port); + if (buf_byte_size > node_data->max_buf_byte_size) { + TFMLOGE("buf size exceeds max buf size"); return false; } - memcpy(node_data_float->byte_array_data, buf, buf_size); - node_data_float->x = x; - node_data_float->y = y; - node_data_float->z = z; - node_data_float->d = d; - node_data_float->array_size = buf_size; + memcpy(node_data->byte_array_data, buf, buf_byte_size); + node_data->x = x; + node_data->y = y; + node_data->z = z; + node_data->d = d; + node_data->array_byte_size = buf_byte_size; return true; } +bool soc_interface_FillInputNodeFloat(int x, int y, int z, int d, + const uint8_t* const buf, + uint64_t buf_byte_size) { + return soc_interface_FillInputNodeWithPort( + /*port=*/0, x, y, z, d, buf, buf_byte_size); +} + // TODO(satok): Remove and use runtime version -bool soc_interface_ReadOutputNodeFloat( - const char* const node_name, uint8_t** buf, uint64_t *buf_size) { - TFMLOGD("ReadOutputNodeFloat"); - int array_size = -1; - float* output_node_data_float = - hexagon_controller_GetOutputNodeDataFloatBuffer(node_name, &array_size); - if (array_size < 0) { +bool soc_interface_ReadOutputNodeWithPort(int port, uint8_t** buf, + uint64_t* buf_byte_size) { + TFMLOGD("ReadOutputNodeWithPort"); + int array_byte_size = -1; + uint8_t* output_node_data_buffer = + hexagon_controller_GetOutputNodeDataBuffer(port, &array_byte_size); + if (array_byte_size < 0) { TFMLOGE("Failed to read data."); return false; } - *buf = (uint8_t*)output_node_data_float; - *buf_size = array_size * sizeof(float); + *buf = output_node_data_buffer; + *buf_byte_size = array_byte_size; return true; } -bool soc_interface_SetupGraphDummy(int version) { +bool soc_interface_ReadOutputNodeFloat(const char* const node_name, + uint8_t** buf, uint64_t* buf_byte_size) { + return soc_interface_ReadOutputNodeWithPort(/*port=*/0, buf, buf_byte_size); +} + +bool soc_interface_setupDummyGraph(int version) { TFMLOGD("SetupGraphDummy"); const uint32_t graph_id = hexagon_controller_SetupGraph(version); if (graph_id == 0) { @@ -136,12 +155,14 @@ bool soc_interface_SetupGraphDummy(int version) { return true; } -bool soc_interface_AllocateNodeInputAndNodeOutputArray( - int total_input_count, int total_output_count) { +bool soc_interface_AllocateNodeInputAndNodeOutputArray(int total_input_count, + int total_output_count) { TFMLOGD("Allocate node inputs and node outputs array %d, %d", total_input_count, total_output_count); - s_node_inputs_array = malloc(total_input_count * sizeof(hexagon_nn_input)); - s_node_outputs_array = malloc(total_output_count * sizeof(hexagon_nn_output)); + posix_memalign((void**)&s_node_inputs_array, 128, + total_input_count * sizeof(hexagon_nn_input)); + posix_memalign((void**)&s_node_outputs_array, 128, + total_output_count * sizeof(hexagon_nn_output)); s_node_inputs_array_index = 0; s_node_outputs_array_index = 0; s_node_inputs_array_max_count = total_input_count; @@ -188,9 +209,9 @@ void* soc_interface_SetOneNodeOutputs(int output_count, int* max_size) { } // Append const node to the graph -bool soc_interface_AppendConstNode( - const char* const name, int node_id, int batch, int height, int width, int depth, - const uint8_t* const data, int data_length) { +bool soc_interface_AppendConstNode(const char* const name, int node_id, + int batch, int height, int width, int depth, + const uint8_t* const data, int data_length) { const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); const int retval = hexagon_controller_AppendConstNode( name, graph_id, node_id, batch, height, width, depth, data, data_length); @@ -202,14 +223,14 @@ bool soc_interface_AppendConstNode( } // Append node to the graph -bool soc_interface_AppendNode( - const char* const name, int node_id, int ops_id, int padding_id, const void* const inputs, - int inputs_count, const void* const outputs, int outputs_count) { +bool soc_interface_AppendNode(const char* const name, int node_id, int ops_id, + int padding_id, const void* const inputs, + int inputs_count, const void* const outputs, + int outputs_count) { const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); const int retval = hexagon_controller_AppendNode( - name, graph_id, node_id, ops_id, padding_id, - (hexagon_nn_input*) inputs, inputs_count, - (hexagon_nn_output*) outputs, outputs_count); + name, graph_id, node_id, ops_id, padding_id, (hexagon_nn_input*)inputs, + inputs_count, (hexagon_nn_output*)outputs, outputs_count); if (retval != 0) { TFMLOGE("Failed to append const node %d", node_id); return false; @@ -217,7 +238,6 @@ bool soc_interface_AppendNode( return true; } - // Instantiate graph bool soc_interface_InstantiateGraph() { const uint32_t nn_id = hexagon_controller_InstantiateGraph(); @@ -240,5 +260,7 @@ void soc_interface_SetDebugFlag(uint64_t flag) { if ((flag & FLAG_ENABLE_INCEPTION_DUMMY_BINARY_INPUT) != 0) { TFMLOGI("Enable always use panda data"); hexagon_controller_EnableDbgUseInceptionDummyData(true); + } else if ((flag & FLAG_ENABLE_EXPERIMENTAL_DEBUG) != 0) { + SetExperimentalDebug(); } } -- cgit v1.2.3