diff options
author | 2017-01-17 13:13:39 -0800 | |
---|---|---|
committer | 2017-01-17 13:22:42 -0800 | |
commit | 6ee65f3a30889a8b6660eb19413dd0214a32c600 (patch) | |
tree | 9983211d8a3fc6bc6f306a8231ac5c4c49186622 /tensorflow/contrib/hvx | |
parent | b398f90411ddb7caee1be4b73de271beb73a07fe (diff) |
Add experimental implementation for hexagon controller
Change: 144746814
Diffstat (limited to 'tensorflow/contrib/hvx')
6 files changed, 1124 insertions, 42 deletions
diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c new file mode 100644 index 0000000000..f91a4b57d9 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/graph_functions_wrapper.c @@ -0,0 +1,355 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// to demonstrate the performance difference between ION and HLOS memory +// for sharing with ADSP. +#define USE_ION_MEMORY + +#include <limits.h> +#include <stdio.h> + +#include "hexagon_controller.h" +#include "hexagon_nn.h" +#include "tfm_log.h" + +static const uint32_t MAX_NODES = 2048; +static const uint32_t MAX_EVENT_COUNT = 256; + +static const bool DUMP_OUTPUT = false; +static const bool DBG_EXECUTION = true; + +static const int OUT_RANKING_SIZE = 5; + +// static only for this file. +// TODO(satok): allocate dynamically +static float s_output_values[300 * 300 * 3 * 4]; + +extern void init_graph(uint32_t id); +extern void init_graph_v1(uint32_t id); +extern uint8_t inception_sample_int_data_299x299[]; +extern uint8_t inception_sample_int_data_224x224[]; +extern float inception_dummy_float_data[]; + +enum InceptionVersion { + INCEPTION_V1, + INCEPTION_V3, +}; + +static enum InceptionVersion s_inception_version = INCEPTION_V3; + +///////////////////////////////////////////////// +// file local functions + +static const char *ConvertGraphInfoIdToName(unsigned int id) { + // TODO(satok): implement + return "?"; +} + +static const char *ConvertGraphInfoIdToOpName(unsigned int id) { + // TODO(satok): implement + return "?"; +} + +///////////////////////////////////////////////// +// file local utilities +static uint32_t FindMaxIdxWithExcludeList( + const float *data, uint32_t entries, const int exclude_size, + const int* exclude_idx) { + int i; + float maxval = data[0]; + int maxidx = 0; + for (i = 0; i < entries; i++) { + bool exclude = false; + for (int j = 0; j < exclude_size; ++j) { + if (exclude_idx[j] == i) { + exclude = true; + break; + } + } + if (exclude) { + continue; + } + if (maxval < data[i]) { + maxval = data[i]; + maxidx = i; + } + } + return maxidx; +} + +static uint32_t FindMaxIdx(const float* data, uint32_t entries) { + return FindMaxIdxWithExcludeList(data, entries, 0, NULL); +} + +void hexagon_controller_PrintMaxNIdx(const float *data, const uint32_t entries, + const int n, int* out_ranking) { + if (DUMP_OUTPUT) { + for (int i = 0; i < entries; ++i) { + TFMLOGD("%d: val = %f", i, data[i]); + } + } + for (int i = 0; i < n; ++i) { + out_ranking[i] = INT_MAX; + } + for (int i = 0; i < n; ++i) { + out_ranking[i] = FindMaxIdxWithExcludeList(data, entries, n, out_ranking); + } + TFMLOGD("=== RANKING ==="); + for (int i = 0; i < n; ++i) { + TFMLOGD("%d: id = %d, val = %f", i, out_ranking[i], data[out_ranking[i]]); + } +} + +static inline unsigned long long int GetCounter(hexagon_nn_perfinfo s) { + unsigned long long int ret; + ret = s.counter_hi; + ret <<= 32; + ret |= s.counter_lo; + return ret; +} + +static int CompareCycle(const void *va, const void *vb) { + const hexagon_nn_perfinfo *a = va; + const hexagon_nn_perfinfo *b = vb; + unsigned long long int acount = GetCounter(*a); + unsigned long long int bcount = GetCounter(*b); + if (acount < bcount) { + return -1; + } else if (acount > bcount) { + return 1; + } else { + return 0; + } +} + +///////////////////////////////////////////////// +// Graph functions + +uint32_t hexagon_controller_InstantiateGraph() { + const uint32_t nn_id = hexagon_nn_init(); + // set debug level to 99 for now + //hexagon_nn_set_debug_level(nn_id, 99); + // TODO(satok): make this as argument + hexagon_nn_set_debug_level(nn_id, 0); + return nn_id; +} + +void hexagon_controller_InitGraph(int version, uint32_t nn_id) { + if (version == 1) { + s_inception_version = INCEPTION_V1; + } else if (version == 3) { + s_inception_version = INCEPTION_V3; + } else { + TFMLOGE("Unsupported inception version %d", version); + return; + } + if (s_inception_version == INCEPTION_V3) { + init_graph(nn_id); + } else if (s_inception_version == INCEPTION_V1) { + init_graph_v1(nn_id); + } + TFMLOGD("Init graph (inception version = %d) done.", version); +} + +bool hexagon_controller_ConstructGraph(uint32_t nn_id) { + int err; + if ((err = hexagon_nn_prepare(nn_id)) != 0) { + TFMLOGE("Prepare failed! returned 0x%x\n", err); + return false; + } else { + TFMLOGD("Prepare success!\n"); + return true; + } +} + +uint32_t hexagon_controller_SetupGraph(int version) { + const uint32_t nn_id = hexagon_controller_InstantiateGraph(); + hexagon_controller_InitGraph(version, nn_id); + hexagon_controller_ConstructGraph(nn_id); + return nn_id; +} + +bool hexagon_controller_ExecuteGraph( + const uint32_t nn_id, + const uint32_t batches, + const uint32_t height, + const uint32_t width, + const uint32_t depth, + uint8_t* int_data, + const uint32_t int_data_size, + uint32_t* out_batches, + uint32_t* out_height, + uint32_t* out_width, + uint32_t* out_depth, + uint8_t* out_vals, + const uint32_t output_val_byte_size, + uint32_t* out_data_byte_size) { + int err; + if (DBG_EXECUTION) { + TFMLOGD("Preparing to execute..."); + TFMLOGD("Input: %d, %d, %d, %d, %d, %d", + batches, height, width, depth, int_data[0], int_data_size); + TFMLOGD("Output: %d, %p", output_val_byte_size, out_vals); + LogDHexagon("Execute graph!"); + } + + if ((err = hexagon_nn_execute(nn_id, + batches, + height, + width, + depth, + int_data, + int_data_size, + out_batches, + out_height, + out_width, + out_depth, + out_vals, + output_val_byte_size, + out_data_byte_size)) != 0) { + if (DBG_EXECUTION) { + LogDHexagon("Execution failed!"); + TFMLOGE("execute got err: %d\n",err); + } + return false; + } else { + if (DBG_EXECUTION) { + LogDHexagon("Execution succeeded!"); + TFMLOGD("%d x %d x %d x %d, byte size = %d\n", + *out_batches, + *out_height, + *out_width, + *out_depth, + *out_data_byte_size); + } + return true; + } +} + +bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id) { + uint32_t out_batches, out_height, out_width, out_depth; + uint32_t out_data_size; + // s_output_values = 300 * 300 * 3 * 4 * 4 + const bool success = hexagon_controller_ExecuteGraph( + nn_id, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, + INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, + (uint8_t *)inception_sample_int_data_299x299, + INCEPTION_PARAM_HEIGHT_V3 * INCEPTION_PARAM_WIDTH_V3 * + INCEPTION_PARAM_DEPTH, + &out_batches, &out_height, &out_width, &out_depth, + (uint8_t *)s_output_values, sizeof(s_output_values), + &out_data_size); + if (success) { + int out_ranking[OUT_RANKING_SIZE]; + hexagon_controller_PrintMaxNIdx( + s_output_values, + out_batches * out_height * out_width * out_depth, + OUT_RANKING_SIZE, out_ranking); + TFMLOGD("%d x %d x %d x %d, size = %d\n", + out_batches, + out_height, + out_width, + out_depth, + out_data_size); + TFMLOGD("max idx: %d\n", FindMaxIdx( + s_output_values, + out_batches * out_height * out_width * out_depth)); + if (out_ranking[0] == 169 && out_ranking[1] == 7) { + return true; + } else { + TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]); + return false; + } + } else { + return false; + } +} + +void hexagon_controller_DumpPerf(uint32_t nn_id) { + hexagon_nn_perfinfo info[MAX_NODES]; + unsigned long long int total_cycles = 0; + unsigned long long int cum_cycles = 0; + unsigned long long int counter = 0; + int n_nodes; + int i; + TFMLOGD("Perf dump follows:"); + if (hexagon_nn_get_perfinfo(nn_id, info, MAX_NODES,&n_nodes) != 0) { + TFMLOGE("perf info failure"); + return; + } + TFMLOGD("Total %d nodes.",n_nodes); + qsort(info,n_nodes,sizeof(info[0]), CompareCycle); + for (i = 0; i < n_nodes; i++) { + total_cycles += GetCounter(info[i]); + } + TFMLOGD("Total %lld cycles.",total_cycles); + for (i = 0; i < n_nodes; i++) { + counter = GetCounter(info[i]); + cum_cycles += counter; + TFMLOGD("node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%," + "cum_cycles,%lld,%f %%\n", + info[i].node_id, + ConvertGraphInfoIdToName(info[i].node_id), + ConvertGraphInfoIdToOpName(info[i].node_id), + info[i].executions, + counter, + 100*((double)counter)/total_cycles, + cum_cycles, + 100*((double)cum_cycles)/total_cycles); + } +#ifdef ENABLE_HVX_FULL_DEBUG + DumpAllPerf(nn_id); +#endif +} + +void hexagon_controller_DumpNodeName(uint32_t nn_id) { + TFMLOGD("Show node name"); + const uint32_t id = nn_id; + hexagon_nn_perfinfo info[MAX_NODES]; + unsigned long long int total_cycles = 0; + unsigned long long int cum_cycles = 0; + unsigned long long int counter = 0; + int node_count; + int i; + TFMLOGD("Perf dump follows:"); + if (hexagon_nn_get_perfinfo(id, info, MAX_NODES, &node_count) != 0) { + TFMLOGD("perf info failure"); + return; + } + TFMLOGD("Total %d nodes.",node_count); + qsort(info, node_count, sizeof(info[0]), CompareCycle); + for (i = 0; i < node_count; i++) { + total_cycles += GetCounter(info[i]); + } + TFMLOGD("Total %lld cycles.", total_cycles); + for (i = 0; i < node_count; i++) { + counter = GetCounter(info[i]); + cum_cycles += counter; + TFMLOGD("node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%," + "cum_cycles,%lld,%f %%", + info[i].node_id, + ConvertGraphInfoIdToName(info[i].node_id), + ConvertGraphInfoIdToOpName(info[i].node_id), + info[i].executions, + counter, + 100*((double)counter)/total_cycles, + cum_cycles, + 100*((double)cum_cycles)/total_cycles); + } +} + +void hexagon_controller_Teardown(uint32_t nn_id) { + hexagon_nn_teardown(nn_id); +} diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c new file mode 100644 index 0000000000..33a37bf77a --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/hexagon_controller.c @@ -0,0 +1,374 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// to demonstrate the performance difference between ION and HLOS memory +// for sharing with ADSP. +#define USE_ION_MEMORY + +#include "hexagon_controller.h" + +#include <malloc.h> +#include <stdio.h> + +#include "adspmsgd.h" +#include "dspCV.h" +#include "rpcmem.h" // helper API's for shared buffer allocation +#include "soc_interface.h" +#include "tfm_log.h" + +// if false, use int data as input. This is only for acceleration purpose +static const bool USE_FLOAT_DATA = true; + +// if true, show id for each node +static const bool DBG_SHOW_ID = false; + +static const uint32_t OUTPUT_PARAM_MAX_LINE_SIZE = 1000; + +// extern pre-generated inception dummy data +extern uint8_t inception_dummy_int_data_224x224[]; +extern uint8_t inception_dummy_int_data_299x299[]; +extern float inception_dummy_float_data[]; + +#define GEMM_WRAPPER_VERSION 1 + +// allocate print bufsize in advance @MB +#define PRINT_BUFSIZE (2 * 1024 * 1024) + +static unsigned char s_print_buf[PRINT_BUFSIZE]; + +// input node data buffer size +// x2 1024 * 1024 * 2 > 299 * 299 * 3 * 4 > 1024 * 1024 +static const int INPUT_NODE_DATA_BUFFER_SIZE = 1024 * 1024 * 2; +// output node data buffer size +// (1008 is enough for inception) +static const int OUTPUT_NODE_DATA_BUFFER_SIZE = 300 * 300 * 3 * 4; + +static struct NodeDataFloat s_input_node_data_float_buffer; +static float* s_output_node_data_float_buffer; +static int s_output_node_data_float_buffer_byte_size; +static int s_output_node_data_float_array_size; +static uint32_t s_target_graph_id; + +static bool s_dbg_use_inception_dummy_data = false; + +void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) { + if (version == 1) { + if (USE_FLOAT_DATA) { + TFMLOGE("ERROR!!!! Do not use float data for v1"); + return; + } + hexagon_controller_CopyByteNodeData( + INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1, + INCEPTION_PARAM_WIDTH_V1, INCEPTION_PARAM_DEPTH, + 1, inception_dummy_int_data_224x224); + } else if (version == 3) { + if (USE_FLOAT_DATA) { + hexagon_controller_CopyByteNodeData( + INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, + INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, + sizeof(float), (uint8_t*)inception_dummy_float_data); + } else { + hexagon_controller_CopyByteNodeData( + INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, + INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, + 1, inception_dummy_int_data_299x299); + } + } +} + +bool hexagon_controller_ExecuteGraphWithBuffer( + uint32_t nn_id, bool show_ranking) { + uint32_t out_batches, out_height, out_width, out_depth; + uint32_t out_data_size; + int x = s_input_node_data_float_buffer.x; + int y = s_input_node_data_float_buffer.y; + int z = s_input_node_data_float_buffer.z; + int d = s_input_node_data_float_buffer.d; + uint8_t *byte_data = s_input_node_data_float_buffer.byte_array_data; + int array_size = s_input_node_data_float_buffer.array_size; + const bool success = hexagon_controller_ExecuteGraph( + nn_id, x, y, z, d, byte_data, array_size, + &out_batches, &out_height, &out_width, &out_depth, + (uint8_t *)s_output_node_data_float_buffer, + s_output_node_data_float_buffer_byte_size, + &out_data_size); + s_output_node_data_float_array_size = + out_batches * out_height * out_width * out_depth; + if (!success) { + TFMLOGE("Execution failed"); + return false; + } else if (!show_ranking) { + return true; + } + + static const int OUT_RANKING_SIZE = 5; + int out_ranking[OUT_RANKING_SIZE]; + hexagon_controller_PrintMaxNIdx( + s_output_node_data_float_buffer, + out_batches * out_height * out_width * out_depth, + OUT_RANKING_SIZE, out_ranking); + TFMLOGD("%d x %d x %d x %d, byte size = %d\n", + out_batches, + out_height, + out_width, + out_depth, + out_data_size); + if (s_dbg_use_inception_dummy_data) { + // Check the result of inception with a dummy data. This step shouldn't + // be passed when show_ranking != true to avoid adding unnecessary + // additional computation cost. + if (out_ranking[0] == 169 && out_ranking[1] == 7) { + TFMLOGD("Result is correct! %d, %d", out_ranking[0], out_ranking[1]); + return true; + } else { + TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]); + return false; + } + } + return true; +} + +uint32_t hexagon_controller_GetTargetGraphId() { + return s_target_graph_id; +} + +void hexagon_controller_SetTargetGraphId(uint32_t graph_id) { + s_target_graph_id = graph_id; +} + +void hexagon_controller_PrintGraph(uint32_t id) { + int retval = hexagon_nn_snpprint(id, s_print_buf, PRINT_BUFSIZE); + TFMLOGD("PrintGraph %s\n", s_print_buf); + if (retval) { + TFMLOGE("Error on print graph\n"); + } +} + +int hexagon_controller_GetWrapperVersion() { + return GEMM_WRAPPER_VERSION; +} + +int hexagon_controller_GetHexagonBinaryVersion() { + int retval = 0; + hexagon_nn_GetHexagonBinaryVersion(&retval); + return retval; +} + +bool hexagon_controller_AllocateNodeDataBuffers( + int input_size, int output_size) { + TFMLOGD("Allocate memory for input / output node data float"); + if (s_input_node_data_float_buffer.buf_size != 0) { + TFMLOGE("ERROR! input buffer is already allocated!!"); + return false; + } else { + int byte_array_data_size = USE_FLOAT_DATA ? + input_size * sizeof(float) : input_size; /* sizeof(uint8_t) ? */ + s_input_node_data_float_buffer.buf_size = input_size; + // unused? remove? + s_input_node_data_float_buffer.array_data = + malloc(input_size * sizeof(float)); + s_input_node_data_float_buffer.byte_array_data = + malloc(byte_array_data_size); + + s_output_node_data_float_buffer = malloc(output_size * sizeof(float)); + s_output_node_data_float_buffer_byte_size = output_size * sizeof(float); + s_output_node_data_float_array_size = 0; + TFMLOGD("allocate node data buffers"); + } + return true; +} + +bool hexagon_controller_ReleaseNodeDataBuffers() { + if (s_input_node_data_float_buffer.buf_size == 0) { + TFMLOGE("ERROR! input buffer has not been allocated yet!!"); + return false; + } else { + s_input_node_data_float_buffer.buf_size = 0; + free(s_input_node_data_float_buffer.array_data); + } + if (s_output_node_data_float_buffer_byte_size == 0) { + TFMLOGE("ERROR! output buffer has not been allocated yet!!"); + return false; + } else { + s_output_node_data_float_buffer_byte_size = 0; + free(s_input_node_data_float_buffer.byte_array_data); + } + return true; +} + +bool hexagon_controller_CopyByteNodeData( + int x, int y, int z, int d, int type_byte_size, uint8_t* array_data) { + int array_byte_size = x * y * z * d * type_byte_size; + TFMLOGD("--- %d, %d, %d, %d, %d, %d",x,y,z,d,type_byte_size,array_byte_size); + if (s_input_node_data_float_buffer.buf_size < array_byte_size) { + TFMLOGE("ERROR! input buffer size is too small! %d < %d", + s_input_node_data_float_buffer.buf_size, array_byte_size); + return false; + } + memcpy(s_input_node_data_float_buffer.byte_array_data, + array_data, array_byte_size); + s_input_node_data_float_buffer.array_size = array_byte_size; + s_input_node_data_float_buffer.x = x; + s_input_node_data_float_buffer.y = y; + s_input_node_data_float_buffer.z = z; + s_input_node_data_float_buffer.d = d; + return true; +} + +int hexagon_controller_InitHexagonWithMaxAttributes( + int enable_dcvs, int bus_usage, int version) { + TFMLOGI("Init hexagon with max attributes"); + const int MCPS = 1000; + const int MBPS = 12000; + + adspmsgd_start(0, RPCMEM_HEAP_DEFAULT, 4096); + + dspCV_Attribute attrib[] = { + // The below values will result in the maximum aDSP performance, + // at Turbo voltage. + // Slightly more MCPS than are available on current targets + {DSP_TOTAL_MCPS, MCPS}, + // drive the clock to MAX on known targets + {DSP_MCPS_PER_THREAD, MCPS / 2}, + // 12 GB/sec is slightly higher than the max realistic + // max BW on existing targets. + {PEAK_BUS_BANDWIDTH_MBPS, MBPS}, + // This app is non-real time, and constantly reading/writing memory + {BUS_USAGE_PERCENT, bus_usage}, + }; + int retval = 0; + if (!enable_dcvs) { + retval = hexagon_nn_disableDcvs(); + if (retval) { + TFMLOGE("Failed to disable DSP DCVS: %x\n", retval); + } + } + + retval = + dspCV_initQ6_with_attributes(attrib, sizeof(attrib) / sizeof(attrib[0])); + TFMLOGD("Return value from dspCV_initQ6() : %d\n", retval); + + hexagon_controller_AllocateNodeDataBuffers( + INPUT_NODE_DATA_BUFFER_SIZE, OUTPUT_NODE_DATA_BUFFER_SIZE); + + if (s_dbg_use_inception_dummy_data) { + hexagon_controller_InitInputNodeDataToInceptionDummyData(version); + } + s_target_graph_id = 0; + + return retval; +} + +int hexagon_controller_DeInitHexagon() { + adspmsgd_stop(); + TFMLOGI("Finalize hexagon"); + const int retval = dspCV_deinitQ6(); + TFMLOGD("return value from dspCV_deinitQ6(): %d \n", retval); + + hexagon_controller_ReleaseNodeDataBuffers(); + + return retval; +} + +void hexagon_controller_GrowMemorySize() { + hexagon_nn_config(); +} + +struct NodeDataFloat* hexagon_controller_GetInputNodeDataFloatBuffer() { + return &s_input_node_data_float_buffer; +} + +float* hexagon_controller_GetOutputNodeDataFloatBuffer( + const char *const node_name, int* out_array_size) { + *out_array_size = s_output_node_data_float_array_size; + return s_output_node_data_float_buffer; +} + +// Append const node to the graph +int hexagon_controller_AppendConstNode( + const char* const name, int graph_id, int node_id, + int batch, int height, int width, int depth, + const uint8_t* const data, int data_length) { + if (DBG_SHOW_ID) { + TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d, %d", + name, node_id, batch, height, width, depth, data_length); + } else { + TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d", + name, batch, height, width, depth, data_length); + } + const int retval = hexagon_nn_append_const_node( + graph_id, node_id, batch, height, width, depth, data, data_length); + if (retval != 0) { + TFMLOGE("Failed to append const node %d", node_id); + return retval; + } + return retval; +} + +// Append node to the graph +int hexagon_controller_AppendNode( + const char* const name, int graph_id, int node_id, int ops_id, + int padding_id, const hexagon_nn_input* const inputs, + int inputs_count, const hexagon_nn_output* const outputs, + int outputs_count) { + char input_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE]; + memset(input_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE); + int pos = 0; + pos += snprintf(&input_param_buf[pos], 500, "in: "); + for (int i = 0; i < inputs_count; ++i) { + if (DBG_SHOW_ID) { + pos += snprintf(&input_param_buf[pos], 500, "(%d, %d), ", + inputs[i].src_id, inputs[i].output_idx); + } else { + pos += snprintf(&input_param_buf[pos], 500, "(%d), ", + inputs[i].output_idx); + } + } + + char output_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE]; + memset(output_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE); + pos = 0; + pos += snprintf(&output_param_buf[pos], 500, "out: "); + for (int i = 0; i < outputs_count; ++i) { + pos += snprintf(&output_param_buf[pos], 500, "(%d), ", outputs[i].max_size); + } + + if (DBG_SHOW_ID) { + TFMLOGV("---(OP) %s, %d, %d, %d, %d, %d, %s, %s", name, node_id, + ops_id, padding_id, inputs_count, outputs_count, input_param_buf, + output_param_buf); + } else { + TFMLOGV("---(OP) %s, %d, %d, %d, %d, %s, %s", name, + ops_id, padding_id, inputs_count, outputs_count, input_param_buf, + output_param_buf); + } + const int retval = hexagon_nn_append_node( + graph_id, node_id, ops_id, padding_id, + inputs, inputs_count, + outputs, outputs_count); + if (retval != 0) { + TFMLOGE("Failed to append const node %d", node_id); + return retval; + } + return retval; +} + +void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable) { + s_dbg_use_inception_dummy_data = enable; +} + +bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled() { + return s_dbg_use_inception_dummy_data; +} diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h b/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h new file mode 100644 index 0000000000..eaf4a58751 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_impl/include/hexagon_controller.h @@ -0,0 +1,124 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef GEMM_WRAPPER_H +#define GEMM_WRAPPER_H + +#include <stdbool.h> +#include <stdlib.h> + +#include "hexagon_nn.h" +#include "node_data_float.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define INCEPTION_PARAM_BATCHES 1 +#define INCEPTION_PARAM_HEIGHT_V1 224 +#define INCEPTION_PARAM_WIDTH_V1 224 +#define INCEPTION_PARAM_HEIGHT_V3 299 +#define INCEPTION_PARAM_WIDTH_V3 299 +#define INCEPTION_PARAM_DEPTH 3 + +// General functions +void hexagon_controller_PrintGraph(uint32_t nn_id); + +int hexagon_controller_GetWrapperVersion(); + +int hexagon_controller_GetHexagonBinaryVersion(); + +// Hexagon perf functions +int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs, + int bus_usage, int version); + +bool hexagon_controller_AllocateNodeDataBuffers(int input_size, + int output_size); + +bool hexagon_controller_ReleaseNodeDataBuffers(); + +bool hexagon_controller_CopyByteNodeData(int x, int y, int z, int d, + int type_byte_size, + uint8_t* array_data); + +int hexagon_controller_DeInitHexagon(); + +uint32_t hexagon_controller_GetTargetGraphId(); + +void hexagon_controller_SetTargetGraphId(uint32_t graph_id); + +// Hexagon config functions +void hexagon_controller_GrowMemorySize(); + +// Graph data transfer functions +struct NodeDataFloat* hexagon_controller_GetInputNodeDataFloatBuffer(); + +float* hexagon_controller_GetOutputNodeDataFloatBuffer( + const char* const node_name, int* out_array_size); + +// Graph functions +uint32_t hexagon_controller_InstantiateGraph(); + +void hexagon_controller_InitGraph(int version, uint32_t nn_id); + +bool hexagon_controller_ConstructGraph(uint32_t nn_id); + +uint32_t hexagon_controller_SetupGraph(int version); + +bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id); + +bool hexagon_controller_ExecuteGraph( + const uint32_t nn_id, const uint32_t batches, const uint32_t height, + const uint32_t width, const uint32_t depth, uint8_t* int_data, + const uint32_t int_data_size, uint32_t* out_batches, uint32_t* out_height, + uint32_t* out_width, uint32_t* out_depth, uint8_t* out_vals, + const uint32_t output_val_byte_size, uint32_t* out_data_byte_size); + +bool hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id, + bool show_ranking); + +void hexagon_controller_DumpPerf(uint32_t nn_id); + +void hexagon_controller_DumpNodeName(uint32_t nn_id); + +void hexagon_controller_Teardown(uint32_t nn_id); + +void hexagon_controller_PrintMaxNIdx(const float* data, const uint32_t entries, + const int n, int* out_ranking); + +void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version); + +int hexagon_controller_AppendNode(const char* const name, int graph_id, + int node_id, int op_id, int padding_id, + const hexagon_nn_input* const inputs, + int inputs_count, + const hexagon_nn_output* const outputs, + int outputs_count); + +int hexagon_controller_AppendConstNode(const char* const name, int graph_id, + int node_id, int batch, int height, + int width, int depth, + const uint8_t* const data, + int data_length); + +void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable); + +bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled(); + +#ifdef __cplusplus +} +#endif // __cplusplus + +#endif // GEMM_WRAPPER_H diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h b/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h new file mode 100644 index 0000000000..55fc000ff4 --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_log/include/tfm_log.h @@ -0,0 +1,68 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef GEMM_WRAPPER_LOG_H +#define GEMM_WRAPPER_LOG_H + +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> + +#define TFM_LOG_LEVEL_VERBOSE -2 +#define TFM_LOG_LEVEL_DEBUG -1 +#define TFM_LOG_LEVEL_INFO 0 +#define TFM_LOG_LEVEL_WARNING 1 +#define TFM_LOG_LEVEL_ERROR 2 +#define TFM_LOG_LEVEL_FATAL 3 + +static int s_log_level = TFM_LOG_LEVEL_INFO; + +static inline bool IsLogOn(int log_level) { return log_level >= s_log_level; } + +static inline void SetLogLevel(int log_level) { s_log_level = log_level; } + +#define TFMLOGD(fmt, ...) \ + do { \ + if (!IsLogOn(TFM_LOG_LEVEL_DEBUG)) break; \ + printf(fmt "\n", ##__VA_ARGS__); \ + } while (0) + +#define TFMLOGI(fmt, ...) \ + do { \ + if (!IsLogOn(TFM_LOG_LEVEL_INFO)) break; \ + printf(fmt "\n", ##__VA_ARGS__); \ + } while (0) + +#define TFMLOGE(fmt, ...) \ + do { \ + if (!IsLogOn(TFM_LOG_LEVEL_ERROR)) break; \ + printf(fmt "\n", ##__VA_ARGS__); \ + } while (0) + +static inline void PrintLogHexagon(const char* fmt, va_list ap) { + char buffer[200]; + const int count = snprintf(buffer, 200, fmt, ap); + buffer[count] = 0; + TFMLOGI("%s", buffer); +} + +static inline void LogDHexagon(const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + PrintLogHexagon(fmt, ap); + va_end(ap); +} + +#endif diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h new file mode 100644 index 0000000000..a9c3296e9f --- /dev/null +++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/include/node_data_float.h @@ -0,0 +1,41 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef NODE_DATA_FLOAT_H +#define NODE_DATA_FLOAT_H + +#ifdef __cplusplus +extern "C" { +#else +#include <inttypes.h> +#endif +#define NODE_DATA_FLOAT_NODE_NAME_BUF_SIZE 100 + +struct NodeDataFloat { + int x; + int y; + int z; + int d; + int buf_size; + int array_size; + float* array_data; + uint8_t* byte_array_data; + char node_name[NODE_DATA_FLOAT_NODE_NAME_BUF_SIZE]; +}; +#ifdef __cplusplus +} +#endif + +#endif // NODE_DATA_FLOAT_H diff --git a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c index ebcbb963e8..7db8d4870c 100755 --- a/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c +++ b/tensorflow/contrib/hvx/hexagon_controller/src_soc_interface/soc_interface.c @@ -15,110 +15,230 @@ limitations under the License. #include "soc_interface.h" +#include <inttypes.h> + +#include "hexagon_controller.h" +#include "hexagon_nn.h" +#include "node_data_float.h" +#include "tfm_log.h" + +const int64_t FLAG_ENABLE_INCEPTION_DUMMY_BINARY_INPUT = 0x01; + +static const int INCEPTION_VERSION = 3; + +static hexagon_nn_input* s_node_inputs_array; +static int s_node_inputs_array_index; +static int s_node_inputs_array_max_count; + +static hexagon_nn_output* s_node_outputs_array; +static int s_node_outputs_array_index; +static int s_node_outputs_array_max_count; + int soc_interface_GetWrapperVersion() { - // TODO(satok): implement - return -1; + TFMLOGD("GetWrapperVersion"); + return hexagon_controller_GetWrapperVersion(); } int soc_interface_GetSocControllerVersion() { - // TODO(satok): implement - return -1; + TFMLOGD("GetSocControllerVersion"); + return hexagon_controller_GetHexagonBinaryVersion(); } bool soc_interface_Init() { - // TODO(satok): implement - return false; + TFMLOGD("Init"); + hexagon_controller_InitHexagonWithMaxAttributes( + 0, 100, INCEPTION_VERSION /* version */); + hexagon_controller_GrowMemorySize(); + return true; } bool soc_interface_Finalize() { - // TODO(satok): implement - return false; + TFMLOGD("Finalize"); + hexagon_controller_DeInitHexagon(); + return true; } bool soc_interface_ExecuteGraph() { - // TODO(satok): implement - return false; + TFMLOGD("ExecuteGraph"); + if (hexagon_controller_IsDbgUseInceptionDummyDataEnabled()) { + hexagon_controller_InitInputNodeDataToInceptionDummyData( + INCEPTION_VERSION /* version */); + } + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + if (graph_id == 0) { + TFMLOGE("Graph id has not been set yet."); + return false; + } + hexagon_controller_ExecuteGraphWithBuffer(graph_id, true); + return true; } bool soc_interface_TeardownGraph() { - // TODO(satok): implement - return false; + TFMLOGD("TeardownGraph"); + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + if (graph_id == 0) { + TFMLOGE("Graph id has not been set yet."); + return false; + } + hexagon_controller_Teardown(graph_id); + return true; } bool soc_interface_FillInputNodeFloat( - int x, int y, int z, int d, const uint8_t* const buf, uint64_t buf_size) { - // TODO(satok): implement - return false; + int x, int y, int z, int d, const uint8_t* const buf, + uint64_t buf_size) { + TFMLOGD("FillInputNodeFloat"); + struct NodeDataFloat* node_data_float = + hexagon_controller_GetInputNodeDataFloatBuffer(); + const int array_size = x * y * z * d; + if (array_size > node_data_float->buf_size) { + TFMLOGE("Array size exceeds buf size %d > %d", + array_size, node_data_float->buf_size); + return false; + } + if (buf_size != array_size * sizeof(float)) { + TFMLOGE("Invalid buf size!"); + return false; + } + memcpy(node_data_float->byte_array_data, buf, buf_size); + node_data_float->x = x; + node_data_float->y = y; + node_data_float->z = z; + node_data_float->d = d; + node_data_float->array_size = buf_size; + return true; } // TODO(satok): Remove and use runtime version bool soc_interface_ReadOutputNodeFloat( const char* const node_name, uint8_t** buf, uint64_t *buf_size) { - // TODO(satok): implement - return false; + TFMLOGD("ReadOutputNodeFloat"); + int array_size = -1; + float* output_node_data_float = + hexagon_controller_GetOutputNodeDataFloatBuffer(node_name, &array_size); + if (array_size < 0) { + TFMLOGE("Failed to read data."); + return false; + } + *buf = (uint8_t*)output_node_data_float; + *buf_size = array_size * sizeof(float); + return true; } bool soc_interface_SetupGraphDummy(int version) { - // TODO(satok): implement - return false; + TFMLOGD("SetupGraphDummy"); + const uint32_t graph_id = hexagon_controller_SetupGraph(version); + if (graph_id == 0) { + TFMLOGE("Failed to setup graph"); + return false; + } + hexagon_controller_SetTargetGraphId(graph_id); + return true; } bool soc_interface_AllocateNodeInputAndNodeOutputArray( int total_input_count, int total_output_count) { - // TODO(satok): implement - return false; + TFMLOGD("Allocate node inputs and node outputs array %d, %d", + total_input_count, total_output_count); + s_node_inputs_array = malloc(total_input_count * sizeof(hexagon_nn_input)); + s_node_outputs_array = malloc(total_output_count * sizeof(hexagon_nn_output)); + s_node_inputs_array_index = 0; + s_node_outputs_array_index = 0; + s_node_inputs_array_max_count = total_input_count; + s_node_outputs_array_max_count = total_output_count; + return true; } bool soc_interface_ReleaseNodeInputAndNodeOutputArray() { - // TODO(satok): implement - return false; + TFMLOGD("Release node inputs and node outputs array"); + free(s_node_inputs_array); + free(s_node_outputs_array); + return true; } void* soc_interface_SetOneNodeInputs( int input_count, const int* const node_id, const int* const port) { - // TODO(satok): implement - return 0; + if (s_node_inputs_array_index + input_count > s_node_inputs_array_max_count) { + TFMLOGE("input count exceeds limit"); + return 0; + } + for (int i = 0; i < input_count; ++i) { + const int index = s_node_inputs_array_index + i; + s_node_inputs_array[index].src_id = node_id[i]; + s_node_inputs_array[index].output_idx = port[i]; + } + void* retval = (void*)(&s_node_inputs_array[s_node_inputs_array_index]); + s_node_inputs_array_index += input_count; + return retval; } void* soc_interface_SetOneNodeOutputs(int output_count, int* max_size) { - // TODO(satok): implement - return 0; + if (s_node_outputs_array_index + output_count > + s_node_outputs_array_max_count) { + TFMLOGE("output count exceeds limit"); + return 0; + } + for (int i = 0; i < output_count; ++i) { + const int index = s_node_outputs_array_index + i; + s_node_outputs_array[index].max_size = max_size[i]; + } + void* retval = (void*)(&s_node_outputs_array[s_node_outputs_array_index]); + s_node_outputs_array_index += output_count; + return retval; } // Append const node to the graph bool soc_interface_AppendConstNode( - const char* const name, int node_id, int batch, int height, int width, - int depth, const uint8_t* const data, int data_length) { - // TODO(satok): implement - return false; + const char* const name, int node_id, int batch, int height, int width, int depth, + const uint8_t* const data, int data_length) { + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + const int retval = hexagon_controller_AppendConstNode( + name, graph_id, node_id, batch, height, width, depth, data, data_length); + if (retval != 0) { + TFMLOGE("Failed to append const node %d", node_id); + return false; + } + return true; } // Append node to the graph bool soc_interface_AppendNode( - const char* const name, int node_id, int ops_id, int padding_id, - const void* const inputs, int inputs_count, const void* const outputs, - int outputs_count) { - // TODO(satok): implement - return false; + const char* const name, int node_id, int ops_id, int padding_id, const void* const inputs, + int inputs_count, const void* const outputs, int outputs_count) { + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + const int retval = hexagon_controller_AppendNode( + name, graph_id, node_id, ops_id, padding_id, + (hexagon_nn_input*) inputs, inputs_count, + (hexagon_nn_output*) outputs, outputs_count); + if (retval != 0) { + TFMLOGE("Failed to append const node %d", node_id); + return false; + } + return true; } // Instantiate graph bool soc_interface_InstantiateGraph() { - // TODO(satok): implement - return false; + const uint32_t nn_id = hexagon_controller_InstantiateGraph(); + hexagon_controller_SetTargetGraphId(nn_id); + return true; } // Construct graph bool soc_interface_ConstructGraph() { - // TODO(satok): implement - return false; + const uint32_t graph_id = hexagon_controller_GetTargetGraphId(); + return hexagon_controller_ConstructGraph(graph_id); } void soc_interface_SetLogLevel(int log_level) { - // TODO(satok): implement + SetLogLevel(log_level); } void soc_interface_SetDebugFlag(uint64_t flag) { - // TODO(satok): implement + TFMLOGI("Set debug flag 0x%" PRIx64, flag); + if ((flag & FLAG_ENABLE_INCEPTION_DUMMY_BINARY_INPUT) != 0) { + TFMLOGI("Enable always use panda data"); + hexagon_controller_EnableDbgUseInceptionDummyData(true); + } } |