aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc41
-rw-r--r--tensorflow/contrib/makefile/Makefile22
-rw-r--r--tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in20
-rw-r--r--tensorflow/contrib/makefile/tf_op_files.txt3
-rw-r--r--tensorflow/core/kernels/hexagon/graph_transferer.cc395
-rw-r--r--tensorflow/core/kernels/hexagon/graph_transferer.h34
-rw-r--r--tensorflow/core/kernels/hexagon/graph_transferer_test.cc26
-rw-r--r--tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc146
-rw-r--r--tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h18
-rw-r--r--tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc13
-rw-r--r--tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc7
-rw-r--r--tensorflow/core/kernels/i_remote_fused_graph_executor.h4
-rw-r--r--tensorflow/core/kernels/remote_fused_graph_execute_utils.cc63
-rw-r--r--tensorflow/core/kernels/remote_fused_graph_execute_utils.h29
-rw-r--r--tensorflow/core/platform/hexagon/soc_interface.h23
15 files changed, 694 insertions, 150 deletions
diff --git a/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc b/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc
index 6ae7c4a742..6af608396a 100644
--- a/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc
+++ b/tensorflow/contrib/hvx/hvx_ops_support_checker/hvx_ops_support_checker_main.cc
@@ -33,10 +33,15 @@ limitations under the License.
#include "tensorflow/tools/graph_transforms/transform_utils.h"
namespace tensorflow {
+
namespace {
-static int ParseFlags(int argc, char* argv[], string* in_graph) {
+static int ParseFlags(int argc, char* argv[], string* in_graph,
+ bool* dump_all_nodes, bool* dump_shape_and_type) {
std::vector<Flag> flag_list = {
- Flag("in_graph", in_graph, "input graph file name"),
+ Flag("in_graph", in_graph, "Input graph file name to check hvx support."),
+ Flag("dump_all_nodes", dump_all_nodes, "Dump all nodes in the model."),
+ Flag("dump_shape_and_type", dump_shape_and_type,
+ "Dump shape and type of nodes"),
};
CHECK(Flags::Parse(&argc, argv, flag_list));
// We need to call this to set up global state for TensorFlow.
@@ -48,12 +53,25 @@ static int ParseFlags(int argc, char* argv[], string* in_graph) {
return 0;
}
-static void SummarizeNode(const NodeDef& node_def) {
+static void SummarizeNode(const NodeDef& node_def,
+ const bool dump_shape_and_type) {
LOG(INFO) << "Node(" << node_def.name() << ")";
LOG(INFO) << " op: " << node_def.op();
for (const string& input : node_def.input()) {
LOG(INFO) << " Input: " << input;
}
+ std::vector<DataType> data_types;
+ std::vector<TensorShape> shapes;
+ const Status status = RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
+ node_def, &data_types, &shapes);
+ if (data_types.empty() || shapes.empty()) {
+ return;
+ }
+ CHECK_EQ(data_types.size(), shapes.size());
+ for (int i = 0; i < data_types.size(); ++i) {
+ LOG(INFO) << " Output(" << i << "): " << DataType_Name(data_types.at(i))
+ << ", " << shapes.at(i).DebugString();
+ }
}
static void DumpRemoteFusedGraph(const NodeDef& node_def) {
@@ -89,10 +107,14 @@ static void DumpRemoteFusedGraph(const NodeDef& node_def) {
}
}
-static void CheckOpsSupport(const GraphDef& graph_def) {
+static void CheckOpsSupport(const GraphDef& graph_def,
+ const bool dump_all_nodes,
+ const bool dump_shape_and_type) {
const IGraphTransferOpsDefinitions& ops_definition =
HexagonOpsDefinitions::getInstance();
LOG(INFO) << "Checking " << graph_def.node_size() << " nodes";
+ LOG(INFO) << "dump_all_nodes = " << dump_all_nodes
+ << ", dump_shape_and_tpye = " << dump_shape_and_type;
std::unordered_set<string> unsupported_ops;
bool all_supported = true;
@@ -125,9 +147,9 @@ static void CheckOpsSupport(const GraphDef& graph_def) {
LOG(INFO) << count << " ops are not supported.";
}
- if (contains_remote_graph) {
+ if (contains_remote_graph || dump_all_nodes) {
for (const NodeDef& node : graph_def.node()) {
- SummarizeNode(node);
+ SummarizeNode(node, dump_shape_and_type);
}
}
}
@@ -137,7 +159,10 @@ static void CheckOpsSupport(const GraphDef& graph_def) {
int main(int argc, char** argv) {
tensorflow::string in_graph;
- const int ret = tensorflow::ParseFlags(argc, argv, &in_graph);
+ bool dump_all_nodes;
+ bool dump_shape_and_type;
+ const int ret = tensorflow::ParseFlags(argc, argv, &in_graph, &dump_all_nodes,
+ &dump_shape_and_type);
if (ret != 0) {
return ret;
}
@@ -146,6 +171,6 @@ int main(int argc, char** argv) {
TF_CHECK_OK(tensorflow::graph_transforms::LoadTextOrBinaryGraphFile(
in_graph, &graph_def));
- tensorflow::CheckOpsSupport(graph_def);
+ tensorflow::CheckOpsSupport(graph_def, dump_all_nodes, dump_shape_and_type);
return 0;
}
diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile
index 305ed0d11e..2150cfe9ea 100644
--- a/tensorflow/contrib/makefile/Makefile
+++ b/tensorflow/contrib/makefile/Makefile
@@ -279,6 +279,16 @@ ifeq ($(TARGET),ANDROID)
LIBS += -lhexagon_controller
LDFLAGS += -L$(HEXAGON_LIBS)
CXXFLAGS += -DUSE_HEXAGON_LIBS
+
+# CAVEAT: We should disable TENSORFLOW_DISABLE_META while running
+# quantized_matmul on Android because it crashes in
+# MultiThreadGemm in tensorflow/core/kernels/meta_support.cc
+# See http://b/33270149
+# TODO(satok): Remove once it's fixed
+ CXXFLAGS += -DTENSORFLOW_DISABLE_META
+
+# Declare __ANDROID_TYPES_FULL__ to enable required types for hvx
+ CXXFLAGS += -D__ANDROID_TYPES_FULL__
endif
ifdef ENABLE_EXPERIMENTAL_HEXNN_OPS
@@ -500,6 +510,18 @@ tensorflow/core/util/reporter.cc \
tensorflow/tools/benchmark/benchmark_model.cc \
tensorflow/tools/benchmark/benchmark_model_main.cc
+ifdef HEXAGON_LIBS
+ TF_CC_SRCS += \
+tensorflow/cc/framework/scope.cc \
+tensorflow/cc/framework/ops.cc \
+tensorflow/cc/ops/const_op.cc \
+tensorflow/core/kernels/hexagon/graph_transfer_utils.cc \
+tensorflow/core/kernels/hexagon/graph_transferer.cc \
+tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc \
+tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc \
+tensorflow/core/kernels/hexagon/hexagon_remote_fused_graph_executor_build.cc
+endif
+
# File names of the intermediate files target compilation generates.
TF_CC_OBJS := $(addprefix $(OBJDIR), $(TF_CC_SRCS:.cc=.o))
PBT_GEN_FILES := $(addprefix $(PBTGENDIR), $(PBT_CC_SRCS))
diff --git a/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in b/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in
index 2a6f66edcb..9aa81144fd 100644
--- a/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in
+++ b/tensorflow/contrib/makefile/sub_makefiles/hexagon_graph_execution/Makefile.in
@@ -34,27 +34,7 @@ $(wildcard $(GTEST_DIR)/src/*.cc) \
$(wildcard $(GTEST_DIR)/src/*.h) \
$(GTEST_HEADERS)
-# CAVEAT: We should disable TENSORFLOW_DISABLE_META while running
-# quantized_matmul on Android because it crashes in
-# MultiThreadGemm in tensorflow/core/kernels/meta_support.cc
-# TODO(satok): Remove once it's fixed
-CXXFLAGS += -DTENSORFLOW_DISABLE_META
-
-# Declare __ANDROID_TYPES_FULL__ to enable required types for hvx
-CXXFLAGS += -D__ANDROID_TYPES_FULL__
-
GRAPH_TRANSFER_SRCS := \
-tensorflow/cc/framework/scope.cc \
-tensorflow/cc/framework/ops.cc \
-tensorflow/cc/ops/const_op.cc \
-tensorflow/core/kernels/hexagon/graph_transfer_utils.cc \
-tensorflow/core/kernels/hexagon/graph_transferer.cc \
-tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc \
-tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc \
-tensorflow/core/kernels/hexagon/hexagon_remote_fused_graph_executor_build.cc \
-tensorflow/core/kernels/remote_fused_graph_execute_op.cc \
-tensorflow/core/kernels/remote_fused_graph_execute_utils.cc \
-tensorflow/core/ops/remote_fused_graph_ops.cc \
tensorflow/core/platform/posix/test.cc
GRAPH_EXECUTION_SRCS := \
diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt
index 857d6fa21b..c73ec0305b 100644
--- a/tensorflow/contrib/makefile/tf_op_files.txt
+++ b/tensorflow/contrib/makefile/tf_op_files.txt
@@ -202,12 +202,15 @@ tensorflow/core/kernels/quantized_reshape_op.cc
tensorflow/core/kernels/quantized_resize_bilinear_op.cc
tensorflow/core/kernels/requantization_range_op.cc
tensorflow/core/kernels/requantize.cc
+tensorflow/core/kernels/remote_fused_graph_execute_op.cc
+tensorflow/core/kernels/remote_fused_graph_execute_utils.cc
tensorflow/core/ops/training_ops.cc
tensorflow/core/ops/string_ops.cc
tensorflow/core/ops/state_ops.cc
tensorflow/core/ops/sparse_ops.cc
tensorflow/core/ops/sendrecv_ops.cc
tensorflow/core/ops/script_ops.cc
+tensorflow/core/ops/remote_fused_graph_ops.cc
tensorflow/core/ops/random_ops.cc
tensorflow/core/ops/random_grad.cc
tensorflow/core/ops/parsing_ops.cc
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer.cc b/tensorflow/core/kernels/hexagon/graph_transferer.cc
index d927ef3efa..055108cd00 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transferer.cc
@@ -21,6 +21,7 @@ limitations under the License.
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/graph/algorithm.h"
#include "tensorflow/core/graph/graph_constructor.h"
+#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/platform/types.h"
#include "tensorflow/core/public/session.h"
@@ -43,10 +44,14 @@ const char INPUTS_NODE_PREFIX[] = "inputs_for_";
const char OUTPUTS_NODE_PREFIX[] = "outputs_for_";
const char DATA_NODE_PREFIX[] = "data_for_op_";
const char CONST_SHAPE_PREFIX[] = "const_shape_";
+const char CONST_VAL_PREFIX[] = "const_val_";
+const char CONST_TENSOR_PREFIX[] = "const_tensor_";
const char PADDING_ATTR_NAME[] = "padding";
const char STRIDES_ATTR_NAME[] = "strides";
+const char KEEP_DIMS_ATTR_NAME[] = "keep_dims";
const char KSIZE_ATTR_NAME[] = "ksize";
const char NULL_OUTPUT_NAME[] = "NULL";
+const char AGGREGATED_INPUT_NODE_NAME[] = "graph_transfer_aggregated_input";
const int PADDING_NA_ID = 0; // VALID = 1, SAME = 2
// This is a temporary workaround to support android build
@@ -58,6 +63,16 @@ static string ToString(T val) {
return stream.str();
}
+static Node* FindMutableNodeByName(const string& name, Graph* graph) {
+ const TensorId tid = ParseTensorName(name);
+ for (Node* node : graph->nodes()) {
+ if (node != nullptr && node->name() == tid.first) {
+ return node;
+ }
+ }
+ return nullptr;
+}
+
/**
* graph loading functions
* - LoadGraphFromProto
@@ -86,13 +101,22 @@ Status GraphTransferer::LoadGraphFromProto(
}
}
+ TF_RETURN_IF_ERROR(TransformGraphToAddAggregatedInputNode(
+ input_node_info_list, &graph, &shape_refiner));
+
std::unordered_multimap<string, const Node*> op_name_to_node_multimap(
graph.num_nodes());
for (const Node* const node : graph.nodes()) {
+ if (node == nullptr) {
+ continue;
+ }
CacheNode(*node);
}
for (const Node* const node : graph.nodes()) {
+ if (node == nullptr) {
+ continue;
+ }
VLOG(1) << "<Node> " << node->name();
for (const Node* const input_node : node->in_nodes()) {
const string& name = input_node->name();
@@ -102,6 +126,9 @@ Status GraphTransferer::LoadGraphFromProto(
}
for (const Node* const node : graph.nodes()) {
+ if (node == nullptr) {
+ continue;
+ }
status = RegisterNodeIfAllInputsAreCached(
ops_definitions, shape_refiner, *node, false, input_node_info_list,
output_node_names);
@@ -265,19 +292,16 @@ GraphTransferInfo& GraphTransferer::GetMutableGraphTransferInfo() {
return graph_transfer_info_;
}
-int GraphTransferer::CacheNode(const Node& node) {
+void GraphTransferer::CacheNode(const Node& node) {
if (node_name_to_id_cache_map_.count(node.name()) > 0) {
- VLOG(1) << "Emplace node to cache failed";
- // TODO(satok): check here?
- return -1;
+ return;
}
- VLOG(1) << "Cache node: " << node.name() << ", " << node.op_def().name();
node_name_cache_list_.emplace_back(&node);
+ const int node_id = node_name_cache_list_.size() - 1;
bool emplace_succeeded = false;
- std::tie(std::ignore, emplace_succeeded) = node_name_to_id_cache_map_.emplace(
- node.name(), node_name_cache_list_.size() - 1);
+ std::tie(std::ignore, emplace_succeeded) =
+ node_name_to_id_cache_map_.emplace(node.name(), node_id);
CHECK(emplace_succeeded);
- return node_name_cache_list_.size() - 1;
}
bool GraphTransferer::AreAllInputsCached(const Node& node) const {
@@ -291,22 +315,124 @@ bool GraphTransferer::AreAllInputsCached(const Node& node) const {
return true;
}
+Status GraphTransferer::TransformGraphToAddAggregatedInputNode(
+ const std::vector<std::pair<string, Tensor>>& input_node_info_list,
+ Graph* graph, ShapeRefiner* shape_refiner) {
+ // Transform a remote fused graph to add an aggregated input node which takes
+ // all inputs of the remote graph.
+ DataTypeVector input_data_types;
+ std::vector<DataType> data_types;
+ std::vector<TensorShape> shapes;
+ std::vector<string> input_nodes;
+ for (int i = 0; i < input_node_info_list.size(); ++i) {
+ Node* node = FindMutableNodeByName(input_node_info_list.at(i).first, graph);
+ CHECK_NOTNULL(node);
+ input_nodes.emplace_back(node->name());
+ input_data_types.emplace_back(input_node_info_list.at(i).second.dtype());
+ data_types.emplace_back(input_node_info_list.at(i).second.dtype());
+ shapes.emplace_back(input_node_info_list.at(i).second.shape());
+ }
+
+ NodeDef input_node_def;
+ auto builder =
+ NodeBuilder(AGGREGATED_INPUT_NODE_NAME, "RemoteFusedGraphExecute")
+ .Input(std::vector<NodeBuilder::NodeOut>{})
+ .Attr("Tinputs", DataTypeVector{})
+ .Attr("Toutputs", input_data_types)
+ .Attr("serialized_remote_fused_graph_execute_info", "")
+ .Attr(RemoteFusedGraphExecuteUtils::ATTR_OUTPUT_DATA_TYPES,
+ data_types)
+ .Attr(RemoteFusedGraphExecuteUtils::ATTR_OUTPUT_SHAPES, shapes);
+
+ Node* input_node;
+ TF_RETURN_IF_ERROR(builder.Finalize(graph, &input_node));
+ CHECK_NOTNULL(input_node);
+
+ bool refined;
+ TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(input_node, &refined));
+
+ shape_inference::InferenceContext* context =
+ shape_refiner->GetContext(input_node);
+ for (int i = 0; i < input_node_info_list.size(); ++i) {
+ shape_inference::ShapeHandle handle;
+ TF_RETURN_IF_ERROR(context->MakeShapeFromTensorShape(
+ input_node_info_list.at(i).second.shape(), &handle));
+ TF_RETURN_IF_ERROR(shape_refiner->SetShape(input_node, i, handle));
+ }
+
+ // Cache the aggregate input node first as it's consumed first.
+ CacheNode(*input_node);
+
+ std::vector<Node*> original_input_nodes(input_nodes.size());
+
+ for (int i = 0; i < input_nodes.size(); ++i) {
+ const string& node_name = input_nodes.at(i);
+ Node* original_input_node = FindMutableNodeByName(node_name, graph);
+ CHECK_NOTNULL(original_input_node);
+ CHECK_EQ(1, original_input_node->num_outputs()); // replaced by identity.
+ Node* created_node;
+ TF_RETURN_IF_ERROR(RemoteFusedGraphExecuteUtils::BuildIdentityOpNode(
+ node_name, AGGREGATED_INPUT_NODE_NAME, i, data_types.at(i), graph,
+ &created_node));
+ CHECK_NOTNULL(created_node);
+ std::vector<DataType> data_types;
+ std::vector<TensorShape> shapes;
+ Status status = RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
+ original_input_node->def(), &data_types, &shapes);
+ if (status.ok()) {
+ created_node->AddAttr(
+ RemoteFusedGraphExecuteUtils::ATTR_OUTPUT_DATA_TYPES, data_types);
+ created_node->AddAttr(RemoteFusedGraphExecuteUtils::ATTR_OUTPUT_SHAPES,
+ shapes);
+ }
+ for (const Edge* out_edge : original_input_node->out_edges()) {
+ Node* dst = out_edge->dst();
+ int dst_port = out_edge->dst_input();
+ // Unused edge will be removed when removing node.
+ graph->AddEdge(created_node, 0, dst, dst_port);
+ }
+ original_input_nodes[i] = original_input_node;
+
+ TF_RETURN_IF_ERROR(shape_refiner->UpdateNode(created_node, &refined));
+
+ shape_inference::InferenceContext* context =
+ shape_refiner->GetContext(created_node);
+ CHECK_NOTNULL(context);
+
+ // Cache replaced input node next to the aggregated input node.
+ CacheNode(*created_node);
+ }
+
+ // Remove original input nodes after adding new input nodes to avoid
+ // reusing same pointer in Graph.
+ for (Node* original_input_node : original_input_nodes) {
+ graph->RemoveNode(original_input_node);
+ }
+
+ return Status::OK();
+}
+
Status GraphTransferer::RegisterNode(
const IGraphTransferOpsDefinitions& ops_definitions,
const ShapeRefiner& shape_refiner, const Node& node,
const std::vector<std::pair<string, Tensor>>& input_node_info_list,
const std::vector<string>& output_node_names) {
- VLOG(1) << "Register node: " << node.name();
+ VLOG(1) << "Register node: " << node.name() << ", " << std::hex
+ << node_name_to_id_cache_map_.at(node.name());
if (node.name() == SOURCE_NODE_NAME || node.name() == SINK_NODE_NAME) {
// Just ignore sink and source
- return Status();
- } else if (RemoteFusedGraphExecuteUtils::IsInputNode(input_node_info_list,
- node.name())) {
+ return Status::OK();
+ } else if (node.name() == AGGREGATED_INPUT_NODE_NAME) {
RegisterInputNode(ops_definitions, shape_refiner, node);
+ return Status::OK();
} else if (node.IsConstant()) {
RegisterConstantNode(shape_refiner, node);
+ } else if (IsPadNode(node)) {
+ RegisterPadNode(ops_definitions, shape_refiner, node);
} else if (HasPaddingAndStrides(node)) {
RegisterNodeWithPaddingAndStrides(ops_definitions, shape_refiner, node);
+ } else if (NeedsToAddRank(node)) {
+ RegisterNodeWithRank(ops_definitions, shape_refiner, node);
} else if (IsNodeFlattenReshape(node, shape_refiner)) {
RegisterFlattenNode(ops_definitions, shape_refiner, node);
} else if (ops_definitions.GetOpIdFor(node.type_string(), {}) !=
@@ -318,7 +444,7 @@ Status GraphTransferer::RegisterNode(
" has not been implemented yet.");
}
- return Status();
+ return Status::OK();
}
void GraphTransferer::RegisterConstantNode(const ShapeRefiner& shape_refiner,
@@ -361,8 +487,7 @@ void GraphTransferer::RegisterConstantNode(const ShapeRefiner& shape_refiner,
const TensorProto* proto = nullptr;
TF_CHECK_OK(GetNodeAttr(node.attrs(), "value", &proto));
Tensor const_tensor;
- // TODO(b/32704451): Don't just ignore this status!
- MakeTensorFromProto(*proto, &const_tensor).IgnoreError();
+ TF_CHECK_OK(MakeTensorFromProto(*proto, &const_tensor));
const_node_info.set_dtype(const_tensor.dtype());
if (data_size > 0) {
@@ -394,12 +519,82 @@ int GraphTransferer::RegisterConstantShape(const std::vector<int>& shape) {
return node_name_to_id_cache_map_[shape_name];
}
+int GraphTransferer::RegisterConstTensor(const Tensor& tensor,
+ const string& suffix) {
+ VLOG(1) << "Cache const tensor.";
+ const int dims = tensor.shape().dims();
+ CHECK(dims <= 4);
+ const string node_name = strings::StrCat(CONST_TENSOR_PREFIX, "_", suffix);
+ if (node_name_to_id_cache_map_.count(node_name) <= 0) {
+ node_name_cache_list_.emplace_back(nullptr);
+ const int id = node_name_cache_list_.size() - 1;
+ node_name_to_id_cache_map_.emplace(node_name, id);
+ GraphTransferInfo::ConstNodeInfo& const_node_info =
+ *graph_transfer_info_.add_const_node_info();
+ const_node_info.set_name(node_name);
+ const_node_info.set_node_id(id);
+ CHECK_EQ(4, SHAPE_ARRAY_SIZE);
+ for (int i = 0; i < SHAPE_ARRAY_SIZE; ++i) {
+ if (i < SHAPE_ARRAY_SIZE - dims) {
+ const_node_info.add_shape(1);
+ } else {
+ const_node_info.add_shape(
+ tensor.shape().dim_size(i - (SHAPE_ARRAY_SIZE - dims)));
+ }
+ }
+ const_node_info.set_dtype(tensor.dtype());
+ const_node_info.set_data(tensor.tensor_data().data(),
+ tensor.tensor_data().size());
+ }
+ return node_name_to_id_cache_map_[node_name];
+}
+
+int GraphTransferer::RegisterConstScalar(const DataType dt, const int val,
+ const int dst_id,
+ const int dst_input_count) {
+ VLOG(1) << "Cache const.";
+ const string val_name =
+ CONST_VAL_PREFIX + ToString(dst_id) + '_' + ToString(dst_input_count);
+ if (node_name_to_id_cache_map_.count(val_name) <= 0) {
+ node_name_cache_list_.emplace_back(nullptr);
+ const int id = node_name_cache_list_.size() - 1;
+ node_name_to_id_cache_map_.emplace(val_name, id);
+ GraphTransferInfo::ConstNodeInfo& const_node_info =
+ *graph_transfer_info_.add_const_node_info();
+ const_node_info.set_name(val_name);
+ const_node_info.set_node_id(id);
+ // TODO(satok): Do not assume rank is 4 here.
+ const_node_info.add_shape(static_cast<int64>(1));
+ const_node_info.add_shape(static_cast<int64>(1));
+ const_node_info.add_shape(static_cast<int64>(1));
+ const_node_info.add_shape(static_cast<int64>(1));
+ const_node_info.set_data(&val, DataTypeSize(dt));
+ }
+ return node_name_to_id_cache_map_[val_name];
+}
+
bool GraphTransferer::HasPaddingAndStrides(const Node& node) {
auto attrs = node.attrs();
return attrs.Find(PADDING_ATTR_NAME) != nullptr &&
attrs.Find(STRIDES_ATTR_NAME) != nullptr;
}
+bool GraphTransferer::NeedsToAddRank(const Node& node) {
+ const string& op_type = node.def().op();
+ if (op_type == "Transpose" || op_type == "ExpandDims") {
+ return true;
+ }
+ return false;
+}
+
+bool GraphTransferer::IsPadNode(const Node& node) {
+ const string& op_type = node.def().op();
+ if (op_type == "Pad") {
+ return true;
+ }
+ return false;
+}
+
bool GraphTransferer::IsNodeFlattenReshape(const Node& node,
const ShapeRefiner& shape_refiner) {
// Check if node is reshape op
@@ -473,15 +668,123 @@ void GraphTransferer::RegisterNodeWithPaddingAndStrides(
node.num_outputs(), true /* append_input */, true /* append_output */);
}
-void GraphTransferer::RegisterInputNode(
+void GraphTransferer::RegisterNodeWithRank(
const IGraphTransferOpsDefinitions& ops_definitions,
const ShapeRefiner& shape_refiner, const Node& node) {
- VLOG(1) << "Register input node: " << node.name();
CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
const int id = node_name_to_id_cache_map_[node.name()];
+ shape_inference::InferenceContext* context = shape_refiner.GetContext(&node);
+ const Node* input0_node;
+ TF_CHECK_OK(node.input_node(0, &input0_node));
+ CHECK_NOTNULL(input0_node);
+ std::vector<TensorShape> shapes;
+ Status status = RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
+ input0_node->def(), nullptr, &shapes);
+ CHECK_EQ(1, shapes.size()) << "Output size should be 1.";
+ const int const_val_id =
+ RegisterConstScalar(DT_INT32, shapes.at(0).dims(), id, node.num_inputs());
+ std::vector<int> extra_inputs{const_val_id};
+ // TODO(satok): Set correct data type if it's given.
+ const int op_type_id = ops_definitions.GetOpIdFor(node.type_string(), {});
+ CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount())
+ << "Op " << node.type_string() << " not found in map(id = " << op_type_id
+ << ")";
+ bool keep_dims = false;
+ int padding_id = PADDING_NA_ID;
+ if (context->GetAttr(KEEP_DIMS_ATTR_NAME, &keep_dims).ok()) {
+ padding_id = keep_dims ? Padding::SAME : Padding::VALID;
+ }
+
+ AppendNodeParamsWithIoParams(
+ shape_refiner, node, node.name(), id, node.type_string(), op_type_id,
+ padding_id, node.num_inputs(), extra_inputs, node.num_outputs(),
+ true /* append_input */, true /* append_output */);
+}
+
+void GraphTransferer::RegisterPadNode(
+ const IGraphTransferOpsDefinitions& ops_definitions,
+ const ShapeRefiner& shape_refiner, const Node& node) {
+ static constexpr int PAD_WIDTH = 4;
+ static constexpr int PAD_HEIGHT = 2;
+ VLOG(1) << "Register generic node: " << node.name();
+ CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
+ const int id = node_name_to_id_cache_map_[node.name()];
+
+ // TODO(satok): Set correct data type if it's given.
+ const int op_type_id = ops_definitions.GetOpIdFor(node.type_string(), {});
+ CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount());
+
+ CHECK_EQ(2, node.num_inputs());
+
+ GraphTransferInfo::NodeInputInfo& node_input_info =
+ *graph_transfer_info_.add_node_input_info();
+ node_input_info.set_node_id(id);
+
+ AddNodeInputByInputIndex(node, 0, &node_input_info);
+
+ const Edge* edge = nullptr;
+ TF_CHECK_OK(node.input_edge(1, &edge));
+ const Node* input_node = edge->src();
+ CHECK_NOTNULL(input_node);
+ CHECK(input_node->IsConstant());
+
+ const TensorProto* tensor_proto = nullptr;
+ TF_CHECK_OK(GetNodeAttr(input_node->def(), "value", &tensor_proto));
+ CHECK_NOTNULL(tensor_proto);
+ Tensor const_tensor;
+ TF_CHECK_OK(MakeTensorFromProto(*tensor_proto, &const_tensor));
+ CHECK_EQ(2, const_tensor.shape().dims());
+ CHECK_EQ(PAD_HEIGHT, const_tensor.shape().dim_size(1));
+ if (const_tensor.shape().dim_size(0) == PAD_WIDTH) {
+ AddNodeInputByInputIndex(node, 1, &node_input_info);
+ } else if (const_tensor.shape().dim_size(0) < PAD_WIDTH) {
+ const int width = const_tensor.shape().dim_size(0);
+ const TensorProto* proto = nullptr;
+ TF_CHECK_OK(GetNodeAttr(input_node->def(), "value", &proto));
+ Tensor const_tensor;
+ TF_CHECK_OK(MakeTensorFromProto(*proto, &const_tensor));
+ CHECK_EQ(DT_INT32, const_tensor.dtype());
+ // reshape tensor input to be rank 4.
+ // TODO(satok): Never assume rank is 4.
+ Tensor new_const_tensor(const_tensor.dtype(), TensorShape{4, 2});
+ for (int i = 0; i < PAD_HEIGHT; ++i) {
+ for (int j = 0; j < PAD_WIDTH; ++j) {
+ if (j < PAD_WIDTH - width) {
+ new_const_tensor.matrix<int32>()(j, i) = 0;
+ } else {
+ new_const_tensor.matrix<int32>()(j, i) =
+ const_tensor.matrix<int32>()(j - (PAD_WIDTH - width), i);
+ }
+ }
+ }
+
+ const int id = RegisterConstTensor(
+ new_const_tensor,
+ strings::StrCat(input_node->name(), "_", node.name(), "_1"));
+
+ GraphTransferInfo::NodeInput& node_input =
+ *node_input_info.add_node_input();
+ node_input.set_node_id(id);
+ node_input.set_output_port(0);
+ } else {
+ CHECK(false);
+ }
+
+ AppendNodeParamsWithIoParams(
+ shape_refiner, node, node.name(), id, node.type_string(), op_type_id,
+ PADDING_NA_ID, node.num_inputs(), {}, node.num_outputs(),
+ false /* append_input */, true /* append_output */);
+}
+
+void GraphTransferer::RegisterInputNode(
+ const IGraphTransferOpsDefinitions& ops_definitions,
+ const ShapeRefiner& shape_refiner, const Node& node) {
const string op_type = node.type_string();
+ VLOG(1) << "Register input node: " << node.name() << ", " << op_type;
+ CHECK_EQ(node_name_to_id_cache_map_.count(node.name()), 1);
+ const int id = node_name_to_id_cache_map_[node.name()];
// TODO(satok): Set correct data type if it's given.
- const int op_type_id = ops_definitions.GetOpIdFor(op_type, {});
+ const int op_type_id = ops_definitions.GetOpIdFor("INPUT", {});
CHECK(op_type_id >= 0 && op_type_id < ops_definitions.GetTotalOpsCount())
<< "Op" << node.name() << ", " << op_type << " is not supported,"
<< op_type_id;
@@ -546,7 +849,6 @@ void GraphTransferer::AppendNodeParams(const string& name, const int id,
const int padding, const int inputs_size,
const std::vector<int>& extra_inputs,
const int outputs_size) {
- VLOG(1) << "Append node params: " << name;
GraphTransferInfo::NodeInfo& node_info =
*graph_transfer_info_.add_node_info();
node_info.set_name(name);
@@ -559,6 +861,23 @@ void GraphTransferer::AppendNodeParams(const string& name, const int id,
node_info.set_output_count(static_cast<int>(outputs_size));
}
+void GraphTransferer::AddNodeInputByInputIndex(
+ const Node& node, const int idx,
+ GraphTransferInfo::NodeInputInfo* node_input_info) {
+ const Edge* edge = nullptr;
+ TF_CHECK_OK(node.input_edge(idx, &edge));
+ const Node* input_node = edge->src();
+ CHECK_NOTNULL(input_node);
+ const int port = edge->src_output();
+
+ const std::string& op_name = input_node->name();
+ CHECK_GT(node_name_to_id_cache_map_.count(op_name), 0) << op_name;
+ const int src_id = node_name_to_id_cache_map_[op_name];
+ GraphTransferInfo::NodeInput& node_input = *node_input_info->add_node_input();
+ node_input.set_node_id(src_id);
+ node_input.set_output_port(port);
+}
+
void GraphTransferer::AppendNodeInputParams(
const int id, const Node& node, const std::vector<int>& extra_inputs) {
VLOG(1) << "Append input params: " << node.name() << ", " << node.num_inputs()
@@ -567,18 +886,7 @@ void GraphTransferer::AppendNodeInputParams(
*graph_transfer_info_.add_node_input_info();
node_input_info.set_node_id(id);
for (int i = 0; i < node.num_inputs(); ++i) {
- const Edge* edge = nullptr;
- TF_CHECK_OK(node.input_edge(i, &edge));
- const Node* input_node = edge->src();
- const int port = edge->src_output();
-
- const std::string& op_name = input_node->name();
- CHECK_GT(node_name_to_id_cache_map_.count(op_name), 0) << op_name;
- const int src_id = node_name_to_id_cache_map_[op_name];
- GraphTransferInfo::NodeInput& node_input =
- *node_input_info.add_node_input();
- node_input.set_node_id(src_id);
- node_input.set_output_port(port);
+ AddNodeInputByInputIndex(node, i, &node_input_info);
}
for (const int extra_input : extra_inputs) {
GraphTransferInfo::NodeInput& node_input =
@@ -596,9 +904,10 @@ void GraphTransferer::AppendNodeOutputParams(const ShapeRefiner& shape_refiner,
*graph_transfer_info_.add_node_output_info();
node_output_info.set_node_id(id);
+ std::vector<DataType> data_types;
std::vector<TensorShape> shapes;
Status status = RemoteFusedGraphExecuteUtils::GetOutputTensorShapeType(
- node.attrs(), nullptr, &shapes);
+ node.attrs(), &data_types, &shapes);
for (int i = 0; i < node.num_outputs(); ++i) {
int data_size = -1;
@@ -608,16 +917,20 @@ void GraphTransferer::AppendNodeOutputParams(const ShapeRefiner& shape_refiner,
shape_inference::InferenceContext* context =
shape_refiner.GetContext(&node);
- shape_inference::ShapeHandle shape_handle = context->output(output_index);
- const shape_inference::DimensionHandle num_elements_dim =
- context->NumElements(shape_handle);
- if (context->ValueKnown(num_elements_dim)) {
+
+ if (context != nullptr && context->ValueKnown(context->NumElements(
+ context->output(output_index)))) {
+ const shape_inference::DimensionHandle num_elements_dim =
+ context->NumElements(context->output(output_index));
const int64 num_output_elements = context->Value(num_elements_dim);
data_size = max_bytes_per_data * num_output_elements;
+ if (status.ok()) {
+ TF_CHECK_OK(status);
+ CHECK_EQ(shapes.at(i).num_elements(), num_output_elements);
+ }
} else {
TF_CHECK_OK(status);
// Use attribute attached to node
- CHECK_EQ(node.num_outputs(), shapes.size()) << node.name();
data_size = max_bytes_per_data * shapes.at(i).num_elements();
}
CHECK_GE(data_size, 0);
@@ -722,11 +1035,11 @@ bool GraphTransferer::TransferParamsComparator::operator()(
const int node_id0 = obj0.node_id();
const int node_id1 = obj1.node_id();
bool obj0_uses_obj1 = false;
- if (dependency_map_.count(node_id0)) {
+ if (dependency_map_.count(node_id0) > 0) {
obj0_uses_obj1 = dependency_map_.at(node_id0).count(node_id1) > 0;
}
bool obj1_uses_obj0 = false;
- if (dependency_map_.count(node_id1)) {
+ if (dependency_map_.count(node_id1) > 0) {
obj1_uses_obj0 = dependency_map_.at(node_id1).count(node_id0) > 0;
}
CHECK(!obj0_uses_obj1 || !obj1_uses_obj0);
@@ -735,7 +1048,9 @@ bool GraphTransferer::TransferParamsComparator::operator()(
} else if (obj1_uses_obj0) {
return true;
}
- return node_id0 > node_id1;
+ // If there is no dependency between two nodes, it expects that
+ // the execution order follows node id order.
+ return node_id0 < node_id1;
}
/* static */ void GraphTransferer::FillDependencyRec(
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer.h b/tensorflow/core/kernels/hexagon/graph_transferer.h
index fa12b22d75..64c60b87c6 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer.h
+++ b/tensorflow/core/kernels/hexagon/graph_transferer.h
@@ -88,6 +88,9 @@ class GraphTransferer {
// Dump verification string of parameters to verify with offline tools
void DumpVerificationStringOfNodeTransferParams() const;
+ static std::array<int64, SHAPE_ARRAY_SIZE> ToTensorShapeArray(
+ const TensorShape& shape);
+
private:
class TransferParamsComparator {
public:
@@ -98,10 +101,16 @@ class GraphTransferer {
const std::unordered_map<int, std::unordered_set<int>>& dependency_map_;
};
- int CacheNode(const Node& node);
+ void CacheNode(const Node& node);
bool AreAllInputsCached(const Node& node) const;
+ // Transform a remote fused graph to add an aggregated input node which takes
+ // all inputs of the remote graph.
+ Status TransformGraphToAddAggregatedInputNode(
+ const std::vector<std::pair<string, Tensor>>& input_node_info_list,
+ Graph* graph, ShapeRefiner* shape_refiner);
+
Status RegisterNode(
const IGraphTransferOpsDefinitions& ops_definitions,
const ShapeRefiner& shape_refiner, const Node& node,
@@ -113,8 +122,17 @@ class GraphTransferer {
int RegisterConstantShape(const std::vector<int>& shape);
+ int RegisterConstTensor(const Tensor& tensor, const string& suffix);
+
+ int RegisterConstScalar(const DataType dt, const int val, const int dst_id,
+ const int dst_input_count);
+
bool HasPaddingAndStrides(const Node& node);
+ bool NeedsToAddRank(const Node& node);
+
+ bool IsPadNode(const Node& node);
+
// Return true if the node is a reshape op which just flattens input
// TODO(satok): Remove this method once generic reshape op is implemented in
// SOC
@@ -125,6 +143,13 @@ class GraphTransferer {
const IGraphTransferOpsDefinitions& ops_definitions,
const ShapeRefiner& shape_refiner, const Node& node);
+ void RegisterNodeWithRank(const IGraphTransferOpsDefinitions& ops_definitions,
+ const ShapeRefiner& shape_refiner,
+ const Node& node);
+
+ void RegisterPadNode(const IGraphTransferOpsDefinitions& ops_definitions,
+ const ShapeRefiner& shape_refiner, const Node& node);
+
void RegisterInputNode(const IGraphTransferOpsDefinitions& ops_definitions,
const ShapeRefiner& shape_refiner,
const Node& node);
@@ -150,6 +175,10 @@ class GraphTransferer {
const std::vector<int>& extra_inputs,
const int outputs_size);
+ void AddNodeInputByInputIndex(
+ const Node& node, const int idx,
+ GraphTransferInfo::NodeInputInfo* node_input_info);
+
void AppendNodeInputParams(const int id, const Node& node,
const std::vector<int>& extra_inputs);
@@ -167,9 +196,6 @@ class GraphTransferer {
const int outputs_size, const bool append_input_params,
const bool append_output_params);
- static std::array<int64, SHAPE_ARRAY_SIZE> ToTensorShapeArray(
- const TensorShape& shape);
-
static string ToPaddingDebugString(int padding);
// Create dependency map
diff --git a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
index ebd4a90330..74ffc026f7 100644
--- a/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
+++ b/tensorflow/core/kernels/hexagon/graph_transferer_test.cc
@@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/core/kernels/hexagon/i_graph_transfer_ops_definitions.h"
#include "tensorflow/core/kernels/i_remote_fused_graph_executor.h"
#include "tensorflow/core/lib/core/status.h"
+#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/lib/io/path.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/public/session.h"
@@ -47,21 +48,19 @@ class GraphTransfererTest : public ::testing::Test {
GraphTransferer gt_;
};
-static const std::vector<string> OP_TYPES{
- "INPUT", "OUTPUT", "Conv2D", "MaxPool", "NoOp", "Add", "Const", "Softmax"};
const RemoteFusedGraphExecuteUtils::TensorShapeMap EMPTY_OUTPUT_TENSOR_MAP;
class TestGraphTransferOpsDefinitions : public IGraphTransferOpsDefinitions {
public:
- int GetTotalOpsCount() const final { return OP_TYPES.size(); }
+ int GetTotalOpsCount() const final { return op_types_.size(); }
-int GetOpIdFor(const string& op_type, const DataTypeVector&) const final {
- for (int i = 0; i < OP_TYPES.size(); ++i) {
- if (OP_TYPES[i] == op_type) {
- return i;
+ int GetOpIdFor(const string& op_type, const DataTypeVector&) const final {
+ for (int i = 0; i < op_types_.size(); ++i) {
+ if (op_types_[i] == op_type) {
+ return i;
+ }
}
- }
- return -1;
+ return -1;
}
GraphTransferInfo::Destination GetTransferDestination() const final {
@@ -69,6 +68,9 @@ GraphTransferInfo::Destination GetTransferDestination() const final {
}
private:
+ const std::vector<string> op_types_{"INPUT", "OUTPUT", "Conv2D",
+ "MaxPool", "NoOp", "Add",
+ "Const", "Softmax", "Identity"};
} TEST_GRAPH_TRANSFER_OPS_DEFINITIONS;
static Output BuildAddOps(const Scope& scope, const Input& x, const Input& y) {
@@ -312,7 +314,7 @@ TEST_F(GraphTransfererTest, LoadAddGraphWithOutputTensorMap) {
const std::vector<string> output_node_names = {NAME_A_PLUS_B};
status = gt_.LoadGraphFromProto(TEST_GRAPH_TRANSFER_OPS_DEFINITIONS, def,
inputs, output_node_names, false);
- ASSERT_TRUE(status.ok());
+ TF_ASSERT_OK(status);
}
TEST_F(GraphTransfererTest, LoadConvGraph) {
@@ -330,7 +332,7 @@ TEST_F(GraphTransfererTest, LoadConvGraph) {
gt_.GetGraphTransferInfo().const_node_info_size();
ASSERT_EQ(2, const_node_count);
const int op_node_count = gt_.GetGraphTransferInfo().node_info_size();
- ASSERT_EQ(3, op_node_count);
+ ASSERT_EQ(4, op_node_count);
const GraphTransferInfo::NodeInfo* params_conv = FindNodeInfo(gt_, "conv");
ASSERT_TRUE(params_conv != nullptr);
const int id = params_conv->node_id();
@@ -356,7 +358,7 @@ TEST_F(GraphTransfererTest, LoadMaxPoolGraph) {
gt_.GetGraphTransferInfo().const_node_info_size();
ASSERT_EQ(2, const_node_count);
const int op_node_count = gt_.GetGraphTransferInfo().node_info_size();
- ASSERT_EQ(3, op_node_count);
+ ASSERT_EQ(4, op_node_count);
const GraphTransferInfo::NodeInfo* params_max_pool =
FindNodeInfo(gt_, "maxpool");
ASSERT_TRUE(params_max_pool != nullptr);
diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
index 518b399c37..660ffd268d 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.cc
@@ -27,6 +27,8 @@ namespace tensorflow {
constexpr const char* const INPUT_OP_NAME = "INPUT";
constexpr const char* const OUTPUT_OP_NAME = "OUTPUT";
+constexpr int ALIGNMENT_BYTES = 16;
+
const bool DBG_DUMP_VERIFICATION_STRING = false;
const int DBG_LEVEL = 0; // -2: verbose, -1: debug, 0: info
const bool DBG_USE_DUMMY_INPUT = false;
@@ -34,6 +36,22 @@ const bool DBG_USE_SAMPLE_INPUT = false;
const int64 FLAG_ENABLE_PANDA_BINARY_INPUT = 0x01;
const bool DBG_DUMP_INPUT_TENSOR_AS_FLOAT_DATA = false;
+static string AddPort(const string& node_name) {
+ if (node_name.find(':') != string::npos) {
+ return node_name;
+ } else {
+ return strings::StrCat(node_name, ":", 0);
+ }
+}
+
+static uint8* FindAlignedPointer(uint8* ptr) {
+ const uintptr_t data_ptr_int = reinterpret_cast<uintptr_t>(ptr);
+ const int shift_count =
+ (ALIGNMENT_BYTES - data_ptr_int % ALIGNMENT_BYTES) % ALIGNMENT_BYTES;
+ uint8* data_ptr = ptr + shift_count;
+ return data_ptr;
+}
+
/* static */ GraphTransferInfo::NodeInfo* HexagonControlWrapper::FindNodeInfo(
const string& name, GraphTransferInfo* graph_transfer_info) {
for (GraphTransferInfo::NodeInfo& node_info :
@@ -60,18 +78,57 @@ bool HexagonControlWrapper::Init(const RemoteFusedGraphExecuteInfo& info) {
std::vector<string> outputs;
RemoteFusedGraphExecuteUtils::BuildRemoteGraphInputsAndOutputsFromProto(
info, &inputs, &outputs);
- graph_transferer_.LoadGraphFromProto(
+ Status status = graph_transferer_.LoadGraphFromProto(
HexagonOpsDefinitions::getInstance(), info.remote_graph(), inputs,
outputs,
false // shape_inference_for_unknown_shape
- );
+ );
+ TF_CHECK_OK(status) << status;
} else {
// If graph transfer info is attached, just import it.
graph_transferer_.SetSerializedGraphTransferInfo(
info.serialized_executor_parameters());
}
execute_info_ = &info;
- return soc_interface_Init();
+ bool success = soc_interface_Init();
+ if (!success) {
+ LOG(ERROR) << "Hexagon initialization was failed. See log output.";
+ return false;
+ }
+ const GraphTransferInfo& gt_info = graph_transferer_.GetGraphTransferInfo();
+ std::vector<int> input_sizes;
+ std::vector<int> output_sizes;
+ CHECK_NOTNULL(execute_info_);
+ for (int i = 0; i < execute_info_->graph_input_node_name_size(); ++i) {
+ const string& input = execute_info_->graph_input_node_name(i);
+ LOG(INFO) << "Add input: " << input << ", " << i;
+ CHECK(input_port_map_.emplace(AddPort(input), i).second);
+ const RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& shape_type =
+ execute_info_->default_graph_input_tensor_shape(i);
+ int64 buf_size = DataTypeSize(shape_type.dtype());
+ for (const TensorShapeProto::Dim& dim : shape_type.shape().dim()) {
+ buf_size *= dim.size();
+ }
+ input_sizes.emplace_back(static_cast<int>(buf_size));
+ }
+ for (int i = 0; i < execute_info_->graph_output_node_name_size(); ++i) {
+ const string& output = execute_info_->graph_output_node_name(i);
+ CHECK(output_port_map_.emplace(AddPort(output), i).second);
+ const RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& shape_type =
+ execute_info_->default_graph_output_tensor_shape(i);
+
+ int64 buf_size = DataTypeSize(shape_type.dtype());
+ for (const TensorShapeProto::Dim& dim : shape_type.shape().dim()) {
+ buf_size *= dim.size();
+ }
+ output_sizes.emplace_back(static_cast<int>(buf_size));
+ }
+
+ LOG(INFO) << "Allocate inout buffer";
+ success &= soc_interface_AllocateInOutNodeBuffers(
+ input_sizes.size(), input_sizes.data(), output_sizes.size(),
+ output_sizes.data());
+ return success;
}
bool HexagonControlWrapper::Finalize() { return soc_interface_Finalize(); }
@@ -86,9 +143,6 @@ bool HexagonControlWrapper::SetupGraph() {
GraphTransferInfo::NodeInfo* node_info =
FindNodeInfo(graph_input.name(), &graph_transfer_info);
CHECK_NE(node_info, nullptr);
- node_info->set_type_name(INPUT_OP_NAME);
- node_info->set_soc_op_id(
- HexagonOpsDefinitions::getInstance().GetOpIdFor(INPUT_OP_NAME, {}));
}
// Generate a new output node which is connected to graph output node
@@ -202,12 +256,8 @@ bool HexagonControlWrapper::SetupGraph() {
auto data = dummy_const_data_.emplace(
std::piecewise_construct, std::make_tuple(node_id), std::make_tuple());
CHECK(data.second);
- const int additional_bytes_for_alignment = 16;
- data.first->second.resize(data_size + additional_bytes_for_alignment - 1);
- const uintptr_t data_ptr_int =
- reinterpret_cast<uintptr_t>(data.first->second.data());
- const int shift_count = (16 - data_ptr_int % 16) % 16;
- uint8* data_ptr = data.first->second.data() + shift_count;
+ data.first->second.resize(data_size + ALIGNMENT_BYTES - 1);
+ uint8* data_ptr = FindAlignedPointer(data.first->second.data());
std::memcpy(data_ptr, params.data().data(), data_size);
soc_interface_AppendConstNode(params.name().c_str(),
node_id + NODE_ID_OFFSET, shape_0, shape_1,
@@ -267,27 +317,37 @@ bool HexagonControlWrapper::TeardownGraph() {
return soc_interface_TeardownGraph();
}
-bool HexagonControlWrapper::FillInputNode(const string& node_name,
- const ConstByteArray bytes) {
- uint64 byte_size;
- const int x = 1;
- const int y = 299;
- const int z = 299;
- const int d = 3;
+bool HexagonControlWrapper::FillInputNode(
+ const string& node_name,
+ const std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE>& shape,
+ const ConstByteArray bytes) {
+ const string tensor_name = AddPort(node_name);
+ CHECK(input_port_map_.count(tensor_name) > 0);
+ const int port = input_port_map_.at(tensor_name);
+ if (input_tensor_data_.count(port) <= 0) {
+ input_tensor_data_.emplace(port, std::vector<uint8>{});
+ }
+ std::vector<uint8>& input_tensor_data = input_tensor_data_.at(port);
+
+ // hexagon only supports 32bit dimension
+ const int x = static_cast<int>(shape[0]);
+ const int y = static_cast<int>(shape[1]);
+ const int z = static_cast<int>(shape[2]);
+ const int d = static_cast<int>(shape[3]);
+
+ const uint64 byte_size = x * y * z * d * DataTypeSize(std::get<2>(bytes));
+ CHECK_EQ(byte_size, std::get<1>(bytes));
+ input_tensor_data.resize(byte_size + ALIGNMENT_BYTES);
+ uint8* data_ptr = FindAlignedPointer(input_tensor_data.data());
+
if (DBG_USE_DUMMY_INPUT) {
- const int array_length = x * y * z * d;
- byte_size = array_length * sizeof(float);
- dummy_input_float_.resize(array_length);
- std::memset(dummy_input_float_.data(), 0, byte_size);
+ std::memset(data_ptr, 0, byte_size);
} else {
- CHECK(std::get<2>(bytes) == DT_FLOAT);
- byte_size = std::get<1>(bytes);
- dummy_input_float_.resize(byte_size / sizeof(float));
- std::memcpy(dummy_input_float_.data(), std::get<0>(bytes), byte_size);
+ std::memcpy(data_ptr, std::get<0>(bytes), byte_size);
}
- return soc_interface_FillInputNodeFloat(
- x, y, z, d, reinterpret_cast<uint8*>(dummy_input_float_.data()),
- byte_size);
+
+ return soc_interface_FillInputNodeWithPort(port, x, y, z, d, data_ptr,
+ byte_size);
}
bool HexagonControlWrapper::ReadOutputNode(
@@ -304,26 +364,28 @@ bool HexagonControlWrapper::ReadOutputNode(
break;
}
}
- std::vector<IRemoteFusedGraphExecutor::ByteArray> outputs;
+ std::vector<ByteArray> outputs;
ReadOutputNode(node_name, &outputs);
CHECK_EQ(1, outputs.size());
- IRemoteFusedGraphExecutor::ByteArray& output = outputs[0];
+ ByteArray& output = outputs[0];
Tensor* output_tensor = tensor_allocator(output_shape);
CHECK(output_tensor->TotalBytes() >= std::get<1>(output))
<< output_tensor->TotalBytes() << ", " << std::get<1>(output);
- // TODO(satok): Avoid specifying float
- std::memcpy(output_tensor->flat<float>().data(), std::get<0>(output),
- std::get<1>(output));
+ TF_CHECK_OK(RemoteFusedGraphExecuteUtils::CopyByteArrayToTensor(
+ std::get<0>(output), std::get<1>(output), output_tensor));
}
bool HexagonControlWrapper::ReadOutputNode(
const string& node_name, std::vector<ByteArray>* const outputs) {
CHECK(outputs != nullptr);
ByteArray output;
- soc_interface_ReadOutputNodeFloat(node_name.c_str(), &std::get<0>(output),
- &std::get<1>(output));
+ const string tensor_name = AddPort(node_name);
+ CHECK(output_port_map_.count(tensor_name) > 0);
+ const int port = output_port_map_.at(tensor_name);
+ soc_interface_ReadOutputNodeWithPort(port, &std::get<0>(output),
+ &std::get<1>(output));
// TODO: Accept all results
- std::get<2>(output) = DT_FLOAT;
+ // std::get<2>(output) = DT_FLOAT;
outputs->emplace_back(output);
return true;
}
@@ -347,7 +409,9 @@ bool HexagonControlWrapper::FillInputNode(const string& node_name,
}
}
}
- FillInputNode(node_name, ba);
+ const std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE> shape =
+ GraphTransferer::ToTensorShapeArray(tensor.shape());
+ FillInputNode(node_name, shape, ba);
return true;
}
@@ -360,7 +424,9 @@ bool HexagonControlWrapper::Finalize() { return false; }
bool HexagonControlWrapper::SetupGraph() { return false; }
bool HexagonControlWrapper::ExecuteGraph() { return false; }
bool HexagonControlWrapper::TeardownGraph() { return false; }
-bool HexagonControlWrapper::FillInputNode(const string&, const ConstByteArray) {
+bool HexagonControlWrapper::FillInputNode(
+ const string&, const std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE>&,
+ const ConstByteArray) {
return false;
}
bool HexagonControlWrapper::FillInputNode(const string&, const Tensor&) {
diff --git a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h
index 97448884e1..209ac9dbf4 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h
+++ b/tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h
@@ -16,6 +16,7 @@ limitations under the License.
#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_CONTROL_WRAPPER_H_
#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_HEXAGON_CONTROL_WRAPPER_H_
+#include <unordered_map>
#include <vector>
#include "tensorflow/core/framework/types.h"
@@ -32,6 +33,9 @@ namespace tensorflow {
*/
class HexagonControlWrapper final : public IRemoteFusedGraphExecutor {
public:
+ using ByteArray =
+ std::tuple<uint8* /* data */, uint64 /* size */, DataType /* type */>;
+
HexagonControlWrapper() = default;
int GetVersion() final;
bool Init(const RemoteFusedGraphExecuteInfo& info) final;
@@ -45,7 +49,13 @@ class HexagonControlWrapper final : public IRemoteFusedGraphExecutor {
bool ReadOutputNode(const string& node_name, std::vector<ByteArray>* outputs);
private:
- bool FillInputNode(const string& node_name, const ConstByteArray bytes);
+ using ConstByteArray = std::tuple<const uint8* /* data */, uint64 /* size */,
+ DataType /* type */>;
+
+ bool FillInputNode(
+ const string& node_name,
+ const std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE>& shape,
+ const ConstByteArray bytes);
// CAVEAT: Need offset as HVX library reserves some ids
static constexpr int NODE_ID_OFFSET = 0x10000;
@@ -57,11 +67,15 @@ class HexagonControlWrapper final : public IRemoteFusedGraphExecutor {
GraphTransferer graph_transferer_{};
// Dummy float array for input node.
// TODO(satok): Use actual data passed by FillInputNode and remove
- std::vector<float> dummy_input_float_{};
+ // std::vector<float> dummy_input_float_{};
+ std::unordered_map<int, std::vector<uint8>> input_tensor_data_{};
// Dummy byte array for cosnt node.
// TODO(satok): Remove
std::unordered_map<int, std::vector<uint8>> dummy_const_data_{};
+ std::unordered_map<string, int> input_port_map_{};
+ std::unordered_map<string, int> output_port_map_{};
+
TF_DISALLOW_COPY_AND_ASSIGN(HexagonControlWrapper);
};
diff --git a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
index 54ba101501..cb9091e29f 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
@@ -46,8 +46,7 @@ adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
namespace tensorflow {
-using ByteArray = IRemoteFusedGraphExecutor::ByteArray;
-using ConstByteArray = IRemoteFusedGraphExecutor::ConstByteArray;
+using ByteArray = HexagonControlWrapper::ByteArray;
constexpr const char* const IMAGE_FILENAME = "/data/local/tmp/img_299x299.bmp";
constexpr const char* const MODEL_FILENAME =
@@ -87,8 +86,7 @@ static void DumpTop10Results(const int byte_size,
10 /* show top_n results */);
}
-static void DumpTop10Results(
- const std::vector<IRemoteFusedGraphExecutor::ByteArray>& outputs) {
+static void DumpTop10Results(const std::vector<ByteArray>& outputs) {
CHECK(outputs.size() == 1);
const int byte_size = std::get<1>(outputs.at(0));
const float* float_array =
@@ -96,9 +94,8 @@ static void DumpTop10Results(
DumpTop10Results(byte_size, float_array);
}
-static void CheckFirstResult(
- const std::vector<IRemoteFusedGraphExecutor::ByteArray>& outputs,
- const int expected_first_id) {
+static void CheckFirstResult(const std::vector<ByteArray>& outputs,
+ const int expected_first_id) {
EXPECT_GE(outputs.size(), 1);
const int byte_size = std::get<1>(outputs.at(0));
const int element_count = byte_size / sizeof(float);
@@ -240,7 +237,7 @@ static void RunInferenceByHexagonControlWrapper(
}
// 5-1. Read output node's outputs
- std::vector<IRemoteFusedGraphExecutor::ByteArray> outputs;
+ std::vector<ByteArray> outputs;
hexagon_control_wrapper.ReadOutputNode("softmax", &outputs);
// 5-2. Dump results
diff --git a/tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc b/tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc
index a4b79e6ec4..2b7585aed1 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_ops_definitions.cc
@@ -350,6 +350,8 @@ HexagonOpsDefinitions::BuildOpNameToSocOpTypeMap() {
#ifdef ENABLE_EXPERIMENTAL_HEXNN_OPS
EmplaceOpType("QuantizedMul", {}, SupportedOpType::QUANTIZED_MUL_8x8to32,
&op_map);
+ EmplaceOpType("QuantizedAdd", {}, SupportedOpType::QUANTIZED_ADD_8p8to32,
+ &op_map);
EmplaceOpType("Pad", {}, SupportedOpType::PAD_F, &op_map);
EmplaceOpType("SpaceToBatchND", {}, SupportedOpType::SPACE_TO_BATCH_ND_F,
&op_map),
@@ -359,6 +361,11 @@ HexagonOpsDefinitions::BuildOpNameToSocOpTypeMap() {
&op_map);
EmplaceOpType("ConcatV2", {}, SupportedOpType::CONCAT_V2_F, &op_map);
EmplaceOpType("Conv2DBackpropInput", {}, SupportedOpType::DECONV_F, &op_map);
+
+ EmplaceOpType("Tanh", {}, SupportedOpType::TANH_F, &op_map);
+ EmplaceOpType("Split", {}, SupportedOpType::SPLIT_F, &op_map);
+ EmplaceOpType("Transpose", {}, SupportedOpType::TRANSPOSE_F, &op_map);
+ EmplaceOpType("Concat", {}, SupportedOpType::CONCAT_F, &op_map);
#endif
return op_map;
};
diff --git a/tensorflow/core/kernels/i_remote_fused_graph_executor.h b/tensorflow/core/kernels/i_remote_fused_graph_executor.h
index fe62a259de..09d1f43ff1 100644
--- a/tensorflow/core/kernels/i_remote_fused_graph_executor.h
+++ b/tensorflow/core/kernels/i_remote_fused_graph_executor.h
@@ -25,10 +25,6 @@ namespace tensorflow {
class IRemoteFusedGraphExecutor {
public:
- using ByteArray =
- std::tuple<uint8* /* data */, uint64 /* size */, DataType /* type */>;
- using ConstByteArray = std::tuple<const uint8* /* data */, uint64 /* size */,
- DataType /* type */>;
using TensorAllocatorFunc = std::function<Tensor*(const TensorShape& shape)>;
IRemoteFusedGraphExecutor() = default;
diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc b/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc
index 103b2be691..dd9839d245 100644
--- a/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc
+++ b/tensorflow/core/kernels/remote_fused_graph_execute_utils.cc
@@ -1280,6 +1280,69 @@ RemoteFusedGraphExecuteUtils::FuseRemoteGraphByPlacedArguments(
return true;
}
+/* static */ Status RemoteFusedGraphExecuteUtils::CopyByteArrayToTensor(
+ const void* src_ptr, const int src_size, Tensor* tensor) {
+ CHECK(tensor->TotalBytes() >= src_size)
+ << tensor->TotalBytes() << ", " << src_size;
+ void* dst_ptr;
+ switch (tensor->dtype()) {
+ case DT_FLOAT:
+ dst_ptr = tensor->flat<float>().data();
+ break;
+ case DT_DOUBLE:
+ dst_ptr = tensor->flat<double>().data();
+ break;
+ case DT_INT32:
+ dst_ptr = tensor->flat<int32>().data();
+ break;
+ case DT_UINT8:
+ dst_ptr = tensor->flat<uint8>().data();
+ break;
+ case DT_INT16:
+ dst_ptr = tensor->flat<int16>().data();
+ break;
+ case DT_INT8:
+ dst_ptr = tensor->flat<int8>().data();
+ break;
+ case DT_STRING:
+ dst_ptr = tensor->flat<string>().data();
+ break;
+ case DT_INT64:
+ dst_ptr = tensor->flat<int64>().data();
+ break;
+ case DT_BOOL:
+ dst_ptr = tensor->flat<bool>().data();
+ break;
+ case DT_QINT8:
+ dst_ptr = tensor->flat<qint8>().data();
+ break;
+ case DT_QUINT8:
+ dst_ptr = tensor->flat<quint8>().data();
+ break;
+ case DT_QINT32:
+ dst_ptr = tensor->flat<qint32>().data();
+ break;
+ case DT_BFLOAT16:
+ dst_ptr = tensor->flat<bfloat16>().data();
+ break;
+ case DT_QINT16:
+ dst_ptr = tensor->flat<qint16>().data();
+ break;
+ case DT_QUINT16:
+ dst_ptr = tensor->flat<quint16>().data();
+ break;
+ case DT_UINT16:
+ dst_ptr = tensor->flat<uint16>().data();
+ break;
+ default:
+ CHECK(false) << "type " << tensor->dtype() << " is not supported.";
+ break;
+ }
+ CHECK_NOTNULL(dst_ptr);
+ std::memcpy(dst_ptr, src_ptr, src_size);
+ return Status::OK();
+}
+
/* static */ Status RemoteFusedGraphExecuteUtils::ReplaceInputNodeByPlaceHolder(
const string& input, const DataType type, const TensorShape& shape,
GraphDef* graph_def) {
diff --git a/tensorflow/core/kernels/remote_fused_graph_execute_utils.h b/tensorflow/core/kernels/remote_fused_graph_execute_utils.h
index a80fc79784..1d4423ed46 100644
--- a/tensorflow/core/kernels/remote_fused_graph_execute_utils.h
+++ b/tensorflow/core/kernels/remote_fused_graph_execute_utils.h
@@ -157,7 +157,7 @@ class RemoteFusedGraphExecuteUtils {
const std::vector<std::pair<string, Tensor>>& input_tensors,
const bool dry_run_inference, GraphDef* graph_def);
- // Build remote fused graph execute info
+ // Build remote fused graph execute info.
static Status BuildRemoteFusedGraphExecuteInfo(
const string& executor_name, const GraphDef& subgraph_def,
const std::vector<string>& inputs, const std::vector<string>& outputs,
@@ -165,31 +165,31 @@ class RemoteFusedGraphExecuteUtils {
DataTypeVector* input_types, DataTypeVector* output_types);
// Build remote fused graph execute op node by fusing specified subgraph
- // as remote fused graph execute info
+ // as remote fused graph execute info.
static Status BuildRemoteFusedGraphExecuteOpNode(
const string& node_name, const string& executor_name,
const GraphDef& subgraph_def, const std::vector<string>& inputs,
const std::vector<string>& outputs, const bool require_shape_type,
Graph* graph, Node** created_node);
- // Build Identity node to forward remote graph node output
+ // Build Identity node to forward remote graph node output.
static Status BuildIdentityOpNode(const string& node_name,
const string& input_node_name,
const int input_node_port,
const DataType dt, Graph* graph,
Node** created_node);
- // Create clusters of given nodes
+ // Create clusters of given nodes.
static Status ClusterizeNodes(const std::unordered_set<string>& node_names,
const GraphDef& graph_def,
std::vector<ClusterInfo>* cluster_infos);
- // Build GraphDef of a given cluster
+ // Build GraphDef of a given cluster.
static Status BuildClusterSubgraphDef(const ClusterInfo& cluster,
const GraphDef& graph_def,
GraphDef* subgraph_def);
- // Build a cluster by given border
+ // Build a cluster by given border.
// CAVEAT: The border must be consistent for one cluster.
static Status BuildClusterByBorder(const std::vector<string>& border_inputs,
const std::vector<string>& border_outputs,
@@ -211,7 +211,7 @@ class RemoteFusedGraphExecuteUtils {
const bool require_shape_type,
GraphDef* output_graph_def);
- // Fuse subgraph of specified nodes
+ // Fuse subgraph of specified nodes.
static Status FuseRemoteGraphByNodeNames(
const GraphDef& input_graph_def, const std::vector<string>& inputs,
const std::vector<string>& outputs,
@@ -220,7 +220,7 @@ class RemoteFusedGraphExecuteUtils {
const string& remote_fused_graph_executor_name,
const bool require_shape_type, GraphDef* output_graph_def);
- // Fuse subgraph of specified border
+ // Fuse subgraph of specified border.
static Status FuseRemoteGraphByBorder(
const GraphDef& input_graph_def, const std::vector<string>& inputs,
const std::vector<string>& outputs,
@@ -230,7 +230,7 @@ class RemoteFusedGraphExecuteUtils {
const string& remote_graph_executor_name, const bool require_shape_type,
GraphDef* output_graph_def);
- // Place arguments to fuse remote graph
+ // Place arguments to fuse remote graph.
static Status PlaceRemoteGraphArguments(
const std::vector<string>& inputs, const std::vector<string>& outputs,
const std::unordered_set<string>& fused_node_names,
@@ -239,7 +239,7 @@ class RemoteFusedGraphExecuteUtils {
const string& remote_fused_graph_node_name,
const string& remote_graph_executor_name, GraphDef* graph_def);
- // Fuse remote graph by placed arguments
+ // Fuse remote graph by placed arguments.
static Status FuseRemoteGraphByPlacedArguments(
const GraphDef& input_graph_def,
const std::vector<std::pair<string, Tensor>>& input_tensors,
@@ -249,6 +249,15 @@ class RemoteFusedGraphExecuteUtils {
const GraphDef& input_graph_def,
const std::vector<std::pair<string, Tensor>>& input_tensors);
+ // Copy a byte array to a tensor data. Though tensor data must be
+ // updated with typed information in general, we can't guarantee that
+ // returned values from a remote processor has typed information because
+ // a logic running in the remote processor possibly be in a separate binary
+ // which may not link tensorflow libraries. To deal with this situation,
+ // remote fused graph needs to overwrite the tensor data by a byte array.
+ static Status CopyByteArrayToTensor(const void* src_ptr, const int src_size,
+ Tensor* tensor);
+
private:
static void EmplaceTensorShapeType(const string& name, const Tensor& tensor,
TensorShapeMap* tensor_shape_map);
diff --git a/tensorflow/core/platform/hexagon/soc_interface.h b/tensorflow/core/platform/hexagon/soc_interface.h
index f4a3cdf4bd..ca37b63e2b 100644
--- a/tensorflow/core/platform/hexagon/soc_interface.h
+++ b/tensorflow/core/platform/hexagon/soc_interface.h
@@ -22,6 +22,8 @@ limitations under the License.
// naming conflicts.
#ifdef __cplusplus
extern "C" {
+#else
+#include <stdbool.h>
#endif // __cplusplus
// Returns the version of loaded hexagon wrapper shared library.
// You should assert that the version matches the expected version before
@@ -39,13 +41,30 @@ bool soc_interface_Finalize();
bool soc_interface_ExecuteGraph();
// Teardown graph setup
bool soc_interface_TeardownGraph();
+
+// Allocate buffers for input node and output node
+bool soc_interface_AllocateInOutNodeBuffers(int input_count, int* input_sizes,
+ int output_count,
+ int* output_sizes);
+
+// Send input data to SOC with port
+bool soc_interface_FillInputNodeWithPort(int port, int x, int y, int z, int d,
+ const uint8_t* const buf,
+ uint64_t buf_byte_size);
+
// Send input data to SOC
bool soc_interface_FillInputNodeFloat(int x, int y, int z, int d,
const uint8_t* const buf,
- uint64_t buf_size);
+ uint64_t buf_byte_size);
+
+// Load output data from SOC with port
+bool soc_interface_ReadOutputNodeWithPort(int port, uint8_t** buf,
+ uint64_t* buf_byte_size);
+
// Load output data from SOC
bool soc_interface_ReadOutputNodeFloat(const char* const node_name,
- uint8_t** buf, uint64_t* buf_size);
+ uint8_t** buf, uint64_t* buf_byte_size);
+
// Setup graph
// TODO(satok): Remove and use runtime version
bool soc_interface_setupDummyGraph(int version);