path: root/tensorflow/core/kernels/xsmm_conv2d_test.cc
diff options
Diffstat (limited to 'tensorflow/core/kernels/xsmm_conv2d_test.cc')
1 files changed, 327 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/xsmm_conv2d_test.cc b/tensorflow/core/kernels/xsmm_conv2d_test.cc
index d81368314c..f4ab6896ae 100644
--- a/tensorflow/core/kernels/xsmm_conv2d_test.cc
+++ b/tensorflow/core/kernels/xsmm_conv2d_test.cc
@@ -15,13 +15,339 @@ limitations under the License.
#include "tensorflow/core/kernels/conv_ops.h"
#include "tensorflow/core/platform/test.h"
+#include "tensorflow/core/graph/graph.h"
+#include "tensorflow/core/graph/node_builder.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
+#include "include/libxsmm.h"
+#include "tensorflow/core/framework/fake_input.h"
namespace tensorflow {
namespace {
+typedef struct {
+ int nImg;
+ int nIfm;
+ int nOfm;
+ int ifhp;
+ int ifwp;
+ int ifh;
+ int ifw;
+ int ofhp;
+ int ofwp;
+ int ofh;
+ int ofw;
+ int pad_h;
+ int pad_w;
+ int pad_h_in;
+ int pad_w_in;
+ int pad_h_out;
+ int pad_w_out;
+ int kh;
+ int kw;
+ int stride_h;
+ int stride_w;
+} naive_conv_t;
+LIBXSMM_INLINE void naive_copy_NCHW_to_NHWC(const float* nchw, Tensor &nhwc, int N, int H, int W, int C)
+ LIBXSMM_VLA_DECL(4, const float, input, nchw, C, H, W);
+ int n, h, w, c;
+ auto output = nhwc.flat<float>();
+ for ( n = 0; n < N; n++ ) {
+ for ( h = 0; h < H; h++ ) {
+ for ( w = 0; w < W; w++ ) {
+ for ( c = 0; c < C; c++ ) {
+ output(n*H*W*C + h*W*C +w*C + c) =
+ LIBXSMM_VLA_ACCESS(4, input, n, c, h, w, C, H, W);
+ }
+ }
+ }
+ }
+LIBXSMM_INLINE void naive_copy_KCRS_to_RSCK(const float* kcrs, Tensor &rsck, int R, int S, int C, int K)
+ LIBXSMM_VLA_DECL(4, const float, input, kcrs, C, R, S);
+ int r, s, c, k;
+ auto output = rsck.flat<float>();
+ for ( r = 0; r < R; r++ ) {
+ for ( s = 0; s < S; s++ ) {
+ for ( c = 0; c < C; c++ ) {
+ for ( k = 0; k < K; k++ ) {
+ output(r*S*C*K + s*C*K + c*K + k) =
+ LIBXSMM_VLA_ACCESS(4, input, k, c, r, s, C, R, S);
+ }
+ }
+ }
+ }
+LIBXSMM_INLINE void zero_buf(float* buf, long size) {
+ int i;
+ for (i = 0; i < size; ++i) {
+ buf[i] = 0.0f;
+ }
+LIBXSMM_INLINE void copy_buf(Tensor &dst,float *src,long size) {
+ long i;
+ auto output = dst.flat<float>();
+ for (i = 0; i < size; ++i)
+ output(i) = src[i];
+LIBXSMM_INLINE void init_buf(float* buf, long size, int initPos, int initOne)
+ int i;
+ zero_buf(buf, size);
+ for (i = 0; i < size; ++i) {
+ buf[i] = (float)((initOne != 0) ? 1.0 : ((initPos != 0) ? drand48() : (0.05 - drand48()/10.0)));
+ }
+LIBXSMM_INLINE void naive_conv_fp(naive_conv_t* param, const float* input, float* output, const float* filter)
+ int nImg = param->nImg;
+ int nIfm = param->nIfm;
+ int nOfm = param->nOfm;
+ int ifhp = param->ifhp;
+ int ifwp = param->ifwp;
+ int ofhp = param->ofhp;
+ int ofwp = param->ofwp;
+ int ifh = param->ifh;
+ int ifw = param->ifw;
+ int ofh = param->ofh;
+ int ofw = param->ofw;
+ int pad_h = param->pad_h;
+ int pad_w = param->pad_w;
+ int pad_h_in = param->pad_h_in;
+ int pad_w_in = param->pad_w_in;
+ int pad_h_out = param->pad_h_out;
+ int pad_w_out = param->pad_w_out;
+ int kh = param->kh;
+ int kw = param->kw;
+ int stride_h = param->stride_h;
+ int stride_w = param->stride_w;
+ /* loop counters */
+ int img, ofm, ifm, oj, oi, ij, ii, kj, ki;
+ LIBXSMM_VLA_DECL(4, float, output_t, output + (pad_w_out * ofwp + pad_h_out), nOfm, ofhp, ofwp);
+ LIBXSMM_VLA_DECL(4, const float, input_t, input + (pad_w_in * ifwp + pad_h_in), nIfm, ifhp, ifwp);
+ LIBXSMM_VLA_DECL(4, const float, filter_t, filter, nIfm, kh, kw);
+ for (img = 0; img < nImg; ++img) {
+ for (ofm = 0; ofm < nOfm; ++ofm) {
+ for (ifm = 0; ifm < nIfm; ++ifm) {
+ for (oj = 0; oj < ofh; ++oj) {
+ ij = oj * stride_h - pad_h;
+ for (oi = 0; oi < ofw; ++oi) {
+ ii = oi * stride_w - pad_w;
+ for (kj = 0; kj < kh; ++kj) {
+ if(ij+kj < 0 || ij+kj >= ifh) continue;
+ for (ki = 0; ki < kw; ++ki) {
+ if(ii+ki < 0 || ii+ki >= ifw) continue;
+ LIBXSMM_VLA_ACCESS( 4, output_t, img, ofm, oj, oi, nOfm, ofhp, ofwp) +=
+ LIBXSMM_VLA_ACCESS(4, input_t, img, ifm, ij + kj, ii + ki, nIfm, ifhp, ifwp)
+ * LIBXSMM_VLA_ACCESS(4, filter_t, ofm, ifm, kj, ki, nIfm, kh, kw);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
void RunXsmmVsGeneric() {}
-TEST(XsmmConv2DTest, Basic) {}
+class XsmmConv2DTest : public OpsTestBase {
+ protected:
+ void MakeOp(int stride) {
+ TF_CHECK_OK(NodeDefBuilder("xsmm", "Conv2D")
+ .Input(FakeInput(DT_FLOAT))
+ .Input(FakeInput(DT_FLOAT))
+ .Attr("strides", {1, stride,stride, 1})
+ .Attr("padding", "VALID" )
+ .Finalize(node_def()));
+ TF_ASSERT_OK(InitOp());
+ }
+TEST_F(XsmmConv2DTest, Basic) {
+ MakeOp(1);
+ int ifw = 14; /* input width, "W" */
+ int ifh = 14; /* input height, "H" */
+ int nImg = 32; /* mini-batch size, "N" */
+ int nIfm = 64; /* number of input feature maps, "C" */
+ int nOfm = 64; /* number of output feature maps, "K" */
+ int kh = 3; /* filter height, "R" */
+ int kw = 3; /* filter width, "S" */
+ int pad = 0; /* padding in output */
+ int stride = 1; /* stride when accessing inputs */
+ int stride_w = stride;
+ int stride_h = stride;
+ int pad_h = pad;
+ int pad_w = pad;
+ int pad_h_in = pad_h;
+ int pad_w_in = pad_w;
+ int pad_h_out = 0;
+ int pad_w_out = 0;
+ /* deriving some values for naive code */
+ int ofh = (ifh + 2 * pad_h - kh) / stride_h + 1;
+ int ofw = (ifw + 2 * pad_w - kw) / stride_w + 1;
+ int ifhp = ifh + 2 * pad_h_in;
+ int ifwp = ifw + 2 * pad_w_in;
+ int ofhp = ofh + 2 * pad_h_out;
+ int ofwp = ofw + 2 * pad_w_out;
+ //Initialization of Filter and Image
+ /* allocate data */
+ float *naive_input = (float*)libxsmm_aligned_malloc( nImg*nIfm*ifhp*ifwp*sizeof(float), 2097152);
+ float *naive_output = (float*)libxsmm_aligned_malloc( nImg*nOfm*ofhp*ofwp*sizeof(float), 2097152);
+ float *naive_filter = (float*)libxsmm_aligned_malloc( nOfm*nIfm*kh*kw* sizeof(float), 2097152);
+ /* initialize data */
+ init_buf(naive_input, nImg*nIfm*ifhp*ifwp, 0, 0);
+ zero_buf(naive_output, nImg*nOfm*ofhp*ofwp);
+ init_buf(naive_filter, nOfm*nIfm*kh*kw, 0, 0);
+ Tensor image(DT_FLOAT,
+ {nImg, ifhp, ifwp, nIfm});
+ Tensor filter(DT_FLOAT, {kh,kw,nIfm,nOfm});
+ naive_copy_NCHW_to_NHWC(naive_input, image, nImg, ifhp, ifwp, nIfm);
+ naive_copy_KCRS_to_RSCK(naive_filter, filter, kh, kw, nIfm, nOfm);
+ //Run naive convolution
+ naive_conv_t naive_param;
+ naive_param.nImg = nImg;
+ naive_param.nIfm = nIfm;
+ naive_param.nOfm = nOfm;
+ naive_param.ifhp = ifhp;
+ naive_param.ifwp = ifwp;
+ naive_param.ofhp = ofhp;
+ naive_param.ofwp = ofwp;
+ naive_param.ifh = ifh;
+ naive_param.ifw = ifw;
+ naive_param.ofh = ofh;
+ naive_param.ofw = ofw;
+ naive_param.pad_h = pad_h;
+ naive_param.pad_w = pad_w;
+ naive_param.pad_h_in = pad_h_in;
+ naive_param.pad_w_in = pad_w_in;
+ naive_param.pad_h_out = pad_h_out;
+ naive_param.pad_w_out = pad_w_out;
+ naive_param.kh = kh;
+ naive_param.kw = kw;
+ naive_param.stride_h = stride_h;
+ naive_param.stride_w = stride_w;
+ naive_conv_fp(&naive_param, naive_input, naive_output, naive_filter);
+ AddInputFromArray<float>(image.shape(), image.flat<float>());
+ AddInputFromArray<float>(filter.shape(), filter.flat<float>());
+ //Run Op (TF)
+ TF_ASSERT_OK(RunOpKernel());
+ // Check the output.
+ Tensor expected(DT_FLOAT, {nImg,ofhp,ofwp, nOfm});
+ naive_copy_NCHW_to_NHWC(naive_output, expected, nImg, ofhp, ofwp, nOfm);
+ test::ExpectTensorNear<float>(expected, *GetOutput(0), 1e-5);
+ libxsmm_free(naive_input);
+ libxsmm_free(naive_output);
+ libxsmm_free(naive_filter);
+TEST(XsmmConv2DTest, Basic) {
+ auto num_threads =
+ ctx->device()->tensorflow_cpu_worker_threads()->num_threads;
+ // See libxsmm_dnn.h for this struct definition.
+ libxsmm_dnn_conv_desc desc;
+ desc.N = batch;
+ desc.C = in_depth;
+ desc.H = input_rows;
+ desc.W = input_cols;
+ desc.K = out_depth;
+ desc.R = filter_rows;
+ desc.S = filter_cols;
+ desc.u = stride_rows;
+ desc.v = stride_cols;
+ desc.pad_h = pad_rows;
+ desc.pad_w = pad_cols;
+ desc.pad_h_in = pad_rows; // libxsmm supports only physical padding for now
+ desc.pad_w_in = pad_cols; // libxsmm supports only physical padding for now
+ desc.pad_h_out = 0;
+ desc.pad_w_out = 0;
+ desc.threads = num_threads;
+ desc.buffer_format = LIBXSMM_DNN_CONV_FORMAT_NHWC;
+ desc.fuse_ops = LIBXSMM_DNN_CONV_FUSE_NONE;
+ desc.datatype_in = LIBXSMM_DNN_DATATYPE_F32;
+ desc.datatype_out = LIBXSMM_DNN_DATATYPE_F32;
+ if (!CanUseXsmmConv2D(desc, data_format)) {
+ return false;
+ }
+ auto input_ptr = input.template flat<float>().data();
+ auto filter_ptr = filter.template flat<float>().data();
+ auto output_ptr = output->template flat<float>().data();
+ bool success = functor::XsmmFwdConv2D<CPUDevice, float>()(
+ ctx, desc, input_ptr, filter_ptr, output_ptr);
+ return success;
} // namespace
} // namespace tensorflow