aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/compute/hs/gen/transpose.c
diff options
context:
space:
mode:
authorGravatar Allan MacKinnon <allanmac@google.com>2018-06-19 13:57:04 -0700
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2018-06-20 01:19:18 +0000
commit4359d529121fc1f39f882693d641c0133d138d41 (patch)
treed2c3239162e68d24d5c2cebc8a4f6659860cc2a0 /src/compute/hs/gen/transpose.c
parent47c29fa64b3ffc1eec7723d40e9862b2d2a8443f (diff)
Skia Compute core files
Bug: skia: Change-Id: I4bba49cf20eff013e581800a3f114c85acd8498c Reviewed-on: https://skia-review.googlesource.com/135782 Reviewed-by: Mike Klein <mtklein@google.com> Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/compute/hs/gen/transpose.c')
-rw-r--r--src/compute/hs/gen/transpose.c172
1 files changed, 172 insertions, 0 deletions
diff --git a/src/compute/hs/gen/transpose.c b/src/compute/hs/gen/transpose.c
new file mode 100644
index 0000000000..f99e966ff7
--- /dev/null
+++ b/src/compute/hs/gen/transpose.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ *
+ */
+
+//
+//
+//
+
+#include "transpose.h"
+#include "macros.h"
+
+//
+// Rows must be an even number. This is enforced elsewhere.
+//
+// The transpose requires (cols_log2 * rows/2) row-pair blends.
+//
+void
+hsg_transpose(uint32_t const cols_log2,
+ uint32_t const rows,
+ void * blend,
+ void * remap,
+ void (*pfn_blend)(uint32_t const cols_log2,
+ uint32_t const row_ll, // lower-left
+ uint32_t const row_ur, // upper-right
+ void * blend),
+ void (*pfn_remap)(uint32_t const row_from,
+ uint32_t const row_to,
+ void * remap))
+{
+ // get mapping array
+ uint32_t * map_curr = ALLOCA(rows * sizeof(*map_curr));
+ uint32_t * map_next = ALLOCA(rows * sizeof(*map_next));
+
+ // init the mapping array
+ for (uint32_t ii=0; ii<rows; ii++)
+ map_curr[ii] = ii;
+
+ // successively transpose rows using blends
+ for (uint32_t cc=1; cc<=cols_log2; cc++)
+ {
+ uint32_t const mask = BITS_TO_MASK(cc);
+
+ for (uint32_t ii=0; ii<rows; ii++)
+ {
+ uint32_t const left = map_curr[ii];
+ uint32_t const stay = left & ~mask;
+
+ if (left != stay) // will be swapped away
+ {
+ for (uint32_t jj=0; jj<rows; jj++)
+ {
+ if (map_curr[jj] == stay)
+ {
+ map_next[jj] = stay;
+ map_next[ii] = stay + (rows << (cc-1));
+
+ pfn_blend(cc,ii,jj,blend); // log2,left,right,payload
+
+ break;
+ }
+ }
+ }
+ }
+
+ uint32_t * tmp = map_curr;
+
+ map_curr = map_next;
+ map_next = tmp;
+ }
+
+ // write out the remapping
+ for (uint32_t ii=0; ii<rows; ii++)
+ pfn_remap(ii,map_curr[ii] >> cols_log2,remap);
+}
+
+//
+// test it!
+//
+
+#ifdef HS_TRANSPOSE_DEBUG
+
+#include <stdio.h>
+
+static uint32_t cols; // implicit on SIMD/GPU
+
+static
+void
+hsg_debug_remap(uint32_t const row_from,
+ uint32_t const row_to,
+ uint32_t * const r)
+{
+ fprintf(stdout,"REMAP( %3u, %3u )\n",row_from,row_to);
+
+ r[row_to] = row_from;
+}
+
+static
+void
+hsg_debug_blend(uint32_t const cols_log2,
+ uint32_t const row_ll, // lower-left
+ uint32_t const row_ur, // upper-right
+ uint32_t * m)
+{
+ fprintf(stdout,"BLEND( %u, %3u, %3u )\n",cols_log2,row_ll,row_ur);
+
+ uint32_t * const ll = ALLOCA(cols * sizeof(*m));
+ uint32_t * const ur = ALLOCA(cols * sizeof(*m));
+
+ memcpy(ll,m+row_ll*cols,cols * sizeof(*m));
+ memcpy(ur,m+row_ur*cols,cols * sizeof(*m));
+
+ for (uint32_t ii=0; ii<cols; ii++)
+ m[row_ll*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii] : ur[ii^(1<<cols_log2-1)];
+
+ for (uint32_t ii=0; ii<cols; ii++)
+ m[row_ur*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii^(1<<cols_log2-1)] : ur[ii];
+}
+
+static
+void
+hsg_debug_print(uint32_t const rows,
+ uint32_t const * const m,
+ uint32_t const * const r)
+{
+ for (uint32_t rr=0; rr<rows; rr++) {
+ for (uint32_t cc=0; cc<cols; cc++)
+ fprintf(stdout,"%4u ",m[r[rr]*cols + cc]);
+ fprintf(stdout,"\n");
+ }
+}
+
+int
+main(int argc, char * argv[])
+{
+ uint32_t const cols_log2 = (argc <= 1) ? 3 : strtoul(argv[1],NULL,0);
+ uint32_t const rows = (argc <= 2) ? 6 : strtoul(argv[2],NULL,0);
+
+ if (rows & 1)
+ return;
+
+ cols = 1 << cols_log2;
+
+ uint32_t * const m = ALLOCA(cols * rows * sizeof(*m));
+ uint32_t * const r = ALLOCA( rows * sizeof(*r));
+
+ for (uint32_t rr=0; rr<rows; rr++) {
+ r[rr] = rr;
+ for (uint32_t cc=0; cc<cols; cc++)
+ m[rr*cols+cc] = cc*rows+rr;
+ }
+
+ hsg_debug_print(rows,m,r);
+
+ hsg_transpose(cols_log2,rows,
+ m,r,
+ hsg_debug_blend,
+ hsg_debug_remap);
+
+ hsg_debug_print(rows,m,r);
+
+ return 0;
+}
+
+#endif
+
+//
+//
+//