diff options
author | Allan MacKinnon <allanmac@google.com> | 2018-06-19 13:57:04 -0700 |
---|---|---|
committer | Skia Commit-Bot <skia-commit-bot@chromium.org> | 2018-06-20 01:19:18 +0000 |
commit | 4359d529121fc1f39f882693d641c0133d138d41 (patch) | |
tree | d2c3239162e68d24d5c2cebc8a4f6659860cc2a0 /src/compute/hs/gen/transpose.c | |
parent | 47c29fa64b3ffc1eec7723d40e9862b2d2a8443f (diff) |
Skia Compute core files
Bug: skia:
Change-Id: I4bba49cf20eff013e581800a3f114c85acd8498c
Reviewed-on: https://skia-review.googlesource.com/135782
Reviewed-by: Mike Klein <mtklein@google.com>
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
Diffstat (limited to 'src/compute/hs/gen/transpose.c')
-rw-r--r-- | src/compute/hs/gen/transpose.c | 172 |
1 files changed, 172 insertions, 0 deletions
diff --git a/src/compute/hs/gen/transpose.c b/src/compute/hs/gen/transpose.c new file mode 100644 index 0000000000..f99e966ff7 --- /dev/null +++ b/src/compute/hs/gen/transpose.c @@ -0,0 +1,172 @@ +/* + * Copyright 2018 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + * + */ + +// +// +// + +#include "transpose.h" +#include "macros.h" + +// +// Rows must be an even number. This is enforced elsewhere. +// +// The transpose requires (cols_log2 * rows/2) row-pair blends. +// +void +hsg_transpose(uint32_t const cols_log2, + uint32_t const rows, + void * blend, + void * remap, + void (*pfn_blend)(uint32_t const cols_log2, + uint32_t const row_ll, // lower-left + uint32_t const row_ur, // upper-right + void * blend), + void (*pfn_remap)(uint32_t const row_from, + uint32_t const row_to, + void * remap)) +{ + // get mapping array + uint32_t * map_curr = ALLOCA(rows * sizeof(*map_curr)); + uint32_t * map_next = ALLOCA(rows * sizeof(*map_next)); + + // init the mapping array + for (uint32_t ii=0; ii<rows; ii++) + map_curr[ii] = ii; + + // successively transpose rows using blends + for (uint32_t cc=1; cc<=cols_log2; cc++) + { + uint32_t const mask = BITS_TO_MASK(cc); + + for (uint32_t ii=0; ii<rows; ii++) + { + uint32_t const left = map_curr[ii]; + uint32_t const stay = left & ~mask; + + if (left != stay) // will be swapped away + { + for (uint32_t jj=0; jj<rows; jj++) + { + if (map_curr[jj] == stay) + { + map_next[jj] = stay; + map_next[ii] = stay + (rows << (cc-1)); + + pfn_blend(cc,ii,jj,blend); // log2,left,right,payload + + break; + } + } + } + } + + uint32_t * tmp = map_curr; + + map_curr = map_next; + map_next = tmp; + } + + // write out the remapping + for (uint32_t ii=0; ii<rows; ii++) + pfn_remap(ii,map_curr[ii] >> cols_log2,remap); +} + +// +// test it! +// + +#ifdef HS_TRANSPOSE_DEBUG + +#include <stdio.h> + +static uint32_t cols; // implicit on SIMD/GPU + +static +void +hsg_debug_remap(uint32_t const row_from, + uint32_t const row_to, + uint32_t * const r) +{ + fprintf(stdout,"REMAP( %3u, %3u )\n",row_from,row_to); + + r[row_to] = row_from; +} + +static +void +hsg_debug_blend(uint32_t const cols_log2, + uint32_t const row_ll, // lower-left + uint32_t const row_ur, // upper-right + uint32_t * m) +{ + fprintf(stdout,"BLEND( %u, %3u, %3u )\n",cols_log2,row_ll,row_ur); + + uint32_t * const ll = ALLOCA(cols * sizeof(*m)); + uint32_t * const ur = ALLOCA(cols * sizeof(*m)); + + memcpy(ll,m+row_ll*cols,cols * sizeof(*m)); + memcpy(ur,m+row_ur*cols,cols * sizeof(*m)); + + for (uint32_t ii=0; ii<cols; ii++) + m[row_ll*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii] : ur[ii^(1<<cols_log2-1)]; + + for (uint32_t ii=0; ii<cols; ii++) + m[row_ur*cols+ii] = ((ii >> cols_log2-1) & 1) ? ll[ii^(1<<cols_log2-1)] : ur[ii]; +} + +static +void +hsg_debug_print(uint32_t const rows, + uint32_t const * const m, + uint32_t const * const r) +{ + for (uint32_t rr=0; rr<rows; rr++) { + for (uint32_t cc=0; cc<cols; cc++) + fprintf(stdout,"%4u ",m[r[rr]*cols + cc]); + fprintf(stdout,"\n"); + } +} + +int +main(int argc, char * argv[]) +{ + uint32_t const cols_log2 = (argc <= 1) ? 3 : strtoul(argv[1],NULL,0); + uint32_t const rows = (argc <= 2) ? 6 : strtoul(argv[2],NULL,0); + + if (rows & 1) + return; + + cols = 1 << cols_log2; + + uint32_t * const m = ALLOCA(cols * rows * sizeof(*m)); + uint32_t * const r = ALLOCA( rows * sizeof(*r)); + + for (uint32_t rr=0; rr<rows; rr++) { + r[rr] = rr; + for (uint32_t cc=0; cc<cols; cc++) + m[rr*cols+cc] = cc*rows+rr; + } + + hsg_debug_print(rows,m,r); + + hsg_transpose(cols_log2,rows, + m,r, + hsg_debug_blend, + hsg_debug_remap); + + hsg_debug_print(rows,m,r); + + return 0; +} + +#endif + +// +// +// |