aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl')
-rw-r--r--src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl260
1 files changed, 130 insertions, 130 deletions
diff --git a/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl b/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl
index 378d51d8d7..7f48978782 100644
--- a/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl
+++ b/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl
@@ -1,130 +1,130 @@
-/*
- * Copyright 2018 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can
- * be found in the LICENSE file.
- *
- */
-
-//
-// NOTE THAT THE SEGMENT TTCK KERNEL IS ENTIRELY DEPENDENT ON THE
-// LAYOUT OF THE TTCK KEY. IF THE TTCK KEY IS ALTERED THEN THIS
-// KERNEL WILL NEED TO BE UPDATED
-//
-
-#include "tile.h"
-#include "atomic_cl.h"
-#include "device_cl_12.h"
-
-//
-//
-//
-
-#define HS_KEYS_PER_SLAB (HS_KEYS_PER_LANE * HS_LANES_PER_WARP)
-#define HS_LANE_MASK (HS_LANES_PER_WARP - 1)
-
-//
-//
-//
-
-#define SKC_YX_NEQ(row,prev) \
- (((as_uint2(r##row).hi ^ as_uint2(r##prev).hi) & SKC_TTCK_HI_MASK_YX) != 0)
-
-//
-//
-//
-
-__kernel
-__attribute__((intel_reqd_sub_group_size(HS_LANES_PER_WARP)))
-void
-skc_kernel_segment_ttck(__global HS_KEY_TYPE * SKC_RESTRICT const vout,
- __global uint * SKC_RESTRICT const indices,
- __global SKC_ATOMIC_UINT volatile * SKC_RESTRICT const atomics)
-{
- uint const global_id = get_global_id(0);
- uint const gmem_base = (global_id >> HS_LANES_PER_WARP_LOG2) * HS_KEYS_PER_SLAB;
- uint const gmem_idx = gmem_base + (global_id & HS_LANE_MASK);
- uint const lane_idx = gmem_base + (global_id & HS_LANE_MASK) * HS_KEYS_PER_LANE;
-
- //
- // LOAD ALL THE ROWS
- //
-#undef HS_SLAB_ROW
-#define HS_SLAB_ROW(row,prev) \
- HS_KEY_TYPE const r##row = (vout + gmem_idx)[prev * HS_LANES_PER_WARP];
-
- HS_SLAB_ROWS();
-
- //
- // LOAD LAST REGISTER FROM COLUMN TO LEFT
- //
- uint diffs = 0;
- uint2 r0 = r1;
-
- if (gmem_base > 0) {
- // if this is the first key in any slab but the first then it
- // broadcast loads the last key in previous slab
- r0.hi = as_uint2(vout[gmem_base - 1]).hi;
- } else if (get_sub_group_local_id() == 0) {
- // if this is the first lane in the first slab
- diffs = 1;
- }
-
- // now shuffle in the last key from the column to the left
- r0.hi = intel_sub_group_shuffle_up(r0.hi,as_uint2(HS_REG_LAST(r)).hi,1);
-
- //
- // FIND ALL DIFFERENCES IN SLAB
- //
- uint valid = 0;
-
-#undef HS_SLAB_ROW
-#define HS_SLAB_ROW(row,prev) \
- valid |= ((r##row != SKC_ULONG_MAX) << prev);
-
- HS_SLAB_ROWS();
-
-#undef HS_SLAB_ROW
-#define HS_SLAB_ROW(row,prev) \
- diffs |= (SKC_YX_NEQ(row,prev) << prev);
-
- HS_SLAB_ROWS();
-
- //
- // SUM UP THE DIFFERENCES
- //
- uint const valid_diffs = valid & diffs;
- uint const count = popcount(valid_diffs);
- uint const inclusive = sub_group_scan_inclusive_add(count);
- uint const exclusive = inclusive - count;
-
- //
- // RESERVE SPACE IN THE INDICES ARRAY
- //
- uint next = 0;
-
- if (get_sub_group_local_id() == HS_LANES_PER_WARP-1)
- next = atomic_add(atomics+1,inclusive); // FIXME -- need a symbolic offset
-
- // distribute base across subgroup
- next = exclusive + sub_group_broadcast(next,HS_LANES_PER_WARP-1);
-
- //
- // STORE THE INDICES
- //
-#undef HS_SLAB_ROW
-#define HS_SLAB_ROW(row,prev) \
- if (valid_diffs & (1 << prev)) \
- indices[next++] = lane_idx + prev;
-
- HS_SLAB_ROWS();
-
- //
- // TRANSPOSE THE SLAB AND STORE IT
- //
- HS_TRANSPOSE_SLAB();
-}
-
-//
-//
-//
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ *
+ */
+
+//
+// NOTE THAT THE SEGMENT TTCK KERNEL IS ENTIRELY DEPENDENT ON THE
+// LAYOUT OF THE TTCK KEY. IF THE TTCK KEY IS ALTERED THEN THIS
+// KERNEL WILL NEED TO BE UPDATED
+//
+
+#include "tile.h"
+#include "atomic_cl.h"
+#include "kernel_cl_12.h"
+
+//
+//
+//
+
+#define HS_KEYS_PER_SLAB (HS_KEYS_PER_LANE * HS_LANES_PER_WARP)
+#define HS_LANE_MASK (HS_LANES_PER_WARP - 1)
+
+//
+//
+//
+
+#define SKC_YX_NEQ(row,prev) \
+ (((as_uint2(r##row).hi ^ as_uint2(r##prev).hi) & SKC_TTCK_HI_MASK_YX) != 0)
+
+//
+//
+//
+
+__kernel
+__attribute__((intel_reqd_sub_group_size(HS_LANES_PER_WARP)))
+void
+skc_kernel_segment_ttck(__global HS_KEY_TYPE * SKC_RESTRICT const vout,
+ __global uint * SKC_RESTRICT const indices,
+ __global SKC_ATOMIC_UINT volatile * SKC_RESTRICT const atomics)
+{
+ uint const global_id = get_global_id(0);
+ uint const gmem_base = (global_id >> HS_LANES_PER_WARP_LOG2) * HS_KEYS_PER_SLAB;
+ uint const gmem_idx = gmem_base + (global_id & HS_LANE_MASK);
+ uint const lane_idx = gmem_base + (global_id & HS_LANE_MASK) * HS_KEYS_PER_LANE;
+
+ //
+ // LOAD ALL THE ROWS
+ //
+#undef HS_SLAB_ROW
+#define HS_SLAB_ROW(row,prev) \
+ HS_KEY_TYPE const r##row = (vout + gmem_idx)[prev * HS_LANES_PER_WARP];
+
+ HS_SLAB_ROWS();
+
+ //
+ // LOAD LAST REGISTER FROM COLUMN TO LEFT
+ //
+ uint diffs = 0;
+ uint2 r0 = r1;
+
+ if (gmem_base > 0) {
+ // if this is the first key in any slab but the first then it
+ // broadcast loads the last key in previous slab
+ r0.hi = as_uint2(vout[gmem_base - 1]).hi;
+ } else if (get_sub_group_local_id() == 0) {
+ // if this is the first lane in the first slab
+ diffs = 1;
+ }
+
+ // now shuffle in the last key from the column to the left
+ r0.hi = intel_sub_group_shuffle_up(r0.hi,as_uint2(HS_REG_LAST(r)).hi,1);
+
+ //
+ // FIND ALL DIFFERENCES IN SLAB
+ //
+ uint valid = 0;
+
+#undef HS_SLAB_ROW
+#define HS_SLAB_ROW(row,prev) \
+ valid |= ((r##row != SKC_ULONG_MAX) << prev);
+
+ HS_SLAB_ROWS();
+
+#undef HS_SLAB_ROW
+#define HS_SLAB_ROW(row,prev) \
+ diffs |= (SKC_YX_NEQ(row,prev) << prev);
+
+ HS_SLAB_ROWS();
+
+ //
+ // SUM UP THE DIFFERENCES
+ //
+ uint const valid_diffs = valid & diffs;
+ uint const count = popcount(valid_diffs);
+ uint const inclusive = sub_group_scan_inclusive_add(count);
+ uint const exclusive = inclusive - count;
+
+ //
+ // RESERVE SPACE IN THE INDICES ARRAY
+ //
+ uint next = 0;
+
+ if (get_sub_group_local_id() == HS_LANES_PER_WARP-1)
+ next = atomic_add(atomics+1,inclusive); // FIXME -- need a symbolic offset
+
+ // distribute base across subgroup
+ next = exclusive + sub_group_broadcast(next,HS_LANES_PER_WARP-1);
+
+ //
+ // STORE THE INDICES
+ //
+#undef HS_SLAB_ROW
+#define HS_SLAB_ROW(row,prev) \
+ if (valid_diffs & (1 << prev)) \
+ indices[next++] = lane_idx + prev;
+
+ HS_SLAB_ROWS();
+
+ //
+ // TRANSPOSE THE SLAB AND STORE IT
+ //
+ HS_TRANSPOSE_SLAB();
+}
+
+//
+//
+//