1 files changed, 130 insertions, 130 deletions
diff --git a/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl b/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl
index 378d51d8d7..7f48978782 100644
--- a/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl
+++ b/src/compute/skc/platforms/cl_12/kernels/segment_ttck.cl
@@ -1,130 +1,130 @@
-/*
- * Copyright 2018 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can
- * be found in the LICENSE file.
- *
- */
-
-//
-// NOTE THAT THE SEGMENT TTCK KERNEL IS ENTIRELY DEPENDENT ON THE
-// LAYOUT OF THE TTCK KEY.  IF THE TTCK KEY IS ALTERED THEN THIS
-// KERNEL WILL NEED TO BE UPDATED
-//
-
-#include "tile.h"
-#include "atomic_cl.h"
-#include "device_cl_12.h"
-
-//
-//
-//
-
-#define HS_KEYS_PER_SLAB  (HS_KEYS_PER_LANE * HS_LANES_PER_WARP)
-#define HS_LANE_MASK      (HS_LANES_PER_WARP - 1)
-
-//
-//
-//
-
-#define SKC_YX_NEQ(row,prev)                \
-  (((as_uint2(r##row).hi ^ as_uint2(r##prev).hi) & SKC_TTCK_HI_MASK_YX) != 0)
-
-//
-//
-//
-
-__kernel
-__attribute__((intel_reqd_sub_group_size(HS_LANES_PER_WARP)))
-void
-skc_kernel_segment_ttck(__global HS_KEY_TYPE              * SKC_RESTRICT const vout,
-                        __global uint                     * SKC_RESTRICT const indices,
-                        __global SKC_ATOMIC_UINT volatile * SKC_RESTRICT const atomics)
-{
-  uint const global_id = get_global_id(0);
-  uint const gmem_base = (global_id >> HS_LANES_PER_WARP_LOG2) * HS_KEYS_PER_SLAB;
-  uint const gmem_idx  = gmem_base + (global_id & HS_LANE_MASK);
-  uint const lane_idx  = gmem_base + (global_id & HS_LANE_MASK) * HS_KEYS_PER_LANE;
-
-  //
-  // LOAD ALL THE ROWS
-  //
-#undef  HS_SLAB_ROW
-#define HS_SLAB_ROW(row,prev)                                           \
-  HS_KEY_TYPE const r##row = (vout + gmem_idx)[prev * HS_LANES_PER_WARP];
-
-  HS_SLAB_ROWS();
-
-  //
-  // LOAD LAST REGISTER FROM COLUMN TO LEFT
-  //
-  uint  diffs = 0;
-  uint2 r0    = r1;
-
-  if (gmem_base > 0) {
-    // if this is the first key in any slab but the first then it
-    // broadcast loads the last key in previous slab
-    r0.hi = as_uint2(vout[gmem_base - 1]).hi;
-  } else if (get_sub_group_local_id() == 0) {
-    // if this is the first lane in the first slab
-    diffs = 1;
-  }
-
-  // now shuffle in the last key from the column to the left
-  r0.hi = intel_sub_group_shuffle_up(r0.hi,as_uint2(HS_REG_LAST(r)).hi,1);
-
-  //
-  // FIND ALL DIFFERENCES IN SLAB
-  //
-  uint valid = 0;
-
-#undef  HS_SLAB_ROW
-#define HS_SLAB_ROW(row,prev)                   \
-  valid |= ((r##row != SKC_ULONG_MAX) << prev);
-
-  HS_SLAB_ROWS();
-
-#undef  HS_SLAB_ROW
-#define HS_SLAB_ROW(row,prev)                   \
-  diffs |= (SKC_YX_NEQ(row,prev) << prev);
-
-  HS_SLAB_ROWS();
-
-  //
-  // SUM UP THE DIFFERENCES
-  //
-  uint const valid_diffs = valid & diffs;
-  uint const count       = popcount(valid_diffs);
-  uint const inclusive   = sub_group_scan_inclusive_add(count);
-  uint const exclusive   = inclusive - count;
-
-  //
-  // RESERVE SPACE IN THE INDICES ARRAY
-  //
-  uint next = 0;
-
-  if (get_sub_group_local_id() == HS_LANES_PER_WARP-1)
-    next = atomic_add(atomics+1,inclusive); // FIXME -- need a symbolic offset
-
-  // distribute base across subgroup
-  next = exclusive + sub_group_broadcast(next,HS_LANES_PER_WARP-1);
-
-  //
-  // STORE THE INDICES
-  //
-#undef  HS_SLAB_ROW
-#define HS_SLAB_ROW(row,prev)                   \
-  if (valid_diffs & (1 << prev))                \
-    indices[next++] = lane_idx + prev;
-
-  HS_SLAB_ROWS();
-
-  //
-  // TRANSPOSE THE SLAB AND STORE IT
-  //
-  HS_TRANSPOSE_SLAB();
-}
-
-//
-//
-//
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ *
+ */
+
+//
+// NOTE THAT THE SEGMENT TTCK KERNEL IS ENTIRELY DEPENDENT ON THE
+// LAYOUT OF THE TTCK KEY.  IF THE TTCK KEY IS ALTERED THEN THIS
+// KERNEL WILL NEED TO BE UPDATED
+//
+
+#include "tile.h"
+#include "atomic_cl.h"
+#include "kernel_cl_12.h"
+
+//
+//
+//
+
+#define HS_KEYS_PER_SLAB  (HS_KEYS_PER_LANE * HS_LANES_PER_WARP)
+#define HS_LANE_MASK      (HS_LANES_PER_WARP - 1)
+
+//
+//
+//
+
+#define SKC_YX_NEQ(row,prev)                \
+  (((as_uint2(r##row).hi ^ as_uint2(r##prev).hi) & SKC_TTCK_HI_MASK_YX) != 0)
+
+//
+//
+//
+
+__kernel
+__attribute__((intel_reqd_sub_group_size(HS_LANES_PER_WARP)))
+void
+skc_kernel_segment_ttck(__global HS_KEY_TYPE              * SKC_RESTRICT const vout,
+                        __global uint                     * SKC_RESTRICT const indices,
+                        __global SKC_ATOMIC_UINT volatile * SKC_RESTRICT const atomics)
+{
+  uint const global_id = get_global_id(0);
+  uint const gmem_base = (global_id >> HS_LANES_PER_WARP_LOG2) * HS_KEYS_PER_SLAB;
+  uint const gmem_idx  = gmem_base + (global_id & HS_LANE_MASK);
+  uint const lane_idx  = gmem_base + (global_id & HS_LANE_MASK) * HS_KEYS_PER_LANE;
+
+  //
+  // LOAD ALL THE ROWS
+  //
+#undef  HS_SLAB_ROW
+#define HS_SLAB_ROW(row,prev)                                           \
+  HS_KEY_TYPE const r##row = (vout + gmem_idx)[prev * HS_LANES_PER_WARP];
+
+  HS_SLAB_ROWS();
+
+  //
+  // LOAD LAST REGISTER FROM COLUMN TO LEFT
+  //
+  uint  diffs = 0;
+  uint2 r0    = r1;
+
+  if (gmem_base > 0) {
+    // if this is the first key in any slab but the first then it
+    // broadcast loads the last key in previous slab
+    r0.hi = as_uint2(vout[gmem_base - 1]).hi;
+  } else if (get_sub_group_local_id() == 0) {
+    // if this is the first lane in the first slab
+    diffs = 1;
+  }
+
+  // now shuffle in the last key from the column to the left
+  r0.hi = intel_sub_group_shuffle_up(r0.hi,as_uint2(HS_REG_LAST(r)).hi,1);
+
+  //
+  // FIND ALL DIFFERENCES IN SLAB
+  //
+  uint valid = 0;
+
+#undef  HS_SLAB_ROW
+#define HS_SLAB_ROW(row,prev)                   \
+  valid |= ((r##row != SKC_ULONG_MAX) << prev);
+
+  HS_SLAB_ROWS();
+
+#undef  HS_SLAB_ROW
+#define HS_SLAB_ROW(row,prev)                   \
+  diffs |= (SKC_YX_NEQ(row,prev) << prev);
+
+  HS_SLAB_ROWS();
+
+  //
+  // SUM UP THE DIFFERENCES
+  //
+  uint const valid_diffs = valid & diffs;
+  uint const count       = popcount(valid_diffs);
+  uint const inclusive   = sub_group_scan_inclusive_add(count);
+  uint const exclusive   = inclusive - count;
+
+  //
+  // RESERVE SPACE IN THE INDICES ARRAY
+  //
+  uint next = 0;
+
+  if (get_sub_group_local_id() == HS_LANES_PER_WARP-1)
+    next = atomic_add(atomics+1,inclusive); // FIXME -- need a symbolic offset
+
+  // distribute base across subgroup
+  next = exclusive + sub_group_broadcast(next,HS_LANES_PER_WARP-1);
+
+  //
+  // STORE THE INDICES
+  //
+#undef  HS_SLAB_ROW
+#define HS_SLAB_ROW(row,prev)                   \
+  if (valid_diffs & (1 << prev))                \
+    indices[next++] = lane_idx + prev;
+
+  HS_SLAB_ROWS();
+
+  //
+  // TRANSPOSE THE SLAB AND STORE IT
+  //
+  HS_TRANSPOSE_SLAB();
+}
+
+//
+//
+//