blob: 726b0a790777cb2593d6451673ab274f905d7116 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can
* be found in the LICENSE file.
*
*/
//
//
//
#include "device_cl_12.h"
//
// BEST TO RUN THESE ON AN OUT-OF-ORDER CQ
//
__kernel
SKC_BP_INIT_IDS_KERNEL_ATTRIBS
void
skc_kernel_block_pool_init_ids(__global uint * const ids, uint const bp_size)
{
uint const gid = get_global_id(0);
//
// FIXME -- TUNE FOR ARCH -- evaluate if it's much faster to
// accomplish this with fewer threads and using either IPC and/or
// vector stores -- it should be on certain architectures!
//
//
// initialize pool with sequence
//
if (gid < bp_size)
ids[gid] = gid * SKC_DEVICE_SUBBLOCKS_PER_BLOCK;
}
//
//
//
__kernel
SKC_BP_INIT_ATOMICS_KERNEL_ATTRIBS
void
skc_kernel_block_pool_init_atomics(__global uint * const bp_atomics, uint const bp_size)
{
// the version test is to squelch a bug with the Intel OpenCL CPU
// compiler declaring it supports the cl_intel_subgroups extension
#if defined(cl_intel_subgroups) || defined (cl_khr_subgroups)
uint const tid = get_sub_group_local_id();
#else
uint const tid = get_local_id(0);
#endif
//
// launch two threads and store [ 0, bp_size ]
//
bp_atomics[tid] = tid * bp_size;
}
//
//
//
|