1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
/*
* Copyright 2017 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can
* be found in the LICENSE file.
*
*/
//
//
//
#include "tile.h"
#include "raster.h"
#include "atomic_cl.h"
#include "block_pool_cl.h"
#include "raster_builder_cl_12.h"
#include "device_cl_12.h"
//
// There is a fixed-size meta table per raster cohort that we use to
// peform a mostly coalesced sizing and allocation of blocks.
//
// This code is simple and fast.
//
__kernel
SKC_RASTERS_ALLOC_KERNEL_ATTRIBS
void
skc_kernel_rasters_alloc(__global SKC_ATOMIC_UINT volatile * const bp_atomics,
__global skc_block_id_t const * const bp_ids,
skc_uint const bp_mask, // pow2 modulo mask for block pool ring
__global skc_block_id_t * const map,
__global skc_uint * const metas,
__global skc_uint const * const raster_ids, // FIXME -- CONSTANT
skc_uint const count)
{
// access to the meta extent is linear
skc_uint const gid = get_global_id(0);
skc_bool const is_active = gid < count;
//
// init with defaults for all lanes
//
union skc_raster_cohort_meta_inout meta = { .in.u32v4 = { 0, 0, 0, 0 } };
skc_uint raster_id = SKC_UINT_MAX;
skc_uint extra_blocks = 0;
if (is_active)
{
// load meta_in
meta.in.u32v4 = vload4(gid,metas);
// load raster_id as early as possible
raster_id = raster_ids[gid];
#if 0
printf("%3u + %5u, %5u, %5u, %5u\n",
gid,
meta.in.blocks,
meta.in.offset,
meta.in.pk,
meta.in.rk);
#endif
// how many blocks will the ttpb blocks consume?
extra_blocks = ((meta.in.pk * SKC_TILE_RATIO + SKC_DEVICE_SUBBLOCKS_PER_BLOCK - SKC_TILE_RATIO) /
SKC_DEVICE_SUBBLOCKS_PER_BLOCK);
// total keys
meta.out.keys += meta.in.pk;
// how many blocks do we need to store the keys in the head and trailing nodes?
skc_uint const hn = ((SKC_RASTER_HEAD_DWORDS + meta.out.keys + SKC_RASTER_NODE_DWORDS - 2) /
(SKC_RASTER_NODE_DWORDS - 1));
// increment blocks
extra_blocks += hn;
// how many nodes trail the head?
meta.out.nodes = hn - 1;
// update blocks
meta.out.blocks += extra_blocks;
#if 0
printf("%3u - %5u, %5u, %5u, %5u\n",
gid,
meta.out.blocks,
meta.out.offset,
meta.out.nodes,
meta.out.keys);
#endif
}
//
// allocate blocks from block pool
//
// first perform a prefix sum on the subgroup to reduce atomic
// operation traffic
//
// note this idiom can be implemented with vectors, subgroups or
// workgroups
//
skc_uint const prefix = SKC_RASTERS_ALLOC_INCLUSIVE_ADD(extra_blocks);
skc_uint reads = 0;
// last lane performs the block pool allocation with an atomic increment
if (SKC_RASTERS_ALLOC_LOCAL_ID() == SKC_RASTERS_ALLOC_GROUP_SIZE - 1) {
reads = SKC_ATOMIC_ADD_GLOBAL_RELAXED_DEVICE(bp_atomics+SKC_BP_ATOMIC_OFFSET_READS,prefix); // ring_reads
}
// broadcast block pool base to all lanes
reads = SKC_RASTERS_ALLOC_BROADCAST(reads,SKC_RASTERS_ALLOC_GROUP_SIZE - 1);
// update base for each lane
reads += prefix - extra_blocks;
//
// store meta header
//
if (is_active)
{
// store headers back to meta extent
vstore4(meta.out.u32v4,gid,metas);
// store reads
metas[SKC_RASTER_COHORT_META_OFFSET_READS + gid] = reads;
// get block_id of each raster head
skc_block_id_t const block_id = bp_ids[reads & bp_mask];
// update map
map[raster_id] = block_id;
#if 0
printf("alloc: %u / %u\n",raster_id,block_id);
#endif
}
}
//
//
//
|