src/compute/skc/platforms/cl_12/kernels/rasters_alloc.cl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

/*
 * Copyright 2017 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can
 * be found in the LICENSE file.
 *
 */

//
//
//

#include "tile.h"
#include "raster.h"
#include "atomic_cl.h"
#include "block_pool_cl.h"
#include "raster_builder_cl_12.h"
#include "device_cl_12.h"

//
// There is a fixed-size meta table per raster cohort that we use to
// peform a mostly coalesced sizing and allocation of blocks.
//
// This code is simple and fast.
//

__kernel
SKC_RASTERS_ALLOC_KERNEL_ATTRIBS
void
skc_kernel_rasters_alloc(__global SKC_ATOMIC_UINT volatile * const bp_atomics,
                         __global skc_block_id_t  const    * const bp_ids,
                         skc_uint                            const bp_mask, // pow2 modulo mask for block pool ring
                         __global skc_block_id_t           * const map,
                         __global skc_uint                 * const metas,
                         __global skc_uint        const    * const raster_ids, // FIXME -- CONSTANT
                         skc_uint                            const count)
{
  // access to the meta extent is linear
  skc_uint const gid       = get_global_id(0);
  skc_bool const is_active = gid < count;

  //
  // init with defaults for all lanes
  //
  union skc_raster_cohort_meta_inout meta         = { .in.u32v4 = { 0, 0, 0, 0 } };
  skc_uint                           raster_id    = SKC_UINT_MAX;
  skc_uint                           extra_blocks = 0;

  if (is_active)
    {
      // load meta_in
      meta.in.u32v4     = vload4(gid,metas);

      // load raster_id as early as possible
      raster_id         = raster_ids[gid];

#if 0
      printf("%3u + %5u, %5u, %5u, %5u\n",
             gid,
             meta.in.blocks,
             meta.in.offset,
             meta.in.pk,
             meta.in.rk);
#endif

      // how many blocks will the ttpb blocks consume?
      extra_blocks      = ((meta.in.pk * SKC_TILE_RATIO + SKC_DEVICE_SUBBLOCKS_PER_BLOCK - SKC_TILE_RATIO) / 
                           SKC_DEVICE_SUBBLOCKS_PER_BLOCK);

      // total keys
      meta.out.keys    += meta.in.pk;

      // how many blocks do we need to store the keys in the head and trailing nodes?
      skc_uint const hn = ((SKC_RASTER_HEAD_DWORDS + meta.out.keys + SKC_RASTER_NODE_DWORDS - 2) /
                           (SKC_RASTER_NODE_DWORDS - 1));
      // increment blocks
      extra_blocks     += hn;

      // how many nodes trail the head?
      meta.out.nodes    = hn - 1;
      
      // update blocks
      meta.out.blocks  += extra_blocks;

#if 0
      printf("%3u - %5u, %5u, %5u, %5u\n",
             gid,
             meta.out.blocks,
             meta.out.offset,
             meta.out.nodes,
             meta.out.keys);
#endif
    }

  //
  // allocate blocks from block pool
  //
  // first perform a prefix sum on the subgroup to reduce atomic
  // operation traffic
  //
  // note this idiom can be implemented with vectors, subgroups or
  // workgroups
  //
  
  skc_uint const prefix = SKC_RASTERS_ALLOC_INCLUSIVE_ADD(extra_blocks);
  skc_uint       reads  = 0;

  // last lane performs the block pool allocation with an atomic increment
  if (SKC_RASTERS_ALLOC_LOCAL_ID() == SKC_RASTERS_ALLOC_GROUP_SIZE - 1) {
    reads = SKC_ATOMIC_ADD_GLOBAL_RELAXED_DEVICE(bp_atomics+SKC_BP_ATOMIC_OFFSET_READS,prefix); // ring_reads
  }

  // broadcast block pool base to all lanes
  reads = SKC_RASTERS_ALLOC_BROADCAST(reads,SKC_RASTERS_ALLOC_GROUP_SIZE - 1);

  // update base for each lane
  reads += prefix - extra_blocks;

  //
  // store meta header
  //
  if (is_active)
    {
      // store headers back to meta extent
      vstore4(meta.out.u32v4,gid,metas);

      // store reads
      metas[SKC_RASTER_COHORT_META_OFFSET_READS + gid] = reads; 

      // get block_id of each raster head 
      skc_block_id_t const block_id = bp_ids[reads & bp_mask];

      // update map
      map[raster_id] = block_id;

#if 0
      printf("alloc: %u / %u\n",raster_id,block_id);
#endif
    }
}

//
//
//