aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/compute/skc/platforms/cl_12/composition_cl_12.c
blob: 96e7834b03dc3d56f45c8513429762ecd76867ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
/*
 * Copyright 2017 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can
 * be found in the LICENSE file.
 *
 */

//
//
//

#include <stdlib.h>
#include <stdio.h>

#include "hs/cl/hs_cl_launcher.h"

#include "common/cl/assert_cl.h"

#include "composition_cl_12.h"
#include "config_cl.h"

#include "context.h"
#include "raster.h"
#include "handle.h"

#include "runtime_cl_12.h"

#include "common.h"
#include "tile.h"

//
// TTCK (32-BIT COMPARE) v1:
//
//  0                                                           63
//  | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER |  X  |  Y  |
//  +----------------------+--------+--------+-------+-----+-----+
//  |          30          |    1   |    1   |   18  |  7  |  7  |
//
//
// TTCK (32-BIT COMPARE) v2:
//
//  0                                                           63
//  | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER |  X  |  Y  |
//  +----------------------+--------+--------+-------+-----+-----+
//  |          30          |    1   |    1   |   15  |  9  |  8  |
//
//
// TTCK (64-BIT COMPARE) -- achieves 4K x 4K with an 8x16 tile:
//
//  0                                                           63
//  | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER |  X  |  Y  |
//  +----------------------+--------+--------+-------+-----+-----+
//  |          27          |    1   |    1   |   18  |  9  |  8  |
//

union skc_ttck
{
  skc_ulong   u64;
  skc_uint2   u32v2;

  struct {
    skc_uint  id         : SKC_TTCK_LO_BITS_ID;
    skc_uint  prefix     : SKC_TTCK_LO_BITS_PREFIX;
    skc_uint  escape     : SKC_TTCK_LO_BITS_ESCAPE;
    skc_uint  layer_lo   : SKC_TTCK_LO_BITS_LAYER;
    skc_uint  layer_hi   : SKC_TTCK_HI_BITS_LAYER;
    skc_uint  x          : SKC_TTCK_HI_BITS_X;
    skc_uint  y          : SKC_TTCK_HI_BITS_Y;
  };

  struct {
    skc_ulong na0        : SKC_TTCK_LO_BITS_ID_PREFIX_ESCAPE;
    skc_ulong layer      : SKC_TTCK_BITS_LAYER;
    skc_ulong na1        : SKC_TTCK_HI_BITS_YX;
  };

  struct {
    skc_uint  na2;
    skc_uint  na3        : SKC_TTCK_HI_BITS_LAYER;
    skc_uint  yx         : SKC_TTCK_HI_BITS_YX;
  };
};

//
// FIXME -- accept floats on host but convert to subpixel offsets
// before appending to command ring
//

#define SKC_PLACE_CMD_TX_CONVERT(f)  0
#define SKC_PLACE_CMD_TY_CONVERT(f)  0

//
// COMPOSITION PLACE
//
// This is a snapshot of the host-side command queue.
//
// Note that the composition command extent could be implemented as
// either a mapped buffer or simply copied to an ephemeral extent.
//
// This implementation may vary between compute platforms.
//

struct skc_composition_place
{
  struct skc_composition_impl      * impl;

  cl_command_queue                   cq;

  struct skc_extent_phw1g_tdrNs_snap cmds;

  skc_subbuf_id_t                    id;
};

//
// Forward declarations
//

static
void
skc_composition_unseal_block(struct skc_composition_impl * const impl,
                             skc_bool                      const block);

//
//
//

static
void
skc_composition_pfn_release(struct skc_composition_impl * const impl)
{
  if (--impl->composition->ref_count != 0)
    return;

  //
  // otherwise, dispose of all resources
  //

  // the unsealed state is a safe state to dispose of resources
  skc_composition_unseal_block(impl,true); // block

  struct skc_runtime * const runtime = impl->runtime;

  // free host composition
  skc_runtime_host_perm_free(runtime,impl->composition);

  // release the cq
  skc_runtime_release_cq_in_order(runtime,impl->cq);

  // release kernels
  cl(ReleaseKernel(impl->kernels.place));
  cl(ReleaseKernel(impl->kernels.segment));

  // release extents
  skc_extent_phw1g_tdrNs_free(runtime,&impl->cmds.extent);
  skc_extent_phrw_free       (runtime,&impl->saved.extent);
  skc_extent_phr_pdrw_free   (runtime,&impl->atomics);

  skc_extent_pdrw_free       (runtime,&impl->keys);
  skc_extent_pdrw_free       (runtime,&impl->offsets);

  // free composition impl
  skc_runtime_host_perm_free(runtime,impl);
}

//
//
//

static
void
skc_composition_place_grid_pfn_dispose(skc_grid_t const grid)
{
  struct skc_composition_place * const place   = skc_grid_get_data(grid);
  struct skc_composition_impl  * const impl    = place->impl;
  struct skc_runtime           * const runtime = impl->runtime;

  // release cq
  skc_runtime_release_cq_in_order(runtime,place->cq);

  // unmap the snapshot (could be a copy)
  skc_extent_phw1g_tdrNs_snap_free(runtime,&place->cmds);

  // release place struct
  skc_runtime_host_temp_free(runtime,place,place->id);

  // release impl
  skc_composition_pfn_release(impl);
}

//
//
//

static
void
skc_composition_place_read_complete(skc_grid_t const grid)
{
  skc_grid_complete(grid);
}

static
void
skc_composition_place_read_cb(cl_event event, cl_int status, skc_grid_t const grid)
{
  SKC_CL_CB(status);

  struct skc_composition_place * const place     = skc_grid_get_data(grid);
  struct skc_composition_impl  * const impl      = place->impl;
  struct skc_runtime           * const runtime   = impl->runtime;
  struct skc_scheduler         * const scheduler = runtime->scheduler;

  // as quickly as possible, enqueue next stage in pipeline to context command scheduler
  SKC_SCHEDULER_SCHEDULE(scheduler,skc_composition_place_read_complete,grid);
}

static
void
skc_composition_place_grid_pfn_execute(skc_grid_t const grid)
{
  //
  // FILLS EXPAND
  //
  // need result of cmd counts before launching RASTERIZE grids
  //
  // - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host
  // - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device
  // - or launch a device-wide grid that feeds itself but that's unsatisfying
  //
  struct skc_composition_place * const place   = skc_grid_get_data(grid);
  struct skc_composition_impl  * const impl    = place->impl;
  struct skc_runtime           * const runtime = impl->runtime;

  skc_uint  const work_size = skc_extent_ring_snap_count(place->cmds.snap);
  skc_uint4 const clip      = { 0, 0, SKC_UINT_MAX, SKC_UINT_MAX };

  // initialize kernel args
  cl(SetKernelArg(impl->kernels.place,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw)));
  cl(SetKernelArg(impl->kernels.place,1,SKC_CL_ARG(impl->atomics.drw)));
  cl(SetKernelArg(impl->kernels.place,2,SKC_CL_ARG(impl->keys.drw)));
  cl(SetKernelArg(impl->kernels.place,3,SKC_CL_ARG(place->cmds.drN)));
  cl(SetKernelArg(impl->kernels.place,4,SKC_CL_ARG(runtime->handle_pool.map.drw)));
  cl(SetKernelArg(impl->kernels.place,5,SKC_CL_ARG(clip))); // FIXME -- convert the clip to yx0/yx1 format
  cl(SetKernelArg(impl->kernels.place,6,SKC_CL_ARG(work_size)));

  // launch kernel
  skc_device_enqueue_kernel(runtime->device,
                            SKC_DEVICE_KERNEL_ID_PLACE,
                            place->cq,
                            impl->kernels.place,
                            work_size,
                            0,NULL,NULL);
  //
  // copy atomics back after every place launch
  //
  cl_event complete;

  skc_extent_phr_pdrw_read(&impl->atomics,place->cq,&complete);

  cl(SetEventCallback(complete,CL_COMPLETE,skc_composition_place_read_cb,grid));
  cl(ReleaseEvent(complete));

  // flush command queue
  cl(Flush(place->cq));
}

//
//
//

static
void
skc_composition_snap(struct skc_composition_impl * const impl)
{
  skc_composition_retain(impl->composition);

  skc_subbuf_id_t id;

  struct skc_composition_place * const place = skc_runtime_host_temp_alloc(impl->runtime,
                                                                           SKC_MEM_FLAGS_READ_WRITE,
                                                                           sizeof(*place),&id,NULL);

  // save the subbuf id
  place->id = id;

  // save backpointer
  place->impl = impl;

  // set grid data
  skc_grid_set_data(impl->grids.place,place);

  // acquire command queue
  place->cq = skc_runtime_acquire_cq_in_order(impl->runtime);

  // checkpoint the ring
  skc_extent_ring_checkpoint(&impl->cmds.ring);

  // make a snapshot
  skc_extent_phw1g_tdrNs_snap_init(impl->runtime,&impl->cmds.ring,&place->cmds);

  // unmap the snapshot (could be a copy)
  skc_extent_phw1g_tdrNs_snap_alloc(impl->runtime,
                                    &impl->cmds.extent,
                                    &place->cmds,
                                    place->cq,
                                    NULL);

  skc_grid_force(impl->grids.place);
}

//
//
//

static
void
skc_composition_pfn_seal(struct skc_composition_impl * const impl)
{
  // return if sealing or sealed
  if (impl->state >= SKC_COMPOSITION_STATE_SEALING)
    return;

  struct skc_runtime   * const runtime   = impl->runtime;
  struct skc_scheduler * const scheduler = runtime->scheduler;

  //
  // otherwise, wait for UNSEALING > UNSEALED transition
  //
  if (impl->state == SKC_COMPOSITION_STATE_UNSEALING)
    {
      SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_UNSEALED);
    }
  else // or we were already unsealed
    {
      // flush is there is work in progress
      skc_uint const count = skc_extent_ring_wip_count(&impl->cmds.ring);

      if (count > 0) {
        skc_composition_snap(impl);
      }
    }

  //
  // now unsealed so we need to start sealing...
  //
  impl->state = SKC_COMPOSITION_STATE_SEALING;

  //
  // the seal operation implies we should force start all dependencies
  // that are still in a ready state
  //
  skc_grid_force(impl->grids.sort);
}

//
//
//

void
skc_composition_sort_execute_complete(struct skc_composition_impl * const impl)
{
  // we're sealed
  impl->state = SKC_COMPOSITION_STATE_SEALED;

  // this grid is done
  skc_grid_complete(impl->grids.sort);
}

static
void
skc_composition_sort_execute_cb(cl_event event, cl_int status, struct skc_composition_impl * const impl)
{
  SKC_CL_CB(status);

  // as quickly as possible, enqueue next stage in pipeline to context command scheduler
  SKC_SCHEDULER_SCHEDULE(impl->runtime->scheduler,skc_composition_sort_execute_complete,impl);
}

static
void
skc_composition_sort_grid_pfn_execute(skc_grid_t const grid)
{
  struct skc_composition_impl * const impl    = skc_grid_get_data(grid);
  struct skc_runtime          * const runtime = impl->runtime;

  // we should be sealing
  assert(impl->state == SKC_COMPOSITION_STATE_SEALING);

  struct skc_place_atomics * const atomics = impl->atomics.hr;

#ifndef NDEBUG
  fprintf(stderr,"composition sort: %u\n",atomics->keys);
#endif

  if (atomics->keys > 0)
    {
      uint32_t keys_padded_in, keys_padded_out;

      hs_cl_pad(runtime->hs,atomics->keys,&keys_padded_in,&keys_padded_out);

      hs_cl_sort(impl->runtime->hs,
                 impl->cq,
                 0,NULL,NULL,
                 impl->keys.drw,
                 NULL,
                 atomics->keys,
                 keys_padded_in,
                 keys_padded_out,
                 false);

      cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(impl->keys.drw)));
      cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(impl->offsets.drw)));
      cl(SetKernelArg(impl->kernels.segment,2,SKC_CL_ARG(impl->atomics.drw)));

      // find start of each tile
      skc_device_enqueue_kernel(runtime->device,
                                SKC_DEVICE_KERNEL_ID_SEGMENT_TTCK,
                                impl->cq,
                                impl->kernels.segment,
                                atomics->keys,
                                0,NULL,NULL);
    }

  cl_event complete;

  // next stage needs to know number of key segments
  skc_extent_phr_pdrw_read(&impl->atomics,impl->cq,&complete);

  // register a callback
  cl(SetEventCallback(complete,CL_COMPLETE,skc_composition_sort_execute_cb,impl));
  cl(ReleaseEvent(complete));

  // flush cq
  cl(Flush(impl->cq));
}

//
//
//

static
void
skc_composition_raster_release(struct skc_composition_impl * const impl)
{
  //
  // reference counts to rasters can only be released when the
  // composition is unsealed and the atomics are reset.
  //
  skc_runtime_raster_device_release(impl->runtime,
                                    impl->saved.extent.hrw,
                                    impl->saved.count);
  // reset count
  impl->saved.count = 0;
}

//
//
//

static
void
skc_composition_unseal_block(struct skc_composition_impl * const impl,
                             skc_bool                      const block)
{
  // return if already unsealed
  if (impl->state == SKC_COMPOSITION_STATE_UNSEALED)
    return;

  //
  // otherwise, we're going to need to pump the scheduler
  //
  struct skc_scheduler * const scheduler = impl->runtime->scheduler;

  //
  // wait for UNSEALING > UNSEALED transition
  //
  if (impl->state == SKC_COMPOSITION_STATE_UNSEALING)
    {
      if (block) {
        SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_UNSEALED);
      }
      return;
    }

  //
  // wait for SEALING > SEALED transition ...
  //
  if (impl->state == SKC_COMPOSITION_STATE_SEALING)
    {
      // wait if sealing
      SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_SEALED);
    }

  // wait for rendering locks to be released
  SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->lock_count > 0);

  //
  // no need to visit UNSEALING state with this implementation
  //

  // acquire a new grid
  impl->grids.sort = SKC_GRID_DEPS_ATTACH(impl->runtime->deps,
                                          NULL,  // the composition state guards this
                                          impl,
                                          NULL,  // no waiting
                                          skc_composition_sort_grid_pfn_execute,
                                          NULL); // no dispose

  // mark composition as unsealed
  impl->state = SKC_COMPOSITION_STATE_UNSEALED;
}

//
// can only be called on a composition that was just unsealed
//
static
void
skc_composition_reset(struct skc_composition_impl * const impl)
{
  // zero the atomics
  skc_extent_phr_pdrw_zero(&impl->atomics,impl->cq,NULL);

  // flush it
  cl(Flush(impl->cq));

  // release all the rasters
  skc_composition_raster_release(impl);
}

static
void
skc_composition_unseal_block_reset(struct skc_composition_impl * const impl,
                                   skc_bool                      const block,
                                   skc_bool                      const reset)
{
  skc_composition_unseal_block(impl,block);

  if (reset) {
    skc_composition_reset(impl);
  }
}

//
//
//

static
void
skc_composition_pfn_unseal(struct skc_composition_impl * const impl, skc_bool const reset)
{
  skc_composition_unseal_block_reset(impl,false,reset);
}

//
// only needs to create a grid
//

static
void
skc_composition_place_create(struct skc_composition_impl * const impl)
{
  // acquire a grid
  impl->grids.place = SKC_GRID_DEPS_ATTACH(impl->runtime->deps,
                                           &impl->grids.place,
                                           NULL,
                                           NULL, // no waiting
                                           skc_composition_place_grid_pfn_execute,
                                           skc_composition_place_grid_pfn_dispose);

  // assign happens-after relationship
  skc_grid_happens_after_grid(impl->grids.sort,impl->grids.place);
}


static
skc_err
skc_composition_pfn_place(struct skc_composition_impl * const impl,
                          skc_raster_t          const *       rasters,
                          skc_layer_id          const *       layer_ids,
                          skc_float             const *       txs,
                          skc_float             const *       tys,
                          skc_uint                            count)
{
  // block and yield if not unsealed
  skc_composition_unseal_block(impl,true);

  //
  // validate and retain all rasters
  //
  skc_err err;

  err = skc_runtime_handle_device_validate_retain(impl->runtime,
                                                  SKC_TYPED_HANDLE_TYPE_IS_RASTER,
                                                  rasters,
                                                  count);
  if (err)
    return err;

  skc_runtime_handle_device_retain(impl->runtime,rasters,count);

  //
  // save the stripped handles
  //
  skc_raster_t * saved = impl->saved.extent.hrw;

  saved             += impl->saved.count;
  impl->saved.count += count;

  for (skc_uint ii=0; ii<count; ii++) {
    saved[ii] = SKC_TYPED_HANDLE_TO_HANDLE(*rasters++);
  }

  //
  // - declare the place grid happens after the raster
  // - copy place commands into ring
  //
  do {
    skc_uint rem;

    // find out how much room is left in then ring's snap
    // if the place ring is full -- let it drain
    SKC_SCHEDULER_WAIT_WHILE(impl->runtime->scheduler,(rem = skc_extent_ring_wip_rem(&impl->cmds.ring)) == 0);

    // append commands
    skc_uint avail = min(rem,count);

    // decrement count
    count -= avail;

    // launch a place kernel after copying commands?
    skc_bool const is_wip_full = (avail == rem);

    // if there is no place grid then create one
    if (impl->grids.place == NULL)
      {
        skc_composition_place_create(impl);
      }

    //
    // FIXME -- OPTIMIZATION? -- the ring_wip_index_inc() test can
    // be avoided by splitting into at most two intervals. It should
    // be plenty fast as is though so leave for now.
    //
    union skc_cmd_place * const cmds = impl->cmds.extent.hw1;

    if ((txs == NULL) && (tys == NULL))
      {
        while (avail-- > 0)
          {
            skc_raster_t const raster = *saved++;

            skc_grid_happens_after_handle(impl->grids.place,raster);

            cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
              (union skc_cmd_place){ raster, *layer_ids++, 0, 0 };
          }
      }
    else if (txs == NULL)
      {
        while (avail-- > 0)
          {
            skc_raster_t const raster = *saved++;

            skc_grid_happens_after_handle(impl->grids.place,raster);

            cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
              (union skc_cmd_place){ raster,
                                     *layer_ids++,
                                     0,
                                     SKC_PLACE_CMD_TY_CONVERT(*tys++) };
          }
      }
    else if (tys == NULL)
      {
        while (avail-- > 0)
          {
            skc_raster_t const raster = *saved++;

            skc_grid_happens_after_handle(impl->grids.place,raster);

            cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
              (union skc_cmd_place){ raster,
                                     *layer_ids++,
                                     SKC_PLACE_CMD_TX_CONVERT(*txs++),
                                     0 };
          }
      }
    else
      {
        while (avail-- > 0)
          {
            skc_raster_t const raster = *saved++;

            skc_grid_happens_after_handle(impl->grids.place,raster);

            cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] =
              (union skc_cmd_place){ raster,
                                     *layer_ids++,
                                     SKC_PLACE_CMD_TX_CONVERT(*txs++),
                                     SKC_PLACE_CMD_TY_CONVERT(*tys++) };
          }
      }

    // launch place kernel?
    if (is_wip_full) {
      skc_composition_snap(impl);
    }
  } while (count > 0);

  return SKC_ERR_SUCCESS;
}

//
//
//

static
void
skc_composition_pfn_bounds(struct skc_composition_impl * const impl, skc_int bounds[4])
{
  //
  // FIXME -- not implemented yet
  //
  // impl bounds will be copied back after sealing
  //
  bounds[0] = SKC_INT_MIN;
  bounds[1] = SKC_INT_MIN;
  bounds[2] = SKC_INT_MAX;
  bounds[3] = SKC_INT_MAX;
}

//
//
//

void
skc_composition_retain_and_lock(struct skc_composition * const composition)
{
  skc_composition_retain(composition);

  composition->impl->lock_count += 1;
}

void
skc_composition_unlock_and_release(struct skc_composition * const composition)
{
  composition->impl->lock_count -= 1;

  skc_composition_pfn_release(composition->impl);
}

//
//
//

skc_err
skc_composition_cl_12_create(struct skc_context       * const context,
                             struct skc_composition * * const composition)
{
  struct skc_runtime * const runtime = context->runtime;

  // retain the context
  // skc_context_retain(context);

  // allocate impl
  struct skc_composition_impl * const impl = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(*impl));

  // allocate composition
  (*composition)            = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(**composition));

  (*composition)->context   = context;
  (*composition)->impl      = impl;
  (*composition)->ref_count = 1;

  (*composition)->place     = skc_composition_pfn_place;
  (*composition)->unseal    = skc_composition_pfn_unseal;
  (*composition)->seal      = skc_composition_pfn_seal;
  (*composition)->bounds    = skc_composition_pfn_bounds;
  (*composition)->release   = skc_composition_pfn_release;

  // intialize impl
  impl->composition   = (*composition);
  impl->runtime       = runtime;

  SKC_ASSERT_STATE_INIT(impl,SKC_COMPOSITION_STATE_SEALED);

  impl->lock_count    = 0;

  impl->grids.sort    = NULL;
  impl->grids.place   = NULL;

  // acquire command queue for sealing/unsealing
  impl->cq            = skc_runtime_acquire_cq_in_order(runtime);

  // acquire kernels
  impl->kernels.place   = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_PLACE);
  impl->kernels.segment = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_SEGMENT_TTCK);

  // get config
  struct skc_config const * const config = runtime->config;

  // initialize ring size with config values
  skc_extent_ring_init(&impl->cmds.ring,
                       config->composition.cmds.elem_count,
                       config->composition.cmds.snap_count,
                       sizeof(union skc_cmd_place));

  skc_extent_phw1g_tdrNs_alloc(runtime,&impl->cmds.extent ,sizeof(union skc_cmd_place) * config->composition.cmds.elem_count);
  skc_extent_phrw_alloc       (runtime,&impl->saved.extent,sizeof(skc_raster_t)        * config->composition.raster_ids.elem_count);
  skc_extent_phr_pdrw_alloc   (runtime,&impl->atomics     ,sizeof(struct skc_place_atomics));

  skc_extent_pdrw_alloc       (runtime,&impl->keys        ,sizeof(skc_ttxk_t)          * config->composition.keys.elem_count);
  skc_extent_pdrw_alloc       (runtime,&impl->offsets     ,sizeof(skc_uint)            * (1u << SKC_TTCK_HI_BITS_YX)); // 1MB

  // nothing saved
  impl->saved.count = 0;

  // unseal the composition, zero the atomics, etc.
  skc_composition_unseal_block_reset(impl,false,true);

  return SKC_ERR_SUCCESS;
}

//
//
//