diff options
Diffstat (limited to 'src/compute/skc/platforms/cl_12/composition_cl_12.c')
-rw-r--r-- | src/compute/skc/platforms/cl_12/composition_cl_12.c | 823 |
1 files changed, 823 insertions, 0 deletions
diff --git a/src/compute/skc/platforms/cl_12/composition_cl_12.c b/src/compute/skc/platforms/cl_12/composition_cl_12.c new file mode 100644 index 0000000000..7853564636 --- /dev/null +++ b/src/compute/skc/platforms/cl_12/composition_cl_12.c @@ -0,0 +1,823 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + * + */ + +// +// +// + +#include <stdlib.h> +#include <stdio.h> + +#include "hs/cl/hs_cl_launcher.h" + +#include "common/cl/assert_cl.h" + +#include "composition_cl_12.h" +#include "config_cl.h" + +#include "context.h" +#include "raster.h" +#include "handle.h" + +#include "runtime_cl_12.h" + +#include "common.h" +#include "tile.h" + +// +// TTCK (32-BIT COMPARE) v1: +// +// 0 63 +// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | +// +----------------------+--------+--------+-------+-----+-----+ +// | 30 | 1 | 1 | 18 | 7 | 7 | +// +// +// TTCK (32-BIT COMPARE) v2: +// +// 0 63 +// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | +// +----------------------+--------+--------+-------+-----+-----+ +// | 30 | 1 | 1 | 15 | 9 | 8 | +// +// +// TTCK (64-BIT COMPARE) -- achieves 4K x 4K with an 8x16 tile: +// +// 0 63 +// | PAYLOAD/TTSB/TTPB ID | PREFIX | ESCAPE | LAYER | X | Y | +// +----------------------+--------+--------+-------+-----+-----+ +// | 27 | 1 | 1 | 18 | 9 | 8 | +// + +union skc_ttck +{ + skc_ulong u64; + skc_uint2 u32v2; + + struct { + skc_uint id : SKC_TTCK_LO_BITS_ID; + skc_uint prefix : SKC_TTCK_LO_BITS_PREFIX; + skc_uint escape : SKC_TTCK_LO_BITS_ESCAPE; + skc_uint layer_lo : SKC_TTCK_LO_BITS_LAYER; + skc_uint layer_hi : SKC_TTCK_HI_BITS_LAYER; + skc_uint x : SKC_TTCK_HI_BITS_X; + skc_uint y : SKC_TTCK_HI_BITS_Y; + }; + + struct { + skc_ulong na0 : SKC_TTCK_LO_BITS_ID_PREFIX_ESCAPE; + skc_ulong layer : SKC_TTCK_BITS_LAYER; + skc_ulong na1 : SKC_TTCK_HI_BITS_YX; + }; + + struct { + skc_uint na2; + skc_uint na3 : SKC_TTCK_HI_BITS_LAYER; + skc_uint yx : SKC_TTCK_HI_BITS_YX; + }; +}; + +// +// FIXME -- accept floats on host but convert to subpixel offsets +// before appending to command ring +// + +#define SKC_PLACE_CMD_TX_CONVERT(f) 0 +#define SKC_PLACE_CMD_TY_CONVERT(f) 0 + +// +// COMPOSITION PLACE +// +// This is a snapshot of the host-side command queue. +// +// Note that the composition command extent could be implemented as +// either a mapped buffer or simply copied to an ephemeral extent. +// +// This implementation may vary between compute platforms. +// + +struct skc_composition_place +{ + struct skc_composition_impl * impl; + + cl_command_queue cq; + + struct skc_extent_phw1g_tdrNs_snap cmds; + + skc_subbuf_id_t id; +}; + +// +// Forward declarations +// + +static +void +skc_composition_unseal_block(struct skc_composition_impl * const impl, + skc_bool const block); + +// +// +// + +static +void +skc_composition_pfn_release(struct skc_composition_impl * const impl) +{ + if (--impl->composition->ref_count != 0) + return; + + // + // otherwise, dispose of all resources + // + + // the unsealed state is a safe state to dispose of resources + skc_composition_unseal_block(impl,true); // block + + struct skc_runtime * const runtime = impl->runtime; + + // free host composition + skc_runtime_host_perm_free(runtime,impl->composition); + + // release the cq + skc_runtime_release_cq_in_order(runtime,impl->cq); + + // release kernels + cl(ReleaseKernel(impl->kernels.place)); + cl(ReleaseKernel(impl->kernels.segment)); + + // release extents + skc_extent_phw1g_tdrNs_free(runtime,&impl->cmds.extent); + skc_extent_phrw_free (runtime,&impl->saved.extent); + skc_extent_phr_pdrw_free (runtime,&impl->atomics); + + skc_extent_pdrw_free (runtime,&impl->keys); + skc_extent_pdrw_free (runtime,&impl->offsets); + + // free composition impl + skc_runtime_host_perm_free(runtime,impl); +} + +// +// +// + +static +void +skc_composition_place_grid_pfn_dispose(skc_grid_t const grid) +{ + struct skc_composition_place * const place = skc_grid_get_data(grid); + struct skc_composition_impl * const impl = place->impl; + struct skc_runtime * const runtime = impl->runtime; + + // release cq + skc_runtime_release_cq_in_order(runtime,place->cq); + + // unmap the snapshot (could be a copy) + skc_extent_phw1g_tdrNs_snap_free(runtime,&place->cmds); + + // release place struct + skc_runtime_host_temp_free(runtime,place,place->id); + + // release impl + skc_composition_pfn_release(impl); +} + +// +// +// + +static +void +skc_composition_place_read_complete(skc_grid_t const grid) +{ + skc_grid_complete(grid); +} + +static +void +skc_composition_place_read_cb(cl_event event, cl_int status, skc_grid_t const grid) +{ + SKC_CL_CB(status); + + struct skc_composition_place * const place = skc_grid_get_data(grid); + struct skc_composition_impl * const impl = place->impl; + struct skc_runtime * const runtime = impl->runtime; + struct skc_scheduler * const scheduler = runtime->scheduler; + + // as quickly as possible, enqueue next stage in pipeline to context command scheduler + SKC_SCHEDULER_SCHEDULE(scheduler,skc_composition_place_read_complete,grid); +} + +static +void +skc_composition_place_grid_pfn_execute(skc_grid_t const grid) +{ + // + // FILLS EXPAND + // + // need result of cmd counts before launching RASTERIZE grids + // + // - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host + // - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device + // - or launch a device-wide grid that feeds itself but that's unsatisfying + // + struct skc_composition_place * const place = skc_grid_get_data(grid); + struct skc_composition_impl * const impl = place->impl; + struct skc_runtime * const runtime = impl->runtime; + + skc_uint const work_size = skc_extent_ring_snap_count(place->cmds.snap); + skc_uint4 const clip = { 0, 0, SKC_UINT_MAX, SKC_UINT_MAX }; + + // initialize kernel args + cl(SetKernelArg(impl->kernels.place,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw))); + cl(SetKernelArg(impl->kernels.place,1,SKC_CL_ARG(impl->atomics.drw))); + cl(SetKernelArg(impl->kernels.place,2,SKC_CL_ARG(impl->keys.drw))); + cl(SetKernelArg(impl->kernels.place,3,SKC_CL_ARG(place->cmds.drN))); + cl(SetKernelArg(impl->kernels.place,4,SKC_CL_ARG(runtime->handle_pool.map.drw))); + cl(SetKernelArg(impl->kernels.place,5,SKC_CL_ARG(clip))); // FIXME -- convert the clip to yx0/yx1 format + cl(SetKernelArg(impl->kernels.place,6,SKC_CL_ARG(work_size))); + + // launch kernel + skc_device_enqueue_kernel(runtime->device, + SKC_DEVICE_KERNEL_ID_PLACE, + place->cq, + impl->kernels.place, + work_size, + 0,NULL,NULL); + // + // copy atomics back after every place launch + // + cl_event complete; + + skc_extent_phr_pdrw_read(&impl->atomics,place->cq,&complete); + + cl(SetEventCallback(complete,CL_COMPLETE,skc_composition_place_read_cb,grid)); + cl(ReleaseEvent(complete)); + + // flush command queue + cl(Flush(place->cq)); +} + +// +// +// + +static +void +skc_composition_snap(struct skc_composition_impl * const impl) +{ + skc_composition_retain(impl->composition); + + skc_subbuf_id_t id; + + struct skc_composition_place * const place = skc_runtime_host_temp_alloc(impl->runtime, + SKC_MEM_FLAGS_READ_WRITE, + sizeof(*place),&id,NULL); + + // save the subbuf id + place->id = id; + + // save backpointer + place->impl = impl; + + // set grid data + skc_grid_set_data(impl->grids.place,place); + + // acquire command queue + place->cq = skc_runtime_acquire_cq_in_order(impl->runtime); + + // checkpoint the ring + skc_extent_ring_checkpoint(&impl->cmds.ring); + + // make a snapshot + skc_extent_phw1g_tdrNs_snap_init(impl->runtime,&impl->cmds.ring,&place->cmds); + + // unmap the snapshot (could be a copy) + skc_extent_phw1g_tdrNs_snap_alloc(impl->runtime, + &impl->cmds.extent, + &place->cmds, + place->cq, + NULL); + + skc_grid_force(impl->grids.place); +} + +// +// +// + +static +void +skc_composition_pfn_seal(struct skc_composition_impl * const impl) +{ + // return if sealing or sealed + if (impl->state >= SKC_COMPOSITION_STATE_SEALING) + return; + + struct skc_runtime * const runtime = impl->runtime; + struct skc_scheduler * const scheduler = runtime->scheduler; + + // + // otherwise, wait for UNSEALING > UNSEALED transition + // + if (impl->state == SKC_COMPOSITION_STATE_UNSEALING) + { + SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_UNSEALED); + } + else // or we were already unsealed + { + // flush is there is work in progress + skc_uint const count = skc_extent_ring_wip_count(&impl->cmds.ring); + + if (count > 0) { + skc_composition_snap(impl); + } + } + + // + // now unsealed so we need to start sealing... + // + impl->state = SKC_COMPOSITION_STATE_SEALING; + + // + // the seal operation implies we should force start all dependencies + // that are still in a ready state + // + skc_grid_force(impl->grids.sort); +} + +// +// +// + +void +skc_composition_sort_execute_complete(struct skc_composition_impl * const impl) +{ + // we're sealed + impl->state = SKC_COMPOSITION_STATE_SEALED; + + // this grid is done + skc_grid_complete(impl->grids.sort); +} + +static +void +skc_composition_sort_execute_cb(cl_event event, cl_int status, struct skc_composition_impl * const impl) +{ + SKC_CL_CB(status); + + // as quickly as possible, enqueue next stage in pipeline to context command scheduler + SKC_SCHEDULER_SCHEDULE(impl->runtime->scheduler,skc_composition_sort_execute_complete,impl); +} + +static +void +skc_composition_sort_grid_pfn_execute(skc_grid_t const grid) +{ + struct skc_composition_impl * const impl = skc_grid_get_data(grid); + + // we should be sealing + assert(impl->state == SKC_COMPOSITION_STATE_SEALING); + + struct skc_place_atomics * const atomics = impl->atomics.hr; + +#ifndef NDEBUG + fprintf(stderr,"composition sort: %u\n",atomics->keys); +#endif + + if (atomics->keys > 0) + { + uint32_t keys_padded_in, keys_padded_out; + + hs_pad(atomics->keys,&keys_padded_in,&keys_padded_out); + + hs_sort(impl->cq, + impl->keys.drw, + impl->keys.drw, + atomics->keys, + keys_padded_in, + keys_padded_out, + false); + + cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(impl->keys.drw))); + cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(impl->offsets.drw))); + cl(SetKernelArg(impl->kernels.segment,2,SKC_CL_ARG(impl->atomics.drw))); + + // find start of each tile + skc_device_enqueue_kernel(impl->runtime->device, + SKC_DEVICE_KERNEL_ID_SEGMENT_TTCK, + impl->cq, + impl->kernels.segment, + atomics->keys, + 0,NULL,NULL); + } + + cl_event complete; + + // next stage needs to know number of key segments + skc_extent_phr_pdrw_read(&impl->atomics,impl->cq,&complete); + + // register a callback + cl(SetEventCallback(complete,CL_COMPLETE,skc_composition_sort_execute_cb,impl)); + cl(ReleaseEvent(complete)); + + // flush cq + cl(Flush(impl->cq)); +} + +// +// +// + +static +void +skc_composition_raster_release(struct skc_composition_impl * const impl) +{ + // + // reference counts to rasters can only be released when the + // composition is unsealed and the atomics are reset. + // + skc_runtime_raster_device_release(impl->runtime, + impl->saved.extent.hrw, + impl->saved.count); + // reset count + impl->saved.count = 0; +} + +// +// +// + +static +void +skc_composition_unseal_block(struct skc_composition_impl * const impl, + skc_bool const block) +{ + // return if already unsealed + if (impl->state == SKC_COMPOSITION_STATE_UNSEALED) + return; + + // + // otherwise, we're going to need to pump the scheduler + // + struct skc_scheduler * const scheduler = impl->runtime->scheduler; + + // + // wait for UNSEALING > UNSEALED transition + // + if (impl->state == SKC_COMPOSITION_STATE_UNSEALING) + { + if (block) { + SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_UNSEALED); + } + return; + } + + // + // wait for SEALING > SEALED transition ... + // + if (impl->state == SKC_COMPOSITION_STATE_SEALING) + { + // wait if sealing + SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->state != SKC_COMPOSITION_STATE_SEALED); + } + + // wait for rendering locks to be released + SKC_SCHEDULER_WAIT_WHILE(scheduler,impl->lock_count > 0); + + // + // no need to visit UNSEALING state with this implementation + // + + // acquire a new grid + impl->grids.sort = SKC_GRID_DEPS_ATTACH(impl->runtime->deps, + NULL, // the composition state guards this + impl, + NULL, // no waiting + skc_composition_sort_grid_pfn_execute, + NULL); // no dispose + + // mark composition as unsealed + impl->state = SKC_COMPOSITION_STATE_UNSEALED; +} + +// +// can only be called on a composition that was just unsealed +// +static +void +skc_composition_reset(struct skc_composition_impl * const impl) +{ + // zero the atomics + skc_extent_phr_pdrw_zero(&impl->atomics,impl->cq,NULL); + + // flush it + cl(Flush(impl->cq)); + + // release all the rasters + skc_composition_raster_release(impl); +} + +static +void +skc_composition_unseal_block_reset(struct skc_composition_impl * const impl, + skc_bool const block, + skc_bool const reset) +{ + skc_composition_unseal_block(impl,block); + + if (reset) { + skc_composition_reset(impl); + } +} + +// +// +// + +static +void +skc_composition_pfn_unseal(struct skc_composition_impl * const impl, skc_bool const reset) +{ + skc_composition_unseal_block_reset(impl,false,reset); +} + +// +// only needs to create a grid +// + +static +void +skc_composition_place_create(struct skc_composition_impl * const impl) +{ + // acquire a grid + impl->grids.place = SKC_GRID_DEPS_ATTACH(impl->runtime->deps, + &impl->grids.place, + NULL, + NULL, // no waiting + skc_composition_place_grid_pfn_execute, + skc_composition_place_grid_pfn_dispose); + + // assign happens-after relationship + skc_grid_happens_after_grid(impl->grids.sort,impl->grids.place); +} + + +static +skc_err +skc_composition_pfn_place(struct skc_composition_impl * const impl, + skc_raster_t const * rasters, + skc_layer_id const * layer_ids, + skc_float const * txs, + skc_float const * tys, + skc_uint count) +{ + // block and yield if not unsealed + skc_composition_unseal_block(impl,true); + + // + // validate and retain all rasters + // + skc_err err; + + err = skc_runtime_handle_device_validate_retain(impl->runtime, + SKC_TYPED_HANDLE_TYPE_IS_RASTER, + rasters, + count); + if (err) + return err; + + skc_runtime_handle_device_retain(impl->runtime,rasters,count); + + // + // save the stripped handles + // + skc_raster_t * saved = impl->saved.extent.hrw; + + saved += impl->saved.count; + impl->saved.count += count; + + for (skc_uint ii=0; ii<count; ii++) { + saved[ii] = SKC_TYPED_HANDLE_TO_HANDLE(*rasters++); + } + + // + // - declare the place grid happens after the raster + // - copy place commands into ring + // + do { + skc_uint rem; + + // find out how much room is left in then ring's snap + // if the place ring is full -- let it drain + SKC_SCHEDULER_WAIT_WHILE(impl->runtime->scheduler,(rem = skc_extent_ring_wip_rem(&impl->cmds.ring)) == 0); + + // append commands + skc_uint avail = min(rem,count); + + // decrement count + count -= avail; + + // launch a place kernel after copying commands? + skc_bool const is_wip_full = (avail == rem); + + // if there is no place grid then create one + if (impl->grids.place == NULL) + { + skc_composition_place_create(impl); + } + + // + // FIXME -- OPTIMIZATION? -- the ring_wip_index_inc() test can + // be avoided by splitting into at most two intervals. It should + // be plenty fast as is though so leave for now. + // + union skc_cmd_place * const cmds = impl->cmds.extent.hw1; + + if ((txs == NULL) && (tys == NULL)) + { + while (avail-- > 0) + { + skc_raster_t const raster = *saved++; + + skc_grid_happens_after_handle(impl->grids.place,raster); + + cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] = + (union skc_cmd_place){ raster, *layer_ids++, 0, 0 }; + } + } + else if (txs == NULL) + { + while (avail-- > 0) + { + skc_raster_t const raster = *saved++; + + skc_grid_happens_after_handle(impl->grids.place,raster); + + cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] = + (union skc_cmd_place){ raster, + *layer_ids++, + 0, + SKC_PLACE_CMD_TY_CONVERT(*tys++) }; + } + } + else if (tys == NULL) + { + while (avail-- > 0) + { + skc_raster_t const raster = *saved++; + + skc_grid_happens_after_handle(impl->grids.place,raster); + + cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] = + (union skc_cmd_place){ raster, + *layer_ids++, + SKC_PLACE_CMD_TX_CONVERT(*txs++), + 0 }; + } + } + else + { + while (avail-- > 0) + { + skc_raster_t const raster = *saved++; + + skc_grid_happens_after_handle(impl->grids.place,raster); + + cmds[skc_extent_ring_wip_index_inc(&impl->cmds.ring)] = + (union skc_cmd_place){ raster, + *layer_ids++, + SKC_PLACE_CMD_TX_CONVERT(*txs++), + SKC_PLACE_CMD_TY_CONVERT(*tys++) }; + } + } + + // launch place kernel? + if (is_wip_full) { + skc_composition_snap(impl); + } + } while (count > 0); + + return SKC_ERR_SUCCESS; +} + +// +// +// + +static +void +skc_composition_pfn_bounds(struct skc_composition_impl * const impl, skc_int bounds[4]) +{ + // + // FIXME -- not implemented yet + // + // impl bounds will be copied back after sealing + // + bounds[0] = SKC_INT_MIN; + bounds[1] = SKC_INT_MIN; + bounds[2] = SKC_INT_MAX; + bounds[3] = SKC_INT_MAX; +} + +// +// +// + +void +skc_composition_retain_and_lock(struct skc_composition * const composition) +{ + skc_composition_retain(composition); + + composition->impl->lock_count += 1; +} + +void +skc_composition_unlock_and_release(struct skc_composition * const composition) +{ + composition->impl->lock_count -= 1; + + skc_composition_pfn_release(composition->impl); +} + +// +// +// + +skc_err +skc_composition_cl_12_create(struct skc_context * const context, + struct skc_composition * * const composition) +{ + struct skc_runtime * const runtime = context->runtime; + + // retain the context + // skc_context_retain(context); + + // allocate impl + struct skc_composition_impl * const impl = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(*impl)); + + // allocate composition + (*composition) = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(**composition)); + + (*composition)->context = context; + (*composition)->impl = impl; + (*composition)->ref_count = 1; + + (*composition)->place = skc_composition_pfn_place; + (*composition)->unseal = skc_composition_pfn_unseal; + (*composition)->seal = skc_composition_pfn_seal; + (*composition)->bounds = skc_composition_pfn_bounds; + (*composition)->release = skc_composition_pfn_release; + + // intialize impl + impl->composition = (*composition); + impl->runtime = runtime; + + SKC_ASSERT_STATE_INIT(impl,SKC_COMPOSITION_STATE_SEALED); + + impl->lock_count = 0; + + impl->grids.sort = NULL; + impl->grids.place = NULL; + + // acquire command queue for sealing/unsealing + impl->cq = skc_runtime_acquire_cq_in_order(runtime); + + // acquire kernels + impl->kernels.place = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_PLACE); + impl->kernels.segment = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_SEGMENT_TTCK); + + // get config + struct skc_config const * const config = runtime->config; + + // initialize ring size with config values + skc_extent_ring_init(&impl->cmds.ring, + config->composition.cmds.elem_count, + config->composition.cmds.snap_count, + sizeof(union skc_cmd_place)); + + skc_extent_phw1g_tdrNs_alloc(runtime,&impl->cmds.extent ,sizeof(union skc_cmd_place) * config->composition.cmds.elem_count); + skc_extent_phrw_alloc (runtime,&impl->saved.extent,sizeof(skc_raster_t) * config->composition.raster_ids.elem_count); + skc_extent_phr_pdrw_alloc (runtime,&impl->atomics ,sizeof(struct skc_place_atomics)); + + skc_extent_pdrw_alloc (runtime,&impl->keys ,sizeof(skc_ttxk_t) * config->composition.keys.elem_count); + skc_extent_pdrw_alloc (runtime,&impl->offsets ,sizeof(skc_uint) * (1u << SKC_TTCK_HI_BITS_YX)); // 1MB + + // nothing saved + impl->saved.count = 0; + + // unseal the composition, zero the atomics, etc. + skc_composition_unseal_block_reset(impl,false,true); + + return SKC_ERR_SUCCESS; +} + +// +// +// |