aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/compute/skc/platforms/cl_12/raster_builder_cl_12.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/compute/skc/platforms/cl_12/raster_builder_cl_12.c')
-rw-r--r--src/compute/skc/platforms/cl_12/raster_builder_cl_12.c1349
1 files changed, 1349 insertions, 0 deletions
diff --git a/src/compute/skc/platforms/cl_12/raster_builder_cl_12.c b/src/compute/skc/platforms/cl_12/raster_builder_cl_12.c
new file mode 100644
index 0000000000..33992cbdfb
--- /dev/null
+++ b/src/compute/skc/platforms/cl_12/raster_builder_cl_12.c
@@ -0,0 +1,1349 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ *
+ */
+
+//
+//
+//
+
+// get rid of these
+#include <stdio.h>
+#include <stdlib.h>
+
+//
+//
+//
+
+#include "hs/cl/hs_cl_launcher.h"
+
+#include "common/cl/assert_cl.h"
+
+#include "context.h"
+#include "grid.h"
+#include "raster.h"
+#include "extent_ring.h"
+#include "raster_builder.h"
+
+#include "tile.h"
+
+#include "config_cl.h"
+#include "runtime_cl_12.h"
+#include "extent_cl_12.h"
+#include "raster_builder_cl_12.h"
+
+//
+// RASTERIZATION SUB-PIPELINE
+// --------------------------
+//
+// Phase 1: expand commands
+//
+// Phase 2: rasterize
+//
+// Phase 3: sort & segment || release paths
+//
+// Phase 4: prefix
+//
+// Phase 5: release rasters
+//
+// RASTER COHORT
+// ==============
+//
+// BUILDER RASTERIZER POST PROCESSING
+// <-----------------------------------------------> <------------> <--------------------------------------------------------------------->
+//
+// fill cmds transforms raster clips path release rasterize cmds cohort map raster release TTSB TTSK cohort atomics context atomics
+// --------- ---------- ------------ ------------ -------------- ---------- -------------- ---- ---- -------------- ---------------
+// 1,2 1,2 1,2 1,2 2 1-4 1,2,3,4 2-4 2-4 2-4 global
+//
+//
+// NOTES: FINE-GRAINED SVM
+// -----------------------
+//
+// 1) In a fine-grained system we know the exact number of
+// rasterize cmds per segment type before phase 1
+//
+// 2) A raster that's "under construction" shouldn't be rasterized
+// until it is complete. This implies that a raster is not part
+// of a cohort until it is complete. The raster builder must
+// handle raster promises being "forced" to completion -- this is
+// likely the result of composition construction and subsequent
+// rendering to a surface.
+//
+// 3) The raster cohort rasterizer state retains the fill cmd,
+// transform, raster clip and path release "ring" extents.
+//
+// 4) The rasterize cmd extent sizes (line, quad, cubic, rational
+// quad, rational cubic) are known ahead of time.
+//
+// 5) The raster cohort post processor is standalone and retains the
+// raster_map, cohort atomics, TTSK_RYX extent, and raster
+// references until complete.
+//
+
+//
+// Notes:
+//
+// - Could have a pipeline stage before expansion count the exact
+// number of line/quad/cubic commands but the command buffers are
+// relatively small (64-bit commands * # of path segments).
+//
+
+// raster
+// cohort atomics path_ids raster_ids transforms clips cmds_fill cmds_l/q/c ttsk_ryx
+//
+//
+// BEGIN ^
+// |
+// EXPAND |
+// |
+// RASTERIZE |
+// |
+// SORT || RELEASE PATHS |
+// |
+// PREFIX |
+// |
+// RELEASE RASTERS |
+// |
+// END v
+//
+//
+// BEGIN
+//
+// EXPAND -- PRODUCES: one or more extents of rasterization commands
+//
+// RASTERIZE -- DEPENDENCY: requires size of command extents before launching
+// -- PRODUCES: an extent of ttsk_ryx keys
+//
+// SORT || RELEASE PATHS -- DEPENDENCY: requires size of key extent before launching
+// -- PRODUCES: sorted array of keys
+//
+// PREFIX -- DEPENDENCY: none -- can execute after SORT because grid size is number of rasters
+//
+// RELEASE RASTERS -- DEPENDENCY: none -- can execute after prefix
+//
+// END
+//
+
+// ------------------------
+//
+// DEPENDENCY is cleanly implemented with a host callback or device kernel launcher
+//
+// Can this hide resource acquisition? Yes. But there are two cases:
+//
+// 1. acqusition of resources occurs on the host thread and lack of
+// resources drains the host command queue until resources are
+// available (OpenCL 2.x)
+//
+// 2. the host commands lazily acquire resources (OpenCL 1.2)
+//
+// ------------------------
+//
+// How to express?
+//
+// Each substage launches its successors. This supports both dependency models.
+//
+// If OpenCL 1.2 then the substage can't be launched until the prior
+// stage's event is complete. So this requires registering a callback
+// to invoke the substage.
+//
+// ------------------------
+
+//
+// BUILD
+//
+
+struct skc_raster_builder_impl
+{
+ struct skc_raster_builder * raster_builder;
+ struct skc_runtime * runtime;
+
+ skc_grid_t cohort;
+
+ // these are all durable/perm extents
+ struct skc_extent_phrwg_thr1s path_ids; // read/write by host
+ struct skc_extent_phw1g_tdrNs transforms; // write once by host + read by device
+ struct skc_extent_phw1g_tdrNs clips; // write once by host + read by device
+ struct skc_extent_phw1g_tdrNs fill_cmds; // write once by host + read by device
+ struct skc_extent_phrwg_tdrNs raster_ids; // read/write by host + read by device
+
+ struct {
+ cl_kernel fills_expand;
+ cl_kernel rasterize_all;
+ cl_kernel segment;
+ cl_kernel rasters_alloc;
+ cl_kernel prefix;
+ } kernels;
+};
+
+//
+// RASTER COHORT
+//
+// This sub-pipeline snapshots the raster builder and then acquires
+// and releases host and device resources as necessary (as late as
+// possible).
+//
+// Note that the cohort extents are ephemeral and are only used by one
+// or more stages of a the rasterization sub-pipeline.
+//
+// The pipeline implementation may vary between compute platforms.
+//
+
+struct skc_raster_cohort
+{
+ struct skc_raster_builder_impl * impl;
+
+ struct skc_extent_phrwg_thr1s_snap path_ids; // read/write by host
+ struct skc_extent_phw1g_tdrNs_snap transforms; // write once by host + read by device
+ struct skc_extent_phw1g_tdrNs_snap clips; // write once by host + read by device
+ struct skc_extent_phw1g_tdrNs_snap fill_cmds; // write once by host + read by device
+ struct skc_extent_phrwg_tdrNs_snap raster_ids; // read/write by host + read by device
+
+ cl_command_queue cq;
+
+ // sub-pipeline atomics
+ struct skc_extent_thr_tdrw atomics;
+
+ // path primitives are expanded into line/quad/cubic/rational cmds
+ struct skc_extent_tdrw cmds;
+
+ // rasterization output
+ struct skc_extent_tdrw keys;
+ // struct skc_extent_thrw_tdrw keys;
+
+ // post-sort extent with metadata for each raster
+ struct skc_extent_tdrw metas;
+ // struct skc_extent_thrw_tdrw metas;
+
+ // subbuf id
+ skc_subbuf_id_t id;
+
+ //
+ // pipeline also uses the following global resources:
+ //
+ // - command queue from global factory
+ // - global block pool and its atomics
+ // - global path and raster host id map
+ // - temporary host and device allocations
+ //
+};
+
+//
+// TTRK (64-BIT COMPARE)
+//
+// 0 63
+// | TTSB ID | X | Y | COHORT ID |
+// +---------+------+------+-----------+
+// | 27 | 12 | 12 | 13 |
+//
+//
+// TTRK (32-BIT COMPARE)
+//
+// 0 63
+// | TTSB ID | N/A | X | Y | COHORT ID |
+// +---------+-----+------+------+-----------+
+// | 27 | 5 | 12 | 12 | 8 |
+//
+
+//
+// TTRK is sortable intermediate key format for TTSK
+//
+// We're going to use the 32-bit comparison version for now
+//
+
+union skc_ttrk
+{
+ skc_ulong u64;
+ skc_uint2 u32v2;
+
+ struct {
+ skc_uint block : SKC_TTXK_LO_BITS_ID;
+ skc_uint na0 : SKC_TTRK_LO_BITS_NA;
+ skc_uint x : SKC_TTXK_HI_BITS_X;
+ skc_uint y : SKC_TTXK_HI_BITS_Y;
+ skc_uint cohort : SKC_TTRK_HI_BITS_COHORT;
+ };
+
+ struct {
+ skc_uint na1;
+ skc_uint yx : SKC_TTXK_HI_BITS_YX;
+ skc_uint na2 : SKC_TTRK_HI_BITS_COHORT;
+ };
+
+ struct {
+ skc_uint na3;
+ skc_uint na4 : SKC_TTXK_HI_BITS_X;
+ skc_uint cohort_y : SKC_TTRK_HI_BITS_COHORT_Y;
+ };
+};
+
+//
+//
+//
+
+static
+void
+skc_raster_builder_pfn_release(struct skc_raster_builder_impl * const impl)
+{
+ // decrement reference count
+ if (--impl->raster_builder->refcount != 0)
+ return;
+
+ //
+ // otherwise, dispose of the the raster builder and its impl
+ //
+ struct skc_runtime * const runtime = impl->runtime;
+
+ // free the raster builder
+ skc_runtime_host_perm_free(runtime,impl->raster_builder);
+
+ // free durable/perm extents
+ skc_extent_phrwg_thr1s_free(runtime,&impl->path_ids);
+ skc_extent_phw1g_tdrNs_free(runtime,&impl->transforms);
+ skc_extent_phw1g_tdrNs_free(runtime,&impl->clips);
+ skc_extent_phw1g_tdrNs_free(runtime,&impl->fill_cmds);
+ skc_extent_phrwg_tdrNs_free(runtime,&impl->raster_ids);
+
+ // release kernels
+ cl(ReleaseKernel(impl->kernels.fills_expand));
+ cl(ReleaseKernel(impl->kernels.rasterize_all));
+
+#if 0
+ cl(ReleaseKernel(impl->kernels.rasterize_lines));
+ cl(ReleaseKernel(impl->kernels.rasterize_quads));
+ cl(ReleaseKernel(impl->kernels.rasterize_cubics));
+#endif
+
+ cl(ReleaseKernel(impl->kernels.segment));
+ cl(ReleaseKernel(impl->kernels.rasters_alloc));
+ cl(ReleaseKernel(impl->kernels.prefix));
+
+ // free the impl
+ skc_runtime_host_perm_free(runtime,impl);
+}
+
+//
+//
+//
+
+static
+void
+skc_raster_builder_rasters_release(struct skc_runtime * const runtime,
+ skc_raster_t const * const rasters,
+ skc_uint const size,
+ skc_uint const from,
+ skc_uint const to)
+{
+ if (from <= to) // no wrap
+ {
+ skc_raster_t const * rasters_from = rasters + from;
+ skc_uint count_from = to - from;
+
+ skc_grid_deps_unmap(runtime->deps,rasters_from,count_from);
+ skc_runtime_raster_device_release(runtime,rasters_from,count_from);
+ }
+ else // from > to implies wrap
+ {
+ skc_raster_t const * rasters_lo = rasters + from;
+ skc_uint count_lo = size - from;
+
+ skc_grid_deps_unmap(runtime->deps,rasters_lo,count_lo);
+ skc_runtime_raster_device_release(runtime,rasters_lo,count_lo);
+
+ skc_grid_deps_unmap(runtime->deps,rasters,to);
+ skc_runtime_raster_device_release(runtime,rasters,to);
+ }
+}
+
+static
+void
+skc_raster_builder_paths_release(struct skc_runtime * const runtime,
+ struct skc_extent_phrwg_thr1s_snap * const snap)
+{
+ // release lo
+ skc_runtime_path_device_release(runtime,snap->hr1.lo,snap->count.lo);
+
+ // release hi
+ if (snap->count.hi)
+ skc_runtime_path_device_release(runtime,snap->hr1.hi,snap->count.hi);
+}
+
+static
+void
+skc_raster_builder_cohort_grid_pfn_dispose(skc_grid_t const grid)
+{
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids -
+ // raster_ids a
+ // transforms -
+ // clips -
+ // fill_cmds -
+ // cq a
+ // cohort atomics a
+ // cmds -
+ // keys a
+ // meta a
+ //
+
+ struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
+ struct skc_raster_builder_impl * const impl = cohort->impl;
+ struct skc_runtime * const runtime = impl->runtime;
+
+ //
+ // release paths -- FIXME -- Note that releasing paths can be
+ // performed after rasterization is complete
+ //
+
+ // snap alloc the paths -- this host snap simply sets up pointers
+ skc_extent_phrwg_thr1s_snap_alloc(runtime,&impl->path_ids,&cohort->path_ids);
+
+ // unmap and release raster ids
+ skc_raster_builder_paths_release(runtime,&cohort->path_ids);
+
+ // release path ids
+ skc_extent_phrwg_thr1s_snap_free(runtime,&cohort->path_ids);
+
+ //
+ // release rasters
+ //
+ skc_uint const size = cohort->raster_ids.snap->ring->size.pow2;
+ skc_uint const from = skc_extent_ring_snap_from(cohort->raster_ids.snap);
+ skc_uint const to = skc_extent_ring_snap_to(cohort->raster_ids.snap);
+
+ // unmap and release raster ids
+ skc_raster_builder_rasters_release(runtime,impl->raster_ids.hrw,size,from,to);
+
+ // release cohort's remaining allocated resources
+ skc_extent_phrwg_tdrNs_snap_free(runtime,&cohort->raster_ids);
+ skc_runtime_release_cq_in_order(runtime,cohort->cq);
+ skc_extent_thr_tdrw_free(runtime,&cohort->atomics);
+ skc_extent_tdrw_free(runtime,&cohort->keys);
+ skc_extent_tdrw_free(runtime,&cohort->metas);
+ // skc_extent_thrw_tdrw_free(runtime,&cohort->keys);
+ // skc_extent_thrw_tdrw_free(runtime,&cohort->metas);
+ skc_runtime_host_temp_free(runtime,cohort,cohort->id);
+
+ // release the raster builder
+ skc_raster_builder_pfn_release(impl);
+
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids -
+ // raster_ids -
+ // transforms -
+ // clips -
+ // fill_cmds -
+ // cq -
+ // cohort atomics -
+ // cmds -
+ // keys -
+ // meta -
+ //
+}
+
+//
+//
+//
+
+static
+void
+skc_raster_cohort_prefix_release(skc_grid_t const grid)
+{
+ // FIXME -- note that pfn_dispose can be accomplished here
+
+ // release the grid
+ skc_grid_complete(grid);
+}
+
+static
+void
+skc_raster_cohort_prefix_cb(cl_event event, cl_int status, skc_grid_t const grid)
+{
+ SKC_CL_CB(status);
+
+ struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
+ struct skc_scheduler * const scheduler = cohort->impl->runtime->scheduler;
+
+ // as quickly as possible, enqueue next stage in pipeline to context command scheduler
+ SKC_SCHEDULER_SCHEDULE(scheduler,skc_raster_cohort_prefix_release,grid);
+}
+
+//
+//
+//
+
+#if 0
+static
+int cmp64(const void * ptr_a, const void * ptr_b)
+{
+ skc_ulong const a = *(const skc_ulong *)ptr_a;
+ skc_ulong const b = *(const skc_ulong *)ptr_b;
+
+ if (a < b) return -1;
+ if (a > b) return +1;
+ else return 0;
+}
+#endif
+
+//
+//
+//
+
+static
+void
+skc_raster_cohort_sort_prefix(skc_grid_t const grid)
+{
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids i
+ // raster_ids i
+ // transforms a
+ // clips a
+ // fill_cmds -
+ // cq a
+ // cohort atomics a
+ // cmds a
+ // keys a
+ // meta -
+ //
+
+ // use the backpointers
+ struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
+ struct skc_raster_builder_impl * const impl = cohort->impl;
+ struct skc_runtime * const runtime = impl->runtime;
+
+ // release transforms
+ skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->transforms);
+
+ // release clips
+ skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->clips);
+
+ // release expanded cmds
+ skc_extent_tdrw_free(runtime,&cohort->cmds);
+
+ // alloc the snapshost -- could be zero-sized
+ skc_extent_phrwg_tdrNs_snap_alloc(runtime,
+ &impl->raster_ids,
+ &cohort->raster_ids,
+ cohort->cq,NULL);
+
+ // will never be zero
+ skc_uint const rasters = skc_extent_ring_snap_count(cohort->raster_ids.snap);
+
+ // acquire fixed-size device-side extent
+ skc_extent_tdrw_alloc(runtime,
+ &cohort->metas,
+ sizeof(struct skc_raster_cohort_meta));
+
+ // skc_extent_thrw_tdrw_alloc(runtime,
+ // &cohort->metas,
+ // sizeof(struct skc_raster_cohort_meta));
+
+ // zero the metas
+ skc_extent_tdrw_zero(&cohort->metas,cohort->cq,NULL);
+
+ // get the read-only host copy of the device atomics
+ struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr;
+
+ //
+ // SORT
+ //
+ if (atomics->keys > 0)
+ {
+#ifndef NDEBUG
+ fprintf(stderr,"raster cohort sort: %u\n",atomics->keys);
+#endif
+
+ //
+ //
+ //
+ uint32_t keys_padded_in, keys_padded_out;
+
+ hs_pad(atomics->keys,&keys_padded_in,&keys_padded_out);
+
+ hs_sort(cohort->cq,
+ cohort->keys.drw,
+ cohort->keys.drw,
+ atomics->keys,
+ keys_padded_in,
+ keys_padded_out,
+ false);
+
+ cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(cohort->keys.drw)));
+ cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(cohort->metas.drw)));
+
+#ifndef NDEBUG
+ fprintf(stderr,"post-sort\n");
+#endif
+
+ // find start of each tile
+ skc_device_enqueue_kernel(runtime->device,
+ SKC_DEVICE_KERNEL_ID_SEGMENT_TTRK,
+ cohort->cq,
+ impl->kernels.segment,
+ atomics->keys,
+ 0,NULL,NULL);
+
+#ifndef NDEBUG
+ fprintf(stderr,"post-segment\n");
+#endif
+
+ //
+ // DELETE ALL THIS WHEN READY
+ //
+
+#if 0
+ //
+ //
+ //
+ cl(Finish(cohort->cq));
+
+ // map keys to host
+ union skc_ttrk * const keys = skc_extent_thrw_tdrw_map(&cohort->keys,
+ cohort->cq,
+ NULL);
+ // map meta to host
+ struct skc_raster_cohort_meta * const metas = skc_extent_thrw_tdrw_map(&cohort->metas,
+ cohort->cq,
+ NULL);
+ // block until done
+ cl(Finish(cohort->cq));
+
+ // sort keys
+ qsort(keys,atomics->keys,sizeof(*keys),cmp64);
+
+ // mask to determine if rk id is a new block
+ skc_uint const subblock_mask = runtime->config->block.subblocks - 1;
+
+ //
+ // some counters
+ //
+ union skc_raster_cohort_meta_in meta_in = {
+ .blocks = 0,
+ .offset = 0,
+ .pk = 0,
+ .rk = 0
+ };
+
+ // get first key
+ union skc_ttrk curr = keys[0];
+
+ skc_uint ii=0, jj=0;
+
+ // for all TTRK keys
+ while (true)
+ {
+ // increment ttrk count
+ meta_in.rk += 1;
+
+ // was this a new block?
+ if ((curr.u32v2.lo & subblock_mask) == 0)
+ meta_in.blocks += 1;
+
+ // break if we're out of keys
+ if (++ii >= atomics->keys)
+ break;
+
+ // otherwise, process next key
+ union skc_ttrk const next = keys[ii];
+
+ // if new cohort then save curr meta and init next meta
+ if (next.cohort != curr.cohort)
+ {
+ fprintf(stderr,"[ %u, %u, %u, %u ]\n",
+ meta_in.blocks,
+ meta_in.offset,
+ meta_in.pk,
+ meta_in.rk);
+
+ // store back to buffer
+ metas->inout[curr.cohort].in = meta_in;
+
+ // update meta_in
+ meta_in.blocks = 0;
+ meta_in.offset = ii;
+ meta_in.pk = 0;
+ meta_in.rk = 0;
+ }
+ // otherwise, if same y but new x then increment TTPK count
+ else if ((next.y == curr.y) && (next.x != curr.x))
+ {
+ meta_in.pk += 1;
+
+#if 0
+ fprintf(stderr,"%3u : %3u : ( %3u, %3u ) -> ( %3u )\n",
+ jj++,curr.cohort,curr.y,curr.x,next.x);
+#endif
+ }
+
+#if 0
+ fprintf(stderr,"( %3u, %3u )\n",next.y,next.x);
+#endif
+
+ curr = next;
+ }
+
+ fprintf(stderr,"[ %u, %u, %u, %u ]\n",
+ meta_in.blocks,
+ meta_in.offset,
+ meta_in.pk,
+ meta_in.rk);
+
+ // store back to buffer
+ metas->inout[curr.cohort].in = meta_in;
+
+
+ // unmap
+ skc_extent_thrw_tdrw_unmap(&cohort->keys,
+ keys,
+ cohort->cq,
+ NULL);
+
+ // unmap
+ skc_extent_thrw_tdrw_unmap(&cohort->metas,
+ metas,
+ cohort->cq,
+ NULL);
+#endif
+ }
+
+#ifndef NDEBUG
+ fprintf(stderr,"rasters_alloc: %u\n",rasters);
+#endif
+
+ //
+ // RASTER ALLOC/INIT
+ //
+ cl(SetKernelArg(impl->kernels.rasters_alloc,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
+ cl(SetKernelArg(impl->kernels.rasters_alloc,1,SKC_CL_ARG(runtime->block_pool.ids.drw)));
+ cl(SetKernelArg(impl->kernels.rasters_alloc,2,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));
+ cl(SetKernelArg(impl->kernels.rasters_alloc,3,SKC_CL_ARG(runtime->handle_pool.map.drw)));
+ cl(SetKernelArg(impl->kernels.rasters_alloc,4,SKC_CL_ARG(cohort->metas.drw)));
+ cl(SetKernelArg(impl->kernels.rasters_alloc,5,SKC_CL_ARG(cohort->raster_ids.drN)));
+ cl(SetKernelArg(impl->kernels.rasters_alloc,6,SKC_CL_ARG(rasters)));
+
+ skc_device_enqueue_kernel(runtime->device,
+ SKC_DEVICE_KERNEL_ID_RASTERS_ALLOC,
+ cohort->cq,
+ impl->kernels.rasters_alloc,
+ rasters,
+ 0,NULL,NULL);
+
+#ifndef NDEBUG
+ fprintf(stderr,"post-alloc\n");
+#endif
+
+ //
+ // PREFIX
+ //
+ cl(SetKernelArg(impl->kernels.prefix,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
+ cl(SetKernelArg(impl->kernels.prefix,1,SKC_CL_ARG(runtime->block_pool.ids.drw)));
+ cl(SetKernelArg(impl->kernels.prefix,2,SKC_CL_ARG(runtime->block_pool.blocks.drw)));
+ cl(SetKernelArg(impl->kernels.prefix,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));
+
+ cl(SetKernelArg(impl->kernels.prefix,4,SKC_CL_ARG(cohort->keys.drw)));
+ cl(SetKernelArg(impl->kernels.prefix,5,SKC_CL_ARG(runtime->handle_pool.map.drw)));
+
+ cl(SetKernelArg(impl->kernels.prefix,6,SKC_CL_ARG(cohort->metas.drw)));
+ cl(SetKernelArg(impl->kernels.prefix,7,SKC_CL_ARG(rasters)));
+
+ cl_event complete;
+
+ skc_device_enqueue_kernel(runtime->device,
+ SKC_DEVICE_KERNEL_ID_PREFIX,
+ cohort->cq,
+ impl->kernels.prefix,
+ rasters,
+ 0,NULL,
+ &complete);
+
+ cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_prefix_cb,grid));
+ cl(ReleaseEvent(complete));
+
+#ifndef NDEBUG
+ fprintf(stderr,"post-prefix\n");
+#endif
+
+ // flush command queue
+ cl(Flush(cohort->cq));
+
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids a
+ // raster_ids a
+ // transforms -
+ // clips -
+ // fill_cmds -
+ // cq a
+ // cohort atomics a
+ // cmds -
+ // keys a
+ // meta a
+ //
+}
+
+static
+void
+skc_raster_cohort_rasterize_cb(cl_event event, cl_int status, skc_grid_t const grid)
+{
+ SKC_CL_CB(status);
+
+ struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
+
+ // as quickly as possible, enqueue next stage in pipeline to context command scheduler
+ SKC_SCHEDULER_SCHEDULE(cohort->impl->runtime->scheduler,skc_raster_cohort_sort_prefix,grid);
+}
+
+static
+void
+skc_raster_cohort_rasterize(skc_grid_t const grid)
+{
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids i
+ // raster_ids i
+ // transforms i
+ // clips i
+ // fill_cmds s
+ // cq a
+ // cohort atomics a
+ // cmds a
+ // cmds_quad a
+ // cmds_cubic a
+ // keys -
+ // meta -
+
+ // use the backpointers
+ struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
+ struct skc_raster_builder_impl * const impl = cohort->impl;
+ struct skc_runtime * const runtime = impl->runtime;
+
+ //
+ // RELEASED RESOURCES
+ //
+ // cmds snap
+ //
+
+ // release the cmds extent and snap since it's only used by the expand stage
+ skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->fill_cmds);
+
+ //
+ // NEW ALLOCATED RESOURCES
+ //
+ // transforms snap
+ // clips snap
+ // ttrk keys
+ //
+ skc_extent_phw1g_tdrNs_snap_alloc(runtime,
+ &impl->transforms,
+ &cohort->transforms,
+ cohort->cq,NULL);
+
+ skc_extent_phw1g_tdrNs_snap_alloc(runtime,
+ &impl->clips,
+ &cohort->clips,
+ cohort->cq,NULL);
+
+ // acquire device-side extent
+ skc_extent_tdrw_alloc(runtime,
+ &cohort->keys,
+ sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys);
+
+ // skc_extent_thrw_tdrw_alloc(runtime,
+ // &cohort->keys,
+ // sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys);
+
+ //
+ // acquire out-of-order command queue
+ //
+ // and launch up to 3 kernels
+ //
+ // for each kernel:
+ //
+ // set runtime "global" kernel args:
+ //
+ // - block pool atomics
+ // - block pool extent
+ //
+ // set cohort "local" kernel args:
+ //
+ // - atomics
+ // - cmds
+ //
+ // enqueue barrier
+ // enqueue copy back of atomics on the command queue
+ // set callback on copy back event
+ // release command queue
+ //
+ struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr;
+
+ if (atomics->cmds > 0)
+ {
+ cl(SetKernelArg(impl->kernels.rasterize_all,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
+ cl(SetKernelArg(impl->kernels.rasterize_all,1,SKC_CL_ARG(runtime->block_pool.blocks.drw)));
+ cl(SetKernelArg(impl->kernels.rasterize_all,2,SKC_CL_ARG(runtime->block_pool.ids.drw)));
+ cl(SetKernelArg(impl->kernels.rasterize_all,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));
+
+ cl(SetKernelArg(impl->kernels.rasterize_all,4,SKC_CL_ARG(cohort->atomics.drw)));
+ cl(SetKernelArg(impl->kernels.rasterize_all,5,SKC_CL_ARG(cohort->keys.drw)));
+
+ cl(SetKernelArg(impl->kernels.rasterize_all,6,SKC_CL_ARG(cohort->transforms.drN)));
+ cl(SetKernelArg(impl->kernels.rasterize_all,7,SKC_CL_ARG(cohort->clips.drN)));
+ cl(SetKernelArg(impl->kernels.rasterize_all,8,SKC_CL_ARG(cohort->cmds.drw)));
+ cl(SetKernelArg(impl->kernels.rasterize_all,9,SKC_CL_ARG(atomics->cmds)));
+
+ skc_device_enqueue_kernel(runtime->device,
+ SKC_DEVICE_KERNEL_ID_RASTERIZE_ALL,
+ cohort->cq,
+ impl->kernels.rasterize_all,
+ atomics->cmds,
+ 0,NULL,NULL);
+ }
+
+ //
+ // copyback number of TTSK keys
+ //
+ cl_event complete;
+
+ skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete);
+
+ cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_rasterize_cb,grid));
+ cl(ReleaseEvent(complete));
+
+ // flush command queue
+ cl(Flush(cohort->cq));
+
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids i
+ // raster_ids i
+ // transforms a
+ // clips a
+ // fill_cmds -
+ // cq a
+ // cohort atomics a
+ // cmds a
+ // keys a
+ // meta -
+}
+
+static
+void
+skc_raster_cohort_fills_expand_cb(cl_event event, cl_int status, skc_grid_t const grid)
+{
+ SKC_CL_CB(status);
+
+ struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
+
+ // as quickly as possible, enqueue next stage in pipeline to context command scheduler
+ SKC_SCHEDULER_SCHEDULE(cohort->impl->runtime->scheduler,skc_raster_cohort_rasterize,grid);
+}
+
+static
+void
+skc_raster_builder_cohort_grid_pfn_execute(skc_grid_t const grid)
+{
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids i
+ // raster_ids i
+ // transforms i
+ // clips i
+ // fill_cmds i
+ // cq -
+ // cohort atomics -
+ // cmds -
+ // keys -
+ // meta -
+ //
+
+ // allocate the cohort
+ struct skc_raster_cohort * const cohort = skc_grid_get_data(grid);
+
+ // get impl
+ struct skc_raster_builder_impl * const impl = cohort->impl;
+ struct skc_runtime * const runtime = impl->runtime;
+
+ // acquire in-order cq
+ cohort->cq = skc_runtime_acquire_cq_in_order(runtime);
+
+ // alloc the snapshot -- could be zero-sized
+ skc_extent_phw1g_tdrNs_snap_alloc(runtime,
+ &impl->fill_cmds,
+ &cohort->fill_cmds,
+ cohort->cq,NULL);
+
+ // flush the cq to get the fill running
+ // cl(Flush(cohort->cq));
+
+ // create split atomics
+ skc_extent_thr_tdrw_alloc(runtime,&cohort->atomics,sizeof(struct skc_raster_cohort_atomic));
+
+ // zero the atomics
+ skc_extent_thr_tdrw_zero(&cohort->atomics,cohort->cq,NULL);
+
+ // get config
+ struct skc_config const * const config = runtime->config;
+
+ // acquire device-side extents
+ skc_extent_tdrw_alloc(runtime,
+ &cohort->cmds,
+ sizeof(union skc_cmd_rasterize) * config->raster_cohort.expand.cmds);
+
+ //
+ // FILLS EXPAND
+ //
+ // need result of cmd counts before launching RASTERIZE grids
+ //
+ // - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host
+ // - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device
+ // - or launch a device-wide grid that feeds itself but that's unsatisfying
+ //
+
+ // how many commands? could be zero
+ skc_uint const work_size = skc_extent_ring_snap_count(cohort->fill_cmds.snap);
+
+ if (work_size > 0)
+ {
+ cl(SetKernelArg(impl->kernels.fills_expand,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw)));
+ cl(SetKernelArg(impl->kernels.fills_expand,1,SKC_CL_ARG(cohort->atomics.drw)));
+ cl(SetKernelArg(impl->kernels.fills_expand,2,SKC_CL_ARG(runtime->handle_pool.map.drw)));
+ cl(SetKernelArg(impl->kernels.fills_expand,3,SKC_CL_ARG(cohort->fill_cmds.drN)));
+ cl(SetKernelArg(impl->kernels.fills_expand,4,SKC_CL_ARG(cohort->cmds.drw)));
+
+ skc_device_enqueue_kernel(runtime->device,
+ SKC_DEVICE_KERNEL_ID_FILLS_EXPAND,
+ cohort->cq,
+ impl->kernels.fills_expand,
+ work_size,
+ 0,NULL,NULL);
+ }
+
+ //
+ // copyback number of rasterization commands
+ //
+ cl_event complete;
+
+ skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete);
+
+ cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_fills_expand_cb,grid));
+ cl(ReleaseEvent(complete));
+
+ // flush command queue
+ cl(Flush(cohort->cq));
+
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids i
+ // raster_ids i
+ // transforms i
+ // clips i
+ // fill_cmds s
+ // cq a
+ // cohort atomics a
+ // cmds a
+ // keys -
+ // meta -
+ //
+}
+
+//
+// move grid into waiting state
+//
+// this entails allocating a cohort from the temporary extent
+//
+
+static
+void
+skc_raster_builder_cohort_grid_pfn_waiting(skc_grid_t const grid)
+{
+ // get the impl
+ struct skc_raster_builder_impl * const impl = skc_grid_get_data(grid);
+ struct skc_runtime * const runtime = impl->runtime;
+
+ // retain the raster builder
+ impl->raster_builder->refcount += 1;
+
+ // allocate the ephemeral/temp cohort
+ skc_subbuf_id_t id;
+
+ struct skc_raster_cohort * const cohort =
+ skc_runtime_host_temp_alloc(runtime,
+ SKC_MEM_FLAGS_READ_WRITE,
+ sizeof(*cohort),
+ &id,
+ NULL);
+
+ // save the id and backpointer
+ cohort->id = id;
+ cohort->impl = impl;
+
+ // set grid data -- replaces impl
+ skc_grid_set_data(grid,cohort);
+
+ //
+ // ACQUIRE RESOURCES FOR THE COHORT
+ //
+
+ struct skc_raster_builder * const raster_builder = impl->raster_builder;
+
+ // immediately take snapshots of all rings -- these are very inexpensive operations
+ skc_extent_phrwg_thr1s_snap_init(runtime,&raster_builder->path_ids .ring,&cohort->path_ids);
+ skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->transforms.ring,&cohort->transforms);
+ skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->clips .ring,&cohort->clips);
+ skc_extent_phw1g_tdrNs_snap_init(runtime,&raster_builder->fill_cmds .ring,&cohort->fill_cmds);
+ skc_extent_phrwg_tdrNs_snap_init(runtime,&raster_builder->raster_ids.ring,&cohort->raster_ids);
+
+ //
+ // ALLOCATED RESOURCES
+ //
+ // path_ids i
+ // raster_ids i
+ // transforms i
+ // clips i
+ // fill_cmds i
+ // cq -
+ // cohort atomics -
+ // cmds -
+ // keys -
+ // meta -
+ //
+}
+
+//
+//
+//
+
+static
+void
+skc_raster_builder_cohort_create(struct skc_raster_builder_impl * const impl)
+{
+ // attach a grid
+ impl->cohort = SKC_GRID_DEPS_ATTACH(impl->runtime->deps,
+ &impl->cohort,
+ impl,
+ skc_raster_builder_cohort_grid_pfn_waiting,
+ skc_raster_builder_cohort_grid_pfn_execute,
+ skc_raster_builder_cohort_grid_pfn_dispose);
+}
+
+//
+//
+//
+
+static
+skc_err
+skc_raster_builder_pfn_add(struct skc_raster_builder_impl * const impl,
+ skc_path_t const * paths,
+ skc_uint count)
+{
+ // validate and retain the path
+ skc_err err;
+
+ err = skc_runtime_handle_device_validate_retain(impl->runtime,
+ SKC_TYPED_HANDLE_TYPE_IS_PATH,
+ paths,
+ count);
+
+ if (err)
+ return err;
+
+ skc_runtime_handle_device_retain(impl->runtime,paths,count);
+
+ // make sure there is a grid
+ if (impl->cohort == NULL) {
+ skc_raster_builder_cohort_create(impl);
+ }
+
+ // declare rasterization grid happens after path
+ while (count-- > 0)
+ skc_grid_happens_after_handle(impl->cohort,SKC_TYPED_HANDLE_TO_HANDLE(*paths++));
+
+ return SKC_ERR_SUCCESS;
+}
+
+//
+//
+//
+
+static
+void
+skc_raster_builder_pfn_end(struct skc_raster_builder_impl * const impl, skc_raster_t * const raster)
+{
+ //
+ // acquire host-managed path raster handle and bump reference count
+ // to 2 handles will be released (reduced to 1) once the rasters are
+ // completely rasterized
+ //
+ *raster = skc_runtime_handle_device_acquire(impl->runtime);
+
+ // make sure there is a grid
+ if (impl->cohort == NULL) {
+ skc_raster_builder_cohort_create(impl);
+ }
+
+ // map a handle to a grid
+ skc_grid_map(impl->cohort,*raster);
+}
+
+//
+// snapshot the ring and lazily start the grid
+//
+// FIXME -- might want to revisit this and settle on an even more
+// opaque implementation. Some options:
+//
+// - never let the SKC API expose a forced grid start
+// - make snapshots kick off a forced grid start
+// - be lazy all the time everywhere
+//
+
+static
+void
+skc_raster_builder_pfn_start(struct skc_raster_builder_impl * const impl)
+{
+ skc_grid_t const cohort = impl->cohort;
+
+ if (cohort != NULL) {
+ skc_grid_start(cohort);
+ }
+}
+
+//
+// NOTE: THIS MIGHT BE REMOVED
+//
+
+static
+void
+skc_raster_builder_pfn_force(struct skc_raster_builder_impl * const impl)
+{
+ skc_grid_t const cohort = impl->cohort;
+
+ if (cohort != NULL) {
+ skc_grid_force(cohort);
+ }
+}
+
+//
+//
+//
+
+skc_err
+skc_raster_builder_cl_12_create(struct skc_context * const context,
+ struct skc_raster_builder * * const raster_builder)
+{
+ struct skc_runtime * const runtime = context->runtime;
+
+ // allocate raster builder
+ (*raster_builder) = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(**raster_builder));
+
+ // refcount
+ (*raster_builder)->refcount = 1;
+
+ // state
+ SKC_ASSERT_STATE_INIT((*raster_builder),SKC_RASTER_BUILDER_STATE_READY);
+
+ // allocate runtime raster builder
+ struct skc_raster_builder_impl * const impl = skc_runtime_host_perm_alloc(runtime,SKC_MEM_FLAGS_READ_WRITE,sizeof(*impl));
+
+ // save the impl
+ (*raster_builder)->impl = impl;
+
+ // intialize impl
+ impl->raster_builder = (*raster_builder);
+ impl->runtime = runtime;
+ impl->cohort = NULL;
+
+ // get config
+ struct skc_config const * const config = runtime->config;
+
+ skc_extent_phrwg_thr1s_alloc(runtime,&impl->path_ids ,sizeof(skc_path_t ) * config->raster_cohort.path_ids .elem_count);
+ skc_extent_phw1g_tdrNs_alloc(runtime,&impl->transforms,sizeof(union skc_transform) * config->raster_cohort.transforms.elem_count);
+ skc_extent_phw1g_tdrNs_alloc(runtime,&impl->clips ,sizeof(union skc_path_clip) * config->raster_cohort.clips .elem_count);
+ skc_extent_phw1g_tdrNs_alloc(runtime,&impl->fill_cmds ,sizeof(union skc_cmd_fill ) * config->raster_cohort.fill .elem_count);
+ skc_extent_phrwg_tdrNs_alloc(runtime,&impl->raster_ids,sizeof(skc_raster_t ) * config->raster_cohort.raster_ids.elem_count);
+
+ // retain the context
+ //skc_context_retain(context);
+
+ (*raster_builder)->context = context;
+
+ (*raster_builder)->add = skc_raster_builder_pfn_add;
+ (*raster_builder)->end = skc_raster_builder_pfn_end;
+ (*raster_builder)->start = skc_raster_builder_pfn_start;
+ (*raster_builder)->force = skc_raster_builder_pfn_force;
+ (*raster_builder)->release = skc_raster_builder_pfn_release;
+
+ // initialize raster builder with host-writable buffers
+ (*raster_builder)->path_ids .extent = impl->path_ids.hrw;
+ (*raster_builder)->transforms.extent = impl->transforms.hw1;
+ (*raster_builder)->clips .extent = impl->clips.hw1;
+ (*raster_builder)->fill_cmds .extent = impl->fill_cmds.hw1;
+ (*raster_builder)->raster_ids.extent = impl->raster_ids.hrw;
+
+ //
+ // the rings perform bookkeeping on the extents
+ //
+ // the ring snapshotting and checkpointing are necessary because
+ // another part of the API can _force_ the raster cohort to flush
+ // its work-in-progress commands but only up to a checkpointed
+ // boundary
+ //
+ skc_extent_ring_init(&(*raster_builder)->path_ids.ring,
+ config->raster_cohort.path_ids.elem_count,
+ config->raster_cohort.path_ids.snap_count,
+ sizeof(skc_path_t));
+
+ skc_extent_ring_init(&(*raster_builder)->transforms.ring,
+ config->raster_cohort.transforms.elem_count,
+ config->raster_cohort.transforms.snap_count,
+ sizeof(union skc_transform));
+
+ skc_extent_ring_init(&(*raster_builder)->clips.ring,
+ config->raster_cohort.clips.elem_count,
+ config->raster_cohort.clips.snap_count,
+ sizeof(union skc_path_clip));
+
+ skc_extent_ring_init(&(*raster_builder)->fill_cmds.ring,
+ config->raster_cohort.fill.elem_count,
+ config->raster_cohort.fill.snap_count,
+ sizeof(union skc_cmd_fill));
+
+ skc_extent_ring_init(&(*raster_builder)->raster_ids.ring,
+ config->raster_cohort.raster_ids.elem_count,
+ config->raster_cohort.raster_ids.snap_count,
+ sizeof(skc_raster_t));
+
+ //
+ // acquire kernels
+ //
+ impl->kernels.fills_expand = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_FILLS_EXPAND);
+ impl->kernels.rasterize_all = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_ALL);
+
+#if 0
+ impl->kernels.rasterize_lines = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_LINES);
+ impl->kernels.rasterize_quads = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_QUADS);
+ impl->kernels.rasterize_cubics = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_CUBICS);
+#endif
+
+ impl->kernels.segment = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_SEGMENT_TTRK);
+ impl->kernels.rasters_alloc = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERS_ALLOC);
+ impl->kernels.prefix = skc_device_acquire_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_PREFIX);
+
+ return SKC_ERR_SUCCESS;
+}
+
+//
+//
+//