aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/compute/skc/platforms/cl_12/runtime_cl_12.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/compute/skc/platforms/cl_12/runtime_cl_12.c')
-rw-r--r--src/compute/skc/platforms/cl_12/runtime_cl_12.c314
1 files changed, 314 insertions, 0 deletions
diff --git a/src/compute/skc/platforms/cl_12/runtime_cl_12.c b/src/compute/skc/platforms/cl_12/runtime_cl_12.c
new file mode 100644
index 0000000000..fca13edbbd
--- /dev/null
+++ b/src/compute/skc/platforms/cl_12/runtime_cl_12.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2017 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ *
+ */
+
+//
+//
+//
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+//
+//
+//
+
+#include "context.h"
+#include "block.h"
+#include "grid.h"
+#include "common/cl/assert_cl.h"
+#include "config_cl.h"
+#include "runtime_cl.h"
+#include "runtime_cl_12.h"
+#include "export_cl_12.h"
+
+//
+//
+//
+
+static
+void
+skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq)
+{
+ // save size
+ runtime->block_pool.size = &runtime->config->block_pool;
+
+ // create block extent
+ skc_extent_pdrw_alloc(runtime,
+ &runtime->block_pool.blocks,
+ runtime->block_pool.size->pool_size *
+ runtime->config->block.bytes);
+
+ // allocate block pool ids
+ skc_extent_pdrw_alloc(runtime,
+ &runtime->block_pool.ids,
+ runtime->block_pool.size->ring_pow2 * sizeof(skc_uint));
+
+ // allocate block pool atomics
+ skc_extent_phr_pdrw_alloc(runtime,
+ &runtime->block_pool.atomics,
+ sizeof(union skc_block_pool_atomic));
+
+ // acquire pool id and atomic initialization kernels
+ cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS);
+ cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS);
+
+ // init ids
+ cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw));
+ cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));
+
+ // the kernel grid is shaped by the target device -- always 2 for atomics
+ skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS,
+ cq,k0,runtime->block_pool.size->pool_size,
+ 0,NULL,NULL);
+
+ // init atomics
+ cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw));
+ cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size)));
+
+ // the kernel grid is shaped by the target device
+ skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS,
+ cq,k1,2,
+ 0,NULL,NULL);
+
+ // kickstart kernel execution
+ cl(Flush(cq));
+
+ // release kernels
+ cl(ReleaseKernel(k0));
+ cl(ReleaseKernel(k1));
+}
+
+static
+void
+skc_block_pool_dispose(struct skc_runtime * const runtime)
+{
+ skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics);
+ skc_extent_pdrw_free (runtime,&runtime->block_pool.ids);
+ skc_extent_pdrw_free (runtime,&runtime->block_pool.blocks);
+}
+
+//
+//
+//
+
+static
+bool
+skc_runtime_yield(struct skc_runtime * const runtime)
+{
+ return skc_scheduler_yield(runtime->scheduler);
+}
+
+static
+void
+skc_runtime_wait(struct skc_runtime * const runtime)
+{
+ skc_scheduler_wait(runtime->scheduler);
+}
+
+//
+//
+//
+
+skc_err
+skc_runtime_cl_12_create(struct skc_context * const context,
+ char const * const target_platform_substring,
+ char const * const target_device_substring,
+ cl_context_properties context_properties[])
+{
+ // allocate the runtime
+ struct skc_runtime * const runtime = malloc(sizeof(*runtime));
+
+ // acquire OpenCL ids and context for target device
+ skc_err err = skc_runtime_cl_create(&runtime->cl,
+ target_platform_substring,
+ target_device_substring,
+ context_properties);
+
+ // create device
+ skc_device_create(runtime);
+
+ // create the host and device allocators
+ skc_allocator_host_create(runtime);
+ skc_allocator_device_create(runtime);
+
+ // how many slots in the scheduler?
+ runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size);
+
+ // allocate deps structure
+ runtime->deps = skc_grid_deps_create(runtime,
+ runtime->scheduler,
+ runtime->config->block_pool.pool_size);
+
+ // initialize cq pool
+ skc_cq_pool_create(runtime,
+ &runtime->cq_pool,
+ runtime->config->cq_pool.type,
+ runtime->config->cq_pool.size);
+
+ // acquire in-order cq
+ cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);
+
+ // initialize block pool
+ skc_block_pool_create(runtime,cq);
+
+ // intialize handle pool
+ skc_handle_pool_create(runtime,
+ &runtime->handle_pool,
+ runtime->config->handle_pool.size,
+ runtime->config->handle_pool.width,
+ runtime->config->handle_pool.recs);
+
+ //
+ // initialize pfns
+ //
+ // FIXME -- at this point we will have identified which device we've
+ // targeted and will load a DLL (or select from a built-in library)
+ // that contains all the pfns.
+ //
+ context->runtime = runtime;
+
+ context->yield = skc_runtime_yield;
+ context->wait = skc_runtime_wait;
+
+ context->path_builder = skc_path_builder_cl_12_create;
+ context->path_retain = skc_runtime_path_host_retain;
+ context->path_release = skc_runtime_path_host_release;
+ context->path_flush = skc_runtime_path_host_flush;
+
+ context->raster_builder = skc_raster_builder_cl_12_create;
+ context->raster_retain = skc_runtime_raster_host_retain;
+ context->raster_release = skc_runtime_raster_host_release;
+ context->raster_flush = skc_runtime_raster_host_flush;
+
+ context->composition = skc_composition_cl_12_create;
+ context->styling = skc_styling_cl_12_create;
+
+ context->surface = skc_surface_cl_12_create;
+
+ // block on pool creation
+ cl(Finish(cq));
+
+ // dispose of in-order cq
+ skc_runtime_release_cq_in_order(runtime,cq);
+
+ return err;
+};
+
+//
+//
+//
+
+skc_err
+skc_runtime_cl_12_dispose(struct skc_context * const context)
+{
+ //
+ // FIXME -- incomplete
+ //
+ fprintf(stderr,"%s incomplete!\n",__func__);
+
+ struct skc_runtime * runtime = context->runtime;
+
+ skc_allocator_device_dispose(runtime);
+ skc_allocator_host_dispose(runtime);
+
+ skc_scheduler_dispose(context->runtime,context->runtime->scheduler);
+
+ skc_grid_deps_dispose(context->runtime->deps);
+
+ skc_cq_pool_dispose(runtime,&runtime->cq_pool);
+
+ skc_block_pool_dispose(context->runtime);
+
+ // skc_handle_pool_dispose(context->runtime);
+
+ return SKC_ERR_SUCCESS;
+}
+
+//
+// TEMPORARY BENCHMARK
+//
+
+#if 1
+
+#include <windows.h>
+
+#define SKC_FRAMES_MASK 0x7F
+#define SKC_FRAMES (SKC_FRAMES_MASK + 1)
+
+void
+skc_runtime_cl_12_debug(struct skc_context * const context)
+{
+#ifdef NDEBUG
+ static skc_uint frames=0;
+ static LARGE_INTEGER StartingTime={0}, EndingTime;
+
+ if ((frames++ & SKC_FRAMES_MASK) != SKC_FRAMES_MASK)
+ return;
+
+ QueryPerformanceCounter(&EndingTime);
+
+ LARGE_INTEGER ElapsedMicroseconds, Frequency;
+
+ ElapsedMicroseconds.QuadPart = EndingTime.QuadPart - StartingTime.QuadPart;
+
+ QueryPerformanceFrequency(&Frequency);
+
+ double const msecs_total = 1000.0 * ElapsedMicroseconds.QuadPart / Frequency.QuadPart;
+ double const msecs_frame = msecs_total / SKC_FRAMES;
+
+ printf("Frames / Total / Per : %u / %.3f / %.3f\n",
+ SKC_FRAMES,msecs_total,msecs_frame);
+#endif
+
+ struct skc_runtime * const runtime = context->runtime;
+
+ // acquire out-of-order cq
+ cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime);
+
+ // copy atomics to host
+ skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL);
+
+ // block until complete
+ cl(Finish(cq));
+
+ // dispose of out-of-order cq
+ skc_runtime_release_cq_in_order(runtime,cq);
+
+ union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr;
+
+ skc_uint const available = bp_atomic->writes - bp_atomic->reads;
+ skc_uint const inuse = runtime->config->block_pool.pool_size - available;
+
+ fprintf(stderr,"w/r/f/a: %9u - %9u = %9u : %6.2f MB\n",
+ bp_atomic->writes,
+ bp_atomic->reads,
+ available,
+ (inuse * runtime->config->block.bytes) / (1024.0*1024.0));
+
+ if (available >= (1<<27))
+ {
+ fprintf(stderr,"block pool corrupted!\n");
+ exit(-1);
+ }
+
+ //
+ //
+ //
+#ifdef NDEBUG
+ QueryPerformanceCounter(&StartingTime);
+#endif
+}
+
+#endif
+
+//
+//
+//
+