diff options
Diffstat (limited to 'src/compute/skc/platforms/cl_12/runtime_cl_12.c')
-rw-r--r-- | src/compute/skc/platforms/cl_12/runtime_cl_12.c | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/src/compute/skc/platforms/cl_12/runtime_cl_12.c b/src/compute/skc/platforms/cl_12/runtime_cl_12.c new file mode 100644 index 0000000000..fca13edbbd --- /dev/null +++ b/src/compute/skc/platforms/cl_12/runtime_cl_12.c @@ -0,0 +1,314 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + * + */ + +// +// +// + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> + +// +// +// + +#include "context.h" +#include "block.h" +#include "grid.h" +#include "common/cl/assert_cl.h" +#include "config_cl.h" +#include "runtime_cl.h" +#include "runtime_cl_12.h" +#include "export_cl_12.h" + +// +// +// + +static +void +skc_block_pool_create(struct skc_runtime * const runtime, cl_command_queue cq) +{ + // save size + runtime->block_pool.size = &runtime->config->block_pool; + + // create block extent + skc_extent_pdrw_alloc(runtime, + &runtime->block_pool.blocks, + runtime->block_pool.size->pool_size * + runtime->config->block.bytes); + + // allocate block pool ids + skc_extent_pdrw_alloc(runtime, + &runtime->block_pool.ids, + runtime->block_pool.size->ring_pow2 * sizeof(skc_uint)); + + // allocate block pool atomics + skc_extent_phr_pdrw_alloc(runtime, + &runtime->block_pool.atomics, + sizeof(union skc_block_pool_atomic)); + + // acquire pool id and atomic initialization kernels + cl_kernel k0 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS); + cl_kernel k1 = skc_device_acquire_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS); + + // init ids + cl(SetKernelArg(k0,0,sizeof(runtime->block_pool.ids.drw),&runtime->block_pool.ids.drw)); + cl(SetKernelArg(k0,1,SKC_CL_ARG(runtime->block_pool.size->pool_size))); + + // the kernel grid is shaped by the target device -- always 2 for atomics + skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_IDS, + cq,k0,runtime->block_pool.size->pool_size, + 0,NULL,NULL); + + // init atomics + cl(SetKernelArg(k1,0,sizeof(runtime->block_pool.atomics.drw),&runtime->block_pool.atomics.drw)); + cl(SetKernelArg(k1,1,SKC_CL_ARG(runtime->block_pool.size->pool_size))); + + // the kernel grid is shaped by the target device + skc_device_enqueue_kernel(runtime->device,SKC_DEVICE_KERNEL_ID_BLOCK_POOL_INIT_ATOMICS, + cq,k1,2, + 0,NULL,NULL); + + // kickstart kernel execution + cl(Flush(cq)); + + // release kernels + cl(ReleaseKernel(k0)); + cl(ReleaseKernel(k1)); +} + +static +void +skc_block_pool_dispose(struct skc_runtime * const runtime) +{ + skc_extent_phr_pdrw_free(runtime,&runtime->block_pool.atomics); + skc_extent_pdrw_free (runtime,&runtime->block_pool.ids); + skc_extent_pdrw_free (runtime,&runtime->block_pool.blocks); +} + +// +// +// + +static +bool +skc_runtime_yield(struct skc_runtime * const runtime) +{ + return skc_scheduler_yield(runtime->scheduler); +} + +static +void +skc_runtime_wait(struct skc_runtime * const runtime) +{ + skc_scheduler_wait(runtime->scheduler); +} + +// +// +// + +skc_err +skc_runtime_cl_12_create(struct skc_context * const context, + char const * const target_platform_substring, + char const * const target_device_substring, + cl_context_properties context_properties[]) +{ + // allocate the runtime + struct skc_runtime * const runtime = malloc(sizeof(*runtime)); + + // acquire OpenCL ids and context for target device + skc_err err = skc_runtime_cl_create(&runtime->cl, + target_platform_substring, + target_device_substring, + context_properties); + + // create device + skc_device_create(runtime); + + // create the host and device allocators + skc_allocator_host_create(runtime); + skc_allocator_device_create(runtime); + + // how many slots in the scheduler? + runtime->scheduler = skc_scheduler_create(runtime,runtime->config->scheduler.size); + + // allocate deps structure + runtime->deps = skc_grid_deps_create(runtime, + runtime->scheduler, + runtime->config->block_pool.pool_size); + + // initialize cq pool + skc_cq_pool_create(runtime, + &runtime->cq_pool, + runtime->config->cq_pool.type, + runtime->config->cq_pool.size); + + // acquire in-order cq + cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime); + + // initialize block pool + skc_block_pool_create(runtime,cq); + + // intialize handle pool + skc_handle_pool_create(runtime, + &runtime->handle_pool, + runtime->config->handle_pool.size, + runtime->config->handle_pool.width, + runtime->config->handle_pool.recs); + + // + // initialize pfns + // + // FIXME -- at this point we will have identified which device we've + // targeted and will load a DLL (or select from a built-in library) + // that contains all the pfns. + // + context->runtime = runtime; + + context->yield = skc_runtime_yield; + context->wait = skc_runtime_wait; + + context->path_builder = skc_path_builder_cl_12_create; + context->path_retain = skc_runtime_path_host_retain; + context->path_release = skc_runtime_path_host_release; + context->path_flush = skc_runtime_path_host_flush; + + context->raster_builder = skc_raster_builder_cl_12_create; + context->raster_retain = skc_runtime_raster_host_retain; + context->raster_release = skc_runtime_raster_host_release; + context->raster_flush = skc_runtime_raster_host_flush; + + context->composition = skc_composition_cl_12_create; + context->styling = skc_styling_cl_12_create; + + context->surface = skc_surface_cl_12_create; + + // block on pool creation + cl(Finish(cq)); + + // dispose of in-order cq + skc_runtime_release_cq_in_order(runtime,cq); + + return err; +}; + +// +// +// + +skc_err +skc_runtime_cl_12_dispose(struct skc_context * const context) +{ + // + // FIXME -- incomplete + // + fprintf(stderr,"%s incomplete!\n",__func__); + + struct skc_runtime * runtime = context->runtime; + + skc_allocator_device_dispose(runtime); + skc_allocator_host_dispose(runtime); + + skc_scheduler_dispose(context->runtime,context->runtime->scheduler); + + skc_grid_deps_dispose(context->runtime->deps); + + skc_cq_pool_dispose(runtime,&runtime->cq_pool); + + skc_block_pool_dispose(context->runtime); + + // skc_handle_pool_dispose(context->runtime); + + return SKC_ERR_SUCCESS; +} + +// +// TEMPORARY BENCHMARK +// + +#if 1 + +#include <windows.h> + +#define SKC_FRAMES_MASK 0x7F +#define SKC_FRAMES (SKC_FRAMES_MASK + 1) + +void +skc_runtime_cl_12_debug(struct skc_context * const context) +{ +#ifdef NDEBUG + static skc_uint frames=0; + static LARGE_INTEGER StartingTime={0}, EndingTime; + + if ((frames++ & SKC_FRAMES_MASK) != SKC_FRAMES_MASK) + return; + + QueryPerformanceCounter(&EndingTime); + + LARGE_INTEGER ElapsedMicroseconds, Frequency; + + ElapsedMicroseconds.QuadPart = EndingTime.QuadPart - StartingTime.QuadPart; + + QueryPerformanceFrequency(&Frequency); + + double const msecs_total = 1000.0 * ElapsedMicroseconds.QuadPart / Frequency.QuadPart; + double const msecs_frame = msecs_total / SKC_FRAMES; + + printf("Frames / Total / Per : %u / %.3f / %.3f\n", + SKC_FRAMES,msecs_total,msecs_frame); +#endif + + struct skc_runtime * const runtime = context->runtime; + + // acquire out-of-order cq + cl_command_queue cq = skc_runtime_acquire_cq_in_order(runtime); + + // copy atomics to host + skc_extent_phr_pdrw_read(&runtime->block_pool.atomics,cq,NULL); + + // block until complete + cl(Finish(cq)); + + // dispose of out-of-order cq + skc_runtime_release_cq_in_order(runtime,cq); + + union skc_block_pool_atomic const * const bp_atomic = runtime->block_pool.atomics.hr; + + skc_uint const available = bp_atomic->writes - bp_atomic->reads; + skc_uint const inuse = runtime->config->block_pool.pool_size - available; + + fprintf(stderr,"w/r/f/a: %9u - %9u = %9u : %6.2f MB\n", + bp_atomic->writes, + bp_atomic->reads, + available, + (inuse * runtime->config->block.bytes) / (1024.0*1024.0)); + + if (available >= (1<<27)) + { + fprintf(stderr,"block pool corrupted!\n"); + exit(-1); + } + + // + // + // +#ifdef NDEBUG + QueryPerformanceCounter(&StartingTime); +#endif +} + +#endif + +// +// +// + |