aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/compute/hs/cl/hs_cl_launcher.h
blob: 049657cc2fe3d63d94e9475c46dfb1f8b7230fe3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*
 * Copyright 2016 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can
 * be found in the LICENSE file.
 *
 */

#pragma once

//
//
//

#include <CL/opencl.h>
#include <stdint.h>
#include <stdbool.h>

//
// Returns some useful info about algorithm's configuration for the
// target architecture.
//

struct hs_info
{
  uint32_t words; // words-per-key (1 = uint, 2 = ulong)
  uint32_t keys;  // keys-per-lane
  uint32_t lanes; // lanes-per-warp
};

//
//
//

void
hs_create(cl_context             context,
          cl_device_id           device_id,
          struct hs_info * const info);

//
//
//

void
hs_release();

//
// Size the buffers.
//

void
hs_pad(uint32_t   const count,
       uint32_t * const count_padded_in,
       uint32_t * const count_padded_out);

//
// Sort the keys in the vin buffer and store them in the vout buffer.
//
// The vin and vout buffers can be the same buffer.
//
// If it is necessary, a barrier should be enqueued before running
// hs_sort().
//
// A final barrier will enqueued before returning.
//

void
hs_sort(cl_command_queue cq, // out-of-order cq
        cl_mem           vin,
        cl_mem           vout,
        uint32_t   const count,
        uint32_t   const count_padded_in,
        uint32_t   const count_padded_out,
        bool       const linearize);

//
//
//