src/core/SkTaskGroup.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

/*
 * Copyright 2014 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "SkOnce.h"
#include "SkSemaphore.h"
#include "SkSpinlock.h"
#include "SkTArray.h"
#include "SkTDArray.h"
#include "SkTaskGroup.h"
#include "SkThreadUtils.h"

#if defined(SK_BUILD_FOR_WIN32)
    static void query_num_cores(int* num_cores) {
        SYSTEM_INFO sysinfo;
        GetNativeSystemInfo(&sysinfo);
        *num_cores = sysinfo.dwNumberOfProcessors;
    }
#else
    #include <unistd.h>
    static void query_num_cores(int* num_cores) {
        *num_cores = (int)sysconf(_SC_NPROCESSORS_ONLN);
    }
#endif

// We cache sk_num_cores() so we only query the OS once.
SK_DECLARE_STATIC_ONCE(g_query_num_cores_once);
int sk_num_cores() {
    static int num_cores = 0;
    SkOnce(&g_query_num_cores_once, query_num_cores, &num_cores);
    SkASSERT(num_cores > 0);
    return num_cores;
}

namespace {

class ThreadPool : SkNoncopyable {
public:
    static void Add(std::function<void(void)> fn, SkAtomic<int32_t>* pending) {
        if (!gGlobal) {
            return fn();
        }
        gGlobal->add(fn, pending);
    }

    static void Batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
        if (!gGlobal) {
            for (int i = 0; i < N; i++) { fn(i); }
            return;
        }
        gGlobal->batch(N, fn, pending);
    }

    static void Wait(SkAtomic<int32_t>* pending) {
        if (!gGlobal) {  // If we have no threads, the work must already be done.
            SkASSERT(pending->load(sk_memory_order_relaxed) == 0);
            return;
        }
        // Acquire pairs with decrement release here or in Loop.
        while (pending->load(sk_memory_order_acquire) > 0) {
            // Lend a hand until our SkTaskGroup of interest is done.
            Work work;
            {
                // We're stealing work opportunistically,
                // so we never call fWorkAvailable.wait(), which could sleep us if there's no work.
                // This means fWorkAvailable is only an upper bound on fWork.count().
                AutoLock lock(&gGlobal->fWorkLock);
                if (gGlobal->fWork.empty()) {
                    // Someone has picked up all the work (including ours).  How nice of them!
                    // (They may still be working on it, so we can't assert *pending == 0 here.)
                    continue;
                }
                work = gGlobal->fWork.back();
                gGlobal->fWork.pop_back();
            }
            // This Work isn't necessarily part of our SkTaskGroup of interest, but that's fine.
            // We threads gotta stick together.  We're always making forward progress.
            work.fn();
            work.pending->fetch_add(-1, sk_memory_order_release);  // Pairs with load above.
        }
    }

private:
    struct AutoLock {
        AutoLock(SkSpinlock* lock) : fLock(lock) { fLock->acquire(); }
        ~AutoLock() { fLock->release(); }
    private:
        SkSpinlock* fLock;
    };

    struct Work {
        std::function<void(void)> fn; // A function to call
        SkAtomic<int32_t>* pending;   // then decrement pending afterwards.
    };

    explicit ThreadPool(int threads) {
        if (threads == -1) {
            threads = sk_num_cores();
        }
        for (int i = 0; i < threads; i++) {
            fThreads.push(new SkThread(&ThreadPool::Loop, this));
            fThreads.top()->start();
        }
    }

    ~ThreadPool() {
        SkASSERT(fWork.empty());  // All SkTaskGroups should be destroyed by now.

        // Send a poison pill to each thread.
        SkAtomic<int> dummy(0);
        for (int i = 0; i < fThreads.count(); i++) {
            this->add(nullptr, &dummy);
        }
        // Wait for them all to swallow the pill and die.
        for (int i = 0; i < fThreads.count(); i++) {
            fThreads[i]->join();
        }
        SkASSERT(fWork.empty());  // Can't hurt to double check.
        fThreads.deleteAll();
    }

    void add(std::function<void(void)> fn, SkAtomic<int32_t>* pending) {
        Work work = { fn, pending };
        pending->fetch_add(+1, sk_memory_order_relaxed);  // No barrier needed.
        {
            AutoLock lock(&fWorkLock);
            fWork.push_back(work);
        }
        fWorkAvailable.signal(1);
    }

    void batch(int N, std::function<void(int)> fn, SkAtomic<int32_t>* pending) {
        pending->fetch_add(+N, sk_memory_order_relaxed);  // No barrier needed.
        {
            AutoLock lock(&fWorkLock);
            for (int i = 0; i < N; i++) {
                Work work = { [i, fn]() { fn(i); }, pending };
                fWork.push_back(work);
            }
        }
        fWorkAvailable.signal(N);
    }

    static void Loop(void* arg) {
        ThreadPool* pool = (ThreadPool*)arg;
        Work work;
        while (true) {
            // Sleep until there's work available, and claim one unit of Work as we wake.
            pool->fWorkAvailable.wait();
            {
                AutoLock lock(&pool->fWorkLock);
                if (pool->fWork.empty()) {
                    // Someone in Wait() stole our work (fWorkAvailable is an upper bound).
                    // Well, that's fine, back to sleep for us.
                    continue;
                }
                work = pool->fWork.back();
                pool->fWork.pop_back();
            }
            if (!work.fn) {
                return;  // Poison pill.  Time... to die.
            }
            work.fn();
            work.pending->fetch_add(-1, sk_memory_order_release);  // Pairs with load in Wait().
        }
    }

    // fWorkLock must be held when reading or modifying fWork.
    SkSpinlock      fWorkLock;
    SkTArray<Work>  fWork;

    // A thread-safe upper bound for fWork.count().
    //
    // We'd have it be an exact count but for the loop in Wait():
    // we never want that to block, so it can't call fWorkAvailable.wait(),
    // and that's the only way to decrement fWorkAvailable.
    // So fWorkAvailable may overcount actual the work available.
    // We make do, but this means some worker threads may wake spuriously.
    SkSemaphore fWorkAvailable;

    // These are only changed in a single-threaded context.
    SkTDArray<SkThread*> fThreads;
    static ThreadPool* gGlobal;

    friend struct SkTaskGroup::Enabler;
};
ThreadPool* ThreadPool::gGlobal = nullptr;

}  // namespace

SkTaskGroup::Enabler::Enabler(int threads) {
    SkASSERT(ThreadPool::gGlobal == nullptr);
    if (threads != 0) {
        ThreadPool::gGlobal = new ThreadPool(threads);
    }
}

SkTaskGroup::Enabler::~Enabler() { delete ThreadPool::gGlobal; }

SkTaskGroup::SkTaskGroup() : fPending(0) {}

void SkTaskGroup::wait()                            { ThreadPool::Wait(&fPending); }
void SkTaskGroup::add(std::function<void(void)> fn) { ThreadPool::Add(fn, &fPending); }
void SkTaskGroup::batch(int N, std::function<void(int)> fn) {
    ThreadPool::Batch(N, fn, &fPending);
}