aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/utils/SkTextureCompressor_LATC.cpp
blob: 937aec809e4c182bacd8bde581dc5fb8ae6536ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
/*
 * Copyright 2014 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "SkTextureCompressor_LATC.h"
#include "SkTextureCompressor_Blitter.h"
#include "SkTextureCompressor_Utils.h"

#include "SkBlitter.h"
#include "SkEndian.h"

// Compression options. In general, the slow version is much more accurate, but
// much slower. The fast option is much faster, but much less accurate. YMMV.
#define COMPRESS_LATC_SLOW 0
#define COMPRESS_LATC_FAST 1

////////////////////////////////////////////////////////////////////////////////

// Generates an LATC palette. LATC constructs
// a palette of eight colors from LUM0 and LUM1 using the algorithm:
//
// LUM0,              if lum0 > lum1 and code(x,y) == 0
// LUM1,              if lum0 > lum1 and code(x,y) == 1
// (6*LUM0+  LUM1)/7, if lum0 > lum1 and code(x,y) == 2
// (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
// (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
// (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
// (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
// (  LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
//
// LUM0,              if lum0 <= lum1 and code(x,y) == 0
// LUM1,              if lum0 <= lum1 and code(x,y) == 1
// (4*LUM0+  LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
// (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
// (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
// (  LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
// 0,                 if lum0 <= lum1 and code(x,y) == 6
// 255,               if lum0 <= lum1 and code(x,y) == 7

static const int kLATCPaletteSize = 8;
static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
    palette[0] = lum0;
    palette[1] = lum1;
    if (lum0 > lum1) {
        for (int i = 1; i < 7; i++) {
            palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
        }
    } else {
        for (int i = 1; i < 5; i++) {
            palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
        }
        palette[6] = 0;
        palette[7] = 255;
    }
}

////////////////////////////////////////////////////////////////////////////////

#if COMPRESS_LATC_SLOW

////////////////////////////////////////////////////////////////////////////////
//
// Utility Functions
//
////////////////////////////////////////////////////////////////////////////////

// Absolute difference between two values. More correct than SkTAbs(a - b)
// because it works on unsigned values.
template <typename T> inline T abs_diff(const T &a, const T &b) {
    return (a > b) ? (a - b) : (b - a);
}

static bool is_extremal(uint8_t pixel) {
    return 0 == pixel || 255 == pixel;
}

typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);

// This function is used by both R11 EAC and LATC to compress 4x4 blocks
// of 8-bit alpha into 64-bit values that comprise the compressed data.
// For both formats, we need to make sure that the dimensions of the 
// src pixels are divisible by 4, and copy 4x4 blocks one at a time
// for compression.
static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
                                     int width, int height, int rowBytes,
                                     A84x4To64BitProc proc) {
    // Make sure that our data is well-formed enough to be considered for compression
    if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
        return false;
    }

    int blocksX = width >> 2;
    int blocksY = height >> 2;

    uint8_t block[16];
    uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
    for (int y = 0; y < blocksY; ++y) {
        for (int x = 0; x < blocksX; ++x) {
            // Load block
            for (int k = 0; k < 4; ++k) {
                memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
            }

            // Compress it
            *encPtr = proc(block);
            ++encPtr;
        }
        src += 4 * rowBytes;
    }

    return true;
}

////////////////////////////////////////////////////////////////////////////////
//
// LATC compressor
//
////////////////////////////////////////////////////////////////////////////////

// LATC compressed texels down into square 4x4 blocks
static const int kLATCBlockSize = 4;
static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;

// Compress a block by using the bounding box of the pixels. It is assumed that
// there are no extremal pixels in this block otherwise we would have used
// compressBlockBBIgnoreExtremal.
static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
    uint8_t minVal = 255;
    uint8_t maxVal = 0;
    for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
        minVal = SkTMin(pixels[i], minVal);
        maxVal = SkTMax(pixels[i], maxVal);
    }

    SkASSERT(!is_extremal(minVal));
    SkASSERT(!is_extremal(maxVal));

    uint8_t palette[kLATCPaletteSize];
    generate_latc_palette(palette, maxVal, minVal);

    uint64_t indices = 0;
    for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {

        // Find the best palette index
        uint8_t bestError = abs_diff(pixels[i], palette[0]);
        uint8_t idx = 0;
        for (int j = 1; j < kLATCPaletteSize; ++j) {
            uint8_t error = abs_diff(pixels[i], palette[j]);
            if (error < bestError) {
                bestError = error;
                idx = j;
            }
        }

        indices <<= 3;
        indices |= idx;
    }

    return
        SkEndian_SwapLE64(
            static_cast<uint64_t>(maxVal) |
            (static_cast<uint64_t>(minVal) << 8) |
            (indices << 16));
}

// Compress a block by using the bounding box of the pixels without taking into
// account the extremal values. The generated palette will contain extremal values
// and fewer points along the line segment to interpolate.
static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
    uint8_t minVal = 255;
    uint8_t maxVal = 0;
    for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
        if (is_extremal(pixels[i])) {
            continue;
        }

        minVal = SkTMin(pixels[i], minVal);
        maxVal = SkTMax(pixels[i], maxVal);
    }

    SkASSERT(!is_extremal(minVal));
    SkASSERT(!is_extremal(maxVal));

    uint8_t palette[kLATCPaletteSize];
    generate_latc_palette(palette, minVal, maxVal);

    uint64_t indices = 0;
    for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {

        // Find the best palette index
        uint8_t idx = 0;
        if (is_extremal(pixels[i])) {
            if (0xFF == pixels[i]) {
                idx = 7;
            } else if (0 == pixels[i]) {
                idx = 6;
            } else {
                SkFAIL("Pixel is extremal but not really?!");
            }
        } else {
            uint8_t bestError = abs_diff(pixels[i], palette[0]);
            for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
                uint8_t error = abs_diff(pixels[i], palette[j]);
                if (error < bestError) {
                    bestError = error;
                    idx = j;
                }
            }
        }

        indices <<= 3;
        indices |= idx;
    }

    return
        SkEndian_SwapLE64(
            static_cast<uint64_t>(minVal) |
            (static_cast<uint64_t>(maxVal) << 8) |
            (indices << 16));
}


// Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two
// values LUM0 and LUM1, and an index into the generated palette. Details of how
// the palette is generated can be found in the comments of generatePalette above.
//
// We choose which palette type to use based on whether or not 'pixels' contains
// any extremal values (0 or 255). If there are extremal values, then we use the
// palette that has the extremal values built in. Otherwise, we use the full bounding
// box.

static uint64_t compress_latc_block(const uint8_t pixels[]) {
    // Collect unique pixels
    int nUniquePixels = 0;
    uint8_t uniquePixels[kLATCPixelsPerBlock];
    for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
        bool foundPixel = false;
        for (int j = 0; j < nUniquePixels; ++j) {
            foundPixel = foundPixel || uniquePixels[j] == pixels[i];
        }

        if (!foundPixel) {
            uniquePixels[nUniquePixels] = pixels[i];
            ++nUniquePixels;
        }
    }

    // If there's only one unique pixel, then our compression is easy.
    if (1 == nUniquePixels) {
        return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));

    // Similarly, if there are only two unique pixels, then our compression is
    // easy again: place the pixels in the block header, and assign the indices
    // with one or zero depending on which pixel they belong to.
    } else if (2 == nUniquePixels) {
        uint64_t outBlock = 0;
        for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
            int idx = 0;
            if (pixels[i] == uniquePixels[1]) {
                idx = 1;
            }

            outBlock <<= 3;
            outBlock |= idx;
        }
        outBlock <<= 16;
        outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
        return SkEndian_SwapLE64(outBlock);
    }

    // Count non-maximal pixel values
    int nonExtremalPixels = 0;
    for (int i = 0; i < nUniquePixels; ++i) {
        if (!is_extremal(uniquePixels[i])) {
            ++nonExtremalPixels;
        }
    }

    // If all the pixels are nonmaximal then compute the palette using
    // the bounding box of all the pixels.
    if (nonExtremalPixels == nUniquePixels) {
        // This is really just for correctness, in all of my tests we
        // never take this step. We don't lose too much perf here because
        // most of the processing in this function is worth it for the 
        // 1 == nUniquePixels optimization.
        return compress_latc_block_bb(pixels);
    } else {
        return compress_latc_block_bb_ignore_extremal(pixels);
    }
}

#endif  // COMPRESS_LATC_SLOW

////////////////////////////////////////////////////////////////////////////////

#if COMPRESS_LATC_FAST

// Take the top three bits of each index and pack them into the low 12
// bits of the integer.
static inline uint32_t pack_index(uint32_t x) {
    // Pack it in...
#if defined (SK_CPU_BENDIAN)
    return
        (x >> 24) |
        ((x >> 13) & 0x38) |
        ((x >> 2) & 0x1C0) |
        ((x << 9) & 0xE00);
#else
    return
        (x & 0x7) |
        ((x >> 5) & 0x38) |
        ((x >> 10) & 0x1C0) |
        ((x >> 15) & 0xE00);
#endif
}

// Converts each 8-bit byte in the integer into an LATC index, and then packs
// the indices into the low 12 bits of the integer.
static inline uint32_t convert_index(uint32_t x) {
    // Since the palette is 
    // 255, 0, 219, 182, 146, 109, 73, 36
    // we need to map the high three bits of each byte in the integer
    // from
    // 0 1 2 3 4 5 6 7
    // to
    // 1 7 6 5 4 3 2 0
    //
    // This first operation takes the mapping from
    // 0 1 2 3 4 5 6 7  -->  7 6 5 4 3 2 1 0
    x = 0x07070707 - SkTextureCompressor::ConvertToThreeBitIndex(x);

    // mask is 1 if index is non-zero
    const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101;

    // add mask:
    // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0
    x = (x + mask);

    // Handle overflow:
    // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0
    x |= (x >> 3) & 0x01010101;

    // Mask out high bits:
    // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0
    x &= 0x07070707;
    
    return pack_index(x);
}

typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes);
template<PackIndicesProc packIndicesProc>
static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int rowBytes) {
    *(reinterpret_cast<uint64_t*>(*dstPtr)) =
        SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16));
    *dstPtr += 8;
}

inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) {
    uint64_t result = 0;
    for (int i = 0; i < 4; ++i) {
        const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes));
        result |= static_cast<uint64_t>(convert_index(idx)) << 12*i;
    }
    return result;
}

inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) {
    // !SPEED! Blarg, this is kind of annoying. SSE4 can make this
    // a LOT faster.
    uint8_t transposed[16];
    for (int i = 0; i < 4; ++i) {
        for (int j = 0; j < 4; ++j) {
            transposed[j*4+i] = indices[i*rowBytes + j];
        }
    }

    return PackRowMajor(transposed, 4);
}

static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src,
                                 int width, int height, int rowBytes) {

    if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) {
        return false;
    }

    uint8_t** dstPtr = &dst;
    for (int y = 0; y < height; y += 4) {
        for (int x = 0; x < width; x += 4) {
            compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes);
        }
    }

    return true;
}

void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) {
    compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
}

#endif  // COMPRESS_LATC_FAST

void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) {
    uint64_t block = SkEndian_SwapLE64(*(reinterpret_cast<const uint64_t *>(src)));
    uint8_t lum0 = block & 0xFF;
    uint8_t lum1 = (block >> 8) & 0xFF;

    uint8_t palette[kLATCPaletteSize];
    generate_latc_palette(palette, lum0, lum1);

    block >>= 16;
    for (int j = 0; j < 4; ++j) {
        for (int i = 0; i < 4; ++i) {
            dst[i] = palette[block & 0x7];
            block >>= 3;
        }
        dst += dstRowBytes;
    }
}

// This is the type passed as the CompressorType argument of the compressed
// blitter for the LATC format. The static functions required to be in this
// struct are documented in SkTextureCompressor_Blitter.h
struct CompressorLATC {
    static inline void CompressA8Vertical(uint8_t* dst, const uint8_t block[]) {
        compress_a8_latc_block<PackColumnMajor>(&dst, block, 4);
    }

    static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src,
                                            int srcRowBytes) {
        compress_a8_latc_block<PackRowMajor>(&dst, src, srcRowBytes);
    }

#if PEDANTIC_BLIT_RECT
    static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes,
                                   const uint8_t* mask) {
        // Pack the mask
        uint64_t cmpMask = 0;
        for (int i = 0; i < 4; ++i) {
            const uint32_t idx = *(reinterpret_cast<const uint32_t*>(src + i*srcRowBytes));
            cmpMask |= static_cast<uint64_t>(pack_index(idx)) << 12*i;
        }
        cmpMask = SkEndian_SwapLE64(cmpMask << 16); // avoid header

        uint64_t cmpSrc;
        uint8_t *cmpSrcPtr = reinterpret_cast<uint8_t*>(&cmpSrc);
        compress_a8_latc_block<PackRowMajor>(&cmpSrcPtr, src, srcRowBytes);

        // Mask out header
        cmpSrc = cmpSrc & cmpMask;

        // Read destination encoding
        uint64_t *cmpDst = reinterpret_cast<uint64_t*>(dst);

        // If the destination is the encoding for a blank block, then we need
        // to properly set the header
        if (0 == cmpDst) {
            *cmpDst = SkTEndian_SwapLE64(0x24924924924900FFULL);
        }

        // Set the new indices
        *cmpDst &= ~cmpMask;
        *cmpDst |= cmpSrc;
    }
#endif  // PEDANTIC_BLIT_RECT
};

////////////////////////////////////////////////////////////////////////////////

namespace SkTextureCompressor {

bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
#if COMPRESS_LATC_FAST
    return compress_4x4_a8_latc(dst, src, width, height, rowBytes);
#elif COMPRESS_LATC_SLOW
    return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block);
#else
#error "Must choose either fast or slow LATC compression"
#endif
}

SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer,
                             SkTBlitterAllocator* allocator) {
    if ((width % 4) != 0 || (height % 4) != 0) {
        return NULL;
    }

#if COMPRESS_LATC_FAST
    // Memset the output buffer to an encoding that decodes to zero. We must do this
    // in order to avoid having uninitialized values in the buffer if the blitter
    // decides not to write certain scanlines (and skip entire rows of blocks).
    // In the case of LATC, if everything is zero, then LUM0 and LUM1 are also zero,
    // and they will only be non-zero (0xFF) if the index is 7. So bzero will do just fine.
    // (8 bytes per block) * (w * h / 16 blocks) = w * h / 2
    sk_bzero(outputBuffer, width * height / 2);

    return allocator->createT<
        SkTCompressedAlphaBlitter<4, 8, CompressorLATC>, int, int, void* >
        (width, height, outputBuffer);
#elif COMPRESS_LATC_SLOW
    // TODO (krajcevski)
    return NULL;
#endif
}

void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) {
    for (int j = 0; j < height; j += 4) {
        for (int i = 0; i < width; i += 4) {
            decompress_latc_block(dst + i, dstRowBytes, src);
            src += 8;
        }
        dst += 4 * dstRowBytes;
    }
}

}  // SkTextureCompressor