1 /*
2  * Copyright (C) 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*
25  * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26  */
27 
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
30 
31 #include "util/bitscan.h"
32 #include "util/format_srgb.h"
33 #include "util/half_float.h"
34 #include "util/u_math.h"
35 
36 #define BLOCK_SIZE 4
37 #define N_PARTITIONS 64
38 #define BLOCK_BYTES 16
39 
40 struct bptc_unorm_mode {
41    int n_subsets;
42    int n_partition_bits;
43    bool has_rotation_bits;
44    bool has_index_selection_bit;
45    int n_color_bits;
46    int n_alpha_bits;
47    bool has_endpoint_pbits;
48    bool has_shared_pbits;
49    int n_index_bits;
50    int n_secondary_index_bits;
51 };
52 
53 struct bptc_float_bitfield {
54    int8_t endpoint;
55    uint8_t component;
56    uint8_t offset;
57    uint8_t n_bits;
58    bool reverse;
59 };
60 
61 struct bptc_float_mode {
62    bool reserved;
63    bool transformed_endpoints;
64    int n_partition_bits;
65    int n_endpoint_bits;
66    int n_index_bits;
67    int n_delta_bits[3];
68    struct bptc_float_bitfield bitfields[24];
69 };
70 
71 struct bit_writer {
72    uint8_t buf;
73    int pos;
74    uint8_t *dst;
75 };
76 
77 static const struct bptc_unorm_mode
78 bptc_unorm_modes[] = {
79    /* 0 */ { 3, 4, false, false, 4, 0, true,  false, 3, 0 },
80    /* 1 */ { 2, 6, false, false, 6, 0, false, true,  3, 0 },
81    /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
82    /* 3 */ { 2, 6, false, false, 7, 0, true,  false, 2, 0 },
83    /* 4 */ { 1, 0, true,  true,  5, 6, false, false, 2, 3 },
84    /* 5 */ { 1, 0, true,  false, 7, 8, false, false, 2, 2 },
85    /* 6 */ { 1, 0, false, false, 7, 7, true,  false, 4, 0 },
86    /* 7 */ { 2, 6, false, false, 5, 5, true,  false, 2, 0 }
87 };
88 
89 static const struct bptc_float_mode
90 bptc_float_modes[] = {
91    /* 00 */
92    { false, true, 5, 10, 3, { 5, 5, 5 },
93      { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
94        { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
95        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
96        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
97        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
98        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
99        { 3, 2, 3, 1, false },
100        { -1 } }
101    },
102    /* 01 */
103    { false, true, 5, 7, 3, { 6, 6, 6 },
104      { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
105        { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
106        { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
107        { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
108        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
109        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
110        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
111        { 2, 0, 0, 6, false },
112        { 3, 0, 0, 6, false },
113        { -1 } }
114    },
115    /* 00010 */
116    { false, true, 5, 11, 3, { 5, 4, 4 },
117      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
118        { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
119        { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
120        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
121        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
122        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
123        { -1 } }
124    },
125    /* 00011 */
126    { false, false, 0, 10, 4, { 10, 10, 10 },
127      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
128        { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
129        { -1 } }
130    },
131    /* 00110 */
132    { false, true, 5, 11, 3, { 4, 5, 4 },
133      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
134        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
135        { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
136        { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
137        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
138        { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
139        { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
140        { -1 } }
141    },
142    /* 00111 */
143    { false, true, 0, 11, 4, { 9, 9, 9 },
144      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
145        { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
146        { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
147        { -1 } }
148    },
149    /* 01010 */
150    { false, true, 5, 11, 3, { 4, 4, 5 },
151      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
152        { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
153        { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
154        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
155        { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
156        { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
157        { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
158        { -1 } }
159    },
160    /* 01011 */
161    { false, true, 0, 12, 4, { 8, 8, 8 },
162      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
163        { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
164        { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
165        { -1 } }
166    },
167    /* 01110 */
168    { false, true, 5, 9, 3, { 5, 5, 5 },
169      { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
170        { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
171        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
172        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
173        { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
174        { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
175        { 3, 2, 3, 1, false },
176        { -1 } }
177    },
178    /* 01111 */
179    { false, true, 0, 16, 4, { 4, 4, 4 },
180      { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
181        { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
182        { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
183        { -1 } }
184    },
185    /* 10010 */
186    { false, true, 5, 8, 3, { 6, 5, 5 },
187      { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
188        { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
189        { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
190        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
191        { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
192        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
193        { 3, 0, 0, 6, false },
194        { -1 } }
195    },
196    /* 10011 */
197    { true /* reserved */ },
198    /* 10110 */
199    { false, true, 5, 8, 3, { 5, 6, 5 },
200      { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
201        { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
202        { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
203        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
204        { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
205        { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
206        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
207        { -1 } }
208    },
209    /* 10111 */
210    { true /* reserved */ },
211    /* 11010 */
212    { false, true, 5, 8, 3, { 5, 5, 6 },
213      { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
214        { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
215        { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
216        { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
217        { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
218        { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
219        { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
220        { -1 } }
221    },
222    /* 11011 */
223    { true /* reserved */ },
224    /* 11110 */
225    { false, false, 5, 6, 3, { 6, 6, 6 },
226      { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
227        { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
228        { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
229        { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
230        { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
231        { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
232        { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
233        { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
234        { -1 } }
235    },
236    /* 11111 */
237    { true /* reserved */ },
238 };
239 
240 /* This partition table is used when the mode has two subsets. Each
241  * partition is represented by a 32-bit value which gives 2 bits per texel
242  * within the block. The value of the two bits represents which subset to use
243  * (0 or 1).
244  */
245 static const uint32_t
246 partition_table1[N_PARTITIONS] = {
247    0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
248    0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
249    0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
250    0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
251    0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
252    0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
253    0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
254    0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
255    0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
256    0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
257    0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
258    0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
259    0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
260    0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
261    0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
262    0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
263 };
264 
265 /* This partition table is used when the mode has three subsets. In this case
266  * the values can be 0, 1 or 2.
267  */
268 static const uint32_t
269 partition_table2[N_PARTITIONS] = {
270    0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
271    0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
272    0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
273    0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
274    0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
275    0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
276    0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
277    0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
278    0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
279    0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
280    0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
281    0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
282    0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
283    0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
284    0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
285    0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
286 };
287 
288 static const uint8_t
289 anchor_indices[][N_PARTITIONS] = {
290    /* Anchor index values for the second subset of two-subset partitioning */
291    {
292       0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
293       0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
294       0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
295       0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
296    },
297 
298    /* Anchor index values for the second subset of three-subset partitioning */
299    {
300       0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
301       0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
302       0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
303       0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
304    },
305 
306    /* Anchor index values for the third subset of three-subset
307     * partitioning
308     */
309    {
310       0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
311       0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
312       0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
313       0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
314    }
315 };
316 
317 static int
extract_bits(const uint8_t * block,int offset,int n_bits)318 extract_bits(const uint8_t *block,
319              int offset,
320              int n_bits)
321 {
322    int byte_index = offset / 8;
323    int bit_index = offset % 8;
324    int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
325    int result = 0;
326    int bit = 0;
327 
328    while (true) {
329       result |= ((block[byte_index] >> bit_index) &
330                  ((1 << n_bits_in_byte) - 1)) << bit;
331 
332       n_bits -= n_bits_in_byte;
333 
334       if (n_bits <= 0)
335          return result;
336 
337       bit += n_bits_in_byte;
338       byte_index++;
339       bit_index = 0;
340       n_bits_in_byte = MIN2(n_bits, 8);
341    }
342 }
343 
344 static uint8_t
expand_component(uint8_t byte,int n_bits)345 expand_component(uint8_t byte,
346                  int n_bits)
347 {
348    /* Expands a n-bit quantity into a byte by copying the most-significant
349     * bits into the unused least-significant bits.
350     */
351    return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
352 }
353 
354 static int
extract_unorm_endpoints(const struct bptc_unorm_mode * mode,const uint8_t * block,int bit_offset,uint8_t endpoints[][4])355 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
356                         const uint8_t *block,
357                         int bit_offset,
358                         uint8_t endpoints[][4])
359 {
360    int component;
361    int subset;
362    int endpoint;
363    int pbit;
364    int n_components;
365 
366    /* Extract each color component */
367    for (component = 0; component < 3; component++) {
368       for (subset = 0; subset < mode->n_subsets; subset++) {
369          for (endpoint = 0; endpoint < 2; endpoint++) {
370             endpoints[subset * 2 + endpoint][component] =
371                extract_bits(block, bit_offset, mode->n_color_bits);
372             bit_offset += mode->n_color_bits;
373          }
374       }
375    }
376 
377    /* Extract the alpha values */
378    if (mode->n_alpha_bits > 0) {
379       for (subset = 0; subset < mode->n_subsets; subset++) {
380          for (endpoint = 0; endpoint < 2; endpoint++) {
381             endpoints[subset * 2 + endpoint][3] =
382                extract_bits(block, bit_offset, mode->n_alpha_bits);
383             bit_offset += mode->n_alpha_bits;
384          }
385       }
386 
387       n_components = 4;
388    } else {
389       for (subset = 0; subset < mode->n_subsets; subset++)
390          for (endpoint = 0; endpoint < 2; endpoint++)
391             endpoints[subset * 2 + endpoint][3] = 255;
392 
393       n_components = 3;
394    }
395 
396    /* Add in the p-bits */
397    if (mode->has_endpoint_pbits) {
398       for (subset = 0; subset < mode->n_subsets; subset++) {
399          for (endpoint = 0; endpoint < 2; endpoint++) {
400             pbit = extract_bits(block, bit_offset, 1);
401             bit_offset += 1;
402 
403             for (component = 0; component < n_components; component++) {
404                endpoints[subset * 2 + endpoint][component] <<= 1;
405                endpoints[subset * 2 + endpoint][component] |= pbit;
406             }
407          }
408       }
409    } else if (mode->has_shared_pbits) {
410       for (subset = 0; subset < mode->n_subsets; subset++) {
411          pbit = extract_bits(block, bit_offset, 1);
412          bit_offset += 1;
413 
414          for (endpoint = 0; endpoint < 2; endpoint++) {
415             for (component = 0; component < n_components; component++) {
416                endpoints[subset * 2 + endpoint][component] <<= 1;
417                endpoints[subset * 2 + endpoint][component] |= pbit;
418             }
419          }
420       }
421    }
422 
423    /* Expand the n-bit values to a byte */
424    for (subset = 0; subset < mode->n_subsets; subset++) {
425       for (endpoint = 0; endpoint < 2; endpoint++) {
426          for (component = 0; component < 3; component++) {
427             endpoints[subset * 2 + endpoint][component] =
428                expand_component(endpoints[subset * 2 + endpoint][component],
429                                 mode->n_color_bits +
430                                 mode->has_endpoint_pbits +
431                                 mode->has_shared_pbits);
432          }
433 
434          if (mode->n_alpha_bits > 0) {
435             endpoints[subset * 2 + endpoint][3] =
436                expand_component(endpoints[subset * 2 + endpoint][3],
437                                 mode->n_alpha_bits +
438                                 mode->has_endpoint_pbits +
439                                 mode->has_shared_pbits);
440          }
441       }
442    }
443 
444    return bit_offset;
445 }
446 
447 static bool
is_anchor(int n_subsets,int partition_num,int texel)448 is_anchor(int n_subsets,
449           int partition_num,
450           int texel)
451 {
452    if (texel == 0)
453       return true;
454 
455    switch (n_subsets) {
456    case 1:
457       return false;
458    case 2:
459       return anchor_indices[0][partition_num] == texel;
460    case 3:
461       return (anchor_indices[1][partition_num] == texel ||
462               anchor_indices[2][partition_num] == texel);
463    default:
464       assert(false);
465       return false;
466    }
467 }
468 
469 static int
count_anchors_before_texel(int n_subsets,int partition_num,int texel)470 count_anchors_before_texel(int n_subsets,
471                            int partition_num,
472                            int texel)
473 {
474    int count = 1;
475 
476    if (texel == 0)
477       return 0;
478 
479    switch (n_subsets) {
480    case 1:
481       break;
482    case 2:
483       if (texel > anchor_indices[0][partition_num])
484          count++;
485       break;
486    case 3:
487       if (texel > anchor_indices[1][partition_num])
488          count++;
489       if (texel > anchor_indices[2][partition_num])
490          count++;
491       break;
492    default:
493       assert(false);
494       return 0;
495    }
496 
497    return count;
498 }
499 
500 static int32_t
interpolate(int32_t a,int32_t b,int index,int index_bits)501 interpolate(int32_t a, int32_t b,
502             int index,
503             int index_bits)
504 {
505    static const uint8_t weights2[] = { 0, 21, 43, 64 };
506    static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
507    static const uint8_t weights4[] =
508       { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
509    static const uint8_t *weights[] = {
510       NULL, NULL, weights2, weights3, weights4
511    };
512    int weight;
513 
514    weight = weights[index_bits][index];
515 
516    return ((64 - weight) * a + weight * b + 32) >> 6;
517 }
518 
519 static void
apply_rotation(int rotation,uint8_t * result)520 apply_rotation(int rotation,
521                uint8_t *result)
522 {
523    uint8_t t;
524 
525    if (rotation == 0)
526       return;
527 
528    rotation--;
529 
530    t = result[rotation];
531    result[rotation] = result[3];
532    result[3] = t;
533 }
534 
535 static void
fetch_rgba_unorm_from_block(const uint8_t * block,uint8_t * result,int texel)536 fetch_rgba_unorm_from_block(const uint8_t *block,
537                             uint8_t *result,
538                             int texel)
539 {
540    int mode_num = ffs(block[0]);
541    const struct bptc_unorm_mode *mode;
542    int bit_offset, secondary_bit_offset;
543    int partition_num;
544    int subset_num;
545    int rotation;
546    int index_selection;
547    int index_bits;
548    int indices[2];
549    int index;
550    int anchors_before_texel;
551    bool anchor;
552    uint8_t endpoints[3 * 2][4];
553    uint32_t subsets;
554    int component;
555 
556    if (mode_num == 0) {
557       /* According to the spec this mode is reserved and shouldn't be used. */
558       memset(result, 0, 4);
559       return;
560    }
561 
562    mode = bptc_unorm_modes + mode_num - 1;
563    bit_offset = mode_num;
564 
565    partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
566    bit_offset += mode->n_partition_bits;
567 
568    switch (mode->n_subsets) {
569    case 1:
570       subsets = 0;
571       break;
572    case 2:
573       subsets = partition_table1[partition_num];
574       break;
575    case 3:
576       subsets = partition_table2[partition_num];
577       break;
578    default:
579       assert(false);
580       return;
581    }
582 
583    if (mode->has_rotation_bits) {
584       rotation = extract_bits(block, bit_offset, 2);
585       bit_offset += 2;
586    } else {
587       rotation = 0;
588    }
589 
590    if (mode->has_index_selection_bit) {
591       index_selection = extract_bits(block, bit_offset, 1);
592       bit_offset++;
593    } else {
594       index_selection = 0;
595    }
596 
597    bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
598 
599    anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
600                                                      partition_num, texel);
601 
602    /* Calculate the offset to the secondary index */
603    secondary_bit_offset = (bit_offset +
604                            BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
605                            mode->n_subsets +
606                            mode->n_secondary_index_bits * texel -
607                            anchors_before_texel);
608 
609    /* Calculate the offset to the primary index for this texel */
610    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
611 
612    subset_num = (subsets >> (texel * 2)) & 3;
613 
614    anchor = is_anchor(mode->n_subsets, partition_num, texel);
615 
616    index_bits = mode->n_index_bits;
617    if (anchor)
618       index_bits--;
619    indices[0] = extract_bits(block, bit_offset, index_bits);
620 
621    if (mode->n_secondary_index_bits) {
622       index_bits = mode->n_secondary_index_bits;
623       if (anchor)
624          index_bits--;
625       indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
626    }
627 
628    index = indices[index_selection];
629    index_bits = (index_selection ?
630                  mode->n_secondary_index_bits :
631                  mode->n_index_bits);
632 
633    for (component = 0; component < 3; component++)
634       result[component] = interpolate(endpoints[subset_num * 2][component],
635                                       endpoints[subset_num * 2 + 1][component],
636                                       index,
637                                       index_bits);
638 
639    /* Alpha uses the opposite index from the color components */
640    if (mode->n_secondary_index_bits && !index_selection) {
641       index = indices[1];
642       index_bits = mode->n_secondary_index_bits;
643    } else {
644       index = indices[0];
645       index_bits = mode->n_index_bits;
646    }
647 
648    result[3] = interpolate(endpoints[subset_num * 2][3],
649                            endpoints[subset_num * 2 + 1][3],
650                            index,
651                            index_bits);
652 
653    apply_rotation(rotation, result);
654 }
655 
656 static void
decompress_rgba_unorm_block(int src_width,int src_height,const uint8_t * block,uint8_t * dst_row,int dst_rowstride)657 decompress_rgba_unorm_block(int src_width, int src_height,
658                             const uint8_t *block,
659                             uint8_t *dst_row, int dst_rowstride)
660 {
661    int mode_num = ffs(block[0]);
662    const struct bptc_unorm_mode *mode;
663    int bit_offset_head, bit_offset, secondary_bit_offset;
664    int partition_num;
665    int subset_num;
666    int rotation;
667    int index_selection;
668    int index_bits;
669    int indices[2];
670    int index;
671    int anchors_before_texel;
672    bool anchor;
673    uint8_t endpoints[3 * 2][4];
674    uint32_t subsets;
675    int component;
676    unsigned x, y;
677 
678    if (mode_num == 0) {
679       /* According to the spec this mode is reserved and shouldn't be used. */
680       for(y = 0; y < src_height; y += 1) {
681          uint8_t *result = dst_row;
682          memset(result, 0, 4 * src_width);
683          dst_row += dst_rowstride;
684       }
685       return;
686    }
687 
688    mode = bptc_unorm_modes + mode_num - 1;
689    bit_offset_head = mode_num;
690 
691    partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
692    bit_offset_head += mode->n_partition_bits;
693 
694    switch (mode->n_subsets) {
695    case 1:
696       subsets = 0;
697       break;
698    case 2:
699       subsets = partition_table1[partition_num];
700       break;
701    case 3:
702       subsets = partition_table2[partition_num];
703       break;
704    default:
705       assert(false);
706       return;
707    }
708 
709    if (mode->has_rotation_bits) {
710       rotation = extract_bits(block, bit_offset_head, 2);
711       bit_offset_head += 2;
712    } else {
713       rotation = 0;
714    }
715 
716    if (mode->has_index_selection_bit) {
717       index_selection = extract_bits(block, bit_offset_head, 1);
718       bit_offset_head++;
719    } else {
720       index_selection = 0;
721    }
722 
723    bit_offset_head = extract_unorm_endpoints(mode, block, bit_offset_head, endpoints);
724 
725    for(y = 0; y < src_height; y += 1) {
726       uint8_t *result = dst_row;
727       for(x = 0; x < src_width; x += 1) {
728          int texel;
729          texel = x + y * 4;
730          bit_offset = bit_offset_head;
731 
732          anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
733                                                            partition_num,
734                                                            texel);
735 
736          /* Calculate the offset to the secondary index */
737          secondary_bit_offset = (bit_offset +
738                                  BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
739                                  mode->n_subsets +
740                                  mode->n_secondary_index_bits * texel -
741                                  anchors_before_texel);
742 
743          /* Calculate the offset to the primary index for this texel */
744          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
745 
746          subset_num = (subsets >> (texel * 2)) & 3;
747 
748          anchor = is_anchor(mode->n_subsets, partition_num, texel);
749 
750          index_bits = mode->n_index_bits;
751          if (anchor)
752             index_bits--;
753          indices[0] = extract_bits(block, bit_offset, index_bits);
754 
755          if (mode->n_secondary_index_bits) {
756             index_bits = mode->n_secondary_index_bits;
757             if (anchor)
758                index_bits--;
759             indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
760          }
761 
762          index = indices[index_selection];
763          index_bits = (index_selection ?
764                        mode->n_secondary_index_bits :
765                        mode->n_index_bits);
766 
767          for (component = 0; component < 3; component++)
768             result[component] = interpolate(endpoints[subset_num * 2][component],
769                                             endpoints[subset_num * 2 + 1][component],
770                                             index,
771                                             index_bits);
772 
773          /* Alpha uses the opposite index from the color components */
774          if (mode->n_secondary_index_bits && !index_selection) {
775             index = indices[1];
776             index_bits = mode->n_secondary_index_bits;
777          } else {
778             index = indices[0];
779             index_bits = mode->n_index_bits;
780          }
781 
782          result[3] = interpolate(endpoints[subset_num * 2][3],
783                                  endpoints[subset_num * 2 + 1][3],
784                                  index,
785                                  index_bits);
786 
787          apply_rotation(rotation, result);
788          result += 4;
789       }
790       dst_row += dst_rowstride;
791    }
792 }
793 
794 static void
decompress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)795 decompress_rgba_unorm(int width, int height,
796                       const uint8_t *src, int src_rowstride,
797                       uint8_t *dst, int dst_rowstride)
798 {
799    int src_row_diff;
800    int y, x;
801 
802    if (src_rowstride >= width * 4)
803       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
804    else
805       src_row_diff = 0;
806 
807    for (y = 0; y < height; y += BLOCK_SIZE) {
808       for (x = 0; x < width; x += BLOCK_SIZE) {
809          decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
810                                      MIN2(height - y, BLOCK_SIZE),
811                                      src,
812                                      dst + x * 4 + y * dst_rowstride,
813                                      dst_rowstride);
814          src += BLOCK_BYTES;
815       }
816       src += src_row_diff;
817    }
818 }
819 
820 static int
signed_unquantize(int value,int n_endpoint_bits)821 signed_unquantize(int value, int n_endpoint_bits)
822 {
823    bool sign;
824 
825    if (n_endpoint_bits >= 16)
826       return value;
827 
828    if (value == 0)
829       return 0;
830 
831    sign = false;
832 
833    if (value < 0) {
834       sign = true;
835       value = -value;
836    }
837 
838    if (value >= (1 << (n_endpoint_bits - 1)) - 1)
839       value = 0x7fff;
840    else
841       value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
842 
843    if (sign)
844       value = -value;
845 
846    return value;
847 }
848 
849 static int
unsigned_unquantize(int value,int n_endpoint_bits)850 unsigned_unquantize(int value, int n_endpoint_bits)
851 {
852    if (n_endpoint_bits >= 15)
853       return value;
854 
855    if (value == 0)
856       return 0;
857 
858    if (value == (1 << n_endpoint_bits) - 1)
859       return 0xffff;
860 
861    return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
862 }
863 
864 static int
extract_float_endpoints(const struct bptc_float_mode * mode,const uint8_t * block,int bit_offset,int32_t endpoints[][3],bool is_signed)865 extract_float_endpoints(const struct bptc_float_mode *mode,
866                         const uint8_t *block,
867                         int bit_offset,
868                         int32_t endpoints[][3],
869                         bool is_signed)
870 {
871    const struct bptc_float_bitfield *bitfield;
872    int endpoint, component;
873    int n_endpoints;
874    int value;
875    int i;
876 
877    if (mode->n_partition_bits)
878       n_endpoints = 4;
879    else
880       n_endpoints = 2;
881 
882    memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
883 
884    for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
885       value = extract_bits(block, bit_offset, bitfield->n_bits);
886       bit_offset += bitfield->n_bits;
887 
888       if (bitfield->reverse) {
889          for (i = 0; i < bitfield->n_bits; i++) {
890             if (value & (1 << i))
891                endpoints[bitfield->endpoint][bitfield->component] |=
892                   1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
893          }
894       } else {
895          endpoints[bitfield->endpoint][bitfield->component] |=
896             value << bitfield->offset;
897       }
898    }
899 
900    if (mode->transformed_endpoints) {
901       /* The endpoints are specified as signed offsets from e0 */
902       for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
903          for (component = 0; component < 3; component++) {
904             value = util_sign_extend(endpoints[endpoint][component],
905                                      mode->n_delta_bits[component]);
906             endpoints[endpoint][component] =
907                ((endpoints[0][component] + value) &
908                 ((1 << mode->n_endpoint_bits) - 1));
909          }
910       }
911    }
912 
913    if (is_signed) {
914       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
915          for (component = 0; component < 3; component++) {
916             value = util_sign_extend(endpoints[endpoint][component],
917                                      mode->n_endpoint_bits);
918             endpoints[endpoint][component] =
919                signed_unquantize(value, mode->n_endpoint_bits);
920          }
921       }
922    } else {
923       for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
924          for (component = 0; component < 3; component++) {
925             endpoints[endpoint][component] =
926                unsigned_unquantize(endpoints[endpoint][component],
927                                    mode->n_endpoint_bits);
928          }
929       }
930    }
931 
932    return bit_offset;
933 }
934 
935 static int32_t
finish_unsigned_unquantize(int32_t value)936 finish_unsigned_unquantize(int32_t value)
937 {
938    return value * 31 / 64;
939 }
940 
941 static int32_t
finish_signed_unquantize(int32_t value)942 finish_signed_unquantize(int32_t value)
943 {
944    if (value < 0)
945       return (-value * 31 / 32) | 0x8000;
946    else
947       return value * 31 / 32;
948 }
949 
950 static void
fetch_rgb_float_from_block(const uint8_t * block,float * result,int texel,bool is_signed)951 fetch_rgb_float_from_block(const uint8_t *block,
952                            float *result,
953                            int texel,
954                            bool is_signed)
955 {
956    int mode_num;
957    const struct bptc_float_mode *mode;
958    int bit_offset;
959    int partition_num;
960    int subset_num;
961    int index_bits;
962    int index;
963    int anchors_before_texel;
964    int32_t endpoints[2 * 2][3];
965    uint32_t subsets;
966    int n_subsets;
967    int component;
968    int32_t value;
969 
970    if (block[0] & 0x2) {
971       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
972       bit_offset = 5;
973    } else {
974       mode_num = block[0] & 3;
975       bit_offset = 2;
976    }
977 
978    mode = bptc_float_modes + mode_num;
979 
980    if (mode->reserved) {
981       memset(result, 0, sizeof result[0] * 3);
982       result[3] = 1.0f;
983       return;
984    }
985 
986    bit_offset = extract_float_endpoints(mode, block, bit_offset,
987                                         endpoints, is_signed);
988 
989    if (mode->n_partition_bits) {
990       partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
991       bit_offset += mode->n_partition_bits;
992 
993       subsets = partition_table1[partition_num];
994       n_subsets = 2;
995    } else {
996       partition_num = 0;
997       subsets = 0;
998       n_subsets = 1;
999    }
1000 
1001    anchors_before_texel =
1002       count_anchors_before_texel(n_subsets, partition_num, texel);
1003 
1004    /* Calculate the offset to the primary index for this texel */
1005    bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1006 
1007    subset_num = (subsets >> (texel * 2)) & 3;
1008 
1009    index_bits = mode->n_index_bits;
1010    if (is_anchor(n_subsets, partition_num, texel))
1011       index_bits--;
1012    index = extract_bits(block, bit_offset, index_bits);
1013 
1014    for (component = 0; component < 3; component++) {
1015       value = interpolate(endpoints[subset_num * 2][component],
1016                           endpoints[subset_num * 2 + 1][component],
1017                           index,
1018                           mode->n_index_bits);
1019 
1020       if (is_signed)
1021          value = finish_signed_unquantize(value);
1022       else
1023          value = finish_unsigned_unquantize(value);
1024 
1025       result[component] = _mesa_half_to_float(value);
1026    }
1027 
1028    result[3] = 1.0f;
1029 }
1030 
1031 static void
decompress_rgb_float_block(unsigned src_width,unsigned src_height,const uint8_t * block,float * dst_row,unsigned dst_rowstride,bool is_signed)1032 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1033                            const uint8_t *block,
1034                            float *dst_row, unsigned dst_rowstride,
1035                            bool is_signed)
1036 {
1037    int mode_num;
1038    const struct bptc_float_mode *mode;
1039    int bit_offset_head, bit_offset;
1040    int partition_num;
1041    int subset_num;
1042    int index_bits;
1043    int index;
1044    int anchors_before_texel;
1045    int32_t endpoints[2 * 2][3];
1046    uint32_t subsets;
1047    int n_subsets;
1048    int component;
1049    int32_t value;
1050    unsigned x, y;
1051 
1052    if (block[0] & 0x2) {
1053       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1054       bit_offset_head = 5;
1055    } else {
1056       mode_num = block[0] & 3;
1057       bit_offset_head = 2;
1058    }
1059 
1060    mode = bptc_float_modes + mode_num;
1061 
1062    if (mode->reserved) {
1063       for(y = 0; y < src_height; y += 1) {
1064          float *result = dst_row;
1065          memset(result, 0, sizeof result[0] * 4 * src_width);
1066          for(x = 0; x < src_width; x += 1) {
1067             result[3] = 1.0f;
1068             result += 4;
1069          }
1070          dst_row += dst_rowstride / sizeof dst_row[0];
1071       }
1072       return;
1073    }
1074 
1075    bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1076                                         endpoints, is_signed);
1077 
1078    if (mode->n_partition_bits) {
1079       partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1080       bit_offset_head += mode->n_partition_bits;
1081 
1082       subsets = partition_table1[partition_num];
1083       n_subsets = 2;
1084    } else {
1085       partition_num = 0;
1086       subsets = 0;
1087       n_subsets = 1;
1088    }
1089 
1090    for(y = 0; y < src_height; y += 1) {
1091       float *result = dst_row;
1092       for(x = 0; x < src_width; x += 1) {
1093          int texel;
1094 
1095          bit_offset = bit_offset_head;
1096 
1097          texel = x + y * 4;
1098 
1099          anchors_before_texel =
1100             count_anchors_before_texel(n_subsets, partition_num, texel);
1101 
1102          /* Calculate the offset to the primary index for this texel */
1103          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1104 
1105          subset_num = (subsets >> (texel * 2)) & 3;
1106 
1107          index_bits = mode->n_index_bits;
1108          if (is_anchor(n_subsets, partition_num, texel))
1109             index_bits--;
1110          index = extract_bits(block, bit_offset, index_bits);
1111 
1112          for (component = 0; component < 3; component++) {
1113             value = interpolate(endpoints[subset_num * 2][component],
1114                                 endpoints[subset_num * 2 + 1][component],
1115                                 index,
1116                                 mode->n_index_bits);
1117 
1118             if (is_signed)
1119                value = finish_signed_unquantize(value);
1120             else
1121                value = finish_unsigned_unquantize(value);
1122 
1123             result[component] = _mesa_half_to_float(value);
1124          }
1125 
1126          result[3] = 1.0f;
1127          result += 4;
1128       }
1129       dst_row += dst_rowstride / sizeof dst_row[0];
1130    }
1131 }
1132 
1133 static void
decompress_rgb_float(int width,int height,const uint8_t * src,int src_rowstride,float * dst,int dst_rowstride,bool is_signed)1134 decompress_rgb_float(int width, int height,
1135                       const uint8_t *src, int src_rowstride,
1136                       float *dst, int dst_rowstride, bool is_signed)
1137 {
1138    int src_row_diff;
1139    int y, x;
1140 
1141    if (src_rowstride >= width * 4)
1142       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1143    else
1144       src_row_diff = 0;
1145 
1146    for (y = 0; y < height; y += BLOCK_SIZE) {
1147       for (x = 0; x < width; x += BLOCK_SIZE) {
1148          decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1149                                     MIN2(height - y, BLOCK_SIZE),
1150                                     src,
1151                                     (dst + x * 4 +
1152                                      (y * dst_rowstride / sizeof dst[0])),
1153                                     dst_rowstride, is_signed);
1154          src += BLOCK_BYTES;
1155       }
1156       src += src_row_diff;
1157    }
1158 }
1159 
1160 static void
decompress_rgb_fp16_block(unsigned src_width,unsigned src_height,const uint8_t * block,uint16_t * dst_row,unsigned dst_rowstride,bool is_signed)1161 decompress_rgb_fp16_block(unsigned src_width, unsigned src_height,
1162                           const uint8_t *block,
1163                           uint16_t *dst_row, unsigned dst_rowstride,
1164                           bool is_signed)
1165 {
1166    int mode_num;
1167    const struct bptc_float_mode *mode;
1168    int bit_offset_head, bit_offset;
1169    int partition_num;
1170    int subset_num;
1171    int index_bits;
1172    int index;
1173    int anchors_before_texel;
1174    int32_t endpoints[2 * 2][3];
1175    uint32_t subsets;
1176    int n_subsets;
1177    int component;
1178    int32_t value;
1179    unsigned x, y;
1180 
1181    if (block[0] & 0x2) {
1182       mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1183       bit_offset_head = 5;
1184    } else {
1185       mode_num = block[0] & 3;
1186       bit_offset_head = 2;
1187    }
1188 
1189    mode = bptc_float_modes + mode_num;
1190 
1191    if (mode->reserved) {
1192       for(y = 0; y < src_height; y += 1) {
1193          uint16_t *result = dst_row;
1194          memset(result, 0, sizeof result[0] * 4 * src_width);
1195          for(x = 0; x < src_width; x += 1) {
1196             result[3] = 1.0f;
1197             result += 4;
1198          }
1199          dst_row += dst_rowstride / sizeof dst_row[0];
1200       }
1201       return;
1202    }
1203 
1204    bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1205                                         endpoints, is_signed);
1206 
1207    if (mode->n_partition_bits) {
1208       partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1209       bit_offset_head += mode->n_partition_bits;
1210 
1211       subsets = partition_table1[partition_num];
1212       n_subsets = 2;
1213    } else {
1214       partition_num = 0;
1215       subsets = 0;
1216       n_subsets = 1;
1217    }
1218 
1219    for(y = 0; y < src_height; y += 1) {
1220       uint16_t *result = dst_row;
1221       for(x = 0; x < src_width; x += 1) {
1222          int texel;
1223 
1224          bit_offset = bit_offset_head;
1225 
1226          texel = x + y * 4;
1227 
1228          anchors_before_texel =
1229             count_anchors_before_texel(n_subsets, partition_num, texel);
1230 
1231          /* Calculate the offset to the primary index for this texel */
1232          bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1233 
1234          subset_num = (subsets >> (texel * 2)) & 3;
1235 
1236          index_bits = mode->n_index_bits;
1237          if (is_anchor(n_subsets, partition_num, texel))
1238             index_bits--;
1239          index = extract_bits(block, bit_offset, index_bits);
1240 
1241          for (component = 0; component < 3; component++) {
1242             value = interpolate(endpoints[subset_num * 2][component],
1243                                 endpoints[subset_num * 2 + 1][component],
1244                                 index,
1245                                 mode->n_index_bits);
1246 
1247             if (is_signed)
1248                value = finish_signed_unquantize(value);
1249             else
1250                value = finish_unsigned_unquantize(value);
1251 
1252             result[component] = (uint16_t)value;
1253          }
1254 
1255          result[3] = FP16_ONE;
1256          result += 4;
1257       }
1258       dst_row += dst_rowstride / sizeof dst_row[0];
1259    }
1260 }
1261 
1262 static void
decompress_rgb_fp16(int width,int height,const uint8_t * src,int src_rowstride,uint16_t * dst,int dst_rowstride,bool is_signed)1263 decompress_rgb_fp16(int width, int height,
1264                     const uint8_t *src, int src_rowstride,
1265                     uint16_t *dst, int dst_rowstride, bool is_signed)
1266 {
1267    int src_row_diff;
1268    int y, x;
1269 
1270    if (src_rowstride >= width * 4)
1271       src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1272    else
1273       src_row_diff = 0;
1274 
1275    for (y = 0; y < height; y += BLOCK_SIZE) {
1276       for (x = 0; x < width; x += BLOCK_SIZE) {
1277          decompress_rgb_fp16_block(MIN2(width - x, BLOCK_SIZE),
1278                                    MIN2(height - y, BLOCK_SIZE),
1279                                    src,
1280                                    (dst + x * 4 +
1281                                     (y * dst_rowstride / sizeof dst[0])),
1282                                    dst_rowstride, is_signed);
1283          src += BLOCK_BYTES;
1284       }
1285       src += src_row_diff;
1286    }
1287 }
1288 
1289 static void
write_bits(struct bit_writer * writer,int n_bits,int value)1290 write_bits(struct bit_writer *writer, int n_bits, int value)
1291 {
1292    do {
1293       if (n_bits + writer->pos >= 8) {
1294          *(writer->dst++) = writer->buf | (value << writer->pos);
1295          writer->buf = 0;
1296          value >>= (8 - writer->pos);
1297          n_bits -= (8 - writer->pos);
1298          writer->pos = 0;
1299       } else {
1300          writer->buf |= value << writer->pos;
1301          writer->pos += n_bits;
1302          break;
1303       }
1304    } while (n_bits > 0);
1305 }
1306 
1307 static void
get_average_luminance_alpha_unorm(int width,int height,const uint8_t * src,int src_rowstride,int * average_luminance,int * average_alpha)1308 get_average_luminance_alpha_unorm(int width, int height,
1309                                   const uint8_t *src, int src_rowstride,
1310                                   int *average_luminance, int *average_alpha)
1311 {
1312    int luminance_sum = 0, alpha_sum = 0;
1313    int y, x;
1314 
1315    for (y = 0; y < height; y++) {
1316       for (x = 0; x < width; x++) {
1317          luminance_sum += src[0] + src[1] + src[2];
1318          alpha_sum += src[3];
1319          src += 4;
1320       }
1321       src += src_rowstride - width * 4;
1322    }
1323 
1324    *average_luminance = luminance_sum / (width * height);
1325    *average_alpha = alpha_sum / (width * height);
1326 }
1327 
1328 static void
get_rgba_endpoints_unorm(int width,int height,const uint8_t * src,int src_rowstride,int average_luminance,int average_alpha,uint8_t endpoints[][4])1329 get_rgba_endpoints_unorm(int width, int height,
1330                          const uint8_t *src, int src_rowstride,
1331                          int average_luminance, int average_alpha,
1332                          uint8_t endpoints[][4])
1333 {
1334    int endpoint_luminances[2];
1335    int midpoint;
1336    int sums[2][4];
1337    int endpoint;
1338    int luminance;
1339    uint8_t temp[3];
1340    const uint8_t *p = src;
1341    int rgb_left_endpoint_count = 0;
1342    int alpha_left_endpoint_count = 0;
1343    int y, x, i;
1344 
1345    memset(sums, 0, sizeof sums);
1346 
1347    for (y = 0; y < height; y++) {
1348       for (x = 0; x < width; x++) {
1349          luminance = p[0] + p[1] + p[2];
1350          if (luminance < average_luminance) {
1351             endpoint = 0;
1352             rgb_left_endpoint_count++;
1353          } else {
1354             endpoint = 1;
1355          }
1356          for (i = 0; i < 3; i++)
1357             sums[endpoint][i] += p[i];
1358 
1359          if (p[2] < average_alpha) {
1360             endpoint = 0;
1361             alpha_left_endpoint_count++;
1362          } else {
1363             endpoint = 1;
1364          }
1365          sums[endpoint][3] += p[3];
1366 
1367          p += 4;
1368       }
1369 
1370       p += src_rowstride - width * 4;
1371    }
1372 
1373    if (rgb_left_endpoint_count == 0 ||
1374        rgb_left_endpoint_count == width * height) {
1375       for (i = 0; i < 3; i++)
1376          endpoints[0][i] = endpoints[1][i] =
1377             (sums[0][i] + sums[1][i]) / (width * height);
1378    } else {
1379       for (i = 0; i < 3; i++) {
1380          endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1381          endpoints[1][i] = (sums[1][i] /
1382                             (width * height - rgb_left_endpoint_count));
1383       }
1384    }
1385 
1386    if (alpha_left_endpoint_count == 0 ||
1387        alpha_left_endpoint_count == width * height) {
1388       endpoints[0][3] = endpoints[1][3] =
1389          (sums[0][3] + sums[1][3]) / (width * height);
1390    } else {
1391          endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1392          endpoints[1][3] = (sums[1][3] /
1393                             (width * height - alpha_left_endpoint_count));
1394    }
1395 
1396    /* We may need to swap the endpoints to ensure the most-significant bit of
1397     * the first index is zero */
1398 
1399    for (endpoint = 0; endpoint < 2; endpoint++) {
1400       endpoint_luminances[endpoint] =
1401          endpoints[endpoint][0] +
1402          endpoints[endpoint][1] +
1403          endpoints[endpoint][2];
1404    }
1405    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1406 
1407    if ((src[0] + src[1] + src[2] <= midpoint) !=
1408        (endpoint_luminances[0] <= midpoint)) {
1409       memcpy(temp, endpoints[0], 3);
1410       memcpy(endpoints[0], endpoints[1], 3);
1411       memcpy(endpoints[1], temp, 3);
1412    }
1413 
1414    /* Same for the alpha endpoints */
1415 
1416    midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1417 
1418    if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1419       temp[0] = endpoints[0][3];
1420       endpoints[0][3] = endpoints[1][3];
1421       endpoints[1][3] = temp[0];
1422    }
1423 }
1424 
1425 static void
write_rgb_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1426 write_rgb_indices_unorm(struct bit_writer *writer,
1427                         int src_width, int src_height,
1428                         const uint8_t *src, int src_rowstride,
1429                         uint8_t endpoints[][4])
1430 {
1431    int luminance;
1432    int endpoint_luminances[2];
1433    int endpoint;
1434    int index;
1435    int y, x;
1436 
1437    for (endpoint = 0; endpoint < 2; endpoint++) {
1438       endpoint_luminances[endpoint] =
1439          endpoints[endpoint][0] +
1440          endpoints[endpoint][1] +
1441          endpoints[endpoint][2];
1442    }
1443 
1444    /* If the endpoints have the same luminance then we'll just use index 0 for
1445     * all of the texels */
1446    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1447       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1448       return;
1449    }
1450 
1451    for (y = 0; y < src_height; y++) {
1452       for (x = 0; x < src_width; x++) {
1453          luminance = src[0] + src[1] + src[2];
1454 
1455          index = ((luminance - endpoint_luminances[0]) * 3 /
1456                   (endpoint_luminances[1] - endpoint_luminances[0]));
1457          if (index < 0)
1458             index = 0;
1459          else if (index > 3)
1460             index = 3;
1461 
1462          assert(x != 0 || y != 0 || index < 2);
1463 
1464          write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1465 
1466          src += 4;
1467       }
1468 
1469       /* Pad the indices out to the block size */
1470       if (src_width < BLOCK_SIZE)
1471          write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1472 
1473       src += src_rowstride - src_width * 4;
1474    }
1475 
1476    /* Pad the indices out to the block size */
1477    if (src_height < BLOCK_SIZE)
1478       write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1479 }
1480 
1481 static void
write_alpha_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1482 write_alpha_indices_unorm(struct bit_writer *writer,
1483                           int src_width, int src_height,
1484                           const uint8_t *src, int src_rowstride,
1485                           uint8_t endpoints[][4])
1486 {
1487    int index;
1488    int y, x;
1489 
1490    /* If the endpoints have the same alpha then we'll just use index 0 for
1491     * all of the texels */
1492    if (endpoints[0][3] == endpoints[1][3]) {
1493       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1494       return;
1495    }
1496 
1497    for (y = 0; y < src_height; y++) {
1498       for (x = 0; x < src_width; x++) {
1499          index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1500                   ((int) endpoints[1][3] - endpoints[0][3]));
1501          if (index < 0)
1502             index = 0;
1503          else if (index > 7)
1504             index = 7;
1505 
1506          assert(x != 0 || y != 0 || index < 4);
1507 
1508          /* The first index has one less bit */
1509          write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1510 
1511          src += 4;
1512       }
1513 
1514       /* Pad the indices out to the block size */
1515       if (src_width < BLOCK_SIZE)
1516          write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1517 
1518       src += src_rowstride - src_width * 4;
1519    }
1520 
1521    /* Pad the indices out to the block size */
1522    if (src_height < BLOCK_SIZE)
1523       write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1524 }
1525 
1526 static void
compress_rgba_unorm_block(int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t * dst)1527 compress_rgba_unorm_block(int src_width, int src_height,
1528                           const uint8_t *src, int src_rowstride,
1529                           uint8_t *dst)
1530 {
1531    int average_luminance, average_alpha;
1532    uint8_t endpoints[2][4];
1533    struct bit_writer writer;
1534    int component, endpoint;
1535 
1536    get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1537                                      &average_luminance, &average_alpha);
1538    get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1539                             average_luminance, average_alpha,
1540                             endpoints);
1541 
1542    writer.dst = dst;
1543    writer.pos = 0;
1544    writer.buf = 0;
1545 
1546    write_bits(&writer, 5, 0x10); /* mode 4 */
1547    write_bits(&writer, 2, 0); /* rotation 0 */
1548    write_bits(&writer, 1, 0); /* index selection bit */
1549 
1550    /* Write the color endpoints */
1551    for (component = 0; component < 3; component++)
1552       for (endpoint = 0; endpoint < 2; endpoint++)
1553          write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1554 
1555    /* Write the alpha endpoints */
1556    for (endpoint = 0; endpoint < 2; endpoint++)
1557       write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1558 
1559    write_rgb_indices_unorm(&writer,
1560                            src_width, src_height,
1561                            src, src_rowstride,
1562                            endpoints);
1563    write_alpha_indices_unorm(&writer,
1564                              src_width, src_height,
1565                              src, src_rowstride,
1566                              endpoints);
1567 }
1568 
1569 static void
compress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)1570 compress_rgba_unorm(int width, int height,
1571                     const uint8_t *src, int src_rowstride,
1572                     uint8_t *dst, int dst_rowstride)
1573 {
1574    int dst_row_diff;
1575    int y, x;
1576 
1577    if (dst_rowstride >= width * 4)
1578       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1579    else
1580       dst_row_diff = 0;
1581 
1582    for (y = 0; y < height; y += BLOCK_SIZE) {
1583       for (x = 0; x < width; x += BLOCK_SIZE) {
1584          compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1585                                    MIN2(height - y, BLOCK_SIZE),
1586                                    src + x * 4 + y * src_rowstride,
1587                                    src_rowstride,
1588                                    dst);
1589          dst += BLOCK_BYTES;
1590       }
1591       dst += dst_row_diff;
1592    }
1593 }
1594 
1595 static float
get_average_luminance_float(int width,int height,const float * src,int src_rowstride)1596 get_average_luminance_float(int width, int height,
1597                             const float *src, int src_rowstride)
1598 {
1599    float luminance_sum = 0;
1600    int y, x;
1601 
1602    for (y = 0; y < height; y++) {
1603       for (x = 0; x < width; x++) {
1604          luminance_sum += src[0] + src[1] + src[2];
1605          src += 3;
1606       }
1607       src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1608    }
1609 
1610    return luminance_sum / (width * height);
1611 }
1612 
1613 static float
clamp_value(float value,bool is_signed)1614 clamp_value(float value, bool is_signed)
1615 {
1616    if (value > 65504.0f)
1617       return 65504.0f;
1618 
1619    if (is_signed) {
1620       if (value < -65504.0f)
1621          return -65504.0f;
1622       else
1623          return value;
1624    }
1625 
1626    if (value < 0.0f)
1627       return 0.0f;
1628 
1629    return value;
1630 }
1631 
1632 static void
get_endpoints_float(int width,int height,const float * src,int src_rowstride,float average_luminance,float endpoints[][3],bool is_signed)1633 get_endpoints_float(int width, int height,
1634                     const float *src, int src_rowstride,
1635                     float average_luminance, float endpoints[][3],
1636                     bool is_signed)
1637 {
1638    float endpoint_luminances[2];
1639    float midpoint;
1640    float sums[2][3];
1641    int endpoint, component;
1642    float luminance;
1643    float temp[3];
1644    const float *p = src;
1645    int left_endpoint_count = 0;
1646    int y, x, i;
1647 
1648    memset(sums, 0, sizeof sums);
1649 
1650    for (y = 0; y < height; y++) {
1651       for (x = 0; x < width; x++) {
1652          luminance = p[0] + p[1] + p[2];
1653          if (luminance < average_luminance) {
1654             endpoint = 0;
1655             left_endpoint_count++;
1656          } else {
1657             endpoint = 1;
1658          }
1659          for (i = 0; i < 3; i++)
1660             sums[endpoint][i] += p[i];
1661 
1662          p += 3;
1663       }
1664 
1665       p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1666    }
1667 
1668    if (left_endpoint_count == 0 ||
1669        left_endpoint_count == width * height) {
1670       for (i = 0; i < 3; i++)
1671          endpoints[0][i] = endpoints[1][i] =
1672             (sums[0][i] + sums[1][i]) / (width * height);
1673    } else {
1674       for (i = 0; i < 3; i++) {
1675          endpoints[0][i] = sums[0][i] / left_endpoint_count;
1676          endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1677       }
1678    }
1679 
1680    /* Clamp the endpoints to the range of a half float and strip out
1681     * infinities */
1682    for (endpoint = 0; endpoint < 2; endpoint++) {
1683       for (component = 0; component < 3; component++) {
1684          endpoints[endpoint][component] =
1685             clamp_value(endpoints[endpoint][component], is_signed);
1686       }
1687    }
1688 
1689    /* We may need to swap the endpoints to ensure the most-significant bit of
1690     * the first index is zero */
1691 
1692    for (endpoint = 0; endpoint < 2; endpoint++) {
1693       endpoint_luminances[endpoint] =
1694          endpoints[endpoint][0] +
1695          endpoints[endpoint][1] +
1696          endpoints[endpoint][2];
1697    }
1698    midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1699 
1700    if ((src[0] + src[1] + src[2] <= midpoint) !=
1701        (endpoint_luminances[0] <= midpoint)) {
1702       memcpy(temp, endpoints[0], sizeof temp);
1703       memcpy(endpoints[0], endpoints[1], sizeof temp);
1704       memcpy(endpoints[1], temp, sizeof temp);
1705    }
1706 }
1707 
1708 static void
write_rgb_indices_float(struct bit_writer * writer,int src_width,int src_height,const float * src,int src_rowstride,float endpoints[][3])1709 write_rgb_indices_float(struct bit_writer *writer,
1710                         int src_width, int src_height,
1711                         const float *src, int src_rowstride,
1712                         float endpoints[][3])
1713 {
1714    float luminance;
1715    float endpoint_luminances[2];
1716    int endpoint;
1717    int index;
1718    int y, x;
1719 
1720    for (endpoint = 0; endpoint < 2; endpoint++) {
1721       endpoint_luminances[endpoint] =
1722          endpoints[endpoint][0] +
1723          endpoints[endpoint][1] +
1724          endpoints[endpoint][2];
1725    }
1726 
1727    /* If the endpoints have the same luminance then we'll just use index 0 for
1728     * all of the texels */
1729    if (endpoint_luminances[0] == endpoint_luminances[1]) {
1730       write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1731       return;
1732    }
1733 
1734    for (y = 0; y < src_height; y++) {
1735       for (x = 0; x < src_width; x++) {
1736          luminance = src[0] + src[1] + src[2];
1737 
1738          index = ((luminance - endpoint_luminances[0]) * 15 /
1739                   (endpoint_luminances[1] - endpoint_luminances[0]));
1740          if (index < 0)
1741             index = 0;
1742          else if (index > 15)
1743             index = 15;
1744 
1745          assert(x != 0 || y != 0 || index < 8);
1746 
1747          write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1748 
1749          src += 3;
1750       }
1751 
1752       /* Pad the indices out to the block size */
1753       if (src_width < BLOCK_SIZE)
1754          write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1755 
1756       src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1757    }
1758 
1759    /* Pad the indices out to the block size */
1760    if (src_height < BLOCK_SIZE)
1761       write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1762 }
1763 
1764 static int
get_endpoint_value(float value,bool is_signed)1765 get_endpoint_value(float value, bool is_signed)
1766 {
1767    bool sign = false;
1768    int half;
1769 
1770    if (is_signed) {
1771       half = _mesa_float_to_half(value);
1772 
1773       if (half & 0x8000) {
1774          half &= 0x7fff;
1775          sign = true;
1776       }
1777 
1778       half = (32 * half / 31) >> 6;
1779 
1780       if (sign)
1781          half = -half & ((1 << 10) - 1);
1782 
1783       return half;
1784    } else {
1785       if (value <= 0.0f)
1786          return 0;
1787 
1788       half = _mesa_float_to_half(value);
1789 
1790       return (64 * half / 31) >> 6;
1791    }
1792 }
1793 
1794 static void
compress_rgb_float_block(int src_width,int src_height,const float * src,int src_rowstride,uint8_t * dst,bool is_signed)1795 compress_rgb_float_block(int src_width, int src_height,
1796                          const float *src, int src_rowstride,
1797                          uint8_t *dst,
1798                          bool is_signed)
1799 {
1800    float average_luminance;
1801    float endpoints[2][3];
1802    struct bit_writer writer;
1803    int component, endpoint;
1804    int endpoint_value;
1805 
1806    average_luminance =
1807       get_average_luminance_float(src_width, src_height, src, src_rowstride);
1808    get_endpoints_float(src_width, src_height, src, src_rowstride,
1809                        average_luminance, endpoints, is_signed);
1810 
1811    writer.dst = dst;
1812    writer.pos = 0;
1813    writer.buf = 0;
1814 
1815    write_bits(&writer, 5, 3); /* mode 3 */
1816 
1817    /* Write the endpoints */
1818    for (endpoint = 0; endpoint < 2; endpoint++) {
1819       for (component = 0; component < 3; component++) {
1820          endpoint_value =
1821             get_endpoint_value(endpoints[endpoint][component], is_signed);
1822          write_bits(&writer, 10, endpoint_value);
1823       }
1824    }
1825 
1826    write_rgb_indices_float(&writer,
1827                            src_width, src_height,
1828                            src, src_rowstride,
1829                            endpoints);
1830 }
1831 
1832 static void
compress_rgb_float(int width,int height,const float * src,int src_rowstride,uint8_t * dst,int dst_rowstride,bool is_signed)1833 compress_rgb_float(int width, int height,
1834                    const float *src, int src_rowstride,
1835                    uint8_t *dst, int dst_rowstride,
1836                    bool is_signed)
1837 {
1838    int dst_row_diff;
1839    int y, x;
1840 
1841    if (dst_rowstride >= width * 4)
1842       dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1843    else
1844       dst_row_diff = 0;
1845 
1846    for (y = 0; y < height; y += BLOCK_SIZE) {
1847       for (x = 0; x < width; x += BLOCK_SIZE) {
1848          compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1849                                   MIN2(height - y, BLOCK_SIZE),
1850                                   src + x * 3 +
1851                                   y * src_rowstride / sizeof (float),
1852                                   src_rowstride,
1853                                   dst,
1854                                   is_signed);
1855          dst += BLOCK_BYTES;
1856       }
1857       dst += dst_row_diff;
1858    }
1859 }
1860 
1861 #endif
1862