1 /*
2 * Copyright (C) 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*
25 * Included by texcompress_bptc and gallium to define BPTC decoding routines.
26 */
27
28 #ifndef TEXCOMPRESS_BPTC_TMP_H
29 #define TEXCOMPRESS_BPTC_TMP_H
30
31 #include "util/bitscan.h"
32 #include "util/format_srgb.h"
33 #include "util/half_float.h"
34 #include "util/u_math.h"
35
36 #define BLOCK_SIZE 4
37 #define N_PARTITIONS 64
38 #define BLOCK_BYTES 16
39
40 struct bptc_unorm_mode {
41 int n_subsets;
42 int n_partition_bits;
43 bool has_rotation_bits;
44 bool has_index_selection_bit;
45 int n_color_bits;
46 int n_alpha_bits;
47 bool has_endpoint_pbits;
48 bool has_shared_pbits;
49 int n_index_bits;
50 int n_secondary_index_bits;
51 };
52
53 struct bptc_float_bitfield {
54 int8_t endpoint;
55 uint8_t component;
56 uint8_t offset;
57 uint8_t n_bits;
58 bool reverse;
59 };
60
61 struct bptc_float_mode {
62 bool reserved;
63 bool transformed_endpoints;
64 int n_partition_bits;
65 int n_endpoint_bits;
66 int n_index_bits;
67 int n_delta_bits[3];
68 struct bptc_float_bitfield bitfields[24];
69 };
70
71 struct bit_writer {
72 uint8_t buf;
73 int pos;
74 uint8_t *dst;
75 };
76
77 static const struct bptc_unorm_mode
78 bptc_unorm_modes[] = {
79 /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
80 /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
81 /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
82 /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
83 /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
84 /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
85 /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
86 /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
87 };
88
89 static const struct bptc_float_mode
90 bptc_float_modes[] = {
91 /* 00 */
92 { false, true, 5, 10, 3, { 5, 5, 5 },
93 { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
94 { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
95 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
96 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
97 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
98 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
99 { 3, 2, 3, 1, false },
100 { -1 } }
101 },
102 /* 01 */
103 { false, true, 5, 7, 3, { 6, 6, 6 },
104 { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
105 { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
106 { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
107 { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
108 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
109 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
110 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
111 { 2, 0, 0, 6, false },
112 { 3, 0, 0, 6, false },
113 { -1 } }
114 },
115 /* 00010 */
116 { false, true, 5, 11, 3, { 5, 4, 4 },
117 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
118 { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
119 { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
120 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
121 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
122 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
123 { -1 } }
124 },
125 /* 00011 */
126 { false, false, 0, 10, 4, { 10, 10, 10 },
127 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
128 { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
129 { -1 } }
130 },
131 /* 00110 */
132 { false, true, 5, 11, 3, { 4, 5, 4 },
133 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
134 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
135 { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
136 { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
137 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
138 { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
139 { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
140 { -1 } }
141 },
142 /* 00111 */
143 { false, true, 0, 11, 4, { 9, 9, 9 },
144 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
145 { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
146 { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
147 { -1 } }
148 },
149 /* 01010 */
150 { false, true, 5, 11, 3, { 4, 4, 5 },
151 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
152 { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
153 { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
154 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
155 { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
156 { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
157 { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
158 { -1 } }
159 },
160 /* 01011 */
161 { false, true, 0, 12, 4, { 8, 8, 8 },
162 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
163 { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
164 { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
165 { -1 } }
166 },
167 /* 01110 */
168 { false, true, 5, 9, 3, { 5, 5, 5 },
169 { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
170 { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
171 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
172 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
173 { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
174 { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
175 { 3, 2, 3, 1, false },
176 { -1 } }
177 },
178 /* 01111 */
179 { false, true, 0, 16, 4, { 4, 4, 4 },
180 { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
181 { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
182 { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
183 { -1 } }
184 },
185 /* 10010 */
186 { false, true, 5, 8, 3, { 6, 5, 5 },
187 { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
188 { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
189 { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
190 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
191 { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
192 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
193 { 3, 0, 0, 6, false },
194 { -1 } }
195 },
196 /* 10011 */
197 { true /* reserved */ },
198 /* 10110 */
199 { false, true, 5, 8, 3, { 5, 6, 5 },
200 { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
201 { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
202 { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
203 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
204 { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
205 { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
206 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
207 { -1 } }
208 },
209 /* 10111 */
210 { true /* reserved */ },
211 /* 11010 */
212 { false, true, 5, 8, 3, { 5, 5, 6 },
213 { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
214 { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
215 { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
216 { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
217 { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
218 { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
219 { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
220 { -1 } }
221 },
222 /* 11011 */
223 { true /* reserved */ },
224 /* 11110 */
225 { false, false, 5, 6, 3, { 6, 6, 6 },
226 { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
227 { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
228 { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
229 { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
230 { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
231 { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
232 { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
233 { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
234 { -1 } }
235 },
236 /* 11111 */
237 { true /* reserved */ },
238 };
239
240 /* This partition table is used when the mode has two subsets. Each
241 * partition is represented by a 32-bit value which gives 2 bits per texel
242 * within the block. The value of the two bits represents which subset to use
243 * (0 or 1).
244 */
245 static const uint32_t
246 partition_table1[N_PARTITIONS] = {
247 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
248 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
249 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
250 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
251 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
252 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
253 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
254 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
255 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
256 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
257 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
258 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
259 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
260 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
261 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
262 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
263 };
264
265 /* This partition table is used when the mode has three subsets. In this case
266 * the values can be 0, 1 or 2.
267 */
268 static const uint32_t
269 partition_table2[N_PARTITIONS] = {
270 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
271 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
272 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
273 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
274 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
275 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
276 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
277 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
278 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
279 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
280 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
281 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
282 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
283 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
284 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
285 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
286 };
287
288 static const uint8_t
289 anchor_indices[][N_PARTITIONS] = {
290 /* Anchor index values for the second subset of two-subset partitioning */
291 {
292 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
293 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
294 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
295 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
296 },
297
298 /* Anchor index values for the second subset of three-subset partitioning */
299 {
300 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
301 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
302 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
303 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
304 },
305
306 /* Anchor index values for the third subset of three-subset
307 * partitioning
308 */
309 {
310 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
311 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
312 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
313 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
314 }
315 };
316
317 static int
extract_bits(const uint8_t * block,int offset,int n_bits)318 extract_bits(const uint8_t *block,
319 int offset,
320 int n_bits)
321 {
322 int byte_index = offset / 8;
323 int bit_index = offset % 8;
324 int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
325 int result = 0;
326 int bit = 0;
327
328 while (true) {
329 result |= ((block[byte_index] >> bit_index) &
330 ((1 << n_bits_in_byte) - 1)) << bit;
331
332 n_bits -= n_bits_in_byte;
333
334 if (n_bits <= 0)
335 return result;
336
337 bit += n_bits_in_byte;
338 byte_index++;
339 bit_index = 0;
340 n_bits_in_byte = MIN2(n_bits, 8);
341 }
342 }
343
344 static uint8_t
expand_component(uint8_t byte,int n_bits)345 expand_component(uint8_t byte,
346 int n_bits)
347 {
348 /* Expands a n-bit quantity into a byte by copying the most-significant
349 * bits into the unused least-significant bits.
350 */
351 return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
352 }
353
354 static int
extract_unorm_endpoints(const struct bptc_unorm_mode * mode,const uint8_t * block,int bit_offset,uint8_t endpoints[][4])355 extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
356 const uint8_t *block,
357 int bit_offset,
358 uint8_t endpoints[][4])
359 {
360 int component;
361 int subset;
362 int endpoint;
363 int pbit;
364 int n_components;
365
366 /* Extract each color component */
367 for (component = 0; component < 3; component++) {
368 for (subset = 0; subset < mode->n_subsets; subset++) {
369 for (endpoint = 0; endpoint < 2; endpoint++) {
370 endpoints[subset * 2 + endpoint][component] =
371 extract_bits(block, bit_offset, mode->n_color_bits);
372 bit_offset += mode->n_color_bits;
373 }
374 }
375 }
376
377 /* Extract the alpha values */
378 if (mode->n_alpha_bits > 0) {
379 for (subset = 0; subset < mode->n_subsets; subset++) {
380 for (endpoint = 0; endpoint < 2; endpoint++) {
381 endpoints[subset * 2 + endpoint][3] =
382 extract_bits(block, bit_offset, mode->n_alpha_bits);
383 bit_offset += mode->n_alpha_bits;
384 }
385 }
386
387 n_components = 4;
388 } else {
389 for (subset = 0; subset < mode->n_subsets; subset++)
390 for (endpoint = 0; endpoint < 2; endpoint++)
391 endpoints[subset * 2 + endpoint][3] = 255;
392
393 n_components = 3;
394 }
395
396 /* Add in the p-bits */
397 if (mode->has_endpoint_pbits) {
398 for (subset = 0; subset < mode->n_subsets; subset++) {
399 for (endpoint = 0; endpoint < 2; endpoint++) {
400 pbit = extract_bits(block, bit_offset, 1);
401 bit_offset += 1;
402
403 for (component = 0; component < n_components; component++) {
404 endpoints[subset * 2 + endpoint][component] <<= 1;
405 endpoints[subset * 2 + endpoint][component] |= pbit;
406 }
407 }
408 }
409 } else if (mode->has_shared_pbits) {
410 for (subset = 0; subset < mode->n_subsets; subset++) {
411 pbit = extract_bits(block, bit_offset, 1);
412 bit_offset += 1;
413
414 for (endpoint = 0; endpoint < 2; endpoint++) {
415 for (component = 0; component < n_components; component++) {
416 endpoints[subset * 2 + endpoint][component] <<= 1;
417 endpoints[subset * 2 + endpoint][component] |= pbit;
418 }
419 }
420 }
421 }
422
423 /* Expand the n-bit values to a byte */
424 for (subset = 0; subset < mode->n_subsets; subset++) {
425 for (endpoint = 0; endpoint < 2; endpoint++) {
426 for (component = 0; component < 3; component++) {
427 endpoints[subset * 2 + endpoint][component] =
428 expand_component(endpoints[subset * 2 + endpoint][component],
429 mode->n_color_bits +
430 mode->has_endpoint_pbits +
431 mode->has_shared_pbits);
432 }
433
434 if (mode->n_alpha_bits > 0) {
435 endpoints[subset * 2 + endpoint][3] =
436 expand_component(endpoints[subset * 2 + endpoint][3],
437 mode->n_alpha_bits +
438 mode->has_endpoint_pbits +
439 mode->has_shared_pbits);
440 }
441 }
442 }
443
444 return bit_offset;
445 }
446
447 static bool
is_anchor(int n_subsets,int partition_num,int texel)448 is_anchor(int n_subsets,
449 int partition_num,
450 int texel)
451 {
452 if (texel == 0)
453 return true;
454
455 switch (n_subsets) {
456 case 1:
457 return false;
458 case 2:
459 return anchor_indices[0][partition_num] == texel;
460 case 3:
461 return (anchor_indices[1][partition_num] == texel ||
462 anchor_indices[2][partition_num] == texel);
463 default:
464 assert(false);
465 return false;
466 }
467 }
468
469 static int
count_anchors_before_texel(int n_subsets,int partition_num,int texel)470 count_anchors_before_texel(int n_subsets,
471 int partition_num,
472 int texel)
473 {
474 int count = 1;
475
476 if (texel == 0)
477 return 0;
478
479 switch (n_subsets) {
480 case 1:
481 break;
482 case 2:
483 if (texel > anchor_indices[0][partition_num])
484 count++;
485 break;
486 case 3:
487 if (texel > anchor_indices[1][partition_num])
488 count++;
489 if (texel > anchor_indices[2][partition_num])
490 count++;
491 break;
492 default:
493 assert(false);
494 return 0;
495 }
496
497 return count;
498 }
499
500 static int32_t
interpolate(int32_t a,int32_t b,int index,int index_bits)501 interpolate(int32_t a, int32_t b,
502 int index,
503 int index_bits)
504 {
505 static const uint8_t weights2[] = { 0, 21, 43, 64 };
506 static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
507 static const uint8_t weights4[] =
508 { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
509 static const uint8_t *weights[] = {
510 NULL, NULL, weights2, weights3, weights4
511 };
512 int weight;
513
514 weight = weights[index_bits][index];
515
516 return ((64 - weight) * a + weight * b + 32) >> 6;
517 }
518
519 static void
apply_rotation(int rotation,uint8_t * result)520 apply_rotation(int rotation,
521 uint8_t *result)
522 {
523 uint8_t t;
524
525 if (rotation == 0)
526 return;
527
528 rotation--;
529
530 t = result[rotation];
531 result[rotation] = result[3];
532 result[3] = t;
533 }
534
535 static void
fetch_rgba_unorm_from_block(const uint8_t * block,uint8_t * result,int texel)536 fetch_rgba_unorm_from_block(const uint8_t *block,
537 uint8_t *result,
538 int texel)
539 {
540 int mode_num = ffs(block[0]);
541 const struct bptc_unorm_mode *mode;
542 int bit_offset, secondary_bit_offset;
543 int partition_num;
544 int subset_num;
545 int rotation;
546 int index_selection;
547 int index_bits;
548 int indices[2];
549 int index;
550 int anchors_before_texel;
551 bool anchor;
552 uint8_t endpoints[3 * 2][4];
553 uint32_t subsets;
554 int component;
555
556 if (mode_num == 0) {
557 /* According to the spec this mode is reserved and shouldn't be used. */
558 memset(result, 0, 4);
559 return;
560 }
561
562 mode = bptc_unorm_modes + mode_num - 1;
563 bit_offset = mode_num;
564
565 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
566 bit_offset += mode->n_partition_bits;
567
568 switch (mode->n_subsets) {
569 case 1:
570 subsets = 0;
571 break;
572 case 2:
573 subsets = partition_table1[partition_num];
574 break;
575 case 3:
576 subsets = partition_table2[partition_num];
577 break;
578 default:
579 assert(false);
580 return;
581 }
582
583 if (mode->has_rotation_bits) {
584 rotation = extract_bits(block, bit_offset, 2);
585 bit_offset += 2;
586 } else {
587 rotation = 0;
588 }
589
590 if (mode->has_index_selection_bit) {
591 index_selection = extract_bits(block, bit_offset, 1);
592 bit_offset++;
593 } else {
594 index_selection = 0;
595 }
596
597 bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
598
599 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
600 partition_num, texel);
601
602 /* Calculate the offset to the secondary index */
603 secondary_bit_offset = (bit_offset +
604 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
605 mode->n_subsets +
606 mode->n_secondary_index_bits * texel -
607 anchors_before_texel);
608
609 /* Calculate the offset to the primary index for this texel */
610 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
611
612 subset_num = (subsets >> (texel * 2)) & 3;
613
614 anchor = is_anchor(mode->n_subsets, partition_num, texel);
615
616 index_bits = mode->n_index_bits;
617 if (anchor)
618 index_bits--;
619 indices[0] = extract_bits(block, bit_offset, index_bits);
620
621 if (mode->n_secondary_index_bits) {
622 index_bits = mode->n_secondary_index_bits;
623 if (anchor)
624 index_bits--;
625 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
626 }
627
628 index = indices[index_selection];
629 index_bits = (index_selection ?
630 mode->n_secondary_index_bits :
631 mode->n_index_bits);
632
633 for (component = 0; component < 3; component++)
634 result[component] = interpolate(endpoints[subset_num * 2][component],
635 endpoints[subset_num * 2 + 1][component],
636 index,
637 index_bits);
638
639 /* Alpha uses the opposite index from the color components */
640 if (mode->n_secondary_index_bits && !index_selection) {
641 index = indices[1];
642 index_bits = mode->n_secondary_index_bits;
643 } else {
644 index = indices[0];
645 index_bits = mode->n_index_bits;
646 }
647
648 result[3] = interpolate(endpoints[subset_num * 2][3],
649 endpoints[subset_num * 2 + 1][3],
650 index,
651 index_bits);
652
653 apply_rotation(rotation, result);
654 }
655
656 static void
decompress_rgba_unorm_block(int src_width,int src_height,const uint8_t * block,uint8_t * dst_row,int dst_rowstride)657 decompress_rgba_unorm_block(int src_width, int src_height,
658 const uint8_t *block,
659 uint8_t *dst_row, int dst_rowstride)
660 {
661 int mode_num = ffs(block[0]);
662 const struct bptc_unorm_mode *mode;
663 int bit_offset_head, bit_offset, secondary_bit_offset;
664 int partition_num;
665 int subset_num;
666 int rotation;
667 int index_selection;
668 int index_bits;
669 int indices[2];
670 int index;
671 int anchors_before_texel;
672 bool anchor;
673 uint8_t endpoints[3 * 2][4];
674 uint32_t subsets;
675 int component;
676 unsigned x, y;
677
678 if (mode_num == 0) {
679 /* According to the spec this mode is reserved and shouldn't be used. */
680 for(y = 0; y < src_height; y += 1) {
681 uint8_t *result = dst_row;
682 memset(result, 0, 4 * src_width);
683 dst_row += dst_rowstride;
684 }
685 return;
686 }
687
688 mode = bptc_unorm_modes + mode_num - 1;
689 bit_offset_head = mode_num;
690
691 partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
692 bit_offset_head += mode->n_partition_bits;
693
694 switch (mode->n_subsets) {
695 case 1:
696 subsets = 0;
697 break;
698 case 2:
699 subsets = partition_table1[partition_num];
700 break;
701 case 3:
702 subsets = partition_table2[partition_num];
703 break;
704 default:
705 assert(false);
706 return;
707 }
708
709 if (mode->has_rotation_bits) {
710 rotation = extract_bits(block, bit_offset_head, 2);
711 bit_offset_head += 2;
712 } else {
713 rotation = 0;
714 }
715
716 if (mode->has_index_selection_bit) {
717 index_selection = extract_bits(block, bit_offset_head, 1);
718 bit_offset_head++;
719 } else {
720 index_selection = 0;
721 }
722
723 bit_offset_head = extract_unorm_endpoints(mode, block, bit_offset_head, endpoints);
724
725 for(y = 0; y < src_height; y += 1) {
726 uint8_t *result = dst_row;
727 for(x = 0; x < src_width; x += 1) {
728 int texel;
729 texel = x + y * 4;
730 bit_offset = bit_offset_head;
731
732 anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
733 partition_num,
734 texel);
735
736 /* Calculate the offset to the secondary index */
737 secondary_bit_offset = (bit_offset +
738 BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
739 mode->n_subsets +
740 mode->n_secondary_index_bits * texel -
741 anchors_before_texel);
742
743 /* Calculate the offset to the primary index for this texel */
744 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
745
746 subset_num = (subsets >> (texel * 2)) & 3;
747
748 anchor = is_anchor(mode->n_subsets, partition_num, texel);
749
750 index_bits = mode->n_index_bits;
751 if (anchor)
752 index_bits--;
753 indices[0] = extract_bits(block, bit_offset, index_bits);
754
755 if (mode->n_secondary_index_bits) {
756 index_bits = mode->n_secondary_index_bits;
757 if (anchor)
758 index_bits--;
759 indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
760 }
761
762 index = indices[index_selection];
763 index_bits = (index_selection ?
764 mode->n_secondary_index_bits :
765 mode->n_index_bits);
766
767 for (component = 0; component < 3; component++)
768 result[component] = interpolate(endpoints[subset_num * 2][component],
769 endpoints[subset_num * 2 + 1][component],
770 index,
771 index_bits);
772
773 /* Alpha uses the opposite index from the color components */
774 if (mode->n_secondary_index_bits && !index_selection) {
775 index = indices[1];
776 index_bits = mode->n_secondary_index_bits;
777 } else {
778 index = indices[0];
779 index_bits = mode->n_index_bits;
780 }
781
782 result[3] = interpolate(endpoints[subset_num * 2][3],
783 endpoints[subset_num * 2 + 1][3],
784 index,
785 index_bits);
786
787 apply_rotation(rotation, result);
788 result += 4;
789 }
790 dst_row += dst_rowstride;
791 }
792 }
793
794 static void
decompress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)795 decompress_rgba_unorm(int width, int height,
796 const uint8_t *src, int src_rowstride,
797 uint8_t *dst, int dst_rowstride)
798 {
799 int src_row_diff;
800 int y, x;
801
802 if (src_rowstride >= width * 4)
803 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
804 else
805 src_row_diff = 0;
806
807 for (y = 0; y < height; y += BLOCK_SIZE) {
808 for (x = 0; x < width; x += BLOCK_SIZE) {
809 decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
810 MIN2(height - y, BLOCK_SIZE),
811 src,
812 dst + x * 4 + y * dst_rowstride,
813 dst_rowstride);
814 src += BLOCK_BYTES;
815 }
816 src += src_row_diff;
817 }
818 }
819
820 static int
signed_unquantize(int value,int n_endpoint_bits)821 signed_unquantize(int value, int n_endpoint_bits)
822 {
823 bool sign;
824
825 if (n_endpoint_bits >= 16)
826 return value;
827
828 if (value == 0)
829 return 0;
830
831 sign = false;
832
833 if (value < 0) {
834 sign = true;
835 value = -value;
836 }
837
838 if (value >= (1 << (n_endpoint_bits - 1)) - 1)
839 value = 0x7fff;
840 else
841 value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
842
843 if (sign)
844 value = -value;
845
846 return value;
847 }
848
849 static int
unsigned_unquantize(int value,int n_endpoint_bits)850 unsigned_unquantize(int value, int n_endpoint_bits)
851 {
852 if (n_endpoint_bits >= 15)
853 return value;
854
855 if (value == 0)
856 return 0;
857
858 if (value == (1 << n_endpoint_bits) - 1)
859 return 0xffff;
860
861 return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
862 }
863
864 static int
extract_float_endpoints(const struct bptc_float_mode * mode,const uint8_t * block,int bit_offset,int32_t endpoints[][3],bool is_signed)865 extract_float_endpoints(const struct bptc_float_mode *mode,
866 const uint8_t *block,
867 int bit_offset,
868 int32_t endpoints[][3],
869 bool is_signed)
870 {
871 const struct bptc_float_bitfield *bitfield;
872 int endpoint, component;
873 int n_endpoints;
874 int value;
875 int i;
876
877 if (mode->n_partition_bits)
878 n_endpoints = 4;
879 else
880 n_endpoints = 2;
881
882 memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
883
884 for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
885 value = extract_bits(block, bit_offset, bitfield->n_bits);
886 bit_offset += bitfield->n_bits;
887
888 if (bitfield->reverse) {
889 for (i = 0; i < bitfield->n_bits; i++) {
890 if (value & (1 << i))
891 endpoints[bitfield->endpoint][bitfield->component] |=
892 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
893 }
894 } else {
895 endpoints[bitfield->endpoint][bitfield->component] |=
896 value << bitfield->offset;
897 }
898 }
899
900 if (mode->transformed_endpoints) {
901 /* The endpoints are specified as signed offsets from e0 */
902 for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
903 for (component = 0; component < 3; component++) {
904 value = util_sign_extend(endpoints[endpoint][component],
905 mode->n_delta_bits[component]);
906 endpoints[endpoint][component] =
907 ((endpoints[0][component] + value) &
908 ((1 << mode->n_endpoint_bits) - 1));
909 }
910 }
911 }
912
913 if (is_signed) {
914 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
915 for (component = 0; component < 3; component++) {
916 value = util_sign_extend(endpoints[endpoint][component],
917 mode->n_endpoint_bits);
918 endpoints[endpoint][component] =
919 signed_unquantize(value, mode->n_endpoint_bits);
920 }
921 }
922 } else {
923 for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
924 for (component = 0; component < 3; component++) {
925 endpoints[endpoint][component] =
926 unsigned_unquantize(endpoints[endpoint][component],
927 mode->n_endpoint_bits);
928 }
929 }
930 }
931
932 return bit_offset;
933 }
934
935 static int32_t
finish_unsigned_unquantize(int32_t value)936 finish_unsigned_unquantize(int32_t value)
937 {
938 return value * 31 / 64;
939 }
940
941 static int32_t
finish_signed_unquantize(int32_t value)942 finish_signed_unquantize(int32_t value)
943 {
944 if (value < 0)
945 return (-value * 31 / 32) | 0x8000;
946 else
947 return value * 31 / 32;
948 }
949
950 static void
fetch_rgb_float_from_block(const uint8_t * block,float * result,int texel,bool is_signed)951 fetch_rgb_float_from_block(const uint8_t *block,
952 float *result,
953 int texel,
954 bool is_signed)
955 {
956 int mode_num;
957 const struct bptc_float_mode *mode;
958 int bit_offset;
959 int partition_num;
960 int subset_num;
961 int index_bits;
962 int index;
963 int anchors_before_texel;
964 int32_t endpoints[2 * 2][3];
965 uint32_t subsets;
966 int n_subsets;
967 int component;
968 int32_t value;
969
970 if (block[0] & 0x2) {
971 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
972 bit_offset = 5;
973 } else {
974 mode_num = block[0] & 3;
975 bit_offset = 2;
976 }
977
978 mode = bptc_float_modes + mode_num;
979
980 if (mode->reserved) {
981 memset(result, 0, sizeof result[0] * 3);
982 result[3] = 1.0f;
983 return;
984 }
985
986 bit_offset = extract_float_endpoints(mode, block, bit_offset,
987 endpoints, is_signed);
988
989 if (mode->n_partition_bits) {
990 partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
991 bit_offset += mode->n_partition_bits;
992
993 subsets = partition_table1[partition_num];
994 n_subsets = 2;
995 } else {
996 partition_num = 0;
997 subsets = 0;
998 n_subsets = 1;
999 }
1000
1001 anchors_before_texel =
1002 count_anchors_before_texel(n_subsets, partition_num, texel);
1003
1004 /* Calculate the offset to the primary index for this texel */
1005 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1006
1007 subset_num = (subsets >> (texel * 2)) & 3;
1008
1009 index_bits = mode->n_index_bits;
1010 if (is_anchor(n_subsets, partition_num, texel))
1011 index_bits--;
1012 index = extract_bits(block, bit_offset, index_bits);
1013
1014 for (component = 0; component < 3; component++) {
1015 value = interpolate(endpoints[subset_num * 2][component],
1016 endpoints[subset_num * 2 + 1][component],
1017 index,
1018 mode->n_index_bits);
1019
1020 if (is_signed)
1021 value = finish_signed_unquantize(value);
1022 else
1023 value = finish_unsigned_unquantize(value);
1024
1025 result[component] = _mesa_half_to_float(value);
1026 }
1027
1028 result[3] = 1.0f;
1029 }
1030
1031 static void
decompress_rgb_float_block(unsigned src_width,unsigned src_height,const uint8_t * block,float * dst_row,unsigned dst_rowstride,bool is_signed)1032 decompress_rgb_float_block(unsigned src_width, unsigned src_height,
1033 const uint8_t *block,
1034 float *dst_row, unsigned dst_rowstride,
1035 bool is_signed)
1036 {
1037 int mode_num;
1038 const struct bptc_float_mode *mode;
1039 int bit_offset_head, bit_offset;
1040 int partition_num;
1041 int subset_num;
1042 int index_bits;
1043 int index;
1044 int anchors_before_texel;
1045 int32_t endpoints[2 * 2][3];
1046 uint32_t subsets;
1047 int n_subsets;
1048 int component;
1049 int32_t value;
1050 unsigned x, y;
1051
1052 if (block[0] & 0x2) {
1053 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1054 bit_offset_head = 5;
1055 } else {
1056 mode_num = block[0] & 3;
1057 bit_offset_head = 2;
1058 }
1059
1060 mode = bptc_float_modes + mode_num;
1061
1062 if (mode->reserved) {
1063 for(y = 0; y < src_height; y += 1) {
1064 float *result = dst_row;
1065 memset(result, 0, sizeof result[0] * 4 * src_width);
1066 for(x = 0; x < src_width; x += 1) {
1067 result[3] = 1.0f;
1068 result += 4;
1069 }
1070 dst_row += dst_rowstride / sizeof dst_row[0];
1071 }
1072 return;
1073 }
1074
1075 bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1076 endpoints, is_signed);
1077
1078 if (mode->n_partition_bits) {
1079 partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1080 bit_offset_head += mode->n_partition_bits;
1081
1082 subsets = partition_table1[partition_num];
1083 n_subsets = 2;
1084 } else {
1085 partition_num = 0;
1086 subsets = 0;
1087 n_subsets = 1;
1088 }
1089
1090 for(y = 0; y < src_height; y += 1) {
1091 float *result = dst_row;
1092 for(x = 0; x < src_width; x += 1) {
1093 int texel;
1094
1095 bit_offset = bit_offset_head;
1096
1097 texel = x + y * 4;
1098
1099 anchors_before_texel =
1100 count_anchors_before_texel(n_subsets, partition_num, texel);
1101
1102 /* Calculate the offset to the primary index for this texel */
1103 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1104
1105 subset_num = (subsets >> (texel * 2)) & 3;
1106
1107 index_bits = mode->n_index_bits;
1108 if (is_anchor(n_subsets, partition_num, texel))
1109 index_bits--;
1110 index = extract_bits(block, bit_offset, index_bits);
1111
1112 for (component = 0; component < 3; component++) {
1113 value = interpolate(endpoints[subset_num * 2][component],
1114 endpoints[subset_num * 2 + 1][component],
1115 index,
1116 mode->n_index_bits);
1117
1118 if (is_signed)
1119 value = finish_signed_unquantize(value);
1120 else
1121 value = finish_unsigned_unquantize(value);
1122
1123 result[component] = _mesa_half_to_float(value);
1124 }
1125
1126 result[3] = 1.0f;
1127 result += 4;
1128 }
1129 dst_row += dst_rowstride / sizeof dst_row[0];
1130 }
1131 }
1132
1133 static void
decompress_rgb_float(int width,int height,const uint8_t * src,int src_rowstride,float * dst,int dst_rowstride,bool is_signed)1134 decompress_rgb_float(int width, int height,
1135 const uint8_t *src, int src_rowstride,
1136 float *dst, int dst_rowstride, bool is_signed)
1137 {
1138 int src_row_diff;
1139 int y, x;
1140
1141 if (src_rowstride >= width * 4)
1142 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1143 else
1144 src_row_diff = 0;
1145
1146 for (y = 0; y < height; y += BLOCK_SIZE) {
1147 for (x = 0; x < width; x += BLOCK_SIZE) {
1148 decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1149 MIN2(height - y, BLOCK_SIZE),
1150 src,
1151 (dst + x * 4 +
1152 (y * dst_rowstride / sizeof dst[0])),
1153 dst_rowstride, is_signed);
1154 src += BLOCK_BYTES;
1155 }
1156 src += src_row_diff;
1157 }
1158 }
1159
1160 static void
decompress_rgb_fp16_block(unsigned src_width,unsigned src_height,const uint8_t * block,uint16_t * dst_row,unsigned dst_rowstride,bool is_signed)1161 decompress_rgb_fp16_block(unsigned src_width, unsigned src_height,
1162 const uint8_t *block,
1163 uint16_t *dst_row, unsigned dst_rowstride,
1164 bool is_signed)
1165 {
1166 int mode_num;
1167 const struct bptc_float_mode *mode;
1168 int bit_offset_head, bit_offset;
1169 int partition_num;
1170 int subset_num;
1171 int index_bits;
1172 int index;
1173 int anchors_before_texel;
1174 int32_t endpoints[2 * 2][3];
1175 uint32_t subsets;
1176 int n_subsets;
1177 int component;
1178 int32_t value;
1179 unsigned x, y;
1180
1181 if (block[0] & 0x2) {
1182 mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
1183 bit_offset_head = 5;
1184 } else {
1185 mode_num = block[0] & 3;
1186 bit_offset_head = 2;
1187 }
1188
1189 mode = bptc_float_modes + mode_num;
1190
1191 if (mode->reserved) {
1192 for(y = 0; y < src_height; y += 1) {
1193 uint16_t *result = dst_row;
1194 memset(result, 0, sizeof result[0] * 4 * src_width);
1195 for(x = 0; x < src_width; x += 1) {
1196 result[3] = 1.0f;
1197 result += 4;
1198 }
1199 dst_row += dst_rowstride / sizeof dst_row[0];
1200 }
1201 return;
1202 }
1203
1204 bit_offset_head = extract_float_endpoints(mode, block, bit_offset_head,
1205 endpoints, is_signed);
1206
1207 if (mode->n_partition_bits) {
1208 partition_num = extract_bits(block, bit_offset_head, mode->n_partition_bits);
1209 bit_offset_head += mode->n_partition_bits;
1210
1211 subsets = partition_table1[partition_num];
1212 n_subsets = 2;
1213 } else {
1214 partition_num = 0;
1215 subsets = 0;
1216 n_subsets = 1;
1217 }
1218
1219 for(y = 0; y < src_height; y += 1) {
1220 uint16_t *result = dst_row;
1221 for(x = 0; x < src_width; x += 1) {
1222 int texel;
1223
1224 bit_offset = bit_offset_head;
1225
1226 texel = x + y * 4;
1227
1228 anchors_before_texel =
1229 count_anchors_before_texel(n_subsets, partition_num, texel);
1230
1231 /* Calculate the offset to the primary index for this texel */
1232 bit_offset += mode->n_index_bits * texel - anchors_before_texel;
1233
1234 subset_num = (subsets >> (texel * 2)) & 3;
1235
1236 index_bits = mode->n_index_bits;
1237 if (is_anchor(n_subsets, partition_num, texel))
1238 index_bits--;
1239 index = extract_bits(block, bit_offset, index_bits);
1240
1241 for (component = 0; component < 3; component++) {
1242 value = interpolate(endpoints[subset_num * 2][component],
1243 endpoints[subset_num * 2 + 1][component],
1244 index,
1245 mode->n_index_bits);
1246
1247 if (is_signed)
1248 value = finish_signed_unquantize(value);
1249 else
1250 value = finish_unsigned_unquantize(value);
1251
1252 result[component] = (uint16_t)value;
1253 }
1254
1255 result[3] = FP16_ONE;
1256 result += 4;
1257 }
1258 dst_row += dst_rowstride / sizeof dst_row[0];
1259 }
1260 }
1261
1262 static void
decompress_rgb_fp16(int width,int height,const uint8_t * src,int src_rowstride,uint16_t * dst,int dst_rowstride,bool is_signed)1263 decompress_rgb_fp16(int width, int height,
1264 const uint8_t *src, int src_rowstride,
1265 uint16_t *dst, int dst_rowstride, bool is_signed)
1266 {
1267 int src_row_diff;
1268 int y, x;
1269
1270 if (src_rowstride >= width * 4)
1271 src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
1272 else
1273 src_row_diff = 0;
1274
1275 for (y = 0; y < height; y += BLOCK_SIZE) {
1276 for (x = 0; x < width; x += BLOCK_SIZE) {
1277 decompress_rgb_fp16_block(MIN2(width - x, BLOCK_SIZE),
1278 MIN2(height - y, BLOCK_SIZE),
1279 src,
1280 (dst + x * 4 +
1281 (y * dst_rowstride / sizeof dst[0])),
1282 dst_rowstride, is_signed);
1283 src += BLOCK_BYTES;
1284 }
1285 src += src_row_diff;
1286 }
1287 }
1288
1289 static void
write_bits(struct bit_writer * writer,int n_bits,int value)1290 write_bits(struct bit_writer *writer, int n_bits, int value)
1291 {
1292 do {
1293 if (n_bits + writer->pos >= 8) {
1294 *(writer->dst++) = writer->buf | (value << writer->pos);
1295 writer->buf = 0;
1296 value >>= (8 - writer->pos);
1297 n_bits -= (8 - writer->pos);
1298 writer->pos = 0;
1299 } else {
1300 writer->buf |= value << writer->pos;
1301 writer->pos += n_bits;
1302 break;
1303 }
1304 } while (n_bits > 0);
1305 }
1306
1307 static void
get_average_luminance_alpha_unorm(int width,int height,const uint8_t * src,int src_rowstride,int * average_luminance,int * average_alpha)1308 get_average_luminance_alpha_unorm(int width, int height,
1309 const uint8_t *src, int src_rowstride,
1310 int *average_luminance, int *average_alpha)
1311 {
1312 int luminance_sum = 0, alpha_sum = 0;
1313 int y, x;
1314
1315 for (y = 0; y < height; y++) {
1316 for (x = 0; x < width; x++) {
1317 luminance_sum += src[0] + src[1] + src[2];
1318 alpha_sum += src[3];
1319 src += 4;
1320 }
1321 src += src_rowstride - width * 4;
1322 }
1323
1324 *average_luminance = luminance_sum / (width * height);
1325 *average_alpha = alpha_sum / (width * height);
1326 }
1327
1328 static void
get_rgba_endpoints_unorm(int width,int height,const uint8_t * src,int src_rowstride,int average_luminance,int average_alpha,uint8_t endpoints[][4])1329 get_rgba_endpoints_unorm(int width, int height,
1330 const uint8_t *src, int src_rowstride,
1331 int average_luminance, int average_alpha,
1332 uint8_t endpoints[][4])
1333 {
1334 int endpoint_luminances[2];
1335 int midpoint;
1336 int sums[2][4];
1337 int endpoint;
1338 int luminance;
1339 uint8_t temp[3];
1340 const uint8_t *p = src;
1341 int rgb_left_endpoint_count = 0;
1342 int alpha_left_endpoint_count = 0;
1343 int y, x, i;
1344
1345 memset(sums, 0, sizeof sums);
1346
1347 for (y = 0; y < height; y++) {
1348 for (x = 0; x < width; x++) {
1349 luminance = p[0] + p[1] + p[2];
1350 if (luminance < average_luminance) {
1351 endpoint = 0;
1352 rgb_left_endpoint_count++;
1353 } else {
1354 endpoint = 1;
1355 }
1356 for (i = 0; i < 3; i++)
1357 sums[endpoint][i] += p[i];
1358
1359 if (p[2] < average_alpha) {
1360 endpoint = 0;
1361 alpha_left_endpoint_count++;
1362 } else {
1363 endpoint = 1;
1364 }
1365 sums[endpoint][3] += p[3];
1366
1367 p += 4;
1368 }
1369
1370 p += src_rowstride - width * 4;
1371 }
1372
1373 if (rgb_left_endpoint_count == 0 ||
1374 rgb_left_endpoint_count == width * height) {
1375 for (i = 0; i < 3; i++)
1376 endpoints[0][i] = endpoints[1][i] =
1377 (sums[0][i] + sums[1][i]) / (width * height);
1378 } else {
1379 for (i = 0; i < 3; i++) {
1380 endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
1381 endpoints[1][i] = (sums[1][i] /
1382 (width * height - rgb_left_endpoint_count));
1383 }
1384 }
1385
1386 if (alpha_left_endpoint_count == 0 ||
1387 alpha_left_endpoint_count == width * height) {
1388 endpoints[0][3] = endpoints[1][3] =
1389 (sums[0][3] + sums[1][3]) / (width * height);
1390 } else {
1391 endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
1392 endpoints[1][3] = (sums[1][3] /
1393 (width * height - alpha_left_endpoint_count));
1394 }
1395
1396 /* We may need to swap the endpoints to ensure the most-significant bit of
1397 * the first index is zero */
1398
1399 for (endpoint = 0; endpoint < 2; endpoint++) {
1400 endpoint_luminances[endpoint] =
1401 endpoints[endpoint][0] +
1402 endpoints[endpoint][1] +
1403 endpoints[endpoint][2];
1404 }
1405 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
1406
1407 if ((src[0] + src[1] + src[2] <= midpoint) !=
1408 (endpoint_luminances[0] <= midpoint)) {
1409 memcpy(temp, endpoints[0], 3);
1410 memcpy(endpoints[0], endpoints[1], 3);
1411 memcpy(endpoints[1], temp, 3);
1412 }
1413
1414 /* Same for the alpha endpoints */
1415
1416 midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
1417
1418 if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
1419 temp[0] = endpoints[0][3];
1420 endpoints[0][3] = endpoints[1][3];
1421 endpoints[1][3] = temp[0];
1422 }
1423 }
1424
1425 static void
write_rgb_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1426 write_rgb_indices_unorm(struct bit_writer *writer,
1427 int src_width, int src_height,
1428 const uint8_t *src, int src_rowstride,
1429 uint8_t endpoints[][4])
1430 {
1431 int luminance;
1432 int endpoint_luminances[2];
1433 int endpoint;
1434 int index;
1435 int y, x;
1436
1437 for (endpoint = 0; endpoint < 2; endpoint++) {
1438 endpoint_luminances[endpoint] =
1439 endpoints[endpoint][0] +
1440 endpoints[endpoint][1] +
1441 endpoints[endpoint][2];
1442 }
1443
1444 /* If the endpoints have the same luminance then we'll just use index 0 for
1445 * all of the texels */
1446 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1447 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
1448 return;
1449 }
1450
1451 for (y = 0; y < src_height; y++) {
1452 for (x = 0; x < src_width; x++) {
1453 luminance = src[0] + src[1] + src[2];
1454
1455 index = ((luminance - endpoint_luminances[0]) * 3 /
1456 (endpoint_luminances[1] - endpoint_luminances[0]));
1457 if (index < 0)
1458 index = 0;
1459 else if (index > 3)
1460 index = 3;
1461
1462 assert(x != 0 || y != 0 || index < 2);
1463
1464 write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
1465
1466 src += 4;
1467 }
1468
1469 /* Pad the indices out to the block size */
1470 if (src_width < BLOCK_SIZE)
1471 write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
1472
1473 src += src_rowstride - src_width * 4;
1474 }
1475
1476 /* Pad the indices out to the block size */
1477 if (src_height < BLOCK_SIZE)
1478 write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1479 }
1480
1481 static void
write_alpha_indices_unorm(struct bit_writer * writer,int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t endpoints[][4])1482 write_alpha_indices_unorm(struct bit_writer *writer,
1483 int src_width, int src_height,
1484 const uint8_t *src, int src_rowstride,
1485 uint8_t endpoints[][4])
1486 {
1487 int index;
1488 int y, x;
1489
1490 /* If the endpoints have the same alpha then we'll just use index 0 for
1491 * all of the texels */
1492 if (endpoints[0][3] == endpoints[1][3]) {
1493 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
1494 return;
1495 }
1496
1497 for (y = 0; y < src_height; y++) {
1498 for (x = 0; x < src_width; x++) {
1499 index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
1500 ((int) endpoints[1][3] - endpoints[0][3]));
1501 if (index < 0)
1502 index = 0;
1503 else if (index > 7)
1504 index = 7;
1505
1506 assert(x != 0 || y != 0 || index < 4);
1507
1508 /* The first index has one less bit */
1509 write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
1510
1511 src += 4;
1512 }
1513
1514 /* Pad the indices out to the block size */
1515 if (src_width < BLOCK_SIZE)
1516 write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
1517
1518 src += src_rowstride - src_width * 4;
1519 }
1520
1521 /* Pad the indices out to the block size */
1522 if (src_height < BLOCK_SIZE)
1523 write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1524 }
1525
1526 static void
compress_rgba_unorm_block(int src_width,int src_height,const uint8_t * src,int src_rowstride,uint8_t * dst)1527 compress_rgba_unorm_block(int src_width, int src_height,
1528 const uint8_t *src, int src_rowstride,
1529 uint8_t *dst)
1530 {
1531 int average_luminance, average_alpha;
1532 uint8_t endpoints[2][4];
1533 struct bit_writer writer;
1534 int component, endpoint;
1535
1536 get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
1537 &average_luminance, &average_alpha);
1538 get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
1539 average_luminance, average_alpha,
1540 endpoints);
1541
1542 writer.dst = dst;
1543 writer.pos = 0;
1544 writer.buf = 0;
1545
1546 write_bits(&writer, 5, 0x10); /* mode 4 */
1547 write_bits(&writer, 2, 0); /* rotation 0 */
1548 write_bits(&writer, 1, 0); /* index selection bit */
1549
1550 /* Write the color endpoints */
1551 for (component = 0; component < 3; component++)
1552 for (endpoint = 0; endpoint < 2; endpoint++)
1553 write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
1554
1555 /* Write the alpha endpoints */
1556 for (endpoint = 0; endpoint < 2; endpoint++)
1557 write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
1558
1559 write_rgb_indices_unorm(&writer,
1560 src_width, src_height,
1561 src, src_rowstride,
1562 endpoints);
1563 write_alpha_indices_unorm(&writer,
1564 src_width, src_height,
1565 src, src_rowstride,
1566 endpoints);
1567 }
1568
1569 static void
compress_rgba_unorm(int width,int height,const uint8_t * src,int src_rowstride,uint8_t * dst,int dst_rowstride)1570 compress_rgba_unorm(int width, int height,
1571 const uint8_t *src, int src_rowstride,
1572 uint8_t *dst, int dst_rowstride)
1573 {
1574 int dst_row_diff;
1575 int y, x;
1576
1577 if (dst_rowstride >= width * 4)
1578 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1579 else
1580 dst_row_diff = 0;
1581
1582 for (y = 0; y < height; y += BLOCK_SIZE) {
1583 for (x = 0; x < width; x += BLOCK_SIZE) {
1584 compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
1585 MIN2(height - y, BLOCK_SIZE),
1586 src + x * 4 + y * src_rowstride,
1587 src_rowstride,
1588 dst);
1589 dst += BLOCK_BYTES;
1590 }
1591 dst += dst_row_diff;
1592 }
1593 }
1594
1595 static float
get_average_luminance_float(int width,int height,const float * src,int src_rowstride)1596 get_average_luminance_float(int width, int height,
1597 const float *src, int src_rowstride)
1598 {
1599 float luminance_sum = 0;
1600 int y, x;
1601
1602 for (y = 0; y < height; y++) {
1603 for (x = 0; x < width; x++) {
1604 luminance_sum += src[0] + src[1] + src[2];
1605 src += 3;
1606 }
1607 src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1608 }
1609
1610 return luminance_sum / (width * height);
1611 }
1612
1613 static float
clamp_value(float value,bool is_signed)1614 clamp_value(float value, bool is_signed)
1615 {
1616 if (value > 65504.0f)
1617 return 65504.0f;
1618
1619 if (is_signed) {
1620 if (value < -65504.0f)
1621 return -65504.0f;
1622 else
1623 return value;
1624 }
1625
1626 if (value < 0.0f)
1627 return 0.0f;
1628
1629 return value;
1630 }
1631
1632 static void
get_endpoints_float(int width,int height,const float * src,int src_rowstride,float average_luminance,float endpoints[][3],bool is_signed)1633 get_endpoints_float(int width, int height,
1634 const float *src, int src_rowstride,
1635 float average_luminance, float endpoints[][3],
1636 bool is_signed)
1637 {
1638 float endpoint_luminances[2];
1639 float midpoint;
1640 float sums[2][3];
1641 int endpoint, component;
1642 float luminance;
1643 float temp[3];
1644 const float *p = src;
1645 int left_endpoint_count = 0;
1646 int y, x, i;
1647
1648 memset(sums, 0, sizeof sums);
1649
1650 for (y = 0; y < height; y++) {
1651 for (x = 0; x < width; x++) {
1652 luminance = p[0] + p[1] + p[2];
1653 if (luminance < average_luminance) {
1654 endpoint = 0;
1655 left_endpoint_count++;
1656 } else {
1657 endpoint = 1;
1658 }
1659 for (i = 0; i < 3; i++)
1660 sums[endpoint][i] += p[i];
1661
1662 p += 3;
1663 }
1664
1665 p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
1666 }
1667
1668 if (left_endpoint_count == 0 ||
1669 left_endpoint_count == width * height) {
1670 for (i = 0; i < 3; i++)
1671 endpoints[0][i] = endpoints[1][i] =
1672 (sums[0][i] + sums[1][i]) / (width * height);
1673 } else {
1674 for (i = 0; i < 3; i++) {
1675 endpoints[0][i] = sums[0][i] / left_endpoint_count;
1676 endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
1677 }
1678 }
1679
1680 /* Clamp the endpoints to the range of a half float and strip out
1681 * infinities */
1682 for (endpoint = 0; endpoint < 2; endpoint++) {
1683 for (component = 0; component < 3; component++) {
1684 endpoints[endpoint][component] =
1685 clamp_value(endpoints[endpoint][component], is_signed);
1686 }
1687 }
1688
1689 /* We may need to swap the endpoints to ensure the most-significant bit of
1690 * the first index is zero */
1691
1692 for (endpoint = 0; endpoint < 2; endpoint++) {
1693 endpoint_luminances[endpoint] =
1694 endpoints[endpoint][0] +
1695 endpoints[endpoint][1] +
1696 endpoints[endpoint][2];
1697 }
1698 midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
1699
1700 if ((src[0] + src[1] + src[2] <= midpoint) !=
1701 (endpoint_luminances[0] <= midpoint)) {
1702 memcpy(temp, endpoints[0], sizeof temp);
1703 memcpy(endpoints[0], endpoints[1], sizeof temp);
1704 memcpy(endpoints[1], temp, sizeof temp);
1705 }
1706 }
1707
1708 static void
write_rgb_indices_float(struct bit_writer * writer,int src_width,int src_height,const float * src,int src_rowstride,float endpoints[][3])1709 write_rgb_indices_float(struct bit_writer *writer,
1710 int src_width, int src_height,
1711 const float *src, int src_rowstride,
1712 float endpoints[][3])
1713 {
1714 float luminance;
1715 float endpoint_luminances[2];
1716 int endpoint;
1717 int index;
1718 int y, x;
1719
1720 for (endpoint = 0; endpoint < 2; endpoint++) {
1721 endpoint_luminances[endpoint] =
1722 endpoints[endpoint][0] +
1723 endpoints[endpoint][1] +
1724 endpoints[endpoint][2];
1725 }
1726
1727 /* If the endpoints have the same luminance then we'll just use index 0 for
1728 * all of the texels */
1729 if (endpoint_luminances[0] == endpoint_luminances[1]) {
1730 write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
1731 return;
1732 }
1733
1734 for (y = 0; y < src_height; y++) {
1735 for (x = 0; x < src_width; x++) {
1736 luminance = src[0] + src[1] + src[2];
1737
1738 index = ((luminance - endpoint_luminances[0]) * 15 /
1739 (endpoint_luminances[1] - endpoint_luminances[0]));
1740 if (index < 0)
1741 index = 0;
1742 else if (index > 15)
1743 index = 15;
1744
1745 assert(x != 0 || y != 0 || index < 8);
1746
1747 write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
1748
1749 src += 3;
1750 }
1751
1752 /* Pad the indices out to the block size */
1753 if (src_width < BLOCK_SIZE)
1754 write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
1755
1756 src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
1757 }
1758
1759 /* Pad the indices out to the block size */
1760 if (src_height < BLOCK_SIZE)
1761 write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
1762 }
1763
1764 static int
get_endpoint_value(float value,bool is_signed)1765 get_endpoint_value(float value, bool is_signed)
1766 {
1767 bool sign = false;
1768 int half;
1769
1770 if (is_signed) {
1771 half = _mesa_float_to_half(value);
1772
1773 if (half & 0x8000) {
1774 half &= 0x7fff;
1775 sign = true;
1776 }
1777
1778 half = (32 * half / 31) >> 6;
1779
1780 if (sign)
1781 half = -half & ((1 << 10) - 1);
1782
1783 return half;
1784 } else {
1785 if (value <= 0.0f)
1786 return 0;
1787
1788 half = _mesa_float_to_half(value);
1789
1790 return (64 * half / 31) >> 6;
1791 }
1792 }
1793
1794 static void
compress_rgb_float_block(int src_width,int src_height,const float * src,int src_rowstride,uint8_t * dst,bool is_signed)1795 compress_rgb_float_block(int src_width, int src_height,
1796 const float *src, int src_rowstride,
1797 uint8_t *dst,
1798 bool is_signed)
1799 {
1800 float average_luminance;
1801 float endpoints[2][3];
1802 struct bit_writer writer;
1803 int component, endpoint;
1804 int endpoint_value;
1805
1806 average_luminance =
1807 get_average_luminance_float(src_width, src_height, src, src_rowstride);
1808 get_endpoints_float(src_width, src_height, src, src_rowstride,
1809 average_luminance, endpoints, is_signed);
1810
1811 writer.dst = dst;
1812 writer.pos = 0;
1813 writer.buf = 0;
1814
1815 write_bits(&writer, 5, 3); /* mode 3 */
1816
1817 /* Write the endpoints */
1818 for (endpoint = 0; endpoint < 2; endpoint++) {
1819 for (component = 0; component < 3; component++) {
1820 endpoint_value =
1821 get_endpoint_value(endpoints[endpoint][component], is_signed);
1822 write_bits(&writer, 10, endpoint_value);
1823 }
1824 }
1825
1826 write_rgb_indices_float(&writer,
1827 src_width, src_height,
1828 src, src_rowstride,
1829 endpoints);
1830 }
1831
1832 static void
compress_rgb_float(int width,int height,const float * src,int src_rowstride,uint8_t * dst,int dst_rowstride,bool is_signed)1833 compress_rgb_float(int width, int height,
1834 const float *src, int src_rowstride,
1835 uint8_t *dst, int dst_rowstride,
1836 bool is_signed)
1837 {
1838 int dst_row_diff;
1839 int y, x;
1840
1841 if (dst_rowstride >= width * 4)
1842 dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
1843 else
1844 dst_row_diff = 0;
1845
1846 for (y = 0; y < height; y += BLOCK_SIZE) {
1847 for (x = 0; x < width; x += BLOCK_SIZE) {
1848 compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
1849 MIN2(height - y, BLOCK_SIZE),
1850 src + x * 3 +
1851 y * src_rowstride / sizeof (float),
1852 src_rowstride,
1853 dst,
1854 is_signed);
1855 dst += BLOCK_BYTES;
1856 }
1857 dst += dst_row_diff;
1858 }
1859 }
1860
1861 #endif
1862