FFmpeg
texturedspenc.c
Go to the documentation of this file.
1 /*
2  * Texture block compression
3  * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
4  * Based on public domain code by Fabian Giesen, Sean Barrett and Yann Collet.
5  *
6  * This file is part of FFmpeg
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include <stddef.h>
27 #include <stdint.h>
28 
29 #include "libavutil/attributes.h"
30 #include "libavutil/common.h"
31 #include "libavutil/intreadwrite.h"
32 
33 #include "texturedsp.h"
34 
35 static const uint8_t expand5[32] = {
36  0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90,
37  99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189,
38  198, 206, 214, 222, 231, 239, 247, 255,
39 };
40 
41 static const uint8_t expand6[64] = {
42  0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44,
43  48, 52, 56, 60, 65, 69, 73, 77, 81, 85, 89, 93,
44  97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142,
45  146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190,
46  195, 199, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239,
47  243, 247, 251, 255,
48 };
49 
50 static const uint8_t match5[256][2] = {
51  { 0, 0 }, { 0, 0 }, { 0, 1 }, { 0, 1 }, { 1, 0 }, { 1, 0 },
52  { 1, 0 }, { 1, 1 }, { 1, 1 }, { 2, 0 }, { 2, 0 }, { 0, 4 },
53  { 2, 1 }, { 2, 1 }, { 2, 1 }, { 3, 0 }, { 3, 0 }, { 3, 0 },
54  { 3, 1 }, { 1, 5 }, { 3, 2 }, { 3, 2 }, { 4, 0 }, { 4, 0 },
55  { 4, 1 }, { 4, 1 }, { 4, 2 }, { 4, 2 }, { 4, 2 }, { 3, 5 },
56  { 5, 1 }, { 5, 1 }, { 5, 2 }, { 4, 4 }, { 5, 3 }, { 5, 3 },
57  { 5, 3 }, { 6, 2 }, { 6, 2 }, { 6, 2 }, { 6, 3 }, { 5, 5 },
58  { 6, 4 }, { 6, 4 }, { 4, 8 }, { 7, 3 }, { 7, 3 }, { 7, 3 },
59  { 7, 4 }, { 7, 4 }, { 7, 4 }, { 7, 5 }, { 5, 9 }, { 7, 6 },
60  { 7, 6 }, { 8, 4 }, { 8, 4 }, { 8, 5 }, { 8, 5 }, { 8, 6 },
61  { 8, 6 }, { 8, 6 }, { 7, 9 }, { 9, 5 }, { 9, 5 }, { 9, 6 },
62  { 8, 8 }, { 9, 7 }, { 9, 7 }, { 9, 7 }, { 10, 6 }, { 10, 6 },
63  { 10, 6 }, { 10, 7 }, { 9, 9 }, { 10, 8 }, { 10, 8 }, { 8, 12 },
64  { 11, 7 }, { 11, 7 }, { 11, 7 }, { 11, 8 }, { 11, 8 }, { 11, 8 },
65  { 11, 9 }, { 9, 13 }, { 11, 10 }, { 11, 10 }, { 12, 8 }, { 12, 8 },
66  { 12, 9 }, { 12, 9 }, { 12, 10 }, { 12, 10 }, { 12, 10 }, { 11, 13 },
67  { 13, 9 }, { 13, 9 }, { 13, 10 }, { 12, 12 }, { 13, 11 }, { 13, 11 },
68  { 13, 11 }, { 14, 10 }, { 14, 10 }, { 14, 10 }, { 14, 11 }, { 13, 13 },
69  { 14, 12 }, { 14, 12 }, { 12, 16 }, { 15, 11 }, { 15, 11 }, { 15, 11 },
70  { 15, 12 }, { 15, 12 }, { 15, 12 }, { 15, 13 }, { 13, 17 }, { 15, 14 },
71  { 15, 14 }, { 16, 12 }, { 16, 12 }, { 16, 13 }, { 16, 13 }, { 16, 14 },
72  { 16, 14 }, { 16, 14 }, { 15, 17 }, { 17, 13 }, { 17, 13 }, { 17, 14 },
73  { 16, 16 }, { 17, 15 }, { 17, 15 }, { 17, 15 }, { 18, 14 }, { 18, 14 },
74  { 18, 14 }, { 18, 15 }, { 17, 17 }, { 18, 16 }, { 18, 16 }, { 16, 20 },
75  { 19, 15 }, { 19, 15 }, { 19, 15 }, { 19, 16 }, { 19, 16 }, { 19, 16 },
76  { 19, 17 }, { 17, 21 }, { 19, 18 }, { 19, 18 }, { 20, 16 }, { 20, 16 },
77  { 20, 17 }, { 20, 17 }, { 20, 18 }, { 20, 18 }, { 20, 18 }, { 19, 21 },
78  { 21, 17 }, { 21, 17 }, { 21, 18 }, { 20, 20 }, { 21, 19 }, { 21, 19 },
79  { 21, 19 }, { 22, 18 }, { 22, 18 }, { 22, 18 }, { 22, 19 }, { 21, 21 },
80  { 22, 20 }, { 22, 20 }, { 20, 24 }, { 23, 19 }, { 23, 19 }, { 23, 19 },
81  { 23, 20 }, { 23, 20 }, { 23, 20 }, { 23, 21 }, { 21, 25 }, { 23, 22 },
82  { 23, 22 }, { 24, 20 }, { 24, 20 }, { 24, 21 }, { 24, 21 }, { 24, 22 },
83  { 24, 22 }, { 24, 22 }, { 23, 25 }, { 25, 21 }, { 25, 21 }, { 25, 22 },
84  { 24, 24 }, { 25, 23 }, { 25, 23 }, { 25, 23 }, { 26, 22 }, { 26, 22 },
85  { 26, 22 }, { 26, 23 }, { 25, 25 }, { 26, 24 }, { 26, 24 }, { 24, 28 },
86  { 27, 23 }, { 27, 23 }, { 27, 23 }, { 27, 24 }, { 27, 24 }, { 27, 24 },
87  { 27, 25 }, { 25, 29 }, { 27, 26 }, { 27, 26 }, { 28, 24 }, { 28, 24 },
88  { 28, 25 }, { 28, 25 }, { 28, 26 }, { 28, 26 }, { 28, 26 }, { 27, 29 },
89  { 29, 25 }, { 29, 25 }, { 29, 26 }, { 28, 28 }, { 29, 27 }, { 29, 27 },
90  { 29, 27 }, { 30, 26 }, { 30, 26 }, { 30, 26 }, { 30, 27 }, { 29, 29 },
91  { 30, 28 }, { 30, 28 }, { 30, 28 }, { 31, 27 }, { 31, 27 }, { 31, 27 },
92  { 31, 28 }, { 31, 28 }, { 31, 28 }, { 31, 29 }, { 31, 29 }, { 31, 30 },
93  { 31, 30 }, { 31, 30 }, { 31, 31 }, { 31, 31 },
94 };
95 
96 static const uint8_t match6[256][2] = {
97  { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 0 }, { 1, 1 }, { 2, 0 },
98  { 2, 1 }, { 3, 0 }, { 3, 0 }, { 3, 1 }, { 4, 0 }, { 4, 0 },
99  { 4, 1 }, { 5, 0 }, { 5, 1 }, { 6, 0 }, { 6, 0 }, { 6, 1 },
100  { 7, 0 }, { 7, 0 }, { 7, 1 }, { 8, 0 }, { 8, 1 }, { 8, 1 },
101  { 8, 2 }, { 9, 1 }, { 9, 2 }, { 9, 2 }, { 9, 3 }, { 10, 2 },
102  { 10, 3 }, { 10, 3 }, { 10, 4 }, { 11, 3 }, { 11, 4 }, { 11, 4 },
103  { 11, 5 }, { 12, 4 }, { 12, 5 }, { 12, 5 }, { 12, 6 }, { 13, 5 },
104  { 13, 6 }, { 8, 16 }, { 13, 7 }, { 14, 6 }, { 14, 7 }, { 9, 17 },
105  { 14, 8 }, { 15, 7 }, { 15, 8 }, { 11, 16 }, { 15, 9 }, { 15, 10 },
106  { 16, 8 }, { 16, 9 }, { 16, 10 }, { 15, 13 }, { 17, 9 }, { 17, 10 },
107  { 17, 11 }, { 15, 16 }, { 18, 10 }, { 18, 11 }, { 18, 12 }, { 16, 16 },
108  { 19, 11 }, { 19, 12 }, { 19, 13 }, { 17, 17 }, { 20, 12 }, { 20, 13 },
109  { 20, 14 }, { 19, 16 }, { 21, 13 }, { 21, 14 }, { 21, 15 }, { 20, 17 },
110  { 22, 14 }, { 22, 15 }, { 25, 10 }, { 22, 16 }, { 23, 15 }, { 23, 16 },
111  { 26, 11 }, { 23, 17 }, { 24, 16 }, { 24, 17 }, { 27, 12 }, { 24, 18 },
112  { 25, 17 }, { 25, 18 }, { 28, 13 }, { 25, 19 }, { 26, 18 }, { 26, 19 },
113  { 29, 14 }, { 26, 20 }, { 27, 19 }, { 27, 20 }, { 30, 15 }, { 27, 21 },
114  { 28, 20 }, { 28, 21 }, { 28, 21 }, { 28, 22 }, { 29, 21 }, { 29, 22 },
115  { 24, 32 }, { 29, 23 }, { 30, 22 }, { 30, 23 }, { 25, 33 }, { 30, 24 },
116  { 31, 23 }, { 31, 24 }, { 27, 32 }, { 31, 25 }, { 31, 26 }, { 32, 24 },
117  { 32, 25 }, { 32, 26 }, { 31, 29 }, { 33, 25 }, { 33, 26 }, { 33, 27 },
118  { 31, 32 }, { 34, 26 }, { 34, 27 }, { 34, 28 }, { 32, 32 }, { 35, 27 },
119  { 35, 28 }, { 35, 29 }, { 33, 33 }, { 36, 28 }, { 36, 29 }, { 36, 30 },
120  { 35, 32 }, { 37, 29 }, { 37, 30 }, { 37, 31 }, { 36, 33 }, { 38, 30 },
121  { 38, 31 }, { 41, 26 }, { 38, 32 }, { 39, 31 }, { 39, 32 }, { 42, 27 },
122  { 39, 33 }, { 40, 32 }, { 40, 33 }, { 43, 28 }, { 40, 34 }, { 41, 33 },
123  { 41, 34 }, { 44, 29 }, { 41, 35 }, { 42, 34 }, { 42, 35 }, { 45, 30 },
124  { 42, 36 }, { 43, 35 }, { 43, 36 }, { 46, 31 }, { 43, 37 }, { 44, 36 },
125  { 44, 37 }, { 44, 37 }, { 44, 38 }, { 45, 37 }, { 45, 38 }, { 40, 48 },
126  { 45, 39 }, { 46, 38 }, { 46, 39 }, { 41, 49 }, { 46, 40 }, { 47, 39 },
127  { 47, 40 }, { 43, 48 }, { 47, 41 }, { 47, 42 }, { 48, 40 }, { 48, 41 },
128  { 48, 42 }, { 47, 45 }, { 49, 41 }, { 49, 42 }, { 49, 43 }, { 47, 48 },
129  { 50, 42 }, { 50, 43 }, { 50, 44 }, { 48, 48 }, { 51, 43 }, { 51, 44 },
130  { 51, 45 }, { 49, 49 }, { 52, 44 }, { 52, 45 }, { 52, 46 }, { 51, 48 },
131  { 53, 45 }, { 53, 46 }, { 53, 47 }, { 52, 49 }, { 54, 46 }, { 54, 47 },
132  { 57, 42 }, { 54, 48 }, { 55, 47 }, { 55, 48 }, { 58, 43 }, { 55, 49 },
133  { 56, 48 }, { 56, 49 }, { 59, 44 }, { 56, 50 }, { 57, 49 }, { 57, 50 },
134  { 60, 45 }, { 57, 51 }, { 58, 50 }, { 58, 51 }, { 61, 46 }, { 58, 52 },
135  { 59, 51 }, { 59, 52 }, { 62, 47 }, { 59, 53 }, { 60, 52 }, { 60, 53 },
136  { 60, 53 }, { 60, 54 }, { 61, 53 }, { 61, 54 }, { 61, 54 }, { 61, 55 },
137  { 62, 54 }, { 62, 55 }, { 62, 55 }, { 62, 56 }, { 63, 55 }, { 63, 56 },
138  { 63, 56 }, { 63, 57 }, { 63, 58 }, { 63, 59 }, { 63, 59 }, { 63, 60 },
139  { 63, 61 }, { 63, 62 }, { 63, 62 }, { 63, 63 },
140 };
141 
142 /* Multiplication over 8 bit emulation */
143 #define mul8(a, b) (((a) * (b) + 128 + (((a) * (b) + 128) >> 8)) >> 8)
144 
145 /* Conversion from rgb24 to rgb565 */
146 #define rgb2rgb565(r, g, b) \
147  ((mul8(r, 31) << 11) | (mul8(g, 63) << 5) | (mul8(b, 31) << 0))
148 
149 /* Linear interpolation at 1/3 point between a and b */
150 #define lerp13(a, b) ((2 * (a) + (b)) / 3)
151 
152 /* Linear interpolation on an RGB pixel */
153 static inline void lerp13rgb(uint8_t *out, uint8_t *p1, uint8_t *p2)
154 {
155  out[0] = lerp13(p1[0], p2[0]);
156  out[1] = lerp13(p1[1], p2[1]);
157  out[2] = lerp13(p1[2], p2[2]);
158 }
159 
160 /* Conversion from rgb565 to rgb24 */
161 static inline void rgb5652rgb(uint8_t *out, uint16_t v)
162 {
163  int rv = (v & 0xf800) >> 11;
164  int gv = (v & 0x07e0) >> 5;
165  int bv = (v & 0x001f) >> 0;
166 
167  out[0] = expand5[rv];
168  out[1] = expand6[gv];
169  out[2] = expand5[bv];
170  out[3] = 0;
171 }
172 
173 /* Color matching function */
174 static unsigned int match_colors(const uint8_t *block, ptrdiff_t stride,
175  uint16_t c0, uint16_t c1)
176 {
177  uint32_t mask = 0;
178  int dirr, dirg, dirb;
179  int dots[16];
180  int stops[4];
181  int x, y, k = 0;
182  int c0_point, half_point, c3_point;
183  uint8_t color[16];
184  static const uint32_t indexMap[8] = {
185  0U << 30, 2U << 30, 0U << 30, 2U << 30,
186  3U << 30, 3U << 30, 1U << 30, 1U << 30,
187  };
188 
189  /* Fill color and compute direction for each component */
190  rgb5652rgb(color + 0, c0);
191  rgb5652rgb(color + 4, c1);
192  lerp13rgb(color + 8, color + 0, color + 4);
193  lerp13rgb(color + 12, color + 4, color + 0);
194 
195  dirr = color[0 * 4 + 0] - color[1 * 4 + 0];
196  dirg = color[0 * 4 + 1] - color[1 * 4 + 1];
197  dirb = color[0 * 4 + 2] - color[1 * 4 + 2];
198 
199  for (y = 0; y < 4; y++) {
200  for (x = 0; x < 4; x++)
201  dots[k++] = block[0 + x * 4 + y * stride] * dirr +
202  block[1 + x * 4 + y * stride] * dirg +
203  block[2 + x * 4 + y * stride] * dirb;
204 
205  stops[y] = color[0 + y * 4] * dirr +
206  color[1 + y * 4] * dirg +
207  color[2 + y * 4] * dirb;
208  }
209 
210  /* Think of the colors as arranged on a line; project point onto that line,
211  * then choose next color out of available ones. we compute the crossover
212  * points for 'best color in top half'/'best in bottom half' and then
213  * the same inside that subinterval.
214  *
215  * Relying on this 1d approximation isn't always optimal in terms of
216  * Euclidean distance, but it's very close and a lot faster.
217  *
218  * http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html */
219  c0_point = (stops[1] + stops[3]) >> 1;
220  half_point = (stops[3] + stops[2]) >> 1;
221  c3_point = (stops[2] + stops[0]) >> 1;
222 
223  for (x = 0; x < 16; x++) {
224  int dot = dots[x];
225  int bits = (dot < half_point ? 4 : 0) |
226  (dot < c0_point ? 2 : 0) |
227  (dot < c3_point ? 1 : 0);
228 
229  mask >>= 2;
230  mask |= indexMap[bits];
231  }
232 
233  return mask;
234 }
235 
236 /* Color optimization function */
237 static void optimize_colors(const uint8_t *block, ptrdiff_t stride,
238  uint16_t *pmax16, uint16_t *pmin16)
239 {
240  const uint8_t *minp;
241  const uint8_t *maxp;
242  const int iter_power = 4;
243  double magn;
244  int v_r, v_g, v_b;
245  float covf[6], vfr, vfg, vfb;
246  int mind, maxd;
247  int cov[6] = { 0 };
248  int mu[3], min[3], max[3];
249  int ch, iter, x, y;
250 
251  /* Determine color distribution */
252  for (ch = 0; ch < 3; ch++) {
253  const uint8_t *bp = &block[ch];
254  int muv, minv, maxv;
255 
256  muv = minv = maxv = bp[0];
257  for (y = 0; y < 4; y++) {
258  for (x = 0; x < 4; x++) {
259  muv += bp[x * 4 + y * stride];
260  if (bp[x * 4 + y * stride] < minv)
261  minv = bp[x * 4 + y * stride];
262  else if (bp[x * 4 + y * stride] > maxv)
263  maxv = bp[x * 4 + y * stride];
264  }
265  }
266 
267  mu[ch] = (muv + 8) >> 4;
268  min[ch] = minv;
269  max[ch] = maxv;
270  }
271 
272  /* Determine covariance matrix */
273  for (y = 0; y < 4; y++) {
274  for (x = 0; x < 4; x++) {
275  int r = block[x * 4 + stride * y + 0] - mu[0];
276  int g = block[x * 4 + stride * y + 1] - mu[1];
277  int b = block[x * 4 + stride * y + 2] - mu[2];
278 
279  cov[0] += r * r;
280  cov[1] += r * g;
281  cov[2] += r * b;
282  cov[3] += g * g;
283  cov[4] += g * b;
284  cov[5] += b * b;
285  }
286  }
287 
288  /* Convert covariance matrix to float, find principal axis via power iter */
289  for (x = 0; x < 6; x++)
290  covf[x] = cov[x] / 255.0f;
291 
292  vfr = (float) (max[0] - min[0]);
293  vfg = (float) (max[1] - min[1]);
294  vfb = (float) (max[2] - min[2]);
295 
296  for (iter = 0; iter < iter_power; iter++) {
297  float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2];
298  float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4];
299  float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5];
300 
301  vfr = r;
302  vfg = g;
303  vfb = b;
304  }
305 
306  magn = fabs(vfr);
307  if (fabs(vfg) > magn)
308  magn = fabs(vfg);
309  if (fabs(vfb) > magn)
310  magn = fabs(vfb);
311 
312  /* if magnitude is too small, default to luminance */
313  if (magn < 4.0f) {
314  /* JPEG YCbCr luma coefs, scaled by 1000 */
315  v_r = 299;
316  v_g = 587;
317  v_b = 114;
318  } else {
319  magn = 512.0 / magn;
320  v_r = (int) (vfr * magn);
321  v_g = (int) (vfg * magn);
322  v_b = (int) (vfb * magn);
323  }
324 
325  /* Pick colors at extreme points */
326  mind = maxd = block[0] * v_r + block[1] * v_g + block[2] * v_b;
327  minp = maxp = block;
328  for (y = 0; y < 4; y++) {
329  for (x = 0; x < 4; x++) {
330  int dot = block[x * 4 + y * stride + 0] * v_r +
331  block[x * 4 + y * stride + 1] * v_g +
332  block[x * 4 + y * stride + 2] * v_b;
333 
334  if (dot < mind) {
335  mind = dot;
336  minp = block + x * 4 + y * stride;
337  } else if (dot > maxd) {
338  maxd = dot;
339  maxp = block + x * 4 + y * stride;
340  }
341  }
342  }
343 
344  *pmax16 = rgb2rgb565(maxp[0], maxp[1], maxp[2]);
345  *pmin16 = rgb2rgb565(minp[0], minp[1], minp[2]);
346 }
347 
348 /* Try to optimize colors to suit block contents better, by solving
349  * a least squares system via normal equations + Cramer's rule. */
350 static int refine_colors(const uint8_t *block, ptrdiff_t stride,
351  uint16_t *pmax16, uint16_t *pmin16, uint32_t mask)
352 {
353  uint32_t cm = mask;
354  uint16_t oldMin = *pmin16;
355  uint16_t oldMax = *pmax16;
356  uint16_t min16, max16;
357  int x, y;
358 
359  /* Additional magic to save a lot of multiplies in the accumulating loop.
360  * The tables contain precomputed products of weights for least squares
361  * system, accumulated inside one 32-bit register */
362  static const int w1tab[4] = { 3, 0, 2, 1 };
363  static const int prods[4] = { 0x090000, 0x000900, 0x040102, 0x010402 };
364 
365  /* Check if all pixels have the same index */
366  if ((mask ^ (mask << 2)) < 4) {
367  /* If so, linear system would be singular; solve using optimal
368  * single-color match on average color. */
369  int r = 8, g = 8, b = 8;
370  for (y = 0; y < 4; y++) {
371  for (x = 0; x < 4; x++) {
372  r += block[0 + x * 4 + y * stride];
373  g += block[1 + x * 4 + y * stride];
374  b += block[2 + x * 4 + y * stride];
375  }
376  }
377 
378  r >>= 4;
379  g >>= 4;
380  b >>= 4;
381 
382  max16 = (match5[r][0] << 11) | (match6[g][0] << 5) | match5[b][0];
383  min16 = (match5[r][1] << 11) | (match6[g][1] << 5) | match5[b][1];
384  } else {
385  float fr, fg, fb;
386  int at1_r = 0, at1_g = 0, at1_b = 0;
387  int at2_r = 0, at2_g = 0, at2_b = 0;
388  int akku = 0;
389  int xx, xy, yy;
390 
391  for (y = 0; y < 4; y++) {
392  for (x = 0; x < 4; x++) {
393  int step = cm & 3;
394  int w1 = w1tab[step];
395  int r = block[0 + x * 4 + y * stride];
396  int g = block[1 + x * 4 + y * stride];
397  int b = block[2 + x * 4 + y * stride];
398 
399  akku += prods[step];
400  at1_r += w1 * r;
401  at1_g += w1 * g;
402  at1_b += w1 * b;
403  at2_r += r;
404  at2_g += g;
405  at2_b += b;
406 
407  cm >>= 2;
408  }
409  }
410 
411  at2_r = 3 * at2_r - at1_r;
412  at2_g = 3 * at2_g - at1_g;
413  at2_b = 3 * at2_b - at1_b;
414 
415  /* Extract solutions and decide solvability */
416  xx = akku >> 16;
417  yy = (akku >> 8) & 0xFF;
418  xy = (akku >> 0) & 0xFF;
419 
420  fr = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy);
421  fg = fr * 63.0f / 31.0f;
422  fb = fr;
423 
424  /* Solve */
425  max16 = av_clip_uintp2((at1_r * yy - at2_r * xy) * fr + 0.5f, 5) << 11;
426  max16 |= av_clip_uintp2((at1_g * yy - at2_g * xy) * fg + 0.5f, 6) << 5;
427  max16 |= av_clip_uintp2((at1_b * yy - at2_b * xy) * fb + 0.5f, 5) << 0;
428 
429  min16 = av_clip_uintp2((at2_r * xx - at1_r * xy) * fr + 0.5f, 5) << 11;
430  min16 |= av_clip_uintp2((at2_g * xx - at1_g * xy) * fg + 0.5f, 6) << 5;
431  min16 |= av_clip_uintp2((at2_b * xx - at1_b * xy) * fb + 0.5f, 5) << 0;
432  }
433 
434  *pmin16 = min16;
435  *pmax16 = max16;
436  return oldMin != min16 || oldMax != max16;
437 }
438 
439 /* Check if input block is a constant color */
440 static int constant_color(const uint8_t *block, ptrdiff_t stride)
441 {
442  int x, y;
443  uint32_t first = AV_RL32(block);
444 
445  for (y = 0; y < 4; y++)
446  for (x = 0; x < 4; x++)
447  if (first != AV_RL32(block + x * 4 + y * stride))
448  return 0;
449  return 1;
450 }
451 
452 /* Main color compression function */
453 static void compress_color(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
454 {
455  uint32_t mask;
456  uint16_t max16, min16;
457  int constant = constant_color(block, stride);
458 
459  /* Constant color will load values from tables */
460  if (constant) {
461  int r = block[0];
462  int g = block[1];
463  int b = block[2];
464  mask = 0xAAAAAAAA;
465  max16 = (match5[r][0] << 11) | (match6[g][0] << 5) | match5[b][0];
466  min16 = (match5[r][1] << 11) | (match6[g][1] << 5) | match5[b][1];
467  } else {
468  int refine;
469 
470  /* Otherwise find pca and map along principal axis */
471  optimize_colors(block, stride, &max16, &min16);
472  if (max16 != min16)
473  mask = match_colors(block, stride, max16, min16);
474  else
475  mask = 0;
476 
477  /* One pass refinement */
478  refine = refine_colors(block, stride, &max16, &min16, mask);
479  if (refine) {
480  if (max16 != min16)
481  mask = match_colors(block, stride, max16, min16);
482  else
483  mask = 0;
484  }
485  }
486 
487  /* Finally write the color block */
488  if (max16 < min16) {
489  FFSWAP(uint16_t, min16, max16);
490  mask ^= 0x55555555;
491  }
492 
493  AV_WL16(dst + 0, max16);
494  AV_WL16(dst + 2, min16);
495  AV_WL32(dst + 4, mask);
496 }
497 
498 /* Alpha compression function */
499 static void compress_alpha(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
500 {
501  int x, y;
502  int dist, bias, dist4, dist2;
503  int mn, mx;
504  int bits = 0;
505  int mask = 0;
506 
507  memset(dst, 0, 8);
508 
509  /* Find min/max color */
510  mn = mx = block[3];
511  for (y = 0; y < 4; y++) {
512  for (x = 0; x < 4; x++) {
513  int val = block[3 + x * 4 + y * stride];
514  if (val < mn)
515  mn = val;
516  else if (val > mx)
517  mx = val;
518  }
519  }
520 
521  /* Encode them */
522  dst[0] = (uint8_t) mx;
523  dst[1] = (uint8_t) mn;
524  dst += 2;
525 
526  /* Mono-alpha shortcut */
527  if (mn == mx)
528  return;
529 
530  /* Determine bias and emit color indices.
531  * Given the choice of mx/mn, these indices are optimal:
532  * fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination */
533  dist = mx - mn;
534 
535  dist4 = dist * 4;
536  dist2 = dist * 2;
537  if (dist < 8)
538  bias = dist - 1 - mn * 7;
539  else
540  bias = dist / 2 + 2 - mn * 7;
541 
542  for (y = 0; y < 4; y++) {
543  for (x = 0; x < 4; x++) {
544  int alp = block[3 + x * 4 + y * stride] * 7 + bias;
545  int ind, tmp;
546 
547  /* This is a "linear scale" lerp factor between 0 (val=min)
548  * and 7 (val=max) to select index. */
549  tmp = (alp >= dist4) ? -1 : 0;
550  ind = tmp & 4;
551  alp -= dist4 & tmp;
552  tmp = (alp >= dist2) ? -1 : 0;
553  ind += tmp & 2;
554  alp -= dist2 & tmp;
555  ind += (alp >= dist);
556 
557  /* Turn linear scale into DXT index (0/1 are extreme points) */
558  ind = -ind & 7;
559  ind ^= (2 > ind);
560 
561  /* Write index */
562  mask |= ind << bits;
563  bits += 3;
564  if (bits >= 8) {
565  *dst++ = mask;
566  mask >>= 8;
567  bits -= 8;
568  }
569  }
570  }
571 }
572 
573 /**
574  * Convert a RGBA buffer to unscaled YCoCg.
575  * Scale is usually introduced to avoid banding over a certain range of colors,
576  * but this version of the algorithm does not introduce it as much as other
577  * implementations, allowing for a simpler and faster conversion.
578  */
579 static void rgba2ycocg(uint8_t *dst, const uint8_t *pixel)
580 {
581  int r = pixel[0];
582  int g = (pixel[1] + 1) >> 1;
583  int b = pixel[2];
584  int t = (2 + r + b) >> 2;
585 
586  dst[0] = av_clip_uint8(128 + ((r - b + 1) >> 1)); /* Co */
587  dst[1] = av_clip_uint8(128 + g - t); /* Cg */
588  dst[2] = 0;
589  dst[3] = av_clip_uint8(g + t); /* Y */
590 }
591 
592 /**
593  * Compress one block of RGBA pixels in a DXT1 texture and store the
594  * resulting bytes in 'dst'. Alpha is not preserved.
595  *
596  * @param dst output buffer.
597  * @param stride scanline in bytes.
598  * @param block block to compress.
599  * @return how much texture data has been written.
600  */
601 static int dxt1_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
602 {
603  compress_color(dst, stride, block);
604 
605  return 8;
606 }
607 
608 /**
609  * Compress one block of RGBA pixels in a DXT5 texture and store the
610  * resulting bytes in 'dst'. Alpha is preserved.
611  *
612  * @param dst output buffer.
613  * @param stride scanline in bytes.
614  * @param block block to compress.
615  * @return how much texture data has been written.
616  */
617 static int dxt5_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
618 {
619  compress_alpha(dst, stride, block);
620  compress_color(dst + 8, stride, block);
621 
622  return 16;
623 }
624 
625 /**
626  * Compress one block of RGBA pixels in a DXT5-YCoCg texture and store the
627  * resulting bytes in 'dst'. Alpha is not preserved.
628  *
629  * @param dst output buffer.
630  * @param stride scanline in bytes.
631  * @param block block to compress.
632  * @return how much texture data has been written.
633  */
634 static int dxt5ys_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
635 {
636  int x, y;
637  uint8_t reorder[64];
638 
639  /* Reorder the components and then run a normal DXT5 compression. */
640  for (y = 0; y < 4; y++)
641  for (x = 0; x < 4; x++)
642  rgba2ycocg(reorder + x * 4 + y * 16, block + x * 4 + y * stride);
643 
644  compress_alpha(dst + 0, 16, reorder);
645  compress_color(dst + 8, 16, reorder);
646 
647  return 16;
648 }
649 
651 {
652  c->dxt1_block = dxt1_block;
653  c->dxt5_block = dxt5_block;
654  c->dxt5ys_block = dxt5ys_block;
655 }
656 
657 #define TEXTUREDSP_FUNC_NAME ff_texturedsp_exec_compress_threads
658 #define TEXTUREDSP_TEX_FUNC(a, b, c) tex_funct(c, b, a)
659 #include "texturedsp_template.c"
expand5
static const uint8_t expand5[32]
Definition: texturedspenc.c:35
r
const char * r
Definition: vf_curves.c:126
AV_WL32
#define AV_WL32(p, v)
Definition: intreadwrite.h:424
lerp13
#define lerp13(a, b)
Definition: texturedspenc.c:150
expand6
static const uint8_t expand6[64]
Definition: texturedspenc.c:41
out
FILE * out
Definition: movenc.c:54
color
Definition: vf_paletteuse.c:511
av_clip_uintp2
#define av_clip_uintp2
Definition: common.h:122
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
TextureDSPEncContext
Definition: texturedsp.h:63
match5
static const uint8_t match5[256][2]
Definition: texturedspenc.c:50
b
#define b
Definition: input.c:41
compress_color
static void compress_color(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
Definition: texturedspenc.c:453
max
#define max(a, b)
Definition: cuda_runtime.h:33
dxt5_block
static int dxt5_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
Compress one block of RGBA pixels in a DXT5 texture and store the resulting bytes in 'dst'.
Definition: texturedspenc.c:617
c1
static const uint64_t c1
Definition: murmur3.c:52
rgb5652rgb
static void rgb5652rgb(uint8_t *out, uint16_t v)
Definition: texturedspenc.c:161
texturedsp.h
constant_color
static int constant_color(const uint8_t *block, ptrdiff_t stride)
Definition: texturedspenc.c:440
val
static double val(void *priv, double ch)
Definition: aeval.c:78
lerp13rgb
static void lerp13rgb(uint8_t *out, uint8_t *p1, uint8_t *p2)
Definition: texturedspenc.c:153
first
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
Definition: rate_distortion.txt:12
av_cold
#define av_cold
Definition: attributes.h:90
ff_texturedspenc_init
av_cold void ff_texturedspenc_init(TextureDSPEncContext *c)
Definition: texturedspenc.c:650
mask
static const uint16_t mask[17]
Definition: lzw.c:38
float
float
Definition: af_crystalizer.c:121
intreadwrite.h
refine_colors
static int refine_colors(const uint8_t *block, ptrdiff_t stride, uint16_t *pmax16, uint16_t *pmin16, uint32_t mask)
Definition: texturedspenc.c:350
g
const char * g
Definition: vf_curves.c:127
bits
uint8_t bits
Definition: vp3data.h:128
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
bias
static int bias(int x, int c)
Definition: vqcdec.c:114
pixel
uint8_t pixel
Definition: tiny_ssim.c:41
rgba2ycocg
static void rgba2ycocg(uint8_t *dst, const uint8_t *pixel)
Convert a RGBA buffer to unscaled YCoCg.
Definition: texturedspenc.c:579
dxt1_block
static int dxt1_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
Compress one block of RGBA pixels in a DXT1 texture and store the resulting bytes in 'dst'.
Definition: texturedspenc.c:601
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
f
f
Definition: af_crystalizer.c:121
compress_alpha
static void compress_alpha(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
Definition: texturedspenc.c:499
AV_WL16
#define AV_WL16(p, v)
Definition: intreadwrite.h:410
attributes.h
rgb2rgb565
#define rgb2rgb565(r, g, b)
Definition: texturedspenc.c:146
match6
static const uint8_t match6[256][2]
Definition: texturedspenc.c:96
common.h
fb
#define fb(width, name)
Definition: cbs_av1.c:585
dxt5ys_block
static int dxt5ys_block(uint8_t *dst, ptrdiff_t stride, const uint8_t *block)
Compress one block of RGBA pixels in a DXT5-YCoCg texture and store the resulting bytes in 'dst'.
Definition: texturedspenc.c:634
texturedsp_template.c
stride
#define stride
Definition: h264pred_template.c:537
FFSWAP
#define FFSWAP(type, a, b)
Definition: macros.h:52
AV_RL32
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_RL32
Definition: bytestream.h:92
U
#define U(x)
Definition: vpx_arith.h:37
match_colors
static unsigned int match_colors(const uint8_t *block, ptrdiff_t stride, uint16_t c0, uint16_t c1)
Definition: texturedspenc.c:174
cm
#define cm
Definition: dvbsubdec.c:39
av_clip_uint8
#define av_clip_uint8
Definition: common.h:104
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
optimize_colors
static void optimize_colors(const uint8_t *block, ptrdiff_t stride, uint16_t *pmax16, uint16_t *pmin16)
Definition: texturedspenc.c:237
int
int
Definition: ffmpeg_filter.c:410
min
float min
Definition: vorbis_enc_data.h:429