FFmpeg
simple_idct_alpha.c
Go to the documentation of this file.
1 /*
2  * Simple IDCT (Alpha optimized)
3  *
4  * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * based upon some outcommented C code from mpeg2dec (idct_mmx.c
7  * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
8  *
9  * Alpha optimizations by Måns Rullgård <mans@mansr.com>
10  * and Falk Hueffner <falk@debian.org>
11  *
12  * This file is part of FFmpeg.
13  *
14  * FFmpeg is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * FFmpeg is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with FFmpeg; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27  */
28 
29 #include "idctdsp_alpha.h"
30 #include "asm.h"
31 
32 // cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
33 // W4 is actually exactly 16384, but using 16383 works around
34 // accumulating rounding errors for some encoders
35 #define W1 22725
36 #define W2 21407
37 #define W3 19266
38 #define W4 16383
39 #define W5 12873
40 #define W6 8867
41 #define W7 4520
42 #define ROW_SHIFT 11
43 #define COL_SHIFT 20
44 
45 /* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */
46 static inline int idct_row(int16_t *row)
47 {
48  int a0, a1, a2, a3, b0, b1, b2, b3, t;
49  uint64_t l, r, t2;
50  l = ldq(row);
51  r = ldq(row + 4);
52 
53  if (l == 0 && r == 0)
54  return 0;
55 
56  a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
57 
58  if (((l & ~0xffffUL) | r) == 0) {
59  a0 >>= ROW_SHIFT;
60  t2 = (uint16_t) a0;
61  t2 |= t2 << 16;
62  t2 |= t2 << 32;
63 
64  stq(t2, row);
65  stq(t2, row + 4);
66  return 1;
67  }
68 
69  a1 = a0;
70  a2 = a0;
71  a3 = a0;
72 
73  t = extwl(l, 4); /* row[2] */
74  if (t != 0) {
75  t = sextw(t);
76  a0 += W2 * t;
77  a1 += W6 * t;
78  a2 -= W6 * t;
79  a3 -= W2 * t;
80  }
81 
82  t = extwl(r, 0); /* row[4] */
83  if (t != 0) {
84  t = sextw(t);
85  a0 += W4 * t;
86  a1 -= W4 * t;
87  a2 -= W4 * t;
88  a3 += W4 * t;
89  }
90 
91  t = extwl(r, 4); /* row[6] */
92  if (t != 0) {
93  t = sextw(t);
94  a0 += W6 * t;
95  a1 -= W2 * t;
96  a2 += W2 * t;
97  a3 -= W6 * t;
98  }
99 
100  t = extwl(l, 2); /* row[1] */
101  if (t != 0) {
102  t = sextw(t);
103  b0 = W1 * t;
104  b1 = W3 * t;
105  b2 = W5 * t;
106  b3 = W7 * t;
107  } else {
108  b0 = 0;
109  b1 = 0;
110  b2 = 0;
111  b3 = 0;
112  }
113 
114  t = extwl(l, 6); /* row[3] */
115  if (t) {
116  t = sextw(t);
117  b0 += W3 * t;
118  b1 -= W7 * t;
119  b2 -= W1 * t;
120  b3 -= W5 * t;
121  }
122 
123 
124  t = extwl(r, 2); /* row[5] */
125  if (t) {
126  t = sextw(t);
127  b0 += W5 * t;
128  b1 -= W1 * t;
129  b2 += W7 * t;
130  b3 += W3 * t;
131  }
132 
133  t = extwl(r, 6); /* row[7] */
134  if (t) {
135  t = sextw(t);
136  b0 += W7 * t;
137  b1 -= W5 * t;
138  b2 += W3 * t;
139  b3 -= W1 * t;
140  }
141 
142  row[0] = (a0 + b0) >> ROW_SHIFT;
143  row[1] = (a1 + b1) >> ROW_SHIFT;
144  row[2] = (a2 + b2) >> ROW_SHIFT;
145  row[3] = (a3 + b3) >> ROW_SHIFT;
146  row[4] = (a3 - b3) >> ROW_SHIFT;
147  row[5] = (a2 - b2) >> ROW_SHIFT;
148  row[6] = (a1 - b1) >> ROW_SHIFT;
149  row[7] = (a0 - b0) >> ROW_SHIFT;
150 
151  return 2;
152 }
153 
154 static inline void idct_col(int16_t *col)
155 {
156  int a0, a1, a2, a3, b0, b1, b2, b3;
157 
158  col[0] += (1 << (COL_SHIFT - 1)) / W4;
159 
160  a0 = W4 * col[8 * 0];
161  a1 = W4 * col[8 * 0];
162  a2 = W4 * col[8 * 0];
163  a3 = W4 * col[8 * 0];
164 
165  if (col[8 * 2]) {
166  a0 += W2 * col[8 * 2];
167  a1 += W6 * col[8 * 2];
168  a2 -= W6 * col[8 * 2];
169  a3 -= W2 * col[8 * 2];
170  }
171 
172  if (col[8 * 4]) {
173  a0 += W4 * col[8 * 4];
174  a1 -= W4 * col[8 * 4];
175  a2 -= W4 * col[8 * 4];
176  a3 += W4 * col[8 * 4];
177  }
178 
179  if (col[8 * 6]) {
180  a0 += W6 * col[8 * 6];
181  a1 -= W2 * col[8 * 6];
182  a2 += W2 * col[8 * 6];
183  a3 -= W6 * col[8 * 6];
184  }
185 
186  if (col[8 * 1]) {
187  b0 = W1 * col[8 * 1];
188  b1 = W3 * col[8 * 1];
189  b2 = W5 * col[8 * 1];
190  b3 = W7 * col[8 * 1];
191  } else {
192  b0 = 0;
193  b1 = 0;
194  b2 = 0;
195  b3 = 0;
196  }
197 
198  if (col[8 * 3]) {
199  b0 += W3 * col[8 * 3];
200  b1 -= W7 * col[8 * 3];
201  b2 -= W1 * col[8 * 3];
202  b3 -= W5 * col[8 * 3];
203  }
204 
205  if (col[8 * 5]) {
206  b0 += W5 * col[8 * 5];
207  b1 -= W1 * col[8 * 5];
208  b2 += W7 * col[8 * 5];
209  b3 += W3 * col[8 * 5];
210  }
211 
212  if (col[8 * 7]) {
213  b0 += W7 * col[8 * 7];
214  b1 -= W5 * col[8 * 7];
215  b2 += W3 * col[8 * 7];
216  b3 -= W1 * col[8 * 7];
217  }
218 
219  col[8 * 0] = (a0 + b0) >> COL_SHIFT;
220  col[8 * 7] = (a0 - b0) >> COL_SHIFT;
221  col[8 * 1] = (a1 + b1) >> COL_SHIFT;
222  col[8 * 6] = (a1 - b1) >> COL_SHIFT;
223  col[8 * 2] = (a2 + b2) >> COL_SHIFT;
224  col[8 * 5] = (a2 - b2) >> COL_SHIFT;
225  col[8 * 3] = (a3 + b3) >> COL_SHIFT;
226  col[8 * 4] = (a3 - b3) >> COL_SHIFT;
227 }
228 
229 /* If all rows but the first one are zero after row transformation,
230  all rows will be identical after column transformation. */
231 static inline void idct_col2(int16_t *col)
232 {
233  int i;
234  uint64_t l, r;
235 
236  for (i = 0; i < 8; ++i) {
237  int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
238 
239  a0 *= W4;
240  col[i] = a0 >> COL_SHIFT;
241  }
242 
243  l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
244  stq(l, col + 2 * 4); stq(r, col + 3 * 4);
245  stq(l, col + 4 * 4); stq(r, col + 5 * 4);
246  stq(l, col + 6 * 4); stq(r, col + 7 * 4);
247  stq(l, col + 8 * 4); stq(r, col + 9 * 4);
248  stq(l, col + 10 * 4); stq(r, col + 11 * 4);
249  stq(l, col + 12 * 4); stq(r, col + 13 * 4);
250  stq(l, col + 14 * 4); stq(r, col + 15 * 4);
251 }
252 
253 void ff_simple_idct_axp(int16_t *block)
254 {
255 
256  int i;
257  int rowsZero = 1; /* all rows except row 0 zero */
258  int rowsConstant = 1; /* all rows consist of a constant value */
259 
260  for (i = 0; i < 8; i++) {
261  int sparseness = idct_row(block + 8 * i);
262 
263  if (i > 0 && sparseness > 0)
264  rowsZero = 0;
265  if (sparseness == 2)
266  rowsConstant = 0;
267  }
268 
269  if (rowsZero) {
270  idct_col2(block);
271  } else if (rowsConstant) {
272  idct_col(block);
273  for (i = 0; i < 8; i += 2) {
274  uint64_t v = (uint16_t) block[0];
275  uint64_t w = (uint16_t) block[8];
276 
277  v |= v << 16;
278  w |= w << 16;
279  v |= v << 32;
280  w |= w << 32;
281  stq(v, block + 0 * 4);
282  stq(v, block + 1 * 4);
283  stq(w, block + 2 * 4);
284  stq(w, block + 3 * 4);
285  block += 4 * 4;
286  }
287  } else {
288  for (i = 0; i < 8; i++)
289  idct_col(block + i);
290  }
291 }
292 
293 void ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
294 {
296  put_pixels_clamped_axp_p(block, dest, line_size);
297 }
298 
299 void ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
300 {
302  add_pixels_clamped_axp_p(block, dest, line_size);
303 }
idct_col2
static void idct_col2(int16_t *col)
Definition: simple_idct_alpha.c:231
r
const char * r
Definition: vf_curves.c:127
W5
#define W5
Definition: simple_idct_alpha.c:39
w
uint8_t w
Definition: llviddspenc.c:38
W1
#define W1
Definition: simple_idct_alpha.c:35
ldq
#define ldq(p)
Definition: asm.h:59
W6
#define W6
Definition: simple_idct_alpha.c:40
asm.h
sextw
#define sextw(x)
Definition: asm.h:56
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2035
ff_simple_idct_put_axp
void ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: simple_idct_alpha.c:293
W7
#define W7
Definition: simple_idct_alpha.c:41
a1
#define a1
Definition: regdef.h:47
ff_simple_idct_add_axp
void ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: simple_idct_alpha.c:299
extwl
#define extwl(a, b)
Definition: asm.h:109
ff_simple_idct_axp
void ff_simple_idct_axp(int16_t *block)
Definition: simple_idct_alpha.c:253
W2
#define W2
Definition: simple_idct_alpha.c:36
b3
static double b3(void *priv, double x, double y)
Definition: vf_xfade.c:2037
ROW_SHIFT
#define ROW_SHIFT
Definition: simple_idct_alpha.c:42
W4
#define W4
Definition: simple_idct_alpha.c:38
W3
#define W3
Definition: simple_idct_alpha.c:37
idct_row
static int idct_row(int16_t *row)
Definition: simple_idct_alpha.c:46
b2
static double b2(void *priv, double x, double y)
Definition: vf_xfade.c:2036
a0
#define a0
Definition: regdef.h:46
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
add_pixels_clamped_axp_p
void(* add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Definition: idctdsp_alpha.c:34
a2
#define a2
Definition: regdef.h:48
idct_col
static void idct_col(int16_t *col)
Definition: simple_idct_alpha.c:154
stq
#define stq(l, p)
Definition: asm.h:69
t2
#define t2
Definition: regdef.h:30
COL_SHIFT
#define COL_SHIFT
Definition: simple_idct_alpha.c:43
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
b0
static double b0(void *priv, double x, double y)
Definition: vf_xfade.c:2034
put_pixels_clamped_axp_p
void(* put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Definition: idctdsp_alpha.c:32
a3
#define a3
Definition: regdef.h:49
idctdsp_alpha.h