FFmpeg
swscale_unscaled.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 #include "libswscale/swscale.h"
22 #include "libavutil/aarch64/cpu.h"
23 
24 #define YUV_TO_RGB_TABLE \
25  c->yuv2rgb_v2r_coeff, \
26  c->yuv2rgb_u2g_coeff, \
27  c->yuv2rgb_v2g_coeff, \
28  c->yuv2rgb_u2b_coeff, \
29 
30 #define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt) \
31 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
32  uint8_t *dst, int linesize, \
33  const uint8_t *srcY, int linesizeY, \
34  const uint8_t *srcU, int linesizeU, \
35  const uint8_t *srcV, int linesizeV, \
36  const int16_t *table, \
37  int y_offset, \
38  int y_coeff); \
39  \
40 static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
41  int srcStride[], int srcSliceY, int srcSliceH, \
42  uint8_t *dst[], int dstStride[]) { \
43  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
44  \
45  return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
46  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
47  src[0], srcStride[0], \
48  src[1], srcStride[1], \
49  src[2], srcStride[2], \
50  yuv2rgb_table, \
51  c->yuv2rgb_y_offset >> 6, \
52  c->yuv2rgb_y_coeff); \
53 } \
54 
55 #define DECLARE_FF_YUVX_TO_GBRP_FUNCS(ifmt, ofmt) \
56 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
57  uint8_t *dst, int linesize, \
58  const uint8_t *srcY, int linesizeY, \
59  const uint8_t *srcU, int linesizeU, \
60  const uint8_t *srcV, int linesizeV, \
61  const int16_t *table, \
62  int y_offset, \
63  int y_coeff, \
64  uint8_t *dst1, int linesize1, \
65  uint8_t *dst2, int linesize2); \
66  \
67 static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
68  int srcStride[], int srcSliceY, int srcSliceH, \
69  uint8_t *dst[], int dstStride[]) { \
70  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
71  \
72  return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
73  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
74  src[0], srcStride[0], \
75  src[1], srcStride[1], \
76  src[2], srcStride[2], \
77  yuv2rgb_table, \
78  c->yuv2rgb_y_offset >> 6, \
79  c->yuv2rgb_y_coeff, \
80  dst[1] + srcSliceY * dstStride[1], dstStride[1], \
81  dst[2] + srcSliceY * dstStride[2], dstStride[2]); \
82 } \
83 
84 #define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx) \
85 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb) \
86 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba) \
87 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr) \
88 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra) \
89 DECLARE_FF_YUVX_TO_GBRP_FUNCS(yuvx, gbrp) \
90 
93 
94 #define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt) \
95 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
96  uint8_t *dst, int linesize, \
97  const uint8_t *srcY, int linesizeY, \
98  const uint8_t *srcC, int linesizeC, \
99  const int16_t *table, \
100  int y_offset, \
101  int y_coeff); \
102  \
103 static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
104  int srcStride[], int srcSliceY, int srcSliceH, \
105  uint8_t *dst[], int dstStride[]) { \
106  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
107  \
108  return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
109  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
110  src[0], srcStride[0], src[1], srcStride[1], \
111  yuv2rgb_table, \
112  c->yuv2rgb_y_offset >> 6, \
113  c->yuv2rgb_y_coeff); \
114 } \
115 
116 #define DECLARE_FF_NVX_TO_GBRP_FUNCS(ifmt, ofmt) \
117 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
118  uint8_t *dst, int linesize, \
119  const uint8_t *srcY, int linesizeY, \
120  const uint8_t *srcC, int linesizeC, \
121  const int16_t *table, \
122  int y_offset, \
123  int y_coeff, \
124  uint8_t *dst1, int linesize1, \
125  uint8_t *dst2, int linesize2); \
126  \
127 static int ifmt##_to_##ofmt##_neon_wrapper(SwsContext *c, const uint8_t *src[], \
128  int srcStride[], int srcSliceY, int srcSliceH, \
129  uint8_t *dst[], int dstStride[]) { \
130  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
131  \
132  return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
133  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
134  src[0], srcStride[0], src[1], srcStride[1], \
135  yuv2rgb_table, \
136  c->yuv2rgb_y_offset >> 6, \
137  c->yuv2rgb_y_coeff, \
138  dst[1] + srcSliceY * dstStride[1], dstStride[1], \
139  dst[2] + srcSliceY * dstStride[2], dstStride[2]); \
140 } \
141 
142 void ff_nv24_to_yuv420p_chroma_neon(uint8_t *dst1, int dstStride1,
143  uint8_t *dst2, int dstStride2,
144  const uint8_t *src, int srcStride,
145  int w, int h);
146 
147 static int nv24_to_yuv420p_neon_wrapper(SwsContext *c, const uint8_t *src[],
148  int srcStride[], int srcSliceY, int srcSliceH,
149  uint8_t *dst[], int dstStride[])
150 {
151  uint8_t *dst1 = dst[1] + dstStride[1] * srcSliceY / 2;
152  uint8_t *dst2 = dst[2] + dstStride[2] * srcSliceY / 2;
153 
154  ff_copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
155  dst[0], dstStride[0]);
156 
157  if (c->srcFormat == AV_PIX_FMT_NV24)
158  ff_nv24_to_yuv420p_chroma_neon(dst1, dstStride[1], dst2, dstStride[2],
159  src[1], srcStride[1], c->srcW / 2, srcSliceH);
160  else
161  ff_nv24_to_yuv420p_chroma_neon(dst2, dstStride[2], dst1, dstStride[1],
162  src[1], srcStride[1], c->srcW / 2, srcSliceH);
163 
164  return srcSliceH;
165 }
166 
167 #define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx) \
168 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb) \
169 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba) \
170 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr) \
171 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra) \
172 DECLARE_FF_NVX_TO_GBRP_FUNCS(nvx, gbrp) \
173 
176 
177 /* We need a 16 pixel width alignment. This constraint can easily be removed
178  * for input reading but for the output which is 4-bytes per pixel (RGBA) the
179  * assembly might be writing as much as 4*15=60 extra bytes at the end of the
180  * line, which won't fit the 32-bytes buffer alignment. */
181 #define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do { \
182  if (c->srcFormat == AV_PIX_FMT_##IFMT \
183  && c->dstFormat == AV_PIX_FMT_##OFMT \
184  && !(c->srcH & 1) \
185  && !(c->srcW & 15) \
186  && !accurate_rnd) \
187  c->convert_unscaled = ifmt##_to_##ofmt##_neon_wrapper; \
188 } while (0)
189 
190 #define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do { \
191  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd); \
192  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd); \
193  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd); \
194  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd); \
195  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, gbrp, GBRP, accurate_rnd); \
196 } while (0)
197 
199  int accurate_rnd = c->flags & SWS_ACCURATE_RND;
200 
201  SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
202  SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
203  SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
204  SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
205 
206  if (c->dstFormat == AV_PIX_FMT_YUV420P &&
207  (c->srcFormat == AV_PIX_FMT_NV24 || c->srcFormat == AV_PIX_FMT_NV42) &&
208  !(c->srcH & 1) && !(c->srcW & 15) && !accurate_rnd)
209  c->convert_unscaled = nv24_to_yuv420p_neon_wrapper;
210 }
211 
213 {
214  int cpu_flags = av_get_cpu_flags();
215  if (have_neon(cpu_flags))
217 }
w
uint8_t w
Definition: llviddspenc.c:38
SET_FF_NVX_TO_ALL_RGBX_FUNC
#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd)
Definition: swscale_unscaled.c:190
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
nv24_to_yuv420p_neon_wrapper
static int nv24_to_yuv420p_neon_wrapper(SwsContext *c, const uint8_t *src[], int srcStride[], int srcSliceY, int srcSliceH, uint8_t *dst[], int dstStride[])
Definition: swscale_unscaled.c:147
ff_get_unscaled_swscale_aarch64
void ff_get_unscaled_swscale_aarch64(SwsContext *c)
Definition: swscale_unscaled.c:212
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx)
Definition: swscale_unscaled.c:84
get_unscaled_swscale_neon
static void get_unscaled_swscale_neon(SwsContext *c)
Definition: swscale_unscaled.c:198
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
have_neon
#define have_neon(flags)
Definition: cpu.h:26
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:114
ff_copyPlane
void ff_copyPlane(const uint8_t *src, int srcStride, int srcSliceY, int srcSliceH, int width, uint8_t *dst, int dstStride)
Definition: swscale_unscaled.c:125
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
swscale_internal.h
ff_nv24_to_yuv420p_chroma_neon
void ff_nv24_to_yuv420p_chroma_neon(uint8_t *dst1, int dstStride1, uint8_t *dst2, int dstStride2, const uint8_t *src, int srcStride, int w, int h)
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx)
Definition: swscale_unscaled.c:167
h
h
Definition: vp9dsp_template.c:2070
cpu.h
SwsContext
Definition: swscale_internal.h:299
src
#define src
Definition: vp8dsp.c:248
swscale.h