FFmpeg
hevcdsp_init_aarch64.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Reimar Döffinger
3  * Copyright (c) 2023 xu fulong <839789740@qq.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <stdint.h>
23 
24 #include "libavutil/attributes.h"
25 #include "libavutil/cpu.h"
26 #include "libavutil/aarch64/cpu.h"
28 #include "libavcodec/hevc/dsp.h"
29 
30 void ff_hevc_v_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride,
31  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
32 void ff_hevc_v_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride,
33  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
34 void ff_hevc_v_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride,
35  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
36 void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride,
37  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
38 void ff_hevc_h_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride,
39  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
40 void ff_hevc_h_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride,
41  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
42 void ff_hevc_v_loop_filter_luma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
43  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
44 void ff_hevc_v_loop_filter_luma_10_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
45  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
46 void ff_hevc_v_loop_filter_luma_12_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
47  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
48 void ff_hevc_h_loop_filter_luma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
49  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
50 void ff_hevc_h_loop_filter_luma_10_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
51  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
52 void ff_hevc_h_loop_filter_luma_12_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
53  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
54 void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs,
55  ptrdiff_t stride);
56 void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs,
57  ptrdiff_t stride);
58 void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, const int16_t *coeffs,
59  ptrdiff_t stride);
60 void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs,
61  ptrdiff_t stride);
62 void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs,
63  ptrdiff_t stride);
64 void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, const int16_t *coeffs,
65  ptrdiff_t stride);
66 void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs,
67  ptrdiff_t stride);
68 void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs,
69  ptrdiff_t stride);
70 void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, const int16_t *coeffs,
71  ptrdiff_t stride);
72 void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs,
73  ptrdiff_t stride);
74 void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs,
75  ptrdiff_t stride);
76 void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, const int16_t *coeffs,
77  ptrdiff_t stride);
78 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
79 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
80 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
81 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
82 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
83 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
84 void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit);
85 void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit);
86 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
87 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
88 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
89 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
90 void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
91 void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
92 void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
93 void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
94 void ff_hevc_idct_4x4_dc_12_neon(int16_t *coeffs);
95 void ff_hevc_idct_8x8_dc_12_neon(int16_t *coeffs);
96 void ff_hevc_idct_16x16_dc_12_neon(int16_t *coeffs);
97 void ff_hevc_idct_32x32_dc_12_neon(int16_t *coeffs);
98 void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs);
99 
100 #define NEON8_FNASSIGN(member, v, h, fn, ext) \
101  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
102  member[2][v][h] = ff_hevc_put_hevc_##fn##6_8_neon##ext; \
103  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
104  member[4][v][h] = ff_hevc_put_hevc_##fn##12_8_neon##ext; \
105  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
106  member[6][v][h] = ff_hevc_put_hevc_##fn##24_8_neon##ext; \
107  member[7][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext; \
108  member[8][v][h] = ff_hevc_put_hevc_##fn##48_8_neon##ext; \
109  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
110 
111 #define NEON8_FNASSIGN_SHARED_32(member, v, h, fn, ext) \
112  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
113  member[2][v][h] = ff_hevc_put_hevc_##fn##6_8_neon##ext; \
114  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
115  member[4][v][h] = ff_hevc_put_hevc_##fn##12_8_neon##ext; \
116  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
117  member[6][v][h] = ff_hevc_put_hevc_##fn##24_8_neon##ext; \
118  member[7][v][h] = \
119  member[8][v][h] = \
120  member[9][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext;
121 
122 #define NEON8_FNASSIGN_PARTIAL_4(member, v, h, fn, ext) \
123  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
124  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
125  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
126  member[7][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext; \
127  member[8][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext; \
128  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
129 
130 #define NEON8_FNASSIGN_PARTIAL_5(member, v, h, fn, ext) \
131  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
132  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
133  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
134  member[7][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext; \
135  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
136 
138 {
139  int cpu_flags = av_get_cpu_flags();
140  if (!have_neon(cpu_flags)) return;
141 
142  if (bit_depth == 8) {
143  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_neon;
144  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_neon;
145  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_neon;
146  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_neon;
147  c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
148  c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
149  c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon;
150  c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon;
151  c->idct[0] = ff_hevc_idct_4x4_8_neon;
152  c->idct[1] = ff_hevc_idct_8x8_8_neon;
153  c->idct[2] = ff_hevc_idct_16x16_8_neon;
154  c->idct[3] = ff_hevc_idct_32x32_8_neon;
155  c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
156  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
157  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
158  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
159  c->transform_4x4_luma = ff_hevc_transform_luma_4x4_neon_8;
160  c->sao_band_filter[0] =
161  c->sao_band_filter[1] =
162  c->sao_band_filter[2] =
163  c->sao_band_filter[3] =
164  c->sao_band_filter[4] = ff_h26x_sao_band_filter_8x8_8_neon;
165  c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8x8_8_neon;
166  c->sao_edge_filter[1] =
167  c->sao_edge_filter[2] =
168  c->sao_edge_filter[3] =
169  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_16x16_8_neon;
170  c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_neon;
171  c->put_hevc_qpel[2][0][1] = ff_hevc_put_hevc_qpel_h6_8_neon;
172  c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_neon;
173  c->put_hevc_qpel[4][0][1] =
174  c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h12_8_neon;
175  c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_neon;
176  c->put_hevc_qpel[7][0][1] =
177  c->put_hevc_qpel[8][0][1] =
178  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h32_8_neon;
179  c->put_hevc_qpel_uni[1][0][1] = ff_hevc_put_hevc_qpel_uni_h4_8_neon;
180  c->put_hevc_qpel_uni[2][0][1] = ff_hevc_put_hevc_qpel_uni_h6_8_neon;
181  c->put_hevc_qpel_uni[3][0][1] = ff_hevc_put_hevc_qpel_uni_h8_8_neon;
182  c->put_hevc_qpel_uni[4][0][1] =
183  c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_qpel_uni_h12_8_neon;
184  c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_qpel_uni_h16_8_neon;
185  c->put_hevc_qpel_uni[7][0][1] =
186  c->put_hevc_qpel_uni[8][0][1] =
187  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_qpel_uni_h32_8_neon;
188  c->put_hevc_qpel_bi[1][0][1] = ff_hevc_put_hevc_qpel_bi_h4_8_neon;
189  c->put_hevc_qpel_bi[2][0][1] = ff_hevc_put_hevc_qpel_bi_h6_8_neon;
190  c->put_hevc_qpel_bi[3][0][1] = ff_hevc_put_hevc_qpel_bi_h8_8_neon;
191  c->put_hevc_qpel_bi[4][0][1] =
192  c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_qpel_bi_h12_8_neon;
193  c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_qpel_bi_h16_8_neon;
194  c->put_hevc_qpel_bi[7][0][1] =
195  c->put_hevc_qpel_bi[8][0][1] =
196  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_qpel_bi_h32_8_neon;
197 
198  NEON8_FNASSIGN(c->put_hevc_epel, 0, 0, pel_pixels,);
199  NEON8_FNASSIGN(c->put_hevc_epel, 1, 0, epel_v,);
200  NEON8_FNASSIGN(c->put_hevc_qpel, 0, 0, pel_pixels,);
201  NEON8_FNASSIGN(c->put_hevc_qpel, 1, 0, qpel_v,);
202  NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 0, pel_bi_pixels,);
203  NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 1, epel_bi_h,);
204  NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 0, epel_bi_v,);
205  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 0, 0, pel_bi_pixels,);
206  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 0, qpel_bi_v,);
207  NEON8_FNASSIGN(c->put_hevc_epel_uni, 0, 0, pel_uni_pixels,);
208  NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 0, epel_uni_v,);
209  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 0, 0, pel_uni_pixels,);
210  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 1, 0, qpel_uni_v,);
211  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 0, pel_uni_w_pixels,);
212  NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 0, pel_uni_w_pixels,);
213  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 0, epel_uni_w_v,);
214  NEON8_FNASSIGN_PARTIAL_4(c->put_hevc_qpel_uni_w, 1, 0, qpel_uni_w_v,);
215 
216  NEON8_FNASSIGN_SHARED_32(c->put_hevc_epel, 0, 1, epel_h,);
217  NEON8_FNASSIGN_SHARED_32(c->put_hevc_epel_uni_w, 0, 1, epel_uni_w_h,);
218 
219  NEON8_FNASSIGN(c->put_hevc_epel, 1, 1, epel_hv,);
220  NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 1, epel_uni_hv,);
221  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 1, epel_uni_w_hv,);
222  NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 1, epel_bi_hv,);
223 
224  NEON8_FNASSIGN_SHARED_32(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h,);
225 
226  NEON8_FNASSIGN(c->put_hevc_qpel, 1, 1, qpel_hv,);
227  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 1, 1, qpel_uni_hv,);
228  NEON8_FNASSIGN_PARTIAL_5(c->put_hevc_qpel_uni_w, 1, 1, qpel_uni_w_hv,);
229  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 1, qpel_bi_hv,);
230 
231  if (have_i8mm(cpu_flags)) {
232  NEON8_FNASSIGN(c->put_hevc_epel, 0, 1, epel_h, _i8mm);
233  NEON8_FNASSIGN(c->put_hevc_epel, 1, 1, epel_hv, _i8mm);
234  NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 1, epel_uni_hv, _i8mm);
235  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 1, epel_uni_w_h ,_i8mm);
236  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 1, epel_uni_w_hv, _i8mm);
237  NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 1, epel_bi_hv, _i8mm);
238  NEON8_FNASSIGN(c->put_hevc_qpel, 0, 1, qpel_h, _i8mm);
239  NEON8_FNASSIGN(c->put_hevc_qpel, 1, 1, qpel_hv, _i8mm);
240  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 1, 1, qpel_uni_hv, _i8mm);
241  NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h, _i8mm);
242  NEON8_FNASSIGN_PARTIAL_5(c->put_hevc_qpel_uni_w, 1, 1, qpel_uni_w_hv, _i8mm);
243  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 1, qpel_bi_hv, _i8mm);
244  }
245 
246  }
247  if (bit_depth == 10) {
248  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_neon;
249  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_neon;
250  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_neon;
251  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_neon;
252  c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
253  c->add_residual[1] = ff_hevc_add_residual_8x8_10_neon;
254  c->add_residual[2] = ff_hevc_add_residual_16x16_10_neon;
255  c->add_residual[3] = ff_hevc_add_residual_32x32_10_neon;
256  c->idct[0] = ff_hevc_idct_4x4_10_neon;
257  c->idct[1] = ff_hevc_idct_8x8_10_neon;
258  c->idct[2] = ff_hevc_idct_16x16_10_neon;
259  c->idct[3] = ff_hevc_idct_32x32_10_neon;
260  c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
261  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
262  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
263  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
264  }
265  if (bit_depth == 12) {
266  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_neon;
267  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_neon;
268  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_neon;
269  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_neon;
270  c->add_residual[0] = ff_hevc_add_residual_4x4_12_neon;
271  c->add_residual[1] = ff_hevc_add_residual_8x8_12_neon;
272  c->add_residual[2] = ff_hevc_add_residual_16x16_12_neon;
273  c->add_residual[3] = ff_hevc_add_residual_32x32_12_neon;
274  c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_neon;
275  c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_neon;
276  c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_neon;
277  c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_neon;
278  }
279 }
_dst
uint8_t * _dst
Definition: dsp.h:52
ff_hevc_v_loop_filter_chroma_10_neon
void ff_hevc_v_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
NEON8_FNASSIGN
#define NEON8_FNASSIGN(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:100
ff_hevc_idct_4x4_dc_8_neon
void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs)
ff_hevc_sao_edge_filter_16x16_8_neon
void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
ff_hevc_transform_luma_4x4_neon_8
void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs)
ff_hevc_idct_4x4_dc_12_neon
void ff_hevc_idct_4x4_dc_12_neon(int16_t *coeffs)
ff_hevc_idct_16x16_dc_12_neon
void ff_hevc_idct_16x16_dc_12_neon(int16_t *coeffs)
ff_hevc_h_loop_filter_luma_10_neon
void ff_hevc_h_loop_filter_luma_10_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
_stride
ptrdiff_t _stride
Definition: h264pred_template.c:411
ff_hevc_sao_edge_filter_8x8_8_neon
void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
ff_hevc_idct_16x16_8_neon
void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
ff_hevc_v_loop_filter_luma_8_neon
void ff_hevc_v_loop_filter_luma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_hevc_add_residual_16x16_10_neon
void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_32x32_8_neon
void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_idct_8x8_dc_12_neon
void ff_hevc_idct_8x8_dc_12_neon(int16_t *coeffs)
ff_hevc_add_residual_4x4_10_neon
void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_8x8_8_neon
void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit)
dsp.h
ff_hevc_idct_16x16_dc_10_neon
void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs)
ff_hevc_h_loop_filter_luma_8_neon
void ff_hevc_h_loop_filter_luma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
av_cold
#define av_cold
Definition: attributes.h:90
ff_hevc_h_loop_filter_luma_12_neon
void ff_hevc_h_loop_filter_luma_12_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_idct_32x32_10_neon
void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_add_residual_32x32_12_neon
void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_v_loop_filter_luma_10_neon
void ff_hevc_v_loop_filter_luma_10_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_idct_32x32_dc_12_neon
void ff_hevc_idct_32x32_dc_12_neon(int16_t *coeffs)
NEON8_FNASSIGN_PARTIAL_5
#define NEON8_FNASSIGN_PARTIAL_5(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:130
ff_hevc_idct_16x16_dc_8_neon
void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs)
ff_hevc_idct_32x32_dc_10_neon
void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
have_i8mm
#define have_i8mm(flags)
Definition: cpu.h:29
dsp.h
ff_hevc_add_residual_8x8_8_neon
void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_v_loop_filter_chroma_12_neon
void ff_hevc_v_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
cpu.h
ff_hevc_add_residual_16x16_12_neon
void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_4x4_10_neon
void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_add_residual_8x8_12_neon
void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_4x4_dc_10_neon
void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs)
have_neon
#define have_neon(flags)
Definition: cpu.h:26
ff_h26x_sao_band_filter_8x8_8_neon
void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
ff_hevc_h_loop_filter_chroma_12_neon
void ff_hevc_h_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_idct_8x8_10_neon
void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit)
HEVCDSPContext
Definition: dsp.h:47
attributes.h
ff_hevc_idct_4x4_8_neon
void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_idct_32x32_dc_8_neon
void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs)
ff_hevc_idct_8x8_dc_8_neon
void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs)
ff_hevc_idct_8x8_dc_10_neon
void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs)
ff_hevc_dsp_init_aarch64
av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
Definition: hevcdsp_init_aarch64.c:137
stride
#define stride
Definition: h264pred_template.c:536
NEON8_FNASSIGN_SHARED_32
#define NEON8_FNASSIGN_SHARED_32(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:111
ff_hevc_add_residual_32x32_8_neon
void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_v_loop_filter_luma_12_neon
void ff_hevc_v_loop_filter_luma_12_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_v_loop_filter_chroma_8_neon
void ff_hevc_v_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_idct_16x16_10_neon
void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_h_loop_filter_chroma_10_neon
void ff_hevc_h_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_add_residual_4x4_8_neon
void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
cpu.h
ff_hevc_add_residual_32x32_10_neon
void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_add_residual_8x8_10_neon
void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_h_loop_filter_chroma_8_neon
void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_add_residual_16x16_8_neon
void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
NEON8_FNASSIGN_PARTIAL_4
#define NEON8_FNASSIGN_PARTIAL_4(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:122
ff_hevc_add_residual_4x4_12_neon
void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)