FFmpeg
hevcdsp_init_aarch64.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Reimar Döffinger
3  * Copyright (c) 2023 xu fulong <839789740@qq.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <stdint.h>
23 
24 #include "libavutil/attributes.h"
25 #include "libavutil/avassert.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/aarch64/cpu.h"
29 #include "libavcodec/hevc/dsp.h"
30 
31 void ff_hevc_v_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride,
32  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
33 void ff_hevc_v_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride,
34  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
35 void ff_hevc_v_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride,
36  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
37 void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride,
38  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
39 void ff_hevc_h_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride,
40  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
41 void ff_hevc_h_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride,
42  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
43 void ff_hevc_v_loop_filter_luma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
44  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
45 void ff_hevc_v_loop_filter_luma_10_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
46  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
47 void ff_hevc_v_loop_filter_luma_12_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
48  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
49 void ff_hevc_h_loop_filter_luma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
50  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
51 void ff_hevc_h_loop_filter_luma_10_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
52  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
53 void ff_hevc_h_loop_filter_luma_12_neon(uint8_t *_pix, ptrdiff_t _stride, int beta,
54  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
55 void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs,
56  ptrdiff_t stride);
57 void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs,
58  ptrdiff_t stride);
59 void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, const int16_t *coeffs,
60  ptrdiff_t stride);
61 void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs,
62  ptrdiff_t stride);
63 void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs,
64  ptrdiff_t stride);
65 void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, const int16_t *coeffs,
66  ptrdiff_t stride);
67 void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs,
68  ptrdiff_t stride);
69 void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs,
70  ptrdiff_t stride);
71 void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, const int16_t *coeffs,
72  ptrdiff_t stride);
73 void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs,
74  ptrdiff_t stride);
75 void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs,
76  ptrdiff_t stride);
77 void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, const int16_t *coeffs,
78  ptrdiff_t stride);
79 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
80 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
81 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
82 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
83 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
84 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
85 void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit);
86 void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit);
87 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
88 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
89 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
90 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
91 void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
92 void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
93 void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
94 void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
95 void ff_hevc_idct_4x4_dc_12_neon(int16_t *coeffs);
96 void ff_hevc_idct_8x8_dc_12_neon(int16_t *coeffs);
97 void ff_hevc_idct_16x16_dc_12_neon(int16_t *coeffs);
98 void ff_hevc_idct_32x32_dc_12_neon(int16_t *coeffs);
99 void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs);
100 
101 void ff_hevc_dequant_4x4_8_neon(int16_t *coeffs);
102 void ff_hevc_dequant_8x8_8_neon(int16_t *coeffs);
103 void ff_hevc_dequant_16x16_8_neon(int16_t *coeffs);
104 void ff_hevc_dequant_32x32_8_neon(int16_t *coeffs);
105 
106 void ff_hevc_dequant_4x4_10_neon(int16_t *coeffs);
107 void ff_hevc_dequant_8x8_10_neon(int16_t *coeffs);
108 void ff_hevc_dequant_16x16_10_neon(int16_t *coeffs);
109 void ff_hevc_dequant_32x32_10_neon(int16_t *coeffs);
110 
111 void ff_hevc_dequant_4x4_12_neon(int16_t *coeffs);
112 void ff_hevc_dequant_8x8_12_neon(int16_t *coeffs);
113 void ff_hevc_dequant_16x16_12_neon(int16_t *coeffs);
114 void ff_hevc_dequant_32x32_12_neon(int16_t *coeffs);
115 
116 static void hevc_dequant_8_neon(int16_t *coeffs, int16_t log2_size)
117 {
118  switch (log2_size) {
119  case 2: ff_hevc_dequant_4x4_8_neon(coeffs); break;
120  case 3: ff_hevc_dequant_8x8_8_neon(coeffs); break;
121  case 4: ff_hevc_dequant_16x16_8_neon(coeffs); break;
122  case 5: ff_hevc_dequant_32x32_8_neon(coeffs); break;
123  default: av_unreachable("log2_size must be 2, 3, 4 or 5");
124  }
125 }
126 
127 static void hevc_dequant_10_neon(int16_t *coeffs, int16_t log2_size)
128 {
129  switch (log2_size) {
130  case 2: ff_hevc_dequant_4x4_10_neon(coeffs); break;
131  case 3: ff_hevc_dequant_8x8_10_neon(coeffs); break;
132  case 4: ff_hevc_dequant_16x16_10_neon(coeffs); break;
133  case 5: ff_hevc_dequant_32x32_10_neon(coeffs); break;
134  default: av_unreachable("log2_size must be 2, 3, 4 or 5");
135  }
136 }
137 
138 static void hevc_dequant_12_neon(int16_t *coeffs, int16_t log2_size)
139 {
140  switch (log2_size) {
141  case 2: ff_hevc_dequant_4x4_12_neon(coeffs); break;
142  case 3: ff_hevc_dequant_8x8_12_neon(coeffs); break;
143  case 4: ff_hevc_dequant_16x16_12_neon(coeffs); break;
144  case 5: ff_hevc_dequant_32x32_12_neon(coeffs); break;
145  default: av_unreachable("log2_size must be 2, 3, 4 or 5");
146  }
147 }
148 
149 #define NEON8_FNASSIGN(member, v, h, fn, ext) \
150  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
151  member[2][v][h] = ff_hevc_put_hevc_##fn##6_8_neon##ext; \
152  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
153  member[4][v][h] = ff_hevc_put_hevc_##fn##12_8_neon##ext; \
154  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
155  member[6][v][h] = ff_hevc_put_hevc_##fn##24_8_neon##ext; \
156  member[7][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext; \
157  member[8][v][h] = ff_hevc_put_hevc_##fn##48_8_neon##ext; \
158  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
159 
160 #define NEON8_FNASSIGN_SHARED_32(member, v, h, fn, ext) \
161  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
162  member[2][v][h] = ff_hevc_put_hevc_##fn##6_8_neon##ext; \
163  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
164  member[4][v][h] = ff_hevc_put_hevc_##fn##12_8_neon##ext; \
165  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
166  member[6][v][h] = ff_hevc_put_hevc_##fn##24_8_neon##ext; \
167  member[7][v][h] = \
168  member[8][v][h] = \
169  member[9][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext;
170 
171 #define NEON8_FNASSIGN_PARTIAL_4(member, v, h, fn, ext) \
172  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
173  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
174  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
175  member[7][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext; \
176  member[8][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext; \
177  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
178 
179 #define NEON8_FNASSIGN_PARTIAL_5(member, v, h, fn, ext) \
180  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
181  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
182  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
183  member[7][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext; \
184  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
185 
186 #define NEON8_FNASSIGN_PARTIAL_6(member, v, h, fn, ext) \
187  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
188  member[2][v][h] = ff_hevc_put_hevc_##fn##6_8_neon##ext; \
189  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
190  member[4][v][h] = ff_hevc_put_hevc_##fn##12_8_neon##ext; \
191  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
192  member[6][v][h] = ff_hevc_put_hevc_##fn##24_8_neon##ext; \
193  member[7][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext; \
194  member[8][v][h] = ff_hevc_put_hevc_##fn##24_8_neon##ext; \
195  member[9][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext;
196 
198 {
199  int cpu_flags = av_get_cpu_flags();
200  if (!have_neon(cpu_flags)) return;
201 
202  if (bit_depth == 8) {
203  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_neon;
204  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_neon;
205  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_neon;
206  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_neon;
207  c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
208  c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
209  c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon;
210  c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon;
211  c->idct[0] = ff_hevc_idct_4x4_8_neon;
212  c->idct[1] = ff_hevc_idct_8x8_8_neon;
213  c->idct[2] = ff_hevc_idct_16x16_8_neon;
214  c->idct[3] = ff_hevc_idct_32x32_8_neon;
215  c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
216  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
217  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
218  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
219  c->transform_4x4_luma = ff_hevc_transform_luma_4x4_neon_8;
220  c->dequant = hevc_dequant_8_neon;
221  c->sao_band_filter[0] = ff_h26x_sao_band_filter_8x8_8_neon;
222  c->sao_band_filter[1] =
223  c->sao_band_filter[2] =
224  c->sao_band_filter[3] =
225  c->sao_band_filter[4] = ff_h26x_sao_band_filter_16x16_8_neon;
226  c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8x8_8_neon;
227  c->sao_edge_filter[1] =
228  c->sao_edge_filter[2] =
229  c->sao_edge_filter[3] =
230  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_16x16_8_neon;
231  c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_neon;
232  c->put_hevc_qpel[2][0][1] = ff_hevc_put_hevc_qpel_h6_8_neon;
233  c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_neon;
234  c->put_hevc_qpel[4][0][1] =
235  c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h12_8_neon;
236  c->put_hevc_qpel[5][0][1] = ff_hevc_put_hevc_qpel_h16_8_neon;
237  c->put_hevc_qpel[7][0][1] =
238  c->put_hevc_qpel[8][0][1] =
239  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h32_8_neon;
240  c->put_hevc_qpel_uni[1][0][1] = ff_hevc_put_hevc_qpel_uni_h4_8_neon;
241  c->put_hevc_qpel_uni[2][0][1] = ff_hevc_put_hevc_qpel_uni_h6_8_neon;
242  c->put_hevc_qpel_uni[3][0][1] = ff_hevc_put_hevc_qpel_uni_h8_8_neon;
243  c->put_hevc_qpel_uni[4][0][1] =
244  c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_qpel_uni_h12_8_neon;
245  c->put_hevc_qpel_uni[5][0][1] = ff_hevc_put_hevc_qpel_uni_h16_8_neon;
246  c->put_hevc_qpel_uni[7][0][1] =
247  c->put_hevc_qpel_uni[8][0][1] =
248  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_qpel_uni_h32_8_neon;
249  c->put_hevc_qpel_bi[1][0][1] = ff_hevc_put_hevc_qpel_bi_h4_8_neon;
250  c->put_hevc_qpel_bi[2][0][1] = ff_hevc_put_hevc_qpel_bi_h6_8_neon;
251  c->put_hevc_qpel_bi[3][0][1] = ff_hevc_put_hevc_qpel_bi_h8_8_neon;
252  c->put_hevc_qpel_bi[4][0][1] =
253  c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_qpel_bi_h12_8_neon;
254  c->put_hevc_qpel_bi[5][0][1] = ff_hevc_put_hevc_qpel_bi_h16_8_neon;
255  c->put_hevc_qpel_bi[7][0][1] =
256  c->put_hevc_qpel_bi[8][0][1] =
257  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_qpel_bi_h32_8_neon;
258 
259  NEON8_FNASSIGN(c->put_hevc_epel, 0, 0, pel_pixels,);
260  NEON8_FNASSIGN(c->put_hevc_epel, 1, 0, epel_v,);
261  NEON8_FNASSIGN(c->put_hevc_qpel, 0, 0, pel_pixels,);
262  NEON8_FNASSIGN(c->put_hevc_qpel, 1, 0, qpel_v,);
263  NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 0, pel_bi_pixels,);
264  NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 1, epel_bi_h,);
265  NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 0, epel_bi_v,);
266  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 0, 0, pel_bi_pixels,);
267  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 0, qpel_bi_v,);
268  NEON8_FNASSIGN_PARTIAL_6(c->put_hevc_qpel_bi_w, 0, 0, pel_bi_w_pixels,);
269  NEON8_FNASSIGN_PARTIAL_6(c->put_hevc_epel_bi_w, 0, 0, pel_bi_w_pixels,);
270  NEON8_FNASSIGN(c->put_hevc_epel_uni, 0, 0, pel_uni_pixels,);
271  NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 0, epel_uni_v,);
272  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 0, 0, pel_uni_pixels,);
273  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 1, 0, qpel_uni_v,);
274  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 0, pel_uni_w_pixels,);
275  NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 0, pel_uni_w_pixels,);
276  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 0, epel_uni_w_v,);
277  NEON8_FNASSIGN_PARTIAL_4(c->put_hevc_qpel_uni_w, 1, 0, qpel_uni_w_v,);
278 
279  NEON8_FNASSIGN_SHARED_32(c->put_hevc_epel, 0, 1, epel_h,);
280  NEON8_FNASSIGN_SHARED_32(c->put_hevc_epel_uni_w, 0, 1, epel_uni_w_h,);
281 
282  NEON8_FNASSIGN(c->put_hevc_epel, 1, 1, epel_hv,);
283  NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 1, epel_uni_hv,);
284  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 1, epel_uni_w_hv,);
285  NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 1, epel_bi_hv,);
286 
287  NEON8_FNASSIGN_SHARED_32(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h,);
288 
289  NEON8_FNASSIGN(c->put_hevc_qpel, 1, 1, qpel_hv,);
290  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 1, 1, qpel_uni_hv,);
291  NEON8_FNASSIGN_PARTIAL_5(c->put_hevc_qpel_uni_w, 1, 1, qpel_uni_w_hv,);
292  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 1, qpel_bi_hv,);
293 
294  if (have_i8mm(cpu_flags)) {
295  NEON8_FNASSIGN(c->put_hevc_epel, 0, 1, epel_h, _i8mm);
296  NEON8_FNASSIGN(c->put_hevc_epel, 1, 1, epel_hv, _i8mm);
297  NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 1, epel_uni_hv, _i8mm);
298  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 1, epel_uni_w_h ,_i8mm);
299  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 1, epel_uni_w_hv, _i8mm);
300  NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 1, epel_bi_hv, _i8mm);
301  NEON8_FNASSIGN(c->put_hevc_qpel, 0, 1, qpel_h, _i8mm);
302  NEON8_FNASSIGN(c->put_hevc_qpel, 1, 1, qpel_hv, _i8mm);
303  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 1, 1, qpel_uni_hv, _i8mm);
304  NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h, _i8mm);
305  NEON8_FNASSIGN_PARTIAL_5(c->put_hevc_qpel_uni_w, 1, 1, qpel_uni_w_hv, _i8mm);
306  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 1, qpel_bi_hv, _i8mm);
307  }
308 
309  }
310  if (bit_depth == 10) {
311  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_neon;
312  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_neon;
313  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_neon;
314  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_neon;
315  c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
316  c->add_residual[1] = ff_hevc_add_residual_8x8_10_neon;
317  c->add_residual[2] = ff_hevc_add_residual_16x16_10_neon;
318  c->add_residual[3] = ff_hevc_add_residual_32x32_10_neon;
319  c->idct[0] = ff_hevc_idct_4x4_10_neon;
320  c->idct[1] = ff_hevc_idct_8x8_10_neon;
321  c->idct[2] = ff_hevc_idct_16x16_10_neon;
322  c->idct[3] = ff_hevc_idct_32x32_10_neon;
323  c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
324  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
325  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
326  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
327  c->dequant = hevc_dequant_10_neon;
328  }
329  if (bit_depth == 12) {
330  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_neon;
331  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_neon;
332  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_neon;
333  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_neon;
334  c->add_residual[0] = ff_hevc_add_residual_4x4_12_neon;
335  c->add_residual[1] = ff_hevc_add_residual_8x8_12_neon;
336  c->add_residual[2] = ff_hevc_add_residual_16x16_12_neon;
337  c->add_residual[3] = ff_hevc_add_residual_32x32_12_neon;
338  c->idct_dc[0] = ff_hevc_idct_4x4_dc_12_neon;
339  c->idct_dc[1] = ff_hevc_idct_8x8_dc_12_neon;
340  c->idct_dc[2] = ff_hevc_idct_16x16_dc_12_neon;
341  c->idct_dc[3] = ff_hevc_idct_32x32_dc_12_neon;
342  c->dequant = hevc_dequant_12_neon;
343  }
344 }
ff_hevc_dequant_32x32_8_neon
void ff_hevc_dequant_32x32_8_neon(int16_t *coeffs)
_dst
uint8_t * _dst
Definition: dsp.h:56
ff_hevc_v_loop_filter_chroma_10_neon
void ff_hevc_v_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
NEON8_FNASSIGN
#define NEON8_FNASSIGN(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:149
ff_hevc_idct_4x4_dc_8_neon
void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs)
ff_hevc_sao_edge_filter_16x16_8_neon
void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
hevc_dequant_12_neon
static void hevc_dequant_12_neon(int16_t *coeffs, int16_t log2_size)
Definition: hevcdsp_init_aarch64.c:138
ff_hevc_dequant_16x16_10_neon
void ff_hevc_dequant_16x16_10_neon(int16_t *coeffs)
ff_hevc_transform_luma_4x4_neon_8
void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs)
ff_hevc_idct_4x4_dc_12_neon
void ff_hevc_idct_4x4_dc_12_neon(int16_t *coeffs)
ff_hevc_idct_16x16_dc_12_neon
void ff_hevc_idct_16x16_dc_12_neon(int16_t *coeffs)
ff_hevc_h_loop_filter_luma_10_neon
void ff_hevc_h_loop_filter_luma_10_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
_stride
ptrdiff_t _stride
Definition: h264pred_template.c:411
ff_hevc_sao_edge_filter_8x8_8_neon
void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
ff_hevc_idct_16x16_8_neon
void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
ff_hevc_v_loop_filter_luma_8_neon
void ff_hevc_v_loop_filter_luma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_hevc_dequant_32x32_12_neon
void ff_hevc_dequant_32x32_12_neon(int16_t *coeffs)
ff_hevc_add_residual_16x16_10_neon
void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_32x32_8_neon
void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_dequant_32x32_10_neon
void ff_hevc_dequant_32x32_10_neon(int16_t *coeffs)
ff_hevc_idct_8x8_dc_12_neon
void ff_hevc_idct_8x8_dc_12_neon(int16_t *coeffs)
hevc_dequant_8_neon
static void hevc_dequant_8_neon(int16_t *coeffs, int16_t log2_size)
Definition: hevcdsp_init_aarch64.c:116
ff_hevc_add_residual_4x4_10_neon
void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_8x8_8_neon
void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit)
dsp.h
hevc_dequant_10_neon
static void hevc_dequant_10_neon(int16_t *coeffs, int16_t log2_size)
Definition: hevcdsp_init_aarch64.c:127
ff_hevc_idct_16x16_dc_10_neon
void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs)
ff_hevc_h_loop_filter_luma_8_neon
void ff_hevc_h_loop_filter_luma_8_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
avassert.h
av_cold
#define av_cold
Definition: attributes.h:106
ff_hevc_h_loop_filter_luma_12_neon
void ff_hevc_h_loop_filter_luma_12_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_idct_32x32_10_neon
void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_add_residual_32x32_12_neon
void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_v_loop_filter_luma_10_neon
void ff_hevc_v_loop_filter_luma_10_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_h26x_sao_band_filter_16x16_8_neon
void ff_h26x_sao_band_filter_16x16_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
NEON8_FNASSIGN_PARTIAL_6
#define NEON8_FNASSIGN_PARTIAL_6(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:186
ff_hevc_idct_32x32_dc_12_neon
void ff_hevc_idct_32x32_dc_12_neon(int16_t *coeffs)
NEON8_FNASSIGN_PARTIAL_5
#define NEON8_FNASSIGN_PARTIAL_5(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:179
av_unreachable
#define av_unreachable(msg)
Asserts that are used as compiler optimization hints depending upon ASSERT_LEVEL and NBDEBUG.
Definition: avassert.h:108
ff_hevc_idct_16x16_dc_8_neon
void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs)
ff_hevc_idct_32x32_dc_10_neon
void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
have_i8mm
#define have_i8mm(flags)
Definition: cpu.h:30
dsp.h
ff_hevc_add_residual_8x8_8_neon
void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_v_loop_filter_chroma_12_neon
void ff_hevc_v_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
cpu.h
ff_hevc_dequant_16x16_8_neon
void ff_hevc_dequant_16x16_8_neon(int16_t *coeffs)
ff_hevc_add_residual_16x16_12_neon
void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_4x4_10_neon
void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_add_residual_8x8_12_neon
void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_dequant_4x4_12_neon
void ff_hevc_dequant_4x4_12_neon(int16_t *coeffs)
ff_hevc_idct_4x4_dc_10_neon
void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs)
have_neon
#define have_neon(flags)
Definition: cpu.h:26
ff_h26x_sao_band_filter_8x8_8_neon
void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
ff_hevc_h_loop_filter_chroma_12_neon
void ff_hevc_h_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_idct_8x8_10_neon
void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit)
HEVCDSPContext
Definition: dsp.h:47
attributes.h
ff_hevc_dequant_8x8_10_neon
void ff_hevc_dequant_8x8_10_neon(int16_t *coeffs)
ff_hevc_idct_4x4_8_neon
void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_idct_32x32_dc_8_neon
void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs)
ff_hevc_dequant_16x16_12_neon
void ff_hevc_dequant_16x16_12_neon(int16_t *coeffs)
ff_hevc_idct_8x8_dc_8_neon
void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs)
ff_hevc_idct_8x8_dc_10_neon
void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs)
ff_hevc_dsp_init_aarch64
av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
Definition: hevcdsp_init_aarch64.c:197
NEON8_FNASSIGN_SHARED_32
#define NEON8_FNASSIGN_SHARED_32(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:160
ff_hevc_add_residual_32x32_8_neon
void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_v_loop_filter_luma_12_neon
void ff_hevc_v_loop_filter_luma_12_neon(uint8_t *_pix, ptrdiff_t _stride, int beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_v_loop_filter_chroma_8_neon
void ff_hevc_v_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_dequant_8x8_12_neon
void ff_hevc_dequant_8x8_12_neon(int16_t *coeffs)
ff_hevc_idct_16x16_10_neon
void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_dequant_4x4_8_neon
void ff_hevc_dequant_4x4_8_neon(int16_t *coeffs)
ff_hevc_dequant_4x4_10_neon
void ff_hevc_dequant_4x4_10_neon(int16_t *coeffs)
ff_hevc_h_loop_filter_chroma_10_neon
void ff_hevc_h_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_add_residual_4x4_8_neon
void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
stride
#define stride
Definition: h264pred_template.c:536
cpu.h
ff_hevc_add_residual_32x32_10_neon
void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_add_residual_8x8_10_neon
void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_h_loop_filter_chroma_8_neon
void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_add_residual_16x16_8_neon
void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
NEON8_FNASSIGN_PARTIAL_4
#define NEON8_FNASSIGN_PARTIAL_4(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:171
ff_hevc_dequant_8x8_8_neon
void ff_hevc_dequant_8x8_8_neon(int16_t *coeffs)
ff_hevc_add_residual_4x4_12_neon
void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)