FFmpeg
aacenc.c
Go to the documentation of this file.
1 /*
2  * AAC encoder
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * AAC encoder
25  */
26 
27 /***********************************
28  * TODOs:
29  * add sane pulse detection
30  ***********************************/
31 #include <float.h>
32 
34 #include "libavutil/libm.h"
35 #include "libavutil/float_dsp.h"
36 #include "libavutil/mem.h"
37 #include "libavutil/opt.h"
38 #include "avcodec.h"
39 #include "codec_internal.h"
40 #include "encode.h"
41 #include "put_bits.h"
42 #include "mpeg4audio.h"
43 #include "sinewin.h"
44 #include "profiles.h"
45 #include "version.h"
46 
47 #include "aac.h"
48 #include "aactab.h"
49 #include "aacenc.h"
50 #include "aacenctab.h"
51 #include "aacenc_utils.h"
52 
53 #include "psymodel.h"
54 
55 /**
56  * List of PCE (Program Configuration Element) for the channel layouts listed
57  * in channel_layout.h
58  *
59  * For those wishing in the future to add other layouts:
60  *
61  * - num_ele: number of elements in each group of front, side, back, lfe channels
62  * (an element is of type SCE (single channel), CPE (channel pair) for
63  * the first 3 groups; and is LFE for LFE group).
64  *
65  * - pairing: 0 for an SCE element or 1 for a CPE; does not apply to LFE group
66  *
67  * - index: there are three independent indices for SCE, CPE and LFE;
68  * they are incremented irrespective of the group to which the element belongs;
69  * they are not reset when going from one group to another
70  *
71  * Example: for 7.0 channel layout,
72  * .pairing = { { 1, 0 }, { 1 }, { 1 }, }, (3 CPE and 1 SCE in front group)
73  * .index = { { 0, 0 }, { 1 }, { 2 }, },
74  * (index is 0 for the single SCE but goes from 0 to 2 for the CPEs)
75  *
76  * The index order impacts the channel ordering. But is otherwise arbitrary
77  * (the sequence could have been 2, 0, 1 instead of 0, 1, 2).
78  *
79  * Spec allows for discontinuous indices, e.g. if one has a total of two SCE,
80  * SCE.0 SCE.15 is OK per spec; BUT it won't be decoded by our AAC decoder
81  * which at this time requires that indices fully cover some range starting
82  * from 0 (SCE.1 SCE.0 is OK but not SCE.0 SCE.15).
83  *
84  * - config_map: total number of elements and their types. Beware, the way the
85  * types are ordered impacts the final channel ordering.
86  *
87  * - reorder_map: reorders the channels.
88  *
89  */
90 static const AACPCEInfo aac_pce_configs[] = {
91  {
93  .num_ele = { 1, 0, 0, 0 },
94  .pairing = { { 0 }, },
95  .index = { { 0 }, },
96  .config_map = { 1, TYPE_SCE, },
97  .reorder_map = { 0 },
98  },
99  {
100  .layout = AV_CHANNEL_LAYOUT_STEREO,
101  .num_ele = { 1, 0, 0, 0 },
102  .pairing = { { 1 }, },
103  .index = { { 0 }, },
104  .config_map = { 1, TYPE_CPE, },
105  .reorder_map = { 0, 1 },
106  },
107  {
108  .layout = AV_CHANNEL_LAYOUT_2POINT1,
109  .num_ele = { 1, 0, 0, 1 },
110  .pairing = { { 1 }, },
111  .index = { { 0 },{ 0 },{ 0 },{ 0 } },
112  .config_map = { 2, TYPE_CPE, TYPE_LFE },
113  .reorder_map = { 0, 1, 2 },
114  },
115  {
116  .layout = AV_CHANNEL_LAYOUT_2_1,
117  .num_ele = { 1, 0, 1, 0 },
118  .pairing = { { 1 },{ 0 },{ 0 } },
119  .index = { { 0 },{ 0 },{ 0 }, },
120  .config_map = { 2, TYPE_CPE, TYPE_SCE },
121  .reorder_map = { 0, 1, 2 },
122  },
123  {
124  .layout = AV_CHANNEL_LAYOUT_SURROUND,
125  .num_ele = { 2, 0, 0, 0 },
126  .pairing = { { 0, 1 }, },
127  .index = { { 0, 0 }, },
128  .config_map = { 2, TYPE_SCE, TYPE_CPE },
129  .reorder_map = { 2, 0, 1 },
130  },
131  {
132  .layout = AV_CHANNEL_LAYOUT_3POINT1,
133  .num_ele = { 2, 0, 0, 1 },
134  .pairing = { { 0, 1 }, },
135  .index = { { 0, 0 }, { 0 }, { 0 }, { 0 }, },
136  .config_map = { 3, TYPE_SCE, TYPE_CPE, TYPE_LFE },
137  .reorder_map = { 2, 0, 1, 3 },
138  },
139  {
140  .layout = AV_CHANNEL_LAYOUT_4POINT0,
141  .num_ele = { 2, 0, 1, 0 },
142  .pairing = { { 0, 1 }, { 0 }, { 0 }, },
143  .index = { { 0, 0 }, { 0 }, { 1 } },
144  .config_map = { 3, TYPE_SCE, TYPE_CPE, TYPE_SCE },
145  .reorder_map = { 2, 0, 1, 3 },
146  },
147  {
148  .layout = AV_CHANNEL_LAYOUT_4POINT1,
149  .num_ele = { 2, 0, 1, 1 },
150  .pairing = { { 0, 1 }, { 0 }, { 0 }, },
151  .index = { { 0, 0 }, { 0 }, { 1 }, { 0 } },
152  .config_map = { 4, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_LFE },
153  .reorder_map = { 2, 0, 1, 4, 3 },
154  },
155  {
156  .layout = AV_CHANNEL_LAYOUT_2_2,
157  .num_ele = { 1, 1, 0, 0 },
158  .pairing = { { 1 }, { 1 }, },
159  .index = { { 0 }, { 1 }, },
160  .config_map = { 2, TYPE_CPE, TYPE_CPE },
161  .reorder_map = { 0, 1, 2, 3 },
162  },
163  {
164  .layout = AV_CHANNEL_LAYOUT_QUAD,
165  .num_ele = { 1, 0, 1, 0 },
166  .pairing = { { 1 }, { 0 }, { 1 }, },
167  .index = { { 0 }, { 0 }, { 1 } },
168  .config_map = { 2, TYPE_CPE, TYPE_CPE },
169  .reorder_map = { 0, 1, 2, 3 },
170  },
171  {
172  .layout = AV_CHANNEL_LAYOUT_5POINT0,
173  .num_ele = { 2, 1, 0, 0 },
174  .pairing = { { 0, 1 }, { 1 }, },
175  .index = { { 0, 0 }, { 1 } },
176  .config_map = { 3, TYPE_SCE, TYPE_CPE, TYPE_CPE },
177  .reorder_map = { 2, 0, 1, 3, 4 },
178  },
179  {
180  .layout = AV_CHANNEL_LAYOUT_5POINT1,
181  .num_ele = { 2, 1, 0, 1 },
182  .pairing = { { 0, 1 }, { 1 }, },
183  .index = { { 0, 0 }, { 1 }, { 0 }, { 0 }, },
184  .config_map = { 4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE },
185  .reorder_map = { 2, 0, 1, 4, 5, 3 },
186  },
187  {
189  .num_ele = { 2, 0, 1, 0 },
190  .pairing = { { 0, 1 }, { 0 }, { 1 } },
191  .index = { { 0, 0 }, { 0 }, { 1 } },
192  .config_map = { 3, TYPE_SCE, TYPE_CPE, TYPE_CPE },
193  .reorder_map = { 2, 0, 1, 3, 4 },
194  },
195  {
197  .num_ele = { 2, 0, 1, 1 },
198  .pairing = { { 0, 1 }, { 0 }, { 1 }, },
199  .index = { { 0, 0 }, { 0 }, { 1 }, { 0 } },
200  .config_map = { 4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE },
201  .reorder_map = { 2, 0, 1, 4, 5, 3 },
202  },
203  {
204  .layout = AV_CHANNEL_LAYOUT_6POINT0,
205  .num_ele = { 2, 1, 1, 0 },
206  .pairing = { { 0, 1 }, { 1 }, { 0 }, },
207  .index = { { 0, 0 }, { 1 }, { 1 } },
208  .config_map = { 4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE },
209  .reorder_map = { 2, 0, 1, 4, 5, 3 },
210  },
211  {
213  .num_ele = { 2, 1, 0, 0 },
214  .pairing = { { 1, 1 }, { 1 } },
215  .index = { { 0, 1 }, { 2 }, },
216  .config_map = { 3, TYPE_CPE, TYPE_CPE, TYPE_CPE, },
217  .reorder_map = { 0, 1, 2, 3, 4, 5 },
218  },
219  {
220  .layout = AV_CHANNEL_LAYOUT_HEXAGONAL,
221  .num_ele = { 2, 0, 2, 0 },
222  .pairing = { { 0, 1 }, { 0 }, { 1, 0 } },
223  .index = { { 0, 0 }, { 0 }, { 1, 1 } },
224  .config_map = { 4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE },
225  .reorder_map = { 2, 0, 1, 3, 4, 5 },
226  },
227  {
228  .layout = AV_CHANNEL_LAYOUT_6POINT1,
229  .num_ele = { 2, 1, 1, 1 },
230  .pairing = { { 0, 1 },{ 1 },{ 0 }, },
231  .index = { { 0, 0 },{ 1 },{ 1 },{ 0 } },
232  .config_map = { 5, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_LFE },
233  .reorder_map = { 2, 0, 1, 5, 6, 4, 3 },
234  },
235  {
237  .num_ele = { 2, 0, 2, 1 },
238  .pairing = { { 0, 1 },{ 1, 0 },{ 0 }, },
239  .index = { { 0, 0 },{ 0 },{ 1, 1 },{ 0 } },
240  .config_map = { 5, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_LFE },
241  .reorder_map = { 2, 0, 1, 4, 5, 6, 3 },
242  },
243  {
245  .num_ele = { 2, 1, 0, 1 },
246  .pairing = { { 1, 1 },{ 1 } },
247  .index = { { 0, 1 },{ 2 },{ 0 },{ 0 } },
248  .config_map = { 4, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_LFE },
249  .reorder_map = { 0, 1, 3, 4, 5, 6, 2 },
250  },
251  {
252  .layout = AV_CHANNEL_LAYOUT_7POINT0,
253  .num_ele = { 2, 0, 2, 0 },
254  .pairing = { { 0, 1 }, { 0 }, { 1, 1 }, },
255  .index = { { 0, 0 }, { 0 }, { 2, 1 }, },
256  .config_map = { 4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE },
257  .reorder_map = { 2, 0, 1, 3, 4, 5, 6 },
258  },
259  {
261  .num_ele = { 3, 1, 0, 0 },
262  .pairing = { { 0, 1, 1 }, { 1 }, },
263  .index = { { 0, 0, 1 }, { 2 }, },
264  .config_map = { 4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE },
265  .reorder_map = { 2, 0, 1, 3, 4, 5, 6 },
266  },
267  {
268  .layout = AV_CHANNEL_LAYOUT_7POINT1,
269  .num_ele = { 2, 0, 2, 1 },
270  .pairing = { { 0, 1 }, { 0 }, { 1, 1 }, },
271  .index = { { 0, 0 }, { 0 }, { 2, 1 }, { 0 } },
272  .config_map = { 5, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_LFE },
273  .reorder_map = { 2, 0, 1, 4, 5, 6, 7, 3 },
274  },
275  {
277  .num_ele = { 3, 1, 0, 1 },
278  .pairing = { { 0, 1, 1 }, { 1 }, },
279  .index = { { 0, 0, 1 }, { 2 }, },
280  .config_map = { 5, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_LFE },
281  .reorder_map = { 2, 0, 1, 4, 5, 6, 7, 3 },
282  },
283  {
285  .num_ele = { 3, 0, 1, 1 },
286  .pairing = { { 0, 1, 1 }, { 0 }, { 1 } },
287  .index = { { 0, 0, 2 }, { 0 }, { 1 }, { 0 } },
288  .config_map = { 5, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_LFE },
289  .reorder_map = { 2, 0, 1, 4, 5, 6, 7, 3 },
290  },
291  {
292  .layout = AV_CHANNEL_LAYOUT_OCTAGONAL,
293  .num_ele = { 2, 0, 3, 0 },
294  .pairing = { { 0, 1 }, { 0 }, { 1, 1, 0 }, },
295  .index = { { 0, 0 }, { 0 }, { 1, 2, 1 }, },
296  .config_map = { 5, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE },
297  .reorder_map = { 2, 0, 1, 6, 7, 3, 4, 5 },
298  },
299 };
300 
301 static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
302 {
303  int i, j;
304  AACEncContext *s = avctx->priv_data;
305  AACPCEInfo *pce = &s->pce;
306  const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
307  const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
308 
309  put_bits(pb, 4, 0);
310 
311  put_bits(pb, 2, avctx->profile);
312  put_bits(pb, 4, s->samplerate_index);
313 
314  put_bits(pb, 4, pce->num_ele[0]); /* Front */
315  put_bits(pb, 4, pce->num_ele[1]); /* Side */
316  put_bits(pb, 4, pce->num_ele[2]); /* Back */
317  put_bits(pb, 2, pce->num_ele[3]); /* LFE */
318  put_bits(pb, 3, 0); /* Assoc data */
319  put_bits(pb, 4, 0); /* CCs */
320 
321  put_bits(pb, 1, 0); /* Stereo mixdown */
322  put_bits(pb, 1, 0); /* Mono mixdown */
323  put_bits(pb, 1, 0); /* Something else */
324 
325  for (i = 0; i < 4; i++) {
326  for (j = 0; j < pce->num_ele[i]; j++) {
327  if (i < 3)
328  put_bits(pb, 1, pce->pairing[i][j]);
329  put_bits(pb, 4, pce->index[i][j]);
330  }
331  }
332 
333  align_put_bits(pb);
334  put_bits(pb, 8, strlen(aux_data));
335  ff_put_string(pb, aux_data, 0);
336 }
337 
338 /**
339  * Make AAC audio config object.
340  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
341  */
342 static int put_audio_specific_config(AVCodecContext *avctx, int chcfg)
343 {
344  PutBitContext pb;
345  AACEncContext *s = avctx->priv_data;
346  const int max_size = 32;
347 
348  avctx->extradata = av_mallocz(max_size);
349  if (!avctx->extradata)
350  return AVERROR(ENOMEM);
351 
352  init_put_bits(&pb, avctx->extradata, max_size);
353  put_bits(&pb, 5, s->profile+1); //profile
354  put_bits(&pb, 4, s->samplerate_index); //sample rate index
355  put_bits(&pb, 4, chcfg);
356  //GASpecificConfig
357  put_bits(&pb, 1, 0); //frame length - 1024 samples
358  put_bits(&pb, 1, 0); //does not depend on core coder
359  put_bits(&pb, 1, 0); //is not extension
360  if (s->needs_pce)
361  put_pce(&pb, avctx);
362 
363  //Explicitly Mark SBR absent
364  put_bits(&pb, 11, 0x2b7); //sync extension
365  put_bits(&pb, 5, AOT_SBR);
366  put_bits(&pb, 1, 0);
367  flush_put_bits(&pb);
368  avctx->extradata_size = put_bytes_output(&pb);
369 
370  return 0;
371 }
372 
374 {
375  ++s->quantize_band_cost_cache_generation;
376  if (s->quantize_band_cost_cache_generation == 0) {
377  memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
378  s->quantize_band_cost_cache_generation = 1;
379  }
380 }
381 
382 #define WINDOW_FUNC(type) \
383 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
384  SingleChannelElement *sce, \
385  const float *audio)
386 
387 WINDOW_FUNC(only_long)
388 {
389  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
390  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
391  float *out = sce->ret_buf;
392 
393  fdsp->vector_fmul (out, audio, lwindow, 1024);
394  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
395 }
396 
397 WINDOW_FUNC(long_start)
398 {
399  const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
400  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
401  float *out = sce->ret_buf;
402 
403  fdsp->vector_fmul(out, audio, lwindow, 1024);
404  memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
405  fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
406  memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
407 }
408 
409 WINDOW_FUNC(long_stop)
410 {
411  const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
412  const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
413  float *out = sce->ret_buf;
414 
415  memset(out, 0, sizeof(out[0]) * 448);
416  fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
417  memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
418  fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
419 }
420 
421 WINDOW_FUNC(eight_short)
422 {
423  const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
424  const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
425  const float *in = audio + 448;
426  float *out = sce->ret_buf;
427  int w;
428 
429  for (w = 0; w < 8; w++) {
430  fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
431  out += 128;
432  in += 128;
433  fdsp->vector_fmul_reverse(out, in, swindow, 128);
434  out += 128;
435  }
436 }
437 
438 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
440  const float *audio) = {
441  [ONLY_LONG_SEQUENCE] = apply_only_long_window,
442  [LONG_START_SEQUENCE] = apply_long_start_window,
443  [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
444  [LONG_STOP_SEQUENCE] = apply_long_stop_window
445 };
446 
448  float *audio)
449 {
450  int i;
451  float *output = sce->ret_buf;
452 
453  apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
454 
456  s->mdct1024_fn(s->mdct1024, sce->coeffs, output, sizeof(float));
457  else
458  for (i = 0; i < 1024; i += 128)
459  s->mdct128_fn(s->mdct128, &sce->coeffs[i], output + i*2, sizeof(float));
460  memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
461  memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
462 }
463 
464 /**
465  * Encode ics_info element.
466  * @see Table 4.6 (syntax of ics_info)
467  */
469 {
470  int w;
471 
472  put_bits(&s->pb, 1, 0); // ics_reserved bit
473  put_bits(&s->pb, 2, info->window_sequence[0]);
474  put_bits(&s->pb, 1, info->use_kb_window[0]);
475  if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
476  put_bits(&s->pb, 6, info->max_sfb);
477  put_bits(&s->pb, 1, 0); /* No predictor present */
478  } else {
479  put_bits(&s->pb, 4, info->max_sfb);
480  for (w = 1; w < 8; w++)
481  put_bits(&s->pb, 1, !info->group_len[w]);
482  }
483 }
484 
485 /**
486  * Encode MS data.
487  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
488  */
490 {
491  int i, w;
492 
493  put_bits(pb, 2, cpe->ms_mode);
494  if (cpe->ms_mode == 1)
495  for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
496  for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
497  put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
498 }
499 
500 /**
501  * Produce integer coefficients from scalefactors provided by the model.
502  */
503 static void adjust_frame_information(ChannelElement *cpe, int chans)
504 {
505  int i, w, w2, g, ch;
506  int maxsfb, cmaxsfb;
507 
508  for (ch = 0; ch < chans; ch++) {
509  IndividualChannelStream *ics = &cpe->ch[ch].ics;
510  maxsfb = 0;
511  cpe->ch[ch].pulse.num_pulse = 0;
512  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
513  for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
514  ;
515  maxsfb = FFMAX(maxsfb, cmaxsfb);
516  }
517  ics->max_sfb = maxsfb;
518 
519  //adjust zero bands for window groups
520  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
521  for (g = 0; g < ics->max_sfb; g++) {
522  i = 1;
523  for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
524  if (!cpe->ch[ch].zeroes[w2*16 + g]) {
525  i = 0;
526  break;
527  }
528  }
529  cpe->ch[ch].zeroes[w*16 + g] = i;
530  }
531  }
532  }
533 
534  if (chans > 1 && cpe->common_window) {
535  IndividualChannelStream *ics0 = &cpe->ch[0].ics;
536  IndividualChannelStream *ics1 = &cpe->ch[1].ics;
537  int msc = 0;
538  ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
539  ics1->max_sfb = ics0->max_sfb;
540  for (w = 0; w < ics0->num_windows*16; w += 16)
541  for (i = 0; i < ics0->max_sfb; i++)
542  if (cpe->ms_mask[w+i])
543  msc++;
544  if (msc == 0 || ics0->max_sfb == 0)
545  cpe->ms_mode = 0;
546  else
547  cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
548  }
549 }
550 
552 {
553  int w, w2, g, i;
554  IndividualChannelStream *ics = &cpe->ch[0].ics;
555  if (!cpe->common_window)
556  return;
557  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
558  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
559  int start = (w+w2) * 128;
560  for (g = 0; g < ics->num_swb; g++) {
561  int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
562  float scale = cpe->ch[0].is_ener[w*16+g];
563  if (!cpe->is_mask[w*16 + g]) {
564  start += ics->swb_sizes[g];
565  continue;
566  }
567  if (cpe->ms_mask[w*16 + g])
568  p *= -1;
569  for (i = 0; i < ics->swb_sizes[g]; i++) {
570  float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
571  cpe->ch[0].coeffs[start+i] = sum;
572  cpe->ch[1].coeffs[start+i] = 0.0f;
573  }
574  start += ics->swb_sizes[g];
575  }
576  }
577  }
578 }
579 
581 {
582  int w, w2, g, i;
583  IndividualChannelStream *ics = &cpe->ch[0].ics;
584  if (!cpe->common_window)
585  return;
586  for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
587  for (w2 = 0; w2 < ics->group_len[w]; w2++) {
588  int start = (w+w2) * 128;
589  for (g = 0; g < ics->num_swb; g++) {
590  /* ms_mask can be used for other purposes in PNS and I/S,
591  * so must not apply M/S if any band uses either, even if
592  * ms_mask is set.
593  */
594  if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
595  || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
596  || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
597  start += ics->swb_sizes[g];
598  continue;
599  }
600  for (i = 0; i < ics->swb_sizes[g]; i++) {
601  float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
602  float R = L - cpe->ch[1].coeffs[start+i];
603  cpe->ch[0].coeffs[start+i] = L;
604  cpe->ch[1].coeffs[start+i] = R;
605  }
606  start += ics->swb_sizes[g];
607  }
608  }
609  }
610 }
611 
612 /**
613  * Encode scalefactor band coding type.
614  */
616 {
617  int w;
618 
619  if (s->coder->set_special_band_scalefactors)
620  s->coder->set_special_band_scalefactors(s, sce);
621 
622  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
623  s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
624 }
625 
626 /**
627  * Encode scalefactors.
628  */
631 {
632  int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
633  int off_is = 0, noise_flag = 1;
634  int i, w;
635 
636  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
637  for (i = 0; i < sce->ics.max_sfb; i++) {
638  if (!sce->zeroes[w*16 + i]) {
639  if (sce->band_type[w*16 + i] == NOISE_BT) {
640  diff = sce->sf_idx[w*16 + i] - off_pns;
641  off_pns = sce->sf_idx[w*16 + i];
642  if (noise_flag-- > 0) {
644  continue;
645  }
646  } else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
647  sce->band_type[w*16 + i] == INTENSITY_BT2) {
648  diff = sce->sf_idx[w*16 + i] - off_is;
649  off_is = sce->sf_idx[w*16 + i];
650  } else {
651  diff = sce->sf_idx[w*16 + i] - off_sf;
652  off_sf = sce->sf_idx[w*16 + i];
653  }
655  av_assert0(diff >= 0 && diff <= 120);
657  }
658  }
659  }
660 }
661 
662 /**
663  * Encode pulse data.
664  */
665 static void encode_pulses(AACEncContext *s, Pulse *pulse)
666 {
667  int i;
668 
669  put_bits(&s->pb, 1, !!pulse->num_pulse);
670  if (!pulse->num_pulse)
671  return;
672 
673  put_bits(&s->pb, 2, pulse->num_pulse - 1);
674  put_bits(&s->pb, 6, pulse->start);
675  for (i = 0; i < pulse->num_pulse; i++) {
676  put_bits(&s->pb, 5, pulse->pos[i]);
677  put_bits(&s->pb, 4, pulse->amp[i]);
678  }
679 }
680 
681 /**
682  * Encode spectral coefficients processed by psychoacoustic model.
683  */
685 {
686  int start, i, w, w2;
687 
688  for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
689  start = 0;
690  for (i = 0; i < sce->ics.max_sfb; i++) {
691  if (sce->zeroes[w*16 + i]) {
692  start += sce->ics.swb_sizes[i];
693  continue;
694  }
695  for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
696  s->coder->quantize_and_encode_band(s, &s->pb,
697  &sce->coeffs[start + w2*128],
698  NULL, sce->ics.swb_sizes[i],
699  sce->sf_idx[w*16 + i],
700  sce->band_type[w*16 + i],
701  s->lambda,
702  sce->ics.window_clipping[w]);
703  }
704  start += sce->ics.swb_sizes[i];
705  }
706  }
707 }
708 
709 /**
710  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
711  */
713 {
714  int start, i, j, w;
715 
716  if (sce->ics.clip_avoidance_factor < 1.0f) {
717  for (w = 0; w < sce->ics.num_windows; w++) {
718  start = 0;
719  for (i = 0; i < sce->ics.max_sfb; i++) {
720  float *swb_coeffs = &sce->coeffs[start + w*128];
721  for (j = 0; j < sce->ics.swb_sizes[i]; j++)
722  swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
723  start += sce->ics.swb_sizes[i];
724  }
725  }
726  }
727 }
728 
729 /**
730  * Encode one channel of audio data.
731  */
734  int common_window)
735 {
736  put_bits(&s->pb, 8, sce->sf_idx[0]);
737  if (!common_window)
738  put_ics_info(s, &sce->ics);
739  encode_band_info(s, sce);
740  encode_scale_factors(avctx, s, sce);
741  encode_pulses(s, &sce->pulse);
742  put_bits(&s->pb, 1, !!sce->tns.present);
743  if (s->coder->encode_tns_info)
744  s->coder->encode_tns_info(s, sce);
745  put_bits(&s->pb, 1, 0); //ssr
747  return 0;
748 }
749 
750 /**
751  * Write some auxiliary information about the created AAC file.
752  */
753 static void put_bitstream_info(AACEncContext *s, const char *name)
754 {
755  int i, namelen, padbits;
756 
757  namelen = strlen(name) + 2;
758  put_bits(&s->pb, 3, TYPE_FIL);
759  put_bits(&s->pb, 4, FFMIN(namelen, 15));
760  if (namelen >= 15)
761  put_bits(&s->pb, 8, namelen - 14);
762  put_bits(&s->pb, 4, 0); //extension type - filler
763  padbits = -put_bits_count(&s->pb) & 7;
764  align_put_bits(&s->pb);
765  for (i = 0; i < namelen - 2; i++)
766  put_bits(&s->pb, 8, name[i]);
767  put_bits(&s->pb, 12 - padbits, 0);
768 }
769 
770 /*
771  * Copy input samples.
772  * Channels are reordered from libavcodec's default order to AAC order.
773  */
775 {
776  int ch;
777  int end = 2048 + (frame ? frame->nb_samples : 0);
778  const uint8_t *channel_map = s->reorder_map;
779 
780  /* copy and remap input samples */
781  for (ch = 0; ch < s->channels; ch++) {
782  /* copy last 1024 samples of previous frame to the start of the current frame */
783  memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
784 
785  /* copy new samples and zero any remaining samples */
786  if (frame) {
787  memcpy(&s->planar_samples[ch][2048],
788  frame->extended_data[channel_map[ch]],
789  frame->nb_samples * sizeof(s->planar_samples[0][0]));
790  }
791  memset(&s->planar_samples[ch][end], 0,
792  (3072 - end) * sizeof(s->planar_samples[0][0]));
793  }
794 }
795 
796 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
797  const AVFrame *frame, int *got_packet_ptr)
798 {
799  AACEncContext *s = avctx->priv_data;
800  float **samples = s->planar_samples, *samples2, *la, *overlap;
801  ChannelElement *cpe;
804  int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
805  int target_bits, rate_bits, too_many_bits, too_few_bits;
806  int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
807  int chan_el_counter[4];
809 
810  /* add current frame to queue */
811  if (frame) {
812  if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
813  return ret;
814  } else {
815  if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
816  return 0;
817  }
818 
820 
821  if (!avctx->frame_num)
822  return 0;
823 
824  start_ch = 0;
825  for (i = 0; i < s->chan_map[0]; i++) {
826  FFPsyWindowInfo* wi = windows + start_ch;
827  tag = s->chan_map[i+1];
828  chans = tag == TYPE_CPE ? 2 : 1;
829  cpe = &s->cpe[i];
830  for (ch = 0; ch < chans; ch++) {
831  int k;
832  float clip_avoidance_factor;
833  sce = &cpe->ch[ch];
834  ics = &sce->ics;
835  s->cur_channel = start_ch + ch;
836  overlap = &samples[s->cur_channel][0];
837  samples2 = overlap + 1024;
838  la = samples2 + (448+64);
839  if (!frame)
840  la = NULL;
841  if (tag == TYPE_LFE) {
842  wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
843  wi[ch].window_shape = 0;
844  wi[ch].num_windows = 1;
845  wi[ch].grouping[0] = 1;
846  wi[ch].clipping[0] = 0;
847 
848  /* Only the lowest 12 coefficients are used in a LFE channel.
849  * The expression below results in only the bottom 8 coefficients
850  * being used for 11.025kHz to 16kHz sample rates.
851  */
852  ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
853  } else {
854  wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
855  ics->window_sequence[0]);
856  }
857  ics->window_sequence[1] = ics->window_sequence[0];
858  ics->window_sequence[0] = wi[ch].window_type[0];
859  ics->use_kb_window[1] = ics->use_kb_window[0];
860  ics->use_kb_window[0] = wi[ch].window_shape;
861  ics->num_windows = wi[ch].num_windows;
862  ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
863  ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
864  ics->max_sfb = FFMIN(ics->max_sfb, ics->num_swb);
865  ics->swb_offset = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
866  ff_swb_offset_128 [s->samplerate_index]:
867  ff_swb_offset_1024[s->samplerate_index];
868  ics->tns_max_bands = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
869  ff_tns_max_bands_128 [s->samplerate_index]:
870  ff_tns_max_bands_1024[s->samplerate_index];
871 
872  for (w = 0; w < ics->num_windows; w++)
873  ics->group_len[w] = wi[ch].grouping[w];
874 
875  /* Calculate input sample maximums and evaluate clipping risk */
876  clip_avoidance_factor = 0.0f;
877  for (w = 0; w < ics->num_windows; w++) {
878  const float *wbuf = overlap + w * 128;
879  const int wlen = 2048 / ics->num_windows;
880  float max = 0;
881  int j;
882  /* mdct input is 2 * output */
883  for (j = 0; j < wlen; j++)
884  max = FFMAX(max, fabsf(wbuf[j]));
885  wi[ch].clipping[w] = max;
886  }
887  for (w = 0; w < ics->num_windows; w++) {
888  if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
889  ics->window_clipping[w] = 1;
890  clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
891  } else {
892  ics->window_clipping[w] = 0;
893  }
894  }
895  if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
896  ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
897  } else {
898  ics->clip_avoidance_factor = 1.0f;
899  }
900 
901  apply_window_and_mdct(s, sce, overlap);
902 
903  for (k = 0; k < 1024; k++) {
904  if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
905  av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
906  return AVERROR(EINVAL);
907  }
908  }
909  avoid_clipping(s, sce);
910  }
911  start_ch += chans;
912  }
913  if ((ret = ff_alloc_packet(avctx, avpkt, 8192 * s->channels)) < 0)
914  return ret;
915  frame_bits = its = 0;
916  do {
917  init_put_bits(&s->pb, avpkt->data, avpkt->size);
918 
919  if ((avctx->frame_num & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
921  start_ch = 0;
922  target_bits = 0;
923  memset(chan_el_counter, 0, sizeof(chan_el_counter));
924  for (i = 0; i < s->chan_map[0]; i++) {
925  FFPsyWindowInfo* wi = windows + start_ch;
926  const float *coeffs[2];
927  tag = s->chan_map[i+1];
928  chans = tag == TYPE_CPE ? 2 : 1;
929  cpe = &s->cpe[i];
930  cpe->common_window = 0;
931  memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
932  memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
933  put_bits(&s->pb, 3, tag);
934  put_bits(&s->pb, 4, chan_el_counter[tag]++);
935  for (ch = 0; ch < chans; ch++) {
936  sce = &cpe->ch[ch];
937  coeffs[ch] = sce->coeffs;
938  memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
939  for (w = 0; w < 128; w++)
940  if (sce->band_type[w] > RESERVED_BT)
941  sce->band_type[w] = 0;
942  }
943  s->psy.bitres.alloc = -1;
944  s->psy.bitres.bits = s->last_frame_pb_count / s->channels;
945  s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
946  if (s->psy.bitres.alloc > 0) {
947  /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
948  target_bits += s->psy.bitres.alloc
949  * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
950  s->psy.bitres.alloc /= chans;
951  }
952  s->cur_type = tag;
953  for (ch = 0; ch < chans; ch++) {
954  s->cur_channel = start_ch + ch;
955  if (s->options.pns && s->coder->mark_pns)
956  s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
957  s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
958  }
959  if (chans > 1
960  && wi[0].window_type[0] == wi[1].window_type[0]
961  && wi[0].window_shape == wi[1].window_shape) {
962 
963  cpe->common_window = 1;
964  for (w = 0; w < wi[0].num_windows; w++) {
965  if (wi[0].grouping[w] != wi[1].grouping[w]) {
966  cpe->common_window = 0;
967  break;
968  }
969  }
970  }
971  for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
972  sce = &cpe->ch[ch];
973  s->cur_channel = start_ch + ch;
974  if (s->options.tns && s->coder->search_for_tns)
975  s->coder->search_for_tns(s, sce);
976  if (s->options.tns && s->coder->apply_tns_filt)
977  s->coder->apply_tns_filt(s, sce);
978  if (sce->tns.present)
979  tns_mode = 1;
980  if (s->options.pns && s->coder->search_for_pns)
981  s->coder->search_for_pns(s, avctx, sce);
982  }
983  s->cur_channel = start_ch;
984  if (s->options.intensity_stereo) { /* Intensity Stereo */
985  if (s->coder->search_for_is)
986  s->coder->search_for_is(s, avctx, cpe);
987  if (cpe->is_mode) is_mode = 1;
989  }
990  if (s->options.mid_side) { /* Mid/Side stereo */
991  if (s->options.mid_side == -1 && s->coder->search_for_ms)
992  s->coder->search_for_ms(s, cpe);
993  else if (cpe->common_window)
994  memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
996  }
997  adjust_frame_information(cpe, chans);
998  if (chans == 2) {
999  put_bits(&s->pb, 1, cpe->common_window);
1000  if (cpe->common_window) {
1001  put_ics_info(s, &cpe->ch[0].ics);
1002  encode_ms_info(&s->pb, cpe);
1003  if (cpe->ms_mode) ms_mode = 1;
1004  }
1005  }
1006  for (ch = 0; ch < chans; ch++) {
1007  s->cur_channel = start_ch + ch;
1008  encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
1009  }
1010  start_ch += chans;
1011  }
1012 
1013  if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
1014  /* When using a constant Q-scale, don't mess with lambda */
1015  break;
1016  }
1017 
1018  /* rate control stuff
1019  * allow between the nominal bitrate, and what psy's bit reservoir says to target
1020  * but drift towards the nominal bitrate always
1021  */
1022  frame_bits = put_bits_count(&s->pb);
1023  rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
1024  rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
1025  too_many_bits = FFMAX(target_bits, rate_bits);
1026  too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
1027  too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
1028 
1029  /* When strict bit-rate control is demanded */
1030  if (avctx->bit_rate_tolerance == 0) {
1031  if (rate_bits < frame_bits) {
1032  float ratio = ((float)rate_bits) / frame_bits;
1033  s->lambda *= FFMIN(0.9f, ratio);
1034  continue;
1035  }
1036  /* reset lambda when solution is found */
1037  s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
1038  break;
1039  }
1040 
1041  /* When using ABR, be strict (but only for increasing) */
1042  too_few_bits = too_few_bits - too_few_bits/8;
1043  too_many_bits = too_many_bits + too_many_bits/2;
1044 
1045  if ( its == 0 /* for steady-state Q-scale tracking */
1046  || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
1047  || frame_bits >= 6144 * s->channels - 3 )
1048  {
1049  float ratio = ((float)rate_bits) / frame_bits;
1050 
1051  if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
1052  /*
1053  * This path is for steady-state Q-scale tracking
1054  * When frame bits fall within the stable range, we still need to adjust
1055  * lambda to maintain it like so in a stable fashion (large jumps in lambda
1056  * create artifacts and should be avoided), but slowly
1057  */
1058  ratio = sqrtf(sqrtf(ratio));
1059  ratio = av_clipf(ratio, 0.9f, 1.1f);
1060  } else {
1061  /* Not so fast though */
1062  ratio = sqrtf(ratio);
1063  }
1064  s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
1065 
1066  /* Keep iterating if we must reduce and lambda is in the sky */
1067  if (ratio > 0.9f && ratio < 1.1f) {
1068  break;
1069  } else {
1070  if (is_mode || ms_mode || tns_mode || pred_mode) {
1071  for (i = 0; i < s->chan_map[0]; i++) {
1072  // Must restore coeffs
1073  chans = tag == TYPE_CPE ? 2 : 1;
1074  cpe = &s->cpe[i];
1075  for (ch = 0; ch < chans; ch++)
1076  memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
1077  }
1078  }
1079  its++;
1080  }
1081  } else {
1082  break;
1083  }
1084  } while (1);
1085 
1086  put_bits(&s->pb, 3, TYPE_END);
1087  flush_put_bits(&s->pb);
1088 
1089  s->last_frame_pb_count = put_bits_count(&s->pb);
1090  avpkt->size = put_bytes_output(&s->pb);
1091 
1092  s->lambda_sum += s->lambda;
1093  s->lambda_count++;
1094 
1095  ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
1096  &avpkt->duration);
1097 
1098  avpkt->flags |= AV_PKT_FLAG_KEY;
1099 
1100  *got_packet_ptr = 1;
1101  return 0;
1102 }
1103 
1105 {
1106  AACEncContext *s = avctx->priv_data;
1107 
1108  av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN);
1109 
1110  av_tx_uninit(&s->mdct1024);
1111  av_tx_uninit(&s->mdct128);
1112  ff_psy_end(&s->psy);
1113  ff_lpc_end(&s->lpc);
1114  av_freep(&s->buffer.samples);
1115  av_freep(&s->cpe);
1116  av_freep(&s->fdsp);
1117  ff_af_queue_close(&s->afq);
1118  return 0;
1119 }
1120 
1122 {
1123  int ret = 0;
1124  float scale = 32768.0f;
1125 
1127  if (!s->fdsp)
1128  return AVERROR(ENOMEM);
1129 
1130  if ((ret = av_tx_init(&s->mdct1024, &s->mdct1024_fn, AV_TX_FLOAT_MDCT, 0,
1131  1024, &scale, 0)) < 0)
1132  return ret;
1133  if ((ret = av_tx_init(&s->mdct128, &s->mdct128_fn, AV_TX_FLOAT_MDCT, 0,
1134  128, &scale, 0)) < 0)
1135  return ret;
1136 
1137  return 0;
1138 }
1139 
1141 {
1142  int ch;
1143  if (!FF_ALLOCZ_TYPED_ARRAY(s->buffer.samples, s->channels * 3 * 1024) ||
1144  !FF_ALLOCZ_TYPED_ARRAY(s->cpe, s->chan_map[0]))
1145  return AVERROR(ENOMEM);
1146 
1147  for(ch = 0; ch < s->channels; ch++)
1148  s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
1149 
1150  return 0;
1151 }
1152 
1154 {
1155  AACEncContext *s = avctx->priv_data;
1156  int i, ret = 0;
1157  int chcfg;
1158  const uint8_t *sizes[2];
1159  uint8_t grouping[AAC_MAX_CHANNELS];
1160  int lengths[2];
1161 
1162  /* Constants */
1163  s->last_frame_pb_count = 0;
1164  avctx->frame_size = 1024;
1165  avctx->initial_padding = 1024;
1166  s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
1167 
1168  /* Channel map and unspecified bitrate guessing */
1169  s->channels = avctx->ch_layout.nb_channels;
1170 
1171  s->needs_pce = 1;
1172  for (chcfg = 1; chcfg < FF_ARRAY_ELEMS(aac_normal_chan_layouts); chcfg++) {
1174  s->needs_pce = s->options.pce;
1175  break;
1176  }
1177  }
1178 
1179  if (s->needs_pce) {
1180  char buf[64];
1181  for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
1183  break;
1184  av_channel_layout_describe(&avctx->ch_layout, buf, sizeof(buf));
1185  if (i == FF_ARRAY_ELEMS(aac_pce_configs)) {
1186  av_log(avctx, AV_LOG_ERROR, "Unsupported channel layout \"%s\"\n", buf);
1187  return AVERROR(EINVAL);
1188  }
1189  av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout \"%s\"\n", buf);
1190  s->pce = aac_pce_configs[i];
1191  s->reorder_map = s->pce.reorder_map;
1192  s->chan_map = s->pce.config_map;
1193  chcfg = 0;
1194  } else {
1195  s->reorder_map = aac_chan_maps[chcfg - 1];
1196  s->chan_map = aac_chan_configs[chcfg - 1];
1197  }
1198 
1199  if (!avctx->bit_rate) {
1200  for (i = 1; i <= s->chan_map[0]; i++) {
1201  avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
1202  s->chan_map[i] == TYPE_LFE ? 16000 : /* LFE */
1203  69000 ; /* SCE */
1204  }
1205  }
1206 
1207  /* Samplerate */
1208  for (int i = 0;; i++) {
1209  av_assert1(i < 13);
1210  if (avctx->sample_rate == ff_mpeg4audio_sample_rates[i]) {
1211  s->samplerate_index = i;
1212  break;
1213  }
1214  }
1215 
1216  /* Bitrate limiting */
1217  WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
1218  "Too many bits %f > %d per frame requested, clamping to max\n",
1219  1024.0 * avctx->bit_rate / avctx->sample_rate,
1220  6144 * s->channels);
1221  avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
1222  avctx->bit_rate);
1223 
1224  /* Profile and option setting */
1225  avctx->profile = avctx->profile == AV_PROFILE_UNKNOWN ? AV_PROFILE_AAC_LOW :
1226  avctx->profile;
1227  for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
1228  if (avctx->profile == aacenc_profiles[i])
1229  break;
1230  ERROR_IF(i == FF_ARRAY_ELEMS(aacenc_profiles), "Profile not supported!\n");
1231  if (avctx->profile == AV_PROFILE_MPEG2_AAC_LOW) {
1232  avctx->profile = AV_PROFILE_AAC_LOW;
1233  WARN_IF(s->options.pns,
1234  "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
1235  s->options.pns = 0;
1236  }
1237  s->profile = avctx->profile;
1238 
1239  /* Coder limitations */
1240  s->coder = &ff_aac_coders[s->options.coder];
1241 
1242  /* M/S introduces horrible artifacts with multichannel files, this is temporary */
1243  if (s->channels > 3)
1244  s->options.mid_side = 0;
1245 
1246  // Initialize static tables
1248 
1249  if ((ret = dsp_init(avctx, s)) < 0)
1250  return ret;
1251 
1252  if ((ret = alloc_buffers(avctx, s)) < 0)
1253  return ret;
1254 
1255  if ((ret = put_audio_specific_config(avctx, chcfg)))
1256  return ret;
1257 
1258  sizes[0] = ff_aac_swb_size_1024[s->samplerate_index];
1259  sizes[1] = ff_aac_swb_size_128[s->samplerate_index];
1260  lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
1261  lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
1262  for (i = 0; i < s->chan_map[0]; i++)
1263  grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
1264  if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
1265  s->chan_map[0], grouping)) < 0)
1266  return ret;
1268  s->random_state = 0x1f2e3d4c;
1269 
1270  ff_aacenc_dsp_init(&s->aacdsp);
1271 
1272  ff_af_queue_init(avctx, &s->afq);
1273 
1274  return 0;
1275 }
1276 
1277 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1278 static const AVOption aacenc_options[] = {
1279  {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, .unit = "coder"},
1280  {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"},
1281  {"fast", "Fast search", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"},
1282  {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
1283  {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1284  {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1285  {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1286  {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1288  {NULL}
1289 };
1290 
1291 static const AVClass aacenc_class = {
1292  .class_name = "AAC encoder",
1293  .item_name = av_default_item_name,
1294  .option = aacenc_options,
1295  .version = LIBAVUTIL_VERSION_INT,
1296 };
1297 
1299  { "b", "0" },
1300  { NULL }
1301 };
1302 
1304  .p.name = "aac",
1305  CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
1306  .p.type = AVMEDIA_TYPE_AUDIO,
1307  .p.id = AV_CODEC_ID_AAC,
1308  .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
1310  .priv_data_size = sizeof(AACEncContext),
1311  .init = aac_encode_init,
1313  .close = aac_encode_end,
1314  .defaults = aac_encode_defaults,
1316  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
1318  .p.priv_class = &aacenc_class,
1319 };
FF_ALLOCZ_TYPED_ARRAY
#define FF_ALLOCZ_TYPED_ARRAY(p, nelem)
Definition: internal.h:78
AVCodecContext::frame_size
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1068
AV_SAMPLE_FMT_FLTP
@ AV_SAMPLE_FMT_FLTP
float, planar
Definition: samplefmt.h:66
name
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option name
Definition: writing_filters.txt:88
ff_tns_max_bands_128
const uint8_t ff_tns_max_bands_128[]
Definition: aactab.c:1990
AV_CHANNEL_LAYOUT_OCTAGONAL
#define AV_CHANNEL_LAYOUT_OCTAGONAL
Definition: channel_layout.h:422
FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:43
aacenc_class
static const AVClass aacenc_class
Definition: aacenc.c:1291
aac_normal_chan_layouts
static const AVChannelLayout aac_normal_chan_layouts[15]
Definition: aacenctab.h:47
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
LIBAVCODEC_IDENT
#define LIBAVCODEC_IDENT
Definition: version.h:43
put_bitstream_info
static void put_bitstream_info(AACEncContext *s, const char *name)
Write some auxiliary information about the created AAC file.
Definition: aacenc.c:753
ff_aac_kbd_short_128
float ff_aac_kbd_short_128[128]
libm.h
SingleChannelElement::pulse
Pulse pulse
Definition: aacenc.h:110
align_put_bits
static void align_put_bits(PutBitContext *s)
Pad the bitstream with zeros up to the next byte boundary.
Definition: put_bits.h:445
TYPE_FIL
@ TYPE_FIL
Definition: aac.h:50
ff_af_queue_remove
void ff_af_queue_remove(AudioFrameQueue *afq, int nb_samples, int64_t *pts, int64_t *duration)
Remove frame(s) from the queue.
Definition: audio_frame_queue.c:75
out
static FILE * out
Definition: movenc.c:55
AV_CHANNEL_LAYOUT_STEREO
#define AV_CHANNEL_LAYOUT_STEREO
Definition: channel_layout.h:395
put_bytes_output
static int put_bytes_output(const PutBitContext *s)
Definition: put_bits.h:99
AVCodecContext::sample_rate
int sample_rate
samples per second
Definition: avcodec.h:1040
AV_CHANNEL_LAYOUT_4POINT1
#define AV_CHANNEL_LAYOUT_4POINT1
Definition: channel_layout.h:401
aacenctab.h
AV_CHANNEL_LAYOUT_HEXAGONAL
#define AV_CHANNEL_LAYOUT_HEXAGONAL
Definition: channel_layout.h:411
copy_input_samples
static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
Definition: aacenc.c:774
aac_encode_init
static av_cold int aac_encode_init(AVCodecContext *avctx)
Definition: aacenc.c:1153
aacenc_profiles
static const int aacenc_profiles[]
Definition: aacenctab.h:145
Pulse::num_pulse
int num_pulse
Definition: aac.h:104
AV_CODEC_FLAG_QSCALE
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
Definition: avcodec.h:213
int64_t
long long int64_t
Definition: coverity.c:34
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:226
SingleChannelElement::zeroes
uint8_t zeroes[128]
band is not coded
Definition: aacenc.h:114
init_put_bits
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:62
ff_af_queue_init
av_cold void ff_af_queue_init(AVCodecContext *avctx, AudioFrameQueue *afq)
Initialize AudioFrameQueue.
Definition: audio_frame_queue.c:28
ff_lpc_init
av_cold int ff_lpc_init(LPCContext *s, int blocksize, int max_order, enum FFLPCType lpc_type)
Initialize LPCContext.
Definition: lpc.c:340
AV_CHANNEL_LAYOUT_2_2
#define AV_CHANNEL_LAYOUT_2_2
Definition: channel_layout.h:402
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:459
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:154
WARN_IF
#define WARN_IF(cond,...)
Definition: aacenc_utils.h:250
AVPacket::data
uint8_t * data
Definition: packet.h:595
ff_aac_coders
const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB]
Definition: aaccoder.c:843
AVOption
AVOption.
Definition: opt.h:429
encode.h
R
#define R
Definition: huffyuv.h:44
encode_band_info
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
Encode scalefactor band coding type.
Definition: aacenc.c:615
AV_PROFILE_MPEG2_AAC_LOW
#define AV_PROFILE_MPEG2_AAC_LOW
Definition: defs.h:77
TemporalNoiseShaping::present
int present
Definition: aacdec.h:186
FFCodec
Definition: codec_internal.h:127
version.h
FFPsyWindowInfo::window_shape
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
float.h
AAC_CODER_NB
@ AAC_CODER_NB
Definition: aacenc.h:48
AVPacket::duration
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: packet.h:613
max
#define max(a, b)
Definition: cuda_runtime.h:33
FFMAX
#define FFMAX(a, b)
Definition: macros.h:47
AVChannelLayout::nb_channels
int nb_channels
Number of channels in this layout.
Definition: channel_layout.h:329
ChannelElement::ch
SingleChannelElement ch[2]
Definition: aacdec.h:296
AV_PKT_FLAG_KEY
#define AV_PKT_FLAG_KEY
The packet contains a keyframe.
Definition: packet.h:650
ff_swb_offset_128
const uint16_t *const ff_swb_offset_128[]
Definition: aactab.c:1940
av_tx_init
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
Definition: tx.c:903
encode_spectral_coeffs
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
Encode spectral coefficients processed by psychoacoustic model.
Definition: aacenc.c:684
ff_tns_max_bands_1024
const uint8_t ff_tns_max_bands_1024[]
Definition: aactab.c:1974
AAC_CODER_FAST
@ AAC_CODER_FAST
Definition: aacenc.h:46
IndividualChannelStream::num_swb
int num_swb
number of scalefactor window bands
Definition: aacdec.h:172
AV_CHANNEL_LAYOUT_7POINT1_WIDE
#define AV_CHANNEL_LAYOUT_7POINT1_WIDE
Definition: channel_layout.h:418
WINDOW_FUNC
#define WINDOW_FUNC(type)
Definition: aacenc.c:382
SingleChannelElement::coeffs
float coeffs[1024]
coefficients for IMDCT, maybe processed
Definition: aacenc.h:119
avoid_clipping
static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
Downscale spectral coefficients for near-clipping windows to avoid artifacts.
Definition: aacenc.c:712
put_audio_specific_config
static int put_audio_specific_config(AVCodecContext *avctx, int chcfg)
Make AAC audio config object.
Definition: aacenc.c:342
FFCodecDefault
Definition: codec_internal.h:97
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:131
mpeg4audio.h
AVCodecContext::ch_layout
AVChannelLayout ch_layout
Audio channel layout.
Definition: avcodec.h:1055
SingleChannelElement::ret_buf
float ret_buf[2048]
PCM output buffer.
Definition: aacenc.h:120
apply_mid_side_stereo
static void apply_mid_side_stereo(ChannelElement *cpe)
Definition: aacenc.c:580
AV_CHANNEL_LAYOUT_2POINT1
#define AV_CHANNEL_LAYOUT_2POINT1
Definition: channel_layout.h:396
TYPE_CPE
@ TYPE_CPE
Definition: aac.h:45
ChannelElement::ms_mode
int ms_mode
Signals mid/side stereo flags coding mode.
Definition: aacenc.h:130
AVCodecContext::initial_padding
int initial_padding
Audio only.
Definition: avcodec.h:1114
IndividualChannelStream::window_clipping
uint8_t window_clipping[8]
set if a certain window is near clipping
Definition: aacdec.h:179
AVCodecContext::flags
int flags
AV_CODEC_FLAG_*.
Definition: avcodec.h:500
Pulse::amp
int amp[4]
Definition: aac.h:107
Pulse::pos
int pos[4]
Definition: aac.h:106
AVCodecContext::bit_rate_tolerance
int bit_rate_tolerance
number of bits the bitstream is allowed to diverge from the reference.
Definition: avcodec.h:1227
put_pce
static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
Definition: aacenc.c:301
ff_psy_end
av_cold void ff_psy_end(FFPsyContext *ctx)
Cleanup model context at the end.
Definition: psymodel.c:77
Pulse::start
int start
Definition: aac.h:105
FF_CODEC_ENCODE_CB
#define FF_CODEC_ENCODE_CB(func)
Definition: codec_internal.h:359
fabsf
static __device__ float fabsf(float a)
Definition: cuda_runtime.h:181
ff_af_queue_add
int ff_af_queue_add(AudioFrameQueue *afq, const AVFrame *f)
Add a frame to the queue.
Definition: audio_frame_queue.c:44
AV_CHANNEL_LAYOUT_6POINT1_FRONT
#define AV_CHANNEL_LAYOUT_6POINT1_FRONT
Definition: channel_layout.h:414
AACPCEInfo::num_ele
int num_ele[4]
front, side, back, lfe
Definition: aacenc.h:170
SingleChannelElement::ics
IndividualChannelStream ics
Definition: aacdec.h:212
AV_CHANNEL_LAYOUT_SURROUND
#define AV_CHANNEL_LAYOUT_SURROUND
Definition: channel_layout.h:398
FFPsyWindowInfo
windowing related information
Definition: psymodel.h:77
adjust_frame_information
static void adjust_frame_information(ChannelElement *cpe, int chans)
Produce integer coefficients from scalefactors provided by the model.
Definition: aacenc.c:503
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:119
AV_PROFILE_UNKNOWN
#define AV_PROFILE_UNKNOWN
Definition: defs.h:65
IndividualChannelStream::clip_avoidance_factor
float clip_avoidance_factor
set if any window is near clipping to the necessary atennuation factor to avoid it
Definition: aacenc.h:88
av_channel_layout_describe
int av_channel_layout_describe(const AVChannelLayout *channel_layout, char *buf, size_t buf_size)
Get a human-readable string describing the channel layout properties.
Definition: channel_layout.c:654
AV_CHANNEL_LAYOUT_4POINT0
#define AV_CHANNEL_LAYOUT_4POINT0
Definition: channel_layout.h:400
float
float
Definition: af_crystalizer.c:122
AVCodecContext::extradata_size
int extradata_size
Definition: avcodec.h:527
NOISE_BT
@ NOISE_BT
Spectral data are scaled white noise not coded in the bitstream.
Definition: aac.h:75
AV_TX_FLOAT_MDCT
@ AV_TX_FLOAT_MDCT
Standard MDCT with a sample data type of float, double or int32_t, respectively.
Definition: tx.h:68
AV_CHANNEL_LAYOUT_7POINT1
#define AV_CHANNEL_LAYOUT_7POINT1
Definition: channel_layout.h:417
s
#define s(width, name)
Definition: cbs_vp9.c:198
AVCodecContext::global_quality
int global_quality
Global quality for codecs which cannot change it per frame.
Definition: avcodec.h:1235
IndividualChannelStream::swb_sizes
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
Definition: aacenc.h:83
g
const char * g
Definition: vf_curves.c:128
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:201
EIGHT_SHORT_SEQUENCE
@ EIGHT_SHORT_SEQUENCE
Definition: aac.h:66
info
MIPS optimizations info
Definition: mips.txt:2
AV_CHANNEL_LAYOUT_5POINT0_BACK
#define AV_CHANNEL_LAYOUT_5POINT0_BACK
Definition: channel_layout.h:406
INTENSITY_BT2
@ INTENSITY_BT2
Scalefactor data are intensity stereo positions (out of phase).
Definition: aac.h:76
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:42
alloc_buffers
static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:1140
channel_map
static const uint8_t channel_map[8][8]
Definition: atrac3plusdec.c:52
ff_put_string
void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
Put the string string in the bitstream.
Definition: bitstream.c:39
IndividualChannelStream
Individual Channel Stream.
Definition: aacdec.h:163
av_mallocz
#define av_mallocz(s)
Definition: tableprint_vlc.h:31
SCALE_DIFF_ZERO
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
Definition: aac.h:95
NAN
#define NAN
Definition: mathematics.h:115
NOISE_PRE
#define NOISE_PRE
preamble for NOISE_BT, put in bitstream with the first noise band
Definition: aac.h:99
PutBitContext
Definition: put_bits.h:50
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:332
if
if(ret)
Definition: filter_design.txt:179
ff_af_queue_close
av_cold void ff_af_queue_close(AudioFrameQueue *afq)
Close AudioFrameQueue.
Definition: audio_frame_queue.c:36
AV_CHANNEL_LAYOUT_7POINT1_WIDE_BACK
#define AV_CHANNEL_LAYOUT_7POINT1_WIDE_BACK
Definition: channel_layout.h:419
INTENSITY_BT
@ INTENSITY_BT
Scalefactor data are intensity stereo positions (in phase).
Definition: aac.h:77
FFPsyWindowInfo::window_type
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
AAC_MAX_CHANNELS
#define AAC_MAX_CHANNELS
Definition: aacenctab.h:41
LIBAVUTIL_VERSION_INT
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:76
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
ChannelElement::is_mask
uint8_t is_mask[128]
Set if intensity stereo is used.
Definition: aacenc.h:133
NULL
#define NULL
Definition: coverity.c:32
sizes
static const int sizes[][2]
Definition: img2dec.c:62
encode_pulses
static void encode_pulses(AACEncContext *s, Pulse *pulse)
Encode pulse data.
Definition: aacenc.c:665
SingleChannelElement::is_ener
float is_ener[128]
Intensity stereo pos.
Definition: aacenc.h:116
IndividualChannelStream::use_kb_window
uint8_t use_kb_window[2]
If set, use Kaiser-Bessel window, otherwise use a sine window.
Definition: aacdec.h:166
ff_aac_num_swb_128
const uint8_t ff_aac_num_swb_128[]
Definition: aactab.c:169
AVCodecContext::bit_rate
int64_t bit_rate
the average bitrate
Definition: avcodec.h:493
av_default_item_name
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:242
profiles.h
ff_lpc_end
av_cold void ff_lpc_end(LPCContext *s)
Uninitialize LPCContext.
Definition: lpc.c:365
ChannelElement::ms_mask
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
Definition: aacdec.h:294
options
Definition: swscale.c:45
aac.h
aactab.h
sqrtf
static __device__ float sqrtf(float a)
Definition: cuda_runtime.h:184
FFPsyWindowInfo::grouping
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
av_clipf
av_clipf
Definition: af_crystalizer.c:122
TNS_MAX_ORDER
#define TNS_MAX_ORDER
Definition: aac.h:36
SingleChannelElement::sf_idx
int sf_idx[128]
scalefactor indices
Definition: aacenc.h:113
float_dsp.h
AV_CODEC_ID_AAC
@ AV_CODEC_ID_AAC
Definition: codec_id.h:463
aac_encode_frame
static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: aacenc.c:796
ff_aac_scalefactor_bits
const uint8_t ff_aac_scalefactor_bits[121]
Definition: aactab.c:200
AACPCEInfo
Definition: aacenc.h:168
FFPsyWindowInfo::clipping
float clipping[8]
maximum absolute normalized intensity in the given window for clip avoidance
Definition: psymodel.h:82
IndividualChannelStream::window_sequence
enum WindowSequence window_sequence[2]
Definition: aacdec.h:165
f
f
Definition: af_crystalizer.c:122
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:551
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
ff_swb_offset_1024
const uint16_t *const ff_swb_offset_1024[]
Definition: aactab.c:1900
AVPacket::size
int size
Definition: packet.h:596
codec_internal.h
ONLY_LONG_SEQUENCE
@ ONLY_LONG_SEQUENCE
Definition: aac.h:64
TYPE_END
@ TYPE_END
Definition: aac.h:51
ff_aac_float_common_init
void ff_aac_float_common_init(void)
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
encode_scale_factors
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce)
Encode scalefactors.
Definition: aacenc.c:629
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
apply_window_and_mdct
static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce, float *audio)
Definition: aacenc.c:447
AVFloatDSPContext
Definition: float_dsp.h:24
AAC_CODER_TWOLOOP
@ AAC_CODER_TWOLOOP
Definition: aacenc.h:45
aac_chan_configs
static const uint8_t aac_chan_configs[14][6]
default channel configurations
Definition: aacenctab.h:66
AV_CHANNEL_LAYOUT_6POINT0
#define AV_CHANNEL_LAYOUT_6POINT0
Definition: channel_layout.h:408
diff
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
Definition: vf_paletteuse.c:166
CLIP_AVOIDANCE_FACTOR
#define CLIP_AVOIDANCE_FACTOR
Definition: aacenc.h:42
ChannelElement::common_window
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
Definition: aacenc.h:129
sinewin.h
apply_intensity_stereo
static void apply_intensity_stereo(ChannelElement *cpe)
Definition: aacenc.c:551
AACPCEInfo::index
int index[4][8]
front, side, back, lfe
Definition: aacenc.h:172
AVPacket::flags
int flags
A combination of AV_PKT_FLAG values.
Definition: packet.h:601
CODEC_SAMPLEFMTS
#define CODEC_SAMPLEFMTS(...)
Definition: codec_internal.h:386
SingleChannelElement::band_type
enum BandType band_type[128]
band types
Definition: aacdec.h:215
av_tx_uninit
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
Definition: tx.c:295
av_channel_layout_compare
int av_channel_layout_compare(const AVChannelLayout *chl, const AVChannelLayout *chl1)
Check whether two channel layouts are semantically the same, i.e.
Definition: channel_layout.c:811
AV_LOG_INFO
#define AV_LOG_INFO
Standard information.
Definition: log.h:221
ff_psy_init
av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)
Initialize psychoacoustic model.
Definition: psymodel.c:28
AV_CHANNEL_LAYOUT_6POINT1_BACK
#define AV_CHANNEL_LAYOUT_6POINT1_BACK
Definition: channel_layout.h:413
aac_pce_configs
static const AACPCEInfo aac_pce_configs[]
List of PCE (Program Configuration Element) for the channel layouts listed in channel_layout....
Definition: aacenc.c:90
SingleChannelElement
Single Channel Element - used for both SCE and LFE elements.
Definition: aacdec.h:211
AVPacket::pts
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:588
put_bits_count
static int put_bits_count(PutBitContext *s)
Definition: put_bits.h:90
IndividualChannelStream::num_windows
int num_windows
Definition: aacdec.h:173
AVCodecContext::extradata
uint8_t * extradata
Out-of-band global headers that may be used by some codecs.
Definition: avcodec.h:526
aacenc_options
static const AVOption aacenc_options[]
Definition: aacenc.c:1278
AV_CHANNEL_LAYOUT_QUAD
#define AV_CHANNEL_LAYOUT_QUAD
Definition: channel_layout.h:403
SingleChannelElement::pcoeffs
float pcoeffs[1024]
coefficients for IMDCT, pristine
Definition: aacenc.h:118
LONG_STOP_SEQUENCE
@ LONG_STOP_SEQUENCE
Definition: aac.h:67
ChannelElement
channel element - generic struct for SCE/CPE/CCE/LFE
Definition: aacdec.h:290
IndividualChannelStream::swb_offset
const uint16_t * swb_offset
table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...
Definition: aacdec.h:171
AV_CHANNEL_LAYOUT_7POINT0_FRONT
#define AV_CHANNEL_LAYOUT_7POINT0_FRONT
Definition: channel_layout.h:416
apply_window
static void(*const apply_window[4])(AVFloatDSPContext *fdsp, SingleChannelElement *sce, const float *audio)
Definition: aacenc.c:438
AACPCEInfo::pairing
int pairing[3][8]
front, side, back
Definition: aacenc.h:171
av_assert1
#define av_assert1(cond)
assert() equivalent, that does not lie in speed critical code.
Definition: avassert.h:58
NOISE_PRE_BITS
#define NOISE_PRE_BITS
length of preamble
Definition: aac.h:100
AV_CHANNEL_LAYOUT_3POINT1
#define AV_CHANNEL_LAYOUT_3POINT1
Definition: channel_layout.h:399
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
TYPE_LFE
@ TYPE_LFE
Definition: aac.h:47
ff_aac_kbd_long_1024
float ff_aac_kbd_long_1024[1024]
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:179
TYPE_SCE
@ TYPE_SCE
Definition: aac.h:44
AACENC_FLAGS
#define AACENC_FLAGS
Definition: aacenc.c:1277
IndividualChannelStream::tns_max_bands
int tns_max_bands
Definition: aacdec.h:174
avcodec.h
AVCodecContext::frame_num
int64_t frame_num
Frame counter, set by libavcodec.
Definition: avcodec.h:1896
aac_encode_defaults
static const FFCodecDefault aac_encode_defaults[]
Definition: aacenc.c:1298
tag
uint32_t tag
Definition: movenc.c:2049
ret
ret
Definition: filter_design.txt:187
ff_aac_num_swb_1024
const uint8_t ff_aac_num_swb_1024[]
Definition: aactab.c:149
AVClass::class_name
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:81
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:265
ff_aac_encoder
const FFCodec ff_aac_encoder
Definition: aacenc.c:1303
encode_ms_info
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
Encode MS data.
Definition: aacenc.c:489
AV_CHANNEL_LAYOUT_7POINT0
#define AV_CHANNEL_LAYOUT_7POINT0
Definition: channel_layout.h:415
RESERVED_BT
@ RESERVED_BT
Band types following are encoded differently from others.
Definition: aac.h:74
LONG_START_SEQUENCE
@ LONG_START_SEQUENCE
Definition: aac.h:65
SingleChannelElement::tns
TemporalNoiseShaping tns
Definition: aacdec.h:214
AACEncContext
AAC encoder context.
Definition: aacenc.h:180
AV_PROFILE_AAC_LOW
#define AV_PROFILE_AAC_LOW
Definition: defs.h:69
AV_CHANNEL_LAYOUT_2_1
#define AV_CHANNEL_LAYOUT_2_1
Definition: channel_layout.h:397
AVCodecContext
main external API structure.
Definition: avcodec.h:443
channel_layout.h
CODEC_SAMPLERATES_ARRAY
#define CODEC_SAMPLERATES_ARRAY(array)
Definition: codec_internal.h:384
encode_individual_channel
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, int common_window)
Encode one channel of audio data.
Definition: aacenc.c:732
NOISE_OFFSET
#define NOISE_OFFSET
subtracted from global gain, used as offset for the preamble
Definition: aac.h:101
ERROR_IF
#define ERROR_IF(cond,...)
Definition: aacenc_utils.h:244
ff_aac_swb_size_1024
const uint8_t *const ff_aac_swb_size_1024[]
Definition: aacenctab.c:97
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Underlying C type is int.
Definition: opt.h:259
TemporalNoiseShaping
Temporal Noise Shaping.
Definition: aacdec.h:185
AVCodecContext::profile
int profile
profile
Definition: avcodec.h:1636
AOT_SBR
@ AOT_SBR
Y Spectral Band Replication.
Definition: mpeg4audio.h:77
L
#define L(x)
Definition: vpx_arith.h:36
AV_CHANNEL_LAYOUT_6POINT0_FRONT
#define AV_CHANNEL_LAYOUT_6POINT0_FRONT
Definition: channel_layout.h:409
AV_CODEC_CAP_DELAY
#define AV_CODEC_CAP_DELAY
Encoder or decoder requires flushing with NULL input at the end in order to give the complete and cor...
Definition: codec.h:76
samples
Filter the word “frame” indicates either a video frame or a group of audio samples
Definition: filter_design.txt:8
ChannelElement::is_mode
uint8_t is_mode
Set if any bands have been encoded using intensity stereo.
Definition: aacenc.h:131
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
put_ics_info
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
Encode ics_info element.
Definition: aacenc.c:468
ff_mpeg4audio_sample_rates
const int ff_mpeg4audio_sample_rates[16]
Definition: mpeg4audio_sample_rates.h:30
ff_aac_swb_size_128
const uint8_t *const ff_aac_swb_size_128[]
Definition: aacenctab.c:89
mem.h
AV_CODEC_FLAG_BITEXACT
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
Definition: avcodec.h:322
aac_encode_end
static av_cold int aac_encode_end(AVCodecContext *avctx)
Definition: aacenc.c:1104
flush_put_bits
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:153
w
uint8_t w
Definition: llvidencdsp.c:39
AV_CHANNEL_LAYOUT_MONO
#define AV_CHANNEL_LAYOUT_MONO
Definition: channel_layout.h:394
scale
static void scale(int *out, const int *in, const int w, const int h, const int shift)
Definition: intra.c:278
FF_AAC_PROFILE_OPTS
#define FF_AAC_PROFILE_OPTS
Definition: profiles.h:29
AVPacket
This structure stores compressed data.
Definition: packet.h:572
AVCodecContext::priv_data
void * priv_data
Definition: avcodec.h:470
AV_OPT_TYPE_BOOL
@ AV_OPT_TYPE_BOOL
Underlying C type is int.
Definition: opt.h:327
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
avpriv_float_dsp_alloc
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
Definition: float_dsp.c:135
AV_CHANNEL_LAYOUT_5POINT1_BACK
#define AV_CHANNEL_LAYOUT_5POINT1_BACK
Definition: channel_layout.h:407
IndividualChannelStream::max_sfb
uint8_t max_sfb
number of scalefactor bands per group
Definition: aacdec.h:164
Pulse
Definition: aac.h:103
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AV_CHANNEL_LAYOUT_6POINT1
#define AV_CHANNEL_LAYOUT_6POINT1
Definition: channel_layout.h:412
dsp_init
static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
Definition: aacenc.c:1121
AV_CHANNEL_LAYOUT_5POINT0
#define AV_CHANNEL_LAYOUT_5POINT0
Definition: channel_layout.h:404
aacenc_utils.h
aac_chan_maps
static const uint8_t aac_chan_maps[14][AAC_MAX_CHANNELS]
Table to remap channels from libavcodec's default order to AAC order.
Definition: aacenctab.h:86
AV_CODEC_CAP_SMALL_LAST_FRAME
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: codec.h:81
AV_CHANNEL_LAYOUT_5POINT1
#define AV_CHANNEL_LAYOUT_5POINT1
Definition: channel_layout.h:405
put_bits.h
IndividualChannelStream::group_len
uint8_t group_len[8]
Definition: aacdec.h:169
psymodel.h
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Special option type for declaring named constants.
Definition: opt.h:299
ff_aacenc_dsp_init
static void ff_aacenc_dsp_init(AACEncDSPContext *s)
Definition: aacencdsp.h:61
ff_alloc_packet
int ff_alloc_packet(AVCodecContext *avctx, AVPacket *avpkt, int64_t size)
Check AVPacket size and allocate data.
Definition: encode.c:61
FF_LPC_TYPE_LEVINSON
@ FF_LPC_TYPE_LEVINSON
Levinson-Durbin recursion.
Definition: lpc.h:46
FFPsyWindowInfo::num_windows
int num_windows
number of windows in a frame
Definition: psymodel.h:80
ff_aac_scalefactor_code
const uint32_t ff_aac_scalefactor_code[121]
Definition: aactab.c:181
ff_quantize_band_cost_cache_init
void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
Definition: aacenc.c:373
AACPCEInfo::layout
AVChannelLayout layout
Definition: aacenc.h:169
aacenc.h