FFmpeg
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
sonic.c
Go to the documentation of this file.
1 /*
2  * Simple free lossless/lossy audio codec
3  * Copyright (c) 2004 Alex Beregszaszi
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "config_components.h"
23 
24 #include "libavutil/mem.h"
25 #include "avcodec.h"
26 #include "codec_internal.h"
27 #include "decode.h"
28 #include "encode.h"
29 #include "get_bits.h"
30 #include "golomb.h"
31 #include "put_golomb.h"
32 #include "rangecoder.h"
33 
34 
35 /**
36  * @file
37  * Simple free lossless/lossy audio codec
38  * Based on Paul Francis Harrison's Bonk (http://www.logarithmic.net/pfh/bonk)
39  * Written and designed by Alex Beregszaszi
40  *
41  * TODO:
42  * - CABAC put/get_symbol
43  * - independent quantizer for channels
44  * - >2 channels support
45  * - more decorrelation types
46  * - more tap_quant tests
47  * - selectable intlist writers/readers (bonk-style, golomb, cabac)
48  */
49 
50 #define MAX_CHANNELS 2
51 
52 #define MID_SIDE 0
53 #define LEFT_SIDE 1
54 #define RIGHT_SIDE 2
55 
56 typedef struct SonicContext {
57  int version;
60 
62  double quantization;
63 
65 
66  int *tap_quant;
69 
70  // for encoding
71  int *tail;
72  int tail_size;
73  int *window;
75 
76  // for decoding
79 } SonicContext;
80 
81 #define LATTICE_SHIFT 10
82 #define SAMPLE_SHIFT 4
83 #define LATTICE_FACTOR (1 << LATTICE_SHIFT)
84 #define SAMPLE_FACTOR (1 << SAMPLE_SHIFT)
85 
86 #define BASE_QUANT 0.6
87 #define RATE_VARIATION 3.0
88 
89 static inline int shift(int a,int b)
90 {
91  return (a+(1<<(b-1))) >> b;
92 }
93 
94 static inline int shift_down(int a,int b)
95 {
96  return (a>>b)+(a<0);
97 }
98 
99 
100 #if CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER
101 // Heavily modified Levinson-Durbin algorithm which
102 // copes better with quantization, and calculates the
103 // actual whitened result as it goes.
104 
105 static void modified_levinson_durbin(int *window, int window_entries,
106  int *out, int out_entries, int channels, int *tap_quant)
107 {
108  int i;
109  int *state = window + window_entries;
110 
111  memcpy(state, window, window_entries * sizeof(*state));
112 
113  for (i = 0; i < out_entries; i++)
114  {
115  int step = (i+1)*channels, k, j;
116  double xx = 0.0, xy = 0.0;
117  int *x_ptr = &(window[step]);
118  int *state_ptr = &(state[0]);
119  j = window_entries - step;
120  for (;j>0;j--,x_ptr++,state_ptr++)
121  {
122  double x_value = *x_ptr;
123  double state_value = *state_ptr;
124  xx += state_value*state_value;
125  xy += x_value*state_value;
126  }
127  if (xx == 0.0)
128  k = 0;
129  else
130  k = (int)(floor(-xy/xx * (double)LATTICE_FACTOR / (double)(tap_quant[i]) + 0.5));
131 
132  if (k > (LATTICE_FACTOR/tap_quant[i]))
133  k = LATTICE_FACTOR/tap_quant[i];
134  if (-k > (LATTICE_FACTOR/tap_quant[i]))
135  k = -(LATTICE_FACTOR/tap_quant[i]);
136 
137  out[i] = k;
138  k *= tap_quant[i];
139 
140  x_ptr = &(window[step]);
141  state_ptr = &(state[0]);
142  j = window_entries - step;
143  for (;j>0;j--,x_ptr++,state_ptr++)
144  {
145  int x_value = *x_ptr;
146  int state_value = *state_ptr;
147  *x_ptr = x_value + shift_down(k*state_value,LATTICE_SHIFT);
148  *state_ptr = state_value + shift_down(k*x_value, LATTICE_SHIFT);
149  }
150  }
151 }
152 
153 static inline int code_samplerate(int samplerate)
154 {
155  switch (samplerate)
156  {
157  case 44100: return 0;
158  case 22050: return 1;
159  case 11025: return 2;
160  case 96000: return 3;
161  case 48000: return 4;
162  case 32000: return 5;
163  case 24000: return 6;
164  case 16000: return 7;
165  case 8000: return 8;
166  }
167  return AVERROR(EINVAL);
168 }
169 
170 static av_cold int sonic_encode_init(AVCodecContext *avctx)
171 {
172  SonicContext *s = avctx->priv_data;
173  int *coded_samples;
174  PutBitContext pb;
175  int i;
176 
177  s->version = 2;
178 
179  if (avctx->ch_layout.nb_channels > MAX_CHANNELS)
180  {
181  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
182  return AVERROR(EINVAL); /* only stereo or mono for now */
183  }
184 
185  if (avctx->ch_layout.nb_channels == 2)
186  s->decorrelation = MID_SIDE;
187  else
188  s->decorrelation = 3;
189 
190  if (avctx->codec->id == AV_CODEC_ID_SONIC_LS)
191  {
192  s->lossless = 1;
193  s->num_taps = 32;
194  s->downsampling = 1;
195  s->quantization = 0.0;
196  }
197  else
198  {
199  s->num_taps = 128;
200  s->downsampling = 2;
201  s->quantization = 1.0;
202  }
203 
204  // max tap 2048
205  if (s->num_taps < 32 || s->num_taps > 1024 || s->num_taps % 32) {
206  av_log(avctx, AV_LOG_ERROR, "Invalid number of taps\n");
207  return AVERROR_INVALIDDATA;
208  }
209 
210  // generate taps
211  s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
212  if (!s->tap_quant)
213  return AVERROR(ENOMEM);
214 
215  for (i = 0; i < s->num_taps; i++)
216  s->tap_quant[i] = ff_sqrt(i+1);
217 
218  s->channels = avctx->ch_layout.nb_channels;
219  s->samplerate = avctx->sample_rate;
220 
221  s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
222  s->frame_size = s->channels*s->block_align*s->downsampling;
223 
224  s->tail_size = s->num_taps*s->channels;
225  s->tail = av_calloc(s->tail_size, sizeof(*s->tail));
226  if (!s->tail)
227  return AVERROR(ENOMEM);
228 
229  s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k) );
230  if (!s->predictor_k)
231  return AVERROR(ENOMEM);
232 
233  coded_samples = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
234  if (!coded_samples)
235  return AVERROR(ENOMEM);
236  for (i = 0; i < s->channels; i++, coded_samples += s->block_align)
237  s->coded_samples[i] = coded_samples;
238 
239  s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
240 
241  s->window_size = ((2*s->tail_size)+s->frame_size);
242  s->window = av_calloc(s->window_size, 2 * sizeof(*s->window));
243  if (!s->window || !s->int_samples)
244  return AVERROR(ENOMEM);
245 
246  avctx->extradata = av_mallocz(16);
247  if (!avctx->extradata)
248  return AVERROR(ENOMEM);
249  init_put_bits(&pb, avctx->extradata, 16*8);
250 
251  put_bits(&pb, 2, s->version); // version
252  if (s->version >= 1)
253  {
254  if (s->version >= 2) {
255  put_bits(&pb, 8, s->version);
256  put_bits(&pb, 8, s->minor_version);
257  }
258  put_bits(&pb, 2, s->channels);
259  put_bits(&pb, 4, code_samplerate(s->samplerate));
260  }
261  put_bits(&pb, 1, s->lossless);
262  if (!s->lossless)
263  put_bits(&pb, 3, SAMPLE_SHIFT); // XXX FIXME: sample precision
264  put_bits(&pb, 2, s->decorrelation);
265  put_bits(&pb, 2, s->downsampling);
266  put_bits(&pb, 5, (s->num_taps >> 5)-1); // 32..1024
267  put_bits(&pb, 1, 0); // XXX FIXME: no custom tap quant table
268 
269  flush_put_bits(&pb);
270  avctx->extradata_size = put_bytes_output(&pb);
271 
272  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
273  s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
274 
275  avctx->frame_size = s->block_align*s->downsampling;
276 
277  return 0;
278 }
279 
280 static av_cold int sonic_encode_close(AVCodecContext *avctx)
281 {
282  SonicContext *s = avctx->priv_data;
283 
284  av_freep(&s->coded_samples[0]);
285  av_freep(&s->predictor_k);
286  av_freep(&s->tail);
287  av_freep(&s->tap_quant);
288  av_freep(&s->window);
289  av_freep(&s->int_samples);
290 
291  return 0;
292 }
293 
294 static av_always_inline av_flatten void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed, uint64_t rc_stat[256][2], uint64_t rc_stat2[32][2]){
295  int i;
296 
297 #define put_rac(C,S,B) \
298 do{\
299  if(rc_stat){\
300  rc_stat[*(S)][B]++;\
301  rc_stat2[(S)-state][B]++;\
302  }\
303  put_rac(C,S,B);\
304 }while(0)
305 
306  if(v){
307  const int a= FFABS(v);
308  const int e= av_log2(a);
309  put_rac(c, state+0, 0);
310  if(e<=9){
311  for(i=0; i<e; i++){
312  put_rac(c, state+1+i, 1); //1..10
313  }
314  put_rac(c, state+1+i, 0);
315 
316  for(i=e-1; i>=0; i--){
317  put_rac(c, state+22+i, (a>>i)&1); //22..31
318  }
319 
320  if(is_signed)
321  put_rac(c, state+11 + e, v < 0); //11..21
322  }else{
323  for(i=0; i<e; i++){
324  put_rac(c, state+1+FFMIN(i,9), 1); //1..10
325  }
326  put_rac(c, state+1+9, 0);
327 
328  for(i=e-1; i>=0; i--){
329  put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
330  }
331 
332  if(is_signed)
333  put_rac(c, state+11 + 10, v < 0); //11..21
334  }
335  }else{
336  put_rac(c, state+0, 1);
337  }
338 #undef put_rac
339 }
340 
341 static inline int intlist_write(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
342 {
343  int i;
344 
345  for (i = 0; i < entries; i++)
346  put_symbol(c, state, buf[i], 1, NULL, NULL);
347 
348  return 1;
349 }
350 
351 static int sonic_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
352  const AVFrame *frame, int *got_packet_ptr)
353 {
354  SonicContext *s = avctx->priv_data;
355  RangeCoder c;
356  int i, j, ch, quant = 0, x = 0;
357  int ret;
358  const short *samples = (const int16_t*)frame->data[0];
359  uint8_t state[32];
360 
361  if ((ret = ff_alloc_packet(avctx, avpkt, s->frame_size * 5 + 1000)) < 0)
362  return ret;
363 
364  ff_init_range_encoder(&c, avpkt->data, avpkt->size);
365  ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
366  memset(state, 128, sizeof(state));
367 
368  // short -> internal
369  for (i = 0; i < s->frame_size; i++)
370  s->int_samples[i] = samples[i];
371 
372  if (!s->lossless)
373  for (i = 0; i < s->frame_size; i++)
374  s->int_samples[i] = s->int_samples[i] << SAMPLE_SHIFT;
375 
376  switch(s->decorrelation)
377  {
378  case MID_SIDE:
379  for (i = 0; i < s->frame_size; i += s->channels)
380  {
381  s->int_samples[i] += s->int_samples[i+1];
382  s->int_samples[i+1] -= shift(s->int_samples[i], 1);
383  }
384  break;
385  case LEFT_SIDE:
386  for (i = 0; i < s->frame_size; i += s->channels)
387  s->int_samples[i+1] -= s->int_samples[i];
388  break;
389  case RIGHT_SIDE:
390  for (i = 0; i < s->frame_size; i += s->channels)
391  s->int_samples[i] -= s->int_samples[i+1];
392  break;
393  }
394 
395  memset(s->window, 0, s->window_size * sizeof(*s->window));
396 
397  for (i = 0; i < s->tail_size; i++)
398  s->window[x++] = s->tail[i];
399 
400  for (i = 0; i < s->frame_size; i++)
401  s->window[x++] = s->int_samples[i];
402 
403  for (i = 0; i < s->tail_size; i++)
404  s->window[x++] = 0;
405 
406  for (i = 0; i < s->tail_size; i++)
407  s->tail[i] = s->int_samples[s->frame_size - s->tail_size + i];
408 
409  // generate taps
410  modified_levinson_durbin(s->window, s->window_size,
411  s->predictor_k, s->num_taps, s->channels, s->tap_quant);
412 
413  if ((ret = intlist_write(&c, state, s->predictor_k, s->num_taps, 0)) < 0)
414  return ret;
415 
416  for (ch = 0; ch < s->channels; ch++)
417  {
418  x = s->tail_size+ch;
419  for (i = 0; i < s->block_align; i++)
420  {
421  int sum = 0;
422  for (j = 0; j < s->downsampling; j++, x += s->channels)
423  sum += s->window[x];
424  s->coded_samples[ch][i] = sum;
425  }
426  }
427 
428  // simple rate control code
429  if (!s->lossless)
430  {
431  double energy1 = 0.0, energy2 = 0.0;
432  for (ch = 0; ch < s->channels; ch++)
433  {
434  for (i = 0; i < s->block_align; i++)
435  {
436  double sample = s->coded_samples[ch][i];
437  energy2 += sample*sample;
438  energy1 += fabs(sample);
439  }
440  }
441 
442  energy2 = sqrt(energy2/(s->channels*s->block_align));
443  energy1 = M_SQRT2*energy1/(s->channels*s->block_align);
444 
445  // increase bitrate when samples are like a gaussian distribution
446  // reduce bitrate when samples are like a two-tailed exponential distribution
447 
448  if (energy2 > energy1)
449  energy2 += (energy2-energy1)*RATE_VARIATION;
450 
451  quant = (int)(BASE_QUANT*s->quantization*energy2/SAMPLE_FACTOR);
452 // av_log(avctx, AV_LOG_DEBUG, "quant: %d energy: %f / %f\n", quant, energy1, energy2);
453 
454  quant = av_clip(quant, 1, 65534);
455 
456  put_symbol(&c, state, quant, 0, NULL, NULL);
457 
458  quant *= SAMPLE_FACTOR;
459  }
460 
461  // write out coded samples
462  for (ch = 0; ch < s->channels; ch++)
463  {
464  if (!s->lossless)
465  for (i = 0; i < s->block_align; i++)
466  s->coded_samples[ch][i] = ROUNDED_DIV(s->coded_samples[ch][i], quant);
467 
468  if ((ret = intlist_write(&c, state, s->coded_samples[ch], s->block_align, 1)) < 0)
469  return ret;
470  }
471 
472  avpkt->size = ff_rac_terminate(&c, 0);
473  *got_packet_ptr = 1;
474  return 0;
475 
476 }
477 #endif /* CONFIG_SONIC_ENCODER || CONFIG_SONIC_LS_ENCODER */
478 
479 #if CONFIG_SONIC_DECODER
480 static const int samplerate_table[] =
481  { 44100, 22050, 11025, 96000, 48000, 32000, 24000, 16000, 8000 };
482 
483 static av_cold int sonic_decode_init(AVCodecContext *avctx)
484 {
485  SonicContext *s = avctx->priv_data;
486  int *tmp;
487  GetBitContext gb;
488  int i;
489  int ret;
490 
491  s->channels = avctx->ch_layout.nb_channels;
492  s->samplerate = avctx->sample_rate;
493 
494  if (!avctx->extradata)
495  {
496  av_log(avctx, AV_LOG_ERROR, "No mandatory headers present\n");
497  return AVERROR_INVALIDDATA;
498  }
499 
500  ret = init_get_bits8(&gb, avctx->extradata, avctx->extradata_size);
501  if (ret < 0)
502  return ret;
503 
504  s->version = get_bits(&gb, 2);
505  if (s->version >= 2) {
506  s->version = get_bits(&gb, 8);
507  s->minor_version = get_bits(&gb, 8);
508  }
509  if (s->version != 2)
510  {
511  av_log(avctx, AV_LOG_ERROR, "Unsupported Sonic version, please report\n");
512  return AVERROR_INVALIDDATA;
513  }
514 
515  if (s->version >= 1)
516  {
517  int sample_rate_index;
518  s->channels = get_bits(&gb, 2);
519  sample_rate_index = get_bits(&gb, 4);
520  if (sample_rate_index >= FF_ARRAY_ELEMS(samplerate_table)) {
521  av_log(avctx, AV_LOG_ERROR, "Invalid sample_rate_index %d\n", sample_rate_index);
522  return AVERROR_INVALIDDATA;
523  }
524  s->samplerate = samplerate_table[sample_rate_index];
525  av_log(avctx, AV_LOG_INFO, "Sonicv2 chans: %d samprate: %d\n",
526  s->channels, s->samplerate);
527  }
528 
529  if (s->channels > MAX_CHANNELS || s->channels < 1)
530  {
531  av_log(avctx, AV_LOG_ERROR, "Only mono and stereo streams are supported by now\n");
532  return AVERROR_INVALIDDATA;
533  }
536  avctx->ch_layout.nb_channels = s->channels;
537 
538  s->lossless = get_bits1(&gb);
539  if (!s->lossless)
540  skip_bits(&gb, 3); // XXX FIXME
541  s->decorrelation = get_bits(&gb, 2);
542  if (s->decorrelation != 3 && s->channels != 2) {
543  av_log(avctx, AV_LOG_ERROR, "invalid decorrelation %d\n", s->decorrelation);
544  return AVERROR_INVALIDDATA;
545  }
546 
547  s->downsampling = get_bits(&gb, 2);
548  if (!s->downsampling) {
549  av_log(avctx, AV_LOG_ERROR, "invalid downsampling value\n");
550  return AVERROR_INVALIDDATA;
551  }
552 
553  s->num_taps = (get_bits(&gb, 5)+1)<<5;
554  if (get_bits1(&gb)) // XXX FIXME
555  av_log(avctx, AV_LOG_INFO, "Custom quant table\n");
556 
557  if (s->num_taps > 128)
558  return AVERROR_INVALIDDATA;
559 
560  s->block_align = 2048LL*s->samplerate/(44100*s->downsampling);
561  s->frame_size = s->channels*s->block_align*s->downsampling;
562 // avctx->frame_size = s->block_align;
563 
564  if (s->num_taps * s->channels > s->frame_size) {
565  av_log(avctx, AV_LOG_ERROR,
566  "number of taps times channels (%d * %d) larger than frame size %d\n",
567  s->num_taps, s->channels, s->frame_size);
568  return AVERROR_INVALIDDATA;
569  }
570 
571  av_log(avctx, AV_LOG_INFO, "Sonic: ver: %d.%d ls: %d dr: %d taps: %d block: %d frame: %d downsamp: %d\n",
572  s->version, s->minor_version, s->lossless, s->decorrelation, s->num_taps, s->block_align, s->frame_size, s->downsampling);
573 
574  // generate taps
575  s->tap_quant = av_calloc(s->num_taps, sizeof(*s->tap_quant));
576  if (!s->tap_quant)
577  return AVERROR(ENOMEM);
578 
579  for (i = 0; i < s->num_taps; i++)
580  s->tap_quant[i] = ff_sqrt(i+1);
581 
582  s->predictor_k = av_calloc(s->num_taps, sizeof(*s->predictor_k));
583 
584  tmp = av_calloc(s->num_taps, s->channels * sizeof(**s->predictor_state));
585  if (!tmp)
586  return AVERROR(ENOMEM);
587  for (i = 0; i < s->channels; i++, tmp += s->num_taps)
588  s->predictor_state[i] = tmp;
589 
590  tmp = av_calloc(s->block_align, s->channels * sizeof(**s->coded_samples));
591  if (!tmp)
592  return AVERROR(ENOMEM);
593  for (i = 0; i < s->channels; i++, tmp += s->block_align)
594  s->coded_samples[i] = tmp;
595 
596  s->int_samples = av_calloc(s->frame_size, sizeof(*s->int_samples));
597  if (!s->int_samples)
598  return AVERROR(ENOMEM);
599 
600  avctx->sample_fmt = AV_SAMPLE_FMT_S16;
601  return 0;
602 }
603 
604 static av_cold int sonic_decode_close(AVCodecContext *avctx)
605 {
606  SonicContext *s = avctx->priv_data;
607 
608  av_freep(&s->int_samples);
609  av_freep(&s->tap_quant);
610  av_freep(&s->predictor_k);
611  av_freep(&s->predictor_state[0]);
612  av_freep(&s->coded_samples[0]);
613 
614  return 0;
615 }
616 
617 static inline av_flatten int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
618  if(get_rac(c, state+0))
619  return 0;
620  else{
621  int i, e;
622  unsigned a;
623  e= 0;
624  while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
625  e++;
626  if (e > 31)
627  return AVERROR_INVALIDDATA;
628  }
629 
630  a= 1;
631  for(i=e-1; i>=0; i--){
632  a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
633  }
634 
635  e= -(is_signed && get_rac(c, state+11 + FFMIN(e, 10))); //11..21
636  return (a^e)-e;
637  }
638 }
639 
640 static inline int intlist_read(RangeCoder *c, uint8_t *state, int *buf, int entries, int base_2_part)
641 {
642  int i;
643 
644  for (i = 0; i < entries; i++)
645  buf[i] = get_symbol(c, state, 1);
646 
647  return 1;
648 }
649 
650 static void predictor_init_state(int *k, int *state, int order)
651 {
652  int i;
653 
654  for (i = order-2; i >= 0; i--)
655  {
656  int j, p, x = state[i];
657 
658  for (j = 0, p = i+1; p < order; j++,p++)
659  {
660  int tmp = x + shift_down(k[j] * (unsigned)state[p], LATTICE_SHIFT);
661  state[p] += shift_down(k[j]* (unsigned)x, LATTICE_SHIFT);
662  x = tmp;
663  }
664  }
665 }
666 
667 static int predictor_calc_error(int *k, int *state, int order, int error)
668 {
669  int i, x = error - (unsigned)shift_down(k[order-1] * (unsigned)state[order-1], LATTICE_SHIFT);
670 
671  int *k_ptr = &(k[order-2]),
672  *state_ptr = &(state[order-2]);
673  for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--)
674  {
675  int k_value = *k_ptr, state_value = *state_ptr;
676  x -= (unsigned)shift_down(k_value * (unsigned)state_value, LATTICE_SHIFT);
677  state_ptr[1] = state_value + shift_down(k_value * (unsigned)x, LATTICE_SHIFT);
678  }
679 
680  // don't drift too far, to avoid overflows
681  if (x > (SAMPLE_FACTOR<<16)) x = (SAMPLE_FACTOR<<16);
682  if (x < -(SAMPLE_FACTOR<<16)) x = -(SAMPLE_FACTOR<<16);
683 
684  state[0] = x;
685 
686  return x;
687 }
688 
689 static int sonic_decode_frame(AVCodecContext *avctx, AVFrame *frame,
690  int *got_frame_ptr, AVPacket *avpkt)
691 {
692  const uint8_t *buf = avpkt->data;
693  int buf_size = avpkt->size;
694  SonicContext *s = avctx->priv_data;
695  RangeCoder c;
696  uint8_t state[32];
697  int i, quant, ch, j, ret;
698  int16_t *samples;
699 
700  if (buf_size == 0) return 0;
701 
702  frame->nb_samples = s->frame_size / avctx->ch_layout.nb_channels;
703  if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
704  return ret;
705  samples = (int16_t *)frame->data[0];
706 
707 // av_log(NULL, AV_LOG_INFO, "buf_size: %d\n", buf_size);
708 
709  memset(state, 128, sizeof(state));
710  ff_init_range_decoder(&c, buf, buf_size);
711  ff_build_rac_states(&c, 0.05*(1LL<<32), 256-8);
712 
713  intlist_read(&c, state, s->predictor_k, s->num_taps, 0);
714 
715  // dequantize
716  for (i = 0; i < s->num_taps; i++)
717  s->predictor_k[i] *= (unsigned) s->tap_quant[i];
718 
719  if (s->lossless)
720  quant = 1;
721  else
722  quant = get_symbol(&c, state, 0) * (unsigned)SAMPLE_FACTOR;
723 
724 // av_log(NULL, AV_LOG_INFO, "quant: %d\n", quant);
725 
726  for (ch = 0; ch < s->channels; ch++)
727  {
728  int x = ch;
729 
730  if (c.overread > MAX_OVERREAD)
731  return AVERROR_INVALIDDATA;
732 
733  predictor_init_state(s->predictor_k, s->predictor_state[ch], s->num_taps);
734 
735  intlist_read(&c, state, s->coded_samples[ch], s->block_align, 1);
736 
737  for (i = 0; i < s->block_align; i++)
738  {
739  for (j = 0; j < s->downsampling - 1; j++)
740  {
741  s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, 0);
742  x += s->channels;
743  }
744 
745  s->int_samples[x] = predictor_calc_error(s->predictor_k, s->predictor_state[ch], s->num_taps, s->coded_samples[ch][i] * (unsigned)quant);
746  x += s->channels;
747  }
748 
749  for (i = 0; i < s->num_taps; i++)
750  s->predictor_state[ch][i] = s->int_samples[s->frame_size - s->channels + ch - i*s->channels];
751  }
752 
753  switch(s->decorrelation)
754  {
755  case MID_SIDE:
756  for (i = 0; i < s->frame_size; i += s->channels)
757  {
758  s->int_samples[i+1] += shift(s->int_samples[i], 1);
759  s->int_samples[i] -= s->int_samples[i+1];
760  }
761  break;
762  case LEFT_SIDE:
763  for (i = 0; i < s->frame_size; i += s->channels)
764  s->int_samples[i+1] += s->int_samples[i];
765  break;
766  case RIGHT_SIDE:
767  for (i = 0; i < s->frame_size; i += s->channels)
768  s->int_samples[i] += s->int_samples[i+1];
769  break;
770  }
771 
772  if (!s->lossless)
773  for (i = 0; i < s->frame_size; i++)
774  s->int_samples[i] = shift(s->int_samples[i], SAMPLE_SHIFT);
775 
776  // internal -> short
777  for (i = 0; i < s->frame_size; i++)
778  samples[i] = av_clip_int16(s->int_samples[i]);
779 
780  *got_frame_ptr = 1;
781 
782  return buf_size;
783 }
784 
785 const FFCodec ff_sonic_decoder = {
786  .p.name = "sonic",
787  CODEC_LONG_NAME("Sonic"),
788  .p.type = AVMEDIA_TYPE_AUDIO,
789  .p.id = AV_CODEC_ID_SONIC,
790  .priv_data_size = sizeof(SonicContext),
791  .init = sonic_decode_init,
792  .close = sonic_decode_close,
793  FF_CODEC_DECODE_CB(sonic_decode_frame),
795  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
796 };
797 #endif /* CONFIG_SONIC_DECODER */
798 
799 #if CONFIG_SONIC_ENCODER
800 const FFCodec ff_sonic_encoder = {
801  .p.name = "sonic",
802  CODEC_LONG_NAME("Sonic"),
803  .p.type = AVMEDIA_TYPE_AUDIO,
804  .p.id = AV_CODEC_ID_SONIC,
805  .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL |
807  .priv_data_size = sizeof(SonicContext),
808  .init = sonic_encode_init,
809  FF_CODEC_ENCODE_CB(sonic_encode_frame),
811  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
812  .close = sonic_encode_close,
813 };
814 #endif
815 
816 #if CONFIG_SONIC_LS_ENCODER
818  .p.name = "sonicls",
819  CODEC_LONG_NAME("Sonic lossless"),
820  .p.type = AVMEDIA_TYPE_AUDIO,
821  .p.id = AV_CODEC_ID_SONIC_LS,
822  .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_EXPERIMENTAL |
824  .priv_data_size = sizeof(SonicContext),
825  .init = sonic_encode_init,
826  FF_CODEC_ENCODE_CB(sonic_encode_frame),
828  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
829  .close = sonic_encode_close,
830 };
831 #endif
error
static void error(const char *err)
Definition: target_bsf_fuzzer.c:32
SonicContext::tail_size
int tail_size
Definition: sonic.c:72
AVCodecContext::frame_size
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1051
av_clip
#define av_clip
Definition: common.h:100
FF_CODEC_CAP_INIT_CLEANUP
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: codec_internal.h:42
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
out
FILE * out
Definition: movenc.c:55
put_bytes_output
static int put_bytes_output(const PutBitContext *s)
Definition: put_bits.h:99
AVCodecContext::sample_rate
int sample_rate
samples per second
Definition: avcodec.h:1024
BASE_QUANT
#define BASE_QUANT
Definition: sonic.c:86
MAX_OVERREAD
#define MAX_OVERREAD
Definition: lagarithrac.h:49
init_put_bits
static void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
Initialize the PutBitContext s.
Definition: put_bits.h:62
SonicContext::downsampling
int downsampling
Definition: sonic.c:61
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:421
put_bits
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
Definition: j2kenc.c:223
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
AVPacket::data
uint8_t * data
Definition: packet.h:552
encode.h
b
#define b
Definition: input.c:42
SonicContext::window_size
int window_size
Definition: sonic.c:74
rangecoder.h
FFCodec
Definition: codec_internal.h:127
AVChannelLayout::order
enum AVChannelOrder order
Channel order used in this layout.
Definition: channel_layout.h:324
AVChannelLayout::nb_channels
int nb_channels
Number of channels in this layout.
Definition: channel_layout.h:329
SonicContext::quantization
double quantization
Definition: sonic.c:62
put_symbol
static av_noinline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed)
Definition: ffv1enc.c:233
SAMPLE_FACTOR
#define SAMPLE_FACTOR
Definition: sonic.c:84
skip_bits
static void skip_bits(GetBitContext *s, int n)
Definition: get_bits.h:379
golomb.h
exp golomb vlc stuff
close
static av_cold void close(AVCodecParserContext *s)
Definition: apv_parser.c:135
get_bits
static unsigned int get_bits(GetBitContext *s, int n)
Read 1-25 bits.
Definition: get_bits.h:333
SonicContext::block_align
int block_align
Definition: sonic.c:64
window
static SDL_Window * window
Definition: ffplay.c:361
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:131
AVCodecContext::codec
const struct AVCodec * codec
Definition: avcodec.h:440
SonicContext
Definition: sonic.c:56
AVCodecContext::ch_layout
AVChannelLayout ch_layout
Audio channel layout.
Definition: avcodec.h:1039
ff_init_range_encoder
av_cold void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size)
Definition: rangecoder.c:42
predictor_calc_error
static int predictor_calc_error(int *k, int *state, int order, int error)
Definition: bonk.c:270
GetBitContext
Definition: get_bits.h:109
LEFT_SIDE
#define LEFT_SIDE
Definition: sonic.c:53
FF_CODEC_ENCODE_CB
#define FF_CODEC_ENCODE_CB(func)
Definition: codec_internal.h:353
ff_sqrt
#define ff_sqrt
Definition: mathops.h:217
LATTICE_FACTOR
#define LATTICE_FACTOR
Definition: sonic.c:83
quant
static const uint8_t quant[64]
Definition: vmixdec.c:71
put_golomb.h
exp golomb vlc writing stuff
SonicContext::decorrelation
int decorrelation
Definition: sonic.c:59
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:210
AV_CODEC_CAP_EXPERIMENTAL
#define AV_CODEC_CAP_EXPERIMENTAL
Codec is experimental and is thus avoided in favor of non experimental encoders.
Definition: codec.h:87
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
av_cold
#define av_cold
Definition: attributes.h:90
init_get_bits8
static int init_get_bits8(GetBitContext *s, const uint8_t *buffer, int byte_size)
Initialize GetBitContext.
Definition: get_bits.h:539
SonicContext::tail
int * tail
Definition: sonic.c:71
AVCodecContext::extradata_size
int extradata_size
Definition: avcodec.h:515
FF_CODEC_DECODE_CB
#define FF_CODEC_DECODE_CB(func)
Definition: codec_internal.h:341
s
#define s(width, name)
Definition: cbs_vp9.c:198
SonicContext::samplerate
int samplerate
Definition: sonic.c:64
floor
static __device__ float floor(float a)
Definition: cuda_runtime.h:173
AVMEDIA_TYPE_AUDIO
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:201
AV_CHANNEL_ORDER_UNSPEC
@ AV_CHANNEL_ORDER_UNSPEC
Only the channel count is specified, without any further information about the channel order.
Definition: channel_layout.h:119
AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE
#define AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE
This encoder can reorder user opaque values from input AVFrames and return them with corresponding ou...
Definition: codec.h:144
SonicContext::coded_samples
int * coded_samples[MAX_CHANNELS]
Definition: sonic.c:68
RATE_VARIATION
#define RATE_VARIATION
Definition: sonic.c:87
channels
channels
Definition: aptx.h:31
decode.h
get_bits.h
SonicContext::frame_size
int frame_size
Definition: sonic.c:64
AV_CODEC_ID_SONIC_LS
@ AV_CODEC_ID_SONIC_LS
Definition: codec_id.h:520
SonicContext::minor_version
int minor_version
Definition: sonic.c:58
PutBitContext
Definition: put_bits.h:50
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:326
ff_sonic_encoder
const FFCodec ff_sonic_encoder
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
if
if(ret)
Definition: filter_design.txt:179
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
av_clip_int16
#define av_clip_int16
Definition: common.h:115
NULL
#define NULL
Definition: coverity.c:32
tmp
static uint8_t tmp[20]
Definition: aes_ctr.c:47
ff_rac_terminate
int ff_rac_terminate(RangeCoder *c, int version)
Terminates the range coder.
Definition: rangecoder.c:109
ROUNDED_DIV
#define ROUNDED_DIV(a, b)
Definition: common.h:58
MID_SIDE
#define MID_SIDE
Definition: sonic.c:52
get_bits1
static unsigned int get_bits1(GetBitContext *s)
Definition: get_bits.h:386
get_symbol
static int get_symbol(RangeCoder *c, uint8_t *state, int is_signed)
Definition: snowdec.c:34
double
double
Definition: af_crystalizer.c:132
SonicContext::window
int * window
Definition: sonic.c:73
SonicContext::channels
int channels
Definition: sonic.c:64
MAX_CHANNELS
#define MAX_CHANNELS
Definition: sonic.c:50
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
SonicContext::predictor_k
int * predictor_k
Definition: sonic.c:77
AV_CODEC_CAP_CHANNEL_CONF
#define AV_CODEC_CAP_CHANNEL_CONF
Codec should fill in channel configuration and samplerate instead of container.
Definition: codec.h:91
ff_sonic_decoder
const FFCodec ff_sonic_decoder
ff_init_range_decoder
av_cold void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf, int buf_size)
Definition: rangecoder.c:53
ff_get_buffer
int ff_get_buffer(AVCodecContext *avctx, AVFrame *frame, int flags)
Get a buffer for a frame.
Definition: decode.c:1635
init
int(* init)(AVBSFContext *ctx)
Definition: dts2pts.c:368
AV_CODEC_CAP_DR1
#define AV_CODEC_CAP_DR1
Codec uses get_buffer() or get_encode_buffer() for allocating buffers and supports custom allocators.
Definition: codec.h:52
SonicContext::lossless
int lossless
Definition: sonic.c:59
AVPacket::size
int size
Definition: packet.h:553
LATTICE_SHIFT
#define LATTICE_SHIFT
Definition: sonic.c:81
codec_internal.h
for
for(k=2;k<=8;++k)
Definition: h264pred_template.c:424
AVCodecContext::sample_fmt
enum AVSampleFormat sample_fmt
audio sample format
Definition: avcodec.h:1031
sample
#define sample
Definition: flacdsp_template.c:44
ff_build_rac_states
void ff_build_rac_states(RangeCoder *c, int factor, int max_p)
Definition: rangecoder.c:68
SonicContext::num_taps
int num_taps
Definition: sonic.c:61
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
SonicContext::version
int version
Definition: sonic.c:57
CODEC_SAMPLEFMTS
#define CODEC_SAMPLEFMTS(...)
Definition: codec_internal.h:380
AV_LOG_INFO
#define AV_LOG_INFO
Standard information.
Definition: log.h:221
AVCodec::id
enum AVCodecID id
Definition: codec.h:186
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
AVCodecContext::extradata
uint8_t * extradata
Out-of-band global headers that may be used by some codecs.
Definition: avcodec.h:514
av_flatten
#define av_flatten
Definition: attributes.h:96
SonicContext::int_samples
int * int_samples
Definition: sonic.c:67
av_always_inline
#define av_always_inline
Definition: attributes.h:49
FFMIN
#define FFMIN(a, b)
Definition: macros.h:49
ff_sonic_ls_encoder
const FFCodec ff_sonic_ls_encoder
AV_SAMPLE_FMT_S16
@ AV_SAMPLE_FMT_S16
signed 16 bits
Definition: samplefmt.h:58
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:256
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:179
get_rac
static int get_rac(RangeCoder *c, uint8_t *const state)
Definition: rangecoder.h:118
av_calloc
void * av_calloc(size_t nmemb, size_t size)
Definition: mem.c:264
avcodec.h
SAMPLE_SHIFT
#define SAMPLE_SHIFT
Definition: sonic.c:82
ret
ret
Definition: filter_design.txt:187
SonicContext::predictor_state
int * predictor_state[MAX_CHANNELS]
Definition: sonic.c:78
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:265
put_rac
#define put_rac(C, S, B)
AVCodecContext
main external API structure.
Definition: avcodec.h:431
av_channel_layout_uninit
void av_channel_layout_uninit(AVChannelLayout *channel_layout)
Free any allocated data in the channel layout and reset the channel count to 0.
Definition: channel_layout.c:442
SonicContext::tap_quant
int * tap_quant
Definition: sonic.c:66
intlist_read
static int intlist_read(BonkContext *s, int *buf, int entries, int base_2_part)
Definition: bonk.c:151
samples
Filter the word “frame” indicates either a video frame or a group of audio samples
Definition: filter_design.txt:8
shift
static int shift(int a, int b)
Definition: sonic.c:89
AV_CODEC_ID_SONIC
@ AV_CODEC_ID_SONIC
Definition: codec_id.h:519
mem.h
RIGHT_SIDE
#define RIGHT_SIDE
Definition: sonic.c:54
flush_put_bits
static void flush_put_bits(PutBitContext *s)
Pad the end of the output stream with zeros.
Definition: put_bits.h:153
M_SQRT2
#define M_SQRT2
Definition: mathematics.h:109
AVPacket
This structure stores compressed data.
Definition: packet.h:529
AVCodecContext::priv_data
void * priv_data
Definition: avcodec.h:458
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
state
static struct @511 state
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVERROR_INVALIDDATA
#define AVERROR_INVALIDDATA
Invalid data found when processing input.
Definition: error.h:61
RangeCoder
Definition: mss3.c:63
shift_down
static int shift_down(int a, int b)
Definition: sonic.c:94
av_log2
int av_log2(unsigned v)
Definition: intmath.c:26
ff_alloc_packet
int ff_alloc_packet(AVCodecContext *avctx, AVPacket *avpkt, int64_t size)
Check AVPacket size and allocate data.
Definition: encode.c:62
predictor_init_state
static void predictor_init_state(int *k, unsigned *state, int order)
Definition: bonk.c:291