FFmpeg: libavcodec/g722enc.c Source File

00001 /*
00002  * Copyright (c) CMU 1993 Computer Science, Speech Group
00003  *                        Chengxiang Lu and Alex Hauptmann
00004  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
00005  * Copyright (c) 2009 Kenan Gillet
00006  * Copyright (c) 2010 Martin Storsjo
00007  *
00008  * This file is part of Libav.
00009  *
00010  * Libav is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * Libav is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with Libav; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "libavutil/avassert.h"
00031 #include "avcodec.h"
00032 #include "internal.h"
00033 #include "g722.h"
00034 #include "libavutil/common.h"
00035 
00036 #define FREEZE_INTERVAL 128
00037 
00038 /* This is an arbitrary value. Allowing insanely large values leads to strange
00039    problems, so we limit it to a reasonable value */
00040 #define MAX_FRAME_SIZE 32768
00041 
00042 /* We clip the value of avctx->trellis to prevent data type overflows and
00043    undefined behavior. Using larger values is insanely slow anyway. */
00044 #define MIN_TRELLIS 0
00045 #define MAX_TRELLIS 16
00046 
00047 static av_cold int g722_encode_close(AVCodecContext *avctx)
00048 {
00049     G722Context *c = avctx->priv_data;
00050     int i;
00051     for (i = 0; i < 2; i++) {
00052         av_freep(&c->paths[i]);
00053         av_freep(&c->node_buf[i]);
00054         av_freep(&c->nodep_buf[i]);
00055     }
00056 #if FF_API_OLD_ENCODE_AUDIO
00057     av_freep(&avctx->coded_frame);
00058 #endif
00059     return 0;
00060 }
00061 
00062 static av_cold int g722_encode_init(AVCodecContext * avctx)
00063 {
00064     G722Context *c = avctx->priv_data;
00065     int ret;
00066 
00067     if (avctx->channels != 1) {
00068         av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n");
00069         return AVERROR_INVALIDDATA;
00070     }
00071 
00072     c->band[0].scale_factor = 8;
00073     c->band[1].scale_factor = 2;
00074     c->prev_samples_pos = 22;
00075 
00076     if (avctx->trellis) {
00077         int frontier = 1 << avctx->trellis;
00078         int max_paths = frontier * FREEZE_INTERVAL;
00079         int i;
00080         for (i = 0; i < 2; i++) {
00081             c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths));
00082             c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf));
00083             c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf));
00084             if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i]) {
00085                 ret = AVERROR(ENOMEM);
00086                 goto error;
00087             }
00088         }
00089     }
00090 
00091     if (avctx->frame_size) {
00092         /* validate frame size */
00093         if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
00094             int new_frame_size;
00095 
00096             if (avctx->frame_size == 1)
00097                 new_frame_size = 2;
00098             else if (avctx->frame_size > MAX_FRAME_SIZE)
00099                 new_frame_size = MAX_FRAME_SIZE;
00100             else
00101                 new_frame_size = avctx->frame_size - 1;
00102 
00103             av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
00104                    "allowed. Using %d instead of %d\n", new_frame_size,
00105                    avctx->frame_size);
00106             avctx->frame_size = new_frame_size;
00107         }
00108     } else {
00109         /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
00110            a common packet size for VoIP applications */
00111         avctx->frame_size = 320;
00112     }
00113     avctx->delay = 22;
00114 
00115     if (avctx->trellis) {
00116         /* validate trellis */
00117         if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
00118             int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
00119             av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
00120                    "allowed. Using %d instead of %d\n", new_trellis,
00121                    avctx->trellis);
00122             avctx->trellis = new_trellis;
00123         }
00124     }
00125 
00126 #if FF_API_OLD_ENCODE_AUDIO
00127     avctx->coded_frame = avcodec_alloc_frame();
00128     if (!avctx->coded_frame) {
00129         ret = AVERROR(ENOMEM);
00130         goto error;
00131     }
00132 #endif
00133 
00134     return 0;
00135 error:
00136     g722_encode_close(avctx);
00137     return ret;
00138 }
00139 
00140 static const int16_t low_quant[33] = {
00141       35,   72,  110,  150,  190,  233,  276,  323,
00142      370,  422,  473,  530,  587,  650,  714,  786,
00143      858,  940, 1023, 1121, 1219, 1339, 1458, 1612,
00144     1765, 1980, 2195, 2557, 2919
00145 };
00146 
00147 static inline void filter_samples(G722Context *c, const int16_t *samples,
00148                                   int *xlow, int *xhigh)
00149 {
00150     int xout1, xout2;
00151     c->prev_samples[c->prev_samples_pos++] = samples[0];
00152     c->prev_samples[c->prev_samples_pos++] = samples[1];
00153     ff_g722_apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2);
00154     *xlow  = xout1 + xout2 >> 14;
00155     *xhigh = xout1 - xout2 >> 14;
00156     if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
00157         memmove(c->prev_samples,
00158                 c->prev_samples + c->prev_samples_pos - 22,
00159                 22 * sizeof(c->prev_samples[0]));
00160         c->prev_samples_pos = 22;
00161     }
00162 }
00163 
00164 static inline int encode_high(const struct G722Band *state, int xhigh)
00165 {
00166     int diff = av_clip_int16(xhigh - state->s_predictor);
00167     int pred = 141 * state->scale_factor >> 8;
00168            /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
00169     return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
00170 }
00171 
00172 static inline int encode_low(const struct G722Band* state, int xlow)
00173 {
00174     int diff  = av_clip_int16(xlow - state->s_predictor);
00175            /* = diff >= 0 ? diff : -(diff + 1) */
00176     int limit = diff ^ (diff >> (sizeof(diff)*8-1));
00177     int i = 0;
00178     limit = limit + 1 << 10;
00179     if (limit > low_quant[8] * state->scale_factor)
00180         i = 9;
00181     while (i < 29 && limit > low_quant[i] * state->scale_factor)
00182         i++;
00183     return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
00184 }
00185 
00186 static void g722_encode_trellis(G722Context *c, int trellis,
00187                                 uint8_t *dst, int nb_samples,
00188                                 const int16_t *samples)
00189 {
00190     int i, j, k;
00191     int frontier = 1 << trellis;
00192     struct TrellisNode **nodes[2];
00193     struct TrellisNode **nodes_next[2];
00194     int pathn[2] = {0, 0}, froze = -1;
00195     struct TrellisPath *p[2];
00196 
00197     for (i = 0; i < 2; i++) {
00198         nodes[i] = c->nodep_buf[i];
00199         nodes_next[i] = c->nodep_buf[i] + frontier;
00200         memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf));
00201         nodes[i][0] = c->node_buf[i] + frontier;
00202         nodes[i][0]->ssd = 0;
00203         nodes[i][0]->path = 0;
00204         nodes[i][0]->state = c->band[i];
00205     }
00206 
00207     for (i = 0; i < nb_samples >> 1; i++) {
00208         int xlow, xhigh;
00209         struct TrellisNode *next[2];
00210         int heap_pos[2] = {0, 0};
00211 
00212         for (j = 0; j < 2; j++) {
00213             next[j] = c->node_buf[j] + frontier*(i & 1);
00214             memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
00215         }
00216 
00217         filter_samples(c, &samples[2*i], &xlow, &xhigh);
00218 
00219         for (j = 0; j < frontier && nodes[0][j]; j++) {
00220             /* Only k >> 2 affects the future adaptive state, therefore testing
00221              * small steps that don't change k >> 2 is useless, the original
00222              * value from encode_low is better than them. Since we step k
00223              * in steps of 4, make sure range is a multiple of 4, so that
00224              * we don't miss the original value from encode_low. */
00225             int range = j < frontier/2 ? 4 : 0;
00226             struct TrellisNode *cur_node = nodes[0][j];
00227 
00228             int ilow = encode_low(&cur_node->state, xlow);
00229 
00230             for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
00231                 int decoded, dec_diff, pos;
00232                 uint32_t ssd;
00233                 struct TrellisNode* node;
00234 
00235                 if (k < 0)
00236                     continue;
00237 
00238                 decoded = av_clip((cur_node->state.scale_factor *
00239                                   ff_g722_low_inv_quant6[k] >> 10)
00240                                 + cur_node->state.s_predictor, -16384, 16383);
00241                 dec_diff = xlow - decoded;
00242 
00243 #define STORE_NODE(index, UPDATE, VALUE)\
00244                 ssd = cur_node->ssd + dec_diff*dec_diff;\
00245                 /* Check for wraparound. Using 64 bit ssd counters would \
00246                  * be simpler, but is slower on x86 32 bit. */\
00247                 if (ssd < cur_node->ssd)\
00248                     continue;\
00249                 if (heap_pos[index] < frontier) {\
00250                     pos = heap_pos[index]++;\
00251                     av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
00252                     node = nodes_next[index][pos] = next[index]++;\
00253                     node->path = pathn[index]++;\
00254                 } else {\
00255                     /* Try to replace one of the leaf nodes with the new \
00256                      * one, but not always testing the same leaf position */\
00257                     pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
00258                     if (ssd >= nodes_next[index][pos]->ssd)\
00259                         continue;\
00260                     heap_pos[index]++;\
00261                     node = nodes_next[index][pos];\
00262                 }\
00263                 node->ssd = ssd;\
00264                 node->state = cur_node->state;\
00265                 UPDATE;\
00266                 c->paths[index][node->path].value = VALUE;\
00267                 c->paths[index][node->path].prev = cur_node->path;\
00268                 /* Sift the newly inserted node up in the heap to restore \
00269                  * the heap property */\
00270                 while (pos > 0) {\
00271                     int parent = (pos - 1) >> 1;\
00272                     if (nodes_next[index][parent]->ssd <= ssd)\
00273                         break;\
00274                     FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
00275                                                 nodes_next[index][pos]);\
00276                     pos = parent;\
00277                 }
00278                 STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
00279             }
00280         }
00281 
00282         for (j = 0; j < frontier && nodes[1][j]; j++) {
00283             int ihigh;
00284             struct TrellisNode *cur_node = nodes[1][j];
00285 
00286             /* We don't try to get any initial guess for ihigh via
00287              * encode_high - since there's only 4 possible values, test
00288              * them all. Testing all of these gives a much, much larger
00289              * gain than testing a larger range around ilow. */
00290             for (ihigh = 0; ihigh < 4; ihigh++) {
00291                 int dhigh, decoded, dec_diff, pos;
00292                 uint32_t ssd;
00293                 struct TrellisNode* node;
00294 
00295                 dhigh = cur_node->state.scale_factor *
00296                         ff_g722_high_inv_quant[ihigh] >> 10;
00297                 decoded = av_clip(dhigh + cur_node->state.s_predictor,
00298                                   -16384, 16383);
00299                 dec_diff = xhigh - decoded;
00300 
00301                 STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
00302             }
00303         }
00304 
00305         for (j = 0; j < 2; j++) {
00306             FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
00307 
00308             if (nodes[j][0]->ssd > (1 << 16)) {
00309                 for (k = 1; k < frontier && nodes[j][k]; k++)
00310                     nodes[j][k]->ssd -= nodes[j][0]->ssd;
00311                 nodes[j][0]->ssd = 0;
00312             }
00313         }
00314 
00315         if (i == froze + FREEZE_INTERVAL) {
00316             p[0] = &c->paths[0][nodes[0][0]->path];
00317             p[1] = &c->paths[1][nodes[1][0]->path];
00318             for (j = i; j > froze; j--) {
00319                 dst[j] = p[1]->value << 6 | p[0]->value;
00320                 p[0] = &c->paths[0][p[0]->prev];
00321                 p[1] = &c->paths[1][p[1]->prev];
00322             }
00323             froze = i;
00324             pathn[0] = pathn[1] = 0;
00325             memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
00326             memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
00327         }
00328     }
00329 
00330     p[0] = &c->paths[0][nodes[0][0]->path];
00331     p[1] = &c->paths[1][nodes[1][0]->path];
00332     for (j = i; j > froze; j--) {
00333         dst[j] = p[1]->value << 6 | p[0]->value;
00334         p[0] = &c->paths[0][p[0]->prev];
00335         p[1] = &c->paths[1][p[1]->prev];
00336     }
00337     c->band[0] = nodes[0][0]->state;
00338     c->band[1] = nodes[1][0]->state;
00339 }
00340 
00341 static av_always_inline void encode_byte(G722Context *c, uint8_t *dst,
00342                                          const int16_t *samples)
00343 {
00344     int xlow, xhigh, ilow, ihigh;
00345     filter_samples(c, samples, &xlow, &xhigh);
00346     ihigh = encode_high(&c->band[1], xhigh);
00347     ilow  = encode_low (&c->band[0], xlow);
00348     ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
00349                                 ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
00350     ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
00351     *dst = ihigh << 6 | ilow;
00352 }
00353 
00354 static void g722_encode_no_trellis(G722Context *c,
00355                                    uint8_t *dst, int nb_samples,
00356                                    const int16_t *samples)
00357 {
00358     int i;
00359     for (i = 0; i < nb_samples; i += 2)
00360         encode_byte(c, dst++, &samples[i]);
00361 }
00362 
00363 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00364                              const AVFrame *frame, int *got_packet_ptr)
00365 {
00366     G722Context *c = avctx->priv_data;
00367     const int16_t *samples = (const int16_t *)frame->data[0];
00368     int nb_samples, out_size, ret;
00369 
00370     out_size = (frame->nb_samples + 1) / 2;
00371     if ((ret = ff_alloc_packet2(avctx, avpkt, out_size)))
00372         return ret;
00373 
00374     nb_samples = frame->nb_samples - (frame->nb_samples & 1);
00375 
00376     if (avctx->trellis)
00377         g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
00378     else
00379         g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
00380 
00381     /* handle last frame with odd frame_size */
00382     if (nb_samples < frame->nb_samples) {
00383         int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
00384         encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
00385     }
00386 
00387     if (frame->pts != AV_NOPTS_VALUE)
00388         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
00389     *got_packet_ptr = 1;
00390     return 0;
00391 }
00392 
00393 AVCodec ff_adpcm_g722_encoder = {
00394     .name           = "g722",
00395     .type           = AVMEDIA_TYPE_AUDIO,
00396     .id             = AV_CODEC_ID_ADPCM_G722,
00397     .priv_data_size = sizeof(G722Context),
00398     .init           = g722_encode_init,
00399     .close          = g722_encode_close,
00400     .encode2        = g722_encode_frame,
00401     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME,
00402     .long_name      = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
00403     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
00404                                                      AV_SAMPLE_FMT_NONE },
00405 };