FFmpeg: libavcodec/wmavoice.c Source File

00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include <math.h>
00029 #include "avcodec.h"
00030 #include "get_bits.h"
00031 #include "put_bits.h"
00032 #include "wmavoice_data.h"
00033 #include "celp_math.h"
00034 #include "celp_filters.h"
00035 #include "acelp_vectors.h"
00036 #include "acelp_filters.h"
00037 #include "lsp.h"
00038 #include "libavutil/lzo.h"
00039 #include "avfft.h"
00040 #include "fft.h"
00041 
00042 #define MAX_BLOCKS           8   
00043 #define MAX_LSPS             16  
00044 #define MAX_LSPS_ALIGN16     16  
00045 
00046 #define MAX_FRAMES           3   
00047 #define MAX_FRAMESIZE        160 
00048 #define MAX_SIGNAL_HISTORY   416 
00049 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00051 #define SFRAME_CACHE_MAXSIZE 256 
00052 
00053 #define VLC_NBITS            6   
00054 
00055 
00058 static VLC frame_type_vlc;
00059 
00063 enum {
00064     ACB_TYPE_NONE       = 0, 
00065     ACB_TYPE_ASYMMETRIC = 1, 
00066 
00067 
00068 
00069 
00070     ACB_TYPE_HAMMING    = 2  
00071 
00072 
00073 };
00074 
00078 enum {
00079     FCB_TYPE_SILENCE    = 0, 
00080 
00081 
00082     FCB_TYPE_HARDCODED  = 1, 
00083 
00084     FCB_TYPE_AW_PULSES  = 2, 
00085 
00086     FCB_TYPE_EXC_PULSES = 3, 
00087 
00088 
00089 };
00090 
00094 static const struct frame_type_desc {
00095     uint8_t n_blocks;     
00096 
00097     uint8_t log_n_blocks; 
00098     uint8_t acb_type;     
00099     uint8_t fcb_type;     
00100     uint8_t dbl_pulses;   
00101 
00102 
00103     uint16_t frame_size;  
00104 
00105 } frame_descs[17] = {
00106     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00107     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00108     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00109     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00110     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00111     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00112     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00113     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00114     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00115     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00116     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00117     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00118     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00119     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00120     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00121     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00122     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00123 };
00124 
00128 typedef struct {
00135     GetBitContext gb;             
00136 
00137 
00138 
00139     int8_t vbm_tree[25];          
00140 
00141     int spillover_bitsize;        
00142 
00143 
00144     int history_nsamples;         
00145 
00146 
00147     /* postfilter specific values */
00148     int do_apf;                   
00149 
00150     int denoise_strength;         
00151 
00152     int denoise_tilt_corr;        
00153 
00154     int dc_level;                 
00155 
00156 
00157     int lsps;                     
00158     int lsp_q_mode;               
00159     int lsp_def_mode;             
00160 
00161     int frame_lsp_bitsize;        
00162 
00163     int sframe_lsp_bitsize;       
00164 
00165 
00166     int min_pitch_val;            
00167     int max_pitch_val;            
00168     int pitch_nbits;              
00169 
00170     int block_pitch_nbits;        
00171 
00172     int block_pitch_range;        
00173     int block_delta_pitch_nbits;  
00174 
00175 
00176 
00177     int block_delta_pitch_hrange; 
00178 
00179     uint16_t block_conv_table[4]; 
00180 
00181 
00190     int spillover_nbits;          
00191 
00192 
00193 
00194     int has_residual_lsps;        
00195 
00196 
00197 
00198 
00199     int skip_bits_next;           
00200 
00201 
00202 
00203     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00206     int sframe_cache_size;        
00207 
00208 
00209 
00210 
00211     PutBitContext pb;             
00212 
00221     double prev_lsps[MAX_LSPS];   
00222 
00223     int last_pitch_val;           
00224     int last_acb_type;            
00225     int pitch_diff_sh16;          
00226 
00227     float silence_gain;           
00228 
00229     int aw_idx_is_ext;            
00230 
00231     int aw_pulse_range;           
00232 
00233 
00234 
00235 
00236 
00237     int aw_n_pulses[2];           
00238 
00239 
00240     int aw_first_pulse_off[2];    
00241 
00242     int aw_next_pulse_off_cache;  
00243 
00244 
00245 
00246 
00247 
00248     int frame_cntr;               
00249 
00250     float gain_pred_err[6];       
00251     float excitation_history[MAX_SIGNAL_HISTORY];
00255     float synth_history[MAX_LSPS]; 
00256 
00263     RDFTContext rdft, irdft;      
00264 
00265     DCTContext dct, dst;          
00266 
00267     float sin[511], cos[511];     
00268 
00269     float postfilter_agc;         
00270 
00271     float dcf_mem[2];             
00272     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00275     float denoise_filter_cache[MAX_FRAMESIZE];
00276     int   denoise_filter_cache_size; 
00277     DECLARE_ALIGNED(16, float, tilted_lpcs_pf)[0x80];
00279     DECLARE_ALIGNED(16, float, denoise_coeffs_pf)[0x80];
00281     DECLARE_ALIGNED(16, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00284 
00287 } WMAVoiceContext;
00288 
00298 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00299 {
00300     static const uint8_t bits[] = {
00301          2,  2,  2,  4,  4,  4,
00302          6,  6,  6,  8,  8,  8,
00303         10, 10, 10, 12, 12, 12,
00304         14, 14, 14, 14
00305     };
00306     static const uint16_t codes[] = {
00307           0x0000, 0x0001, 0x0002,        //              00/01/10
00308           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00309           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00310           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00311           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00312           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00313           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00314     };
00315     int cntr[8], n, res;
00316 
00317     memset(vbm_tree, 0xff, sizeof(vbm_tree));
00318     memset(cntr,     0,    sizeof(cntr));
00319     for (n = 0; n < 17; n++) {
00320         res = get_bits(gb, 3);
00321         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00322             return -1;
00323         vbm_tree[res * 3 + cntr[res]++] = n;
00324     }
00325     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00326                     bits, 1, 1, codes, 2, 2, 132);
00327     return 0;
00328 }
00329 
00333 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00334 {
00335     int n, flags, pitch_range, lsp16_flag;
00336     WMAVoiceContext *s = ctx->priv_data;
00337 
00346     if (ctx->extradata_size != 46) {
00347         av_log(ctx, AV_LOG_ERROR,
00348                "Invalid extradata size %d (should be 46)\n",
00349                ctx->extradata_size);
00350         return -1;
00351     }
00352     flags                = AV_RL32(ctx->extradata + 18);
00353     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00354     s->do_apf            =    flags & 0x1;
00355     if (s->do_apf) {
00356         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00357         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00358         ff_dct_init(&s->dct,  6, DCT_I);
00359         ff_dct_init(&s->dst,  6, DST_I);
00360 
00361         ff_sine_window_init(s->cos, 256);
00362         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00363         for (n = 0; n < 255; n++) {
00364             s->sin[n]       = -s->sin[510 - n];
00365             s->cos[510 - n] =  s->cos[n];
00366         }
00367     }
00368     s->denoise_strength  =   (flags >> 2) & 0xF;
00369     if (s->denoise_strength >= 12) {
00370         av_log(ctx, AV_LOG_ERROR,
00371                "Invalid denoise filter strength %d (max=11)\n",
00372                s->denoise_strength);
00373         return -1;
00374     }
00375     s->denoise_tilt_corr = !!(flags & 0x40);
00376     s->dc_level          =   (flags >> 7) & 0xF;
00377     s->lsp_q_mode        = !!(flags & 0x2000);
00378     s->lsp_def_mode      = !!(flags & 0x4000);
00379     lsp16_flag           =    flags & 0x1000;
00380     if (lsp16_flag) {
00381         s->lsps               = 16;
00382         s->frame_lsp_bitsize  = 34;
00383         s->sframe_lsp_bitsize = 60;
00384     } else {
00385         s->lsps               = 10;
00386         s->frame_lsp_bitsize  = 24;
00387         s->sframe_lsp_bitsize = 48;
00388     }
00389     for (n = 0; n < s->lsps; n++)
00390         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00391 
00392     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00393     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00394         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00395         return -1;
00396     }
00397 
00398     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00399     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00400     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00401     s->pitch_nbits      = av_ceil_log2(pitch_range);
00402     s->last_pitch_val   = 40;
00403     s->last_acb_type    = ACB_TYPE_NONE;
00404     s->history_nsamples = s->max_pitch_val + 8;
00405 
00406     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00407         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00408             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00409 
00410         av_log(ctx, AV_LOG_ERROR,
00411                "Unsupported samplerate %d (min=%d, max=%d)\n",
00412                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00413 
00414         return -1;
00415     }
00416 
00417     s->block_conv_table[0]      = s->min_pitch_val;
00418     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00419     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00420     s->block_conv_table[3]      = s->max_pitch_val - 1;
00421     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00422     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00423     s->block_pitch_range        = s->block_conv_table[2] +
00424                                   s->block_conv_table[3] + 1 +
00425                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00426     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00427 
00428     ctx->sample_fmt             = SAMPLE_FMT_FLT;
00429 
00430     return 0;
00431 }
00432 
00454 static void adaptive_gain_control(float *out, const float *in,
00455                                   const float *speech_synth,
00456                                   int size, float alpha, float *gain_mem)
00457 {
00458     int i;
00459     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00460     float mem = *gain_mem;
00461 
00462     for (i = 0; i < size; i++) {
00463         speech_energy     += fabsf(speech_synth[i]);
00464         postfilter_energy += fabsf(in[i]);
00465     }
00466     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00467 
00468     for (i = 0; i < size; i++) {
00469         mem = alpha * mem + gain_scale_factor;
00470         out[i] = in[i] * mem;
00471     }
00472 
00473     *gain_mem = mem;
00474 }
00475 
00494 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00495                            const float *in, float *out, int size)
00496 {
00497     int n;
00498     float optimal_gain = 0, dot;
00499     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00500                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00501                 *best_hist_ptr;
00502 
00503     /* find best fitting point in history */
00504     do {
00505         dot = ff_dot_productf(in, ptr, size);
00506         if (dot > optimal_gain) {
00507             optimal_gain  = dot;
00508             best_hist_ptr = ptr;
00509         }
00510     } while (--ptr >= end);
00511 
00512     if (optimal_gain <= 0)
00513         return -1;
00514     dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00515     if (dot <= 0) // would be 1.0
00516         return -1;
00517 
00518     if (optimal_gain <= dot) {
00519         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00520     } else
00521         dot = 0.625;
00522 
00523     /* actual smoothing */
00524     for (n = 0; n < size; n++)
00525         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00526 
00527     return 0;
00528 }
00529 
00540 static float tilt_factor(const float *lpcs, int n_lpcs)
00541 {
00542     float rh0, rh1;
00543 
00544     rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
00545     rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00546 
00547     return rh1 / rh0;
00548 }
00549 
00553 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00554                                 int fcb_type, float *coeffs, int remainder)
00555 {
00556     float last_coeff, min = 15.0, max = -15.0;
00557     float irange, angle_mul, gain_mul, range, sq;
00558     int n, idx;
00559 
00560     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00561     ff_rdft_calc(&s->rdft, lpcs);
00562 #define log_range(var, assign) do { \
00563         float tmp = log10f(assign);  var = tmp; \
00564         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00565     } while (0)
00566     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00567     for (n = 1; n < 64; n++)
00568         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00569                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00570     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00571 #undef log_range
00572     range    = max - min;
00573     lpcs[64] = last_coeff;
00574 
00575     /* Now, use this spectrum to pick out these frequencies with higher
00576      * (relative) power/energy (which we then take to be "not noise"),
00577      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00578      * These frequencies will be maintained, while others ("noise") will be
00579      * decreased in the filter output. */
00580     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00581     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00582                                                           (5.0 / 14.7));
00583     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00584     for (n = 0; n <= 64; n++) {
00585         float pow;
00586 
00587         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00588         pow = wmavoice_denoise_power_table[s->denoise_strength][idx];
00589         lpcs[n] = angle_mul * pow;
00590 
00591         /* 70.57 =~ 1/log10(1.0331663) */
00592         idx = (pow * gain_mul - 0.0295) * 70.570526123;
00593         if (idx > 127) { // fallback if index falls outside table range
00594             coeffs[n] = wmavoice_energy_table[127] *
00595                         powf(1.0331663, idx - 127);
00596         } else
00597             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00598     }
00599 
00600     /* calculate the Hilbert transform of the gains, which we do (since this
00601      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00602      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00603      * "moment" of the LPCs in this filter. */
00604     ff_dct_calc(&s->dct, lpcs);
00605     ff_dct_calc(&s->dst, lpcs);
00606 
00607     /* Split out the coefficient indexes into phase/magnitude pairs */
00608     idx = 255 + av_clip(lpcs[64],               -255, 255);
00609     coeffs[0]  = coeffs[0]  * s->cos[idx];
00610     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00611     last_coeff = coeffs[64] * s->cos[idx];
00612     for (n = 63;; n--) {
00613         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00614         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00615         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00616 
00617         if (!--n) break;
00618 
00619         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00620         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00621         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00622     }
00623     coeffs[1] = last_coeff;
00624 
00625     /* move into real domain */
00626     ff_rdft_calc(&s->irdft, coeffs);
00627 
00628     /* tilt correction and normalize scale */
00629     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00630     if (s->denoise_tilt_corr) {
00631         float tilt_mem = 0;
00632 
00633         coeffs[remainder - 1] = 0;
00634         ff_tilt_compensation(&tilt_mem,
00635                              -1.8 * tilt_factor(coeffs, remainder - 1),
00636                              coeffs, remainder);
00637     }
00638     sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00639     for (n = 0; n < remainder; n++)
00640         coeffs[n] *= sq;
00641 }
00642 
00669 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00670                            float *synth_pf, int size,
00671                            const float *lpcs)
00672 {
00673     int remainder, lim, n;
00674 
00675     if (fcb_type != FCB_TYPE_SILENCE) {
00676         float *tilted_lpcs = s->tilted_lpcs_pf,
00677               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00678 
00679         tilted_lpcs[0]           = 1.0;
00680         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00681         memset(&tilted_lpcs[s->lsps + 1], 0,
00682                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00683         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00684                              tilted_lpcs, s->lsps + 2);
00685 
00686         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00687          * size is applied to the next frame. All input beyond this is zero,
00688          * and thus all output beyond this will go towards zero, hence we can
00689          * limit to min(size-1, 127-size) as a performance consideration. */
00690         remainder = FFMIN(127 - size, size - 1);
00691         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00692 
00693         /* apply coefficients (in frequency spectrum domain), i.e. complex
00694          * number multiplication */
00695         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00696         ff_rdft_calc(&s->rdft, synth_pf);
00697         ff_rdft_calc(&s->rdft, coeffs);
00698         synth_pf[0] *= coeffs[0];
00699         synth_pf[1] *= coeffs[1];
00700         for (n = 1; n < 64; n++) {
00701             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00702             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00703             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00704         }
00705         ff_rdft_calc(&s->irdft, synth_pf);
00706     }
00707 
00708     /* merge filter output with the history of previous runs */
00709     if (s->denoise_filter_cache_size) {
00710         lim = FFMIN(s->denoise_filter_cache_size, size);
00711         for (n = 0; n < lim; n++)
00712             synth_pf[n] += s->denoise_filter_cache[n];
00713         s->denoise_filter_cache_size -= lim;
00714         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00715                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00716     }
00717 
00718     /* move remainder of filter output into a cache for future runs */
00719     if (fcb_type != FCB_TYPE_SILENCE) {
00720         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00721         for (n = 0; n < lim; n++)
00722             s->denoise_filter_cache[n] += synth_pf[size + n];
00723         if (lim < remainder) {
00724             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00725                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00726             s->denoise_filter_cache_size = remainder;
00727         }
00728     }
00729 }
00730 
00750 static void postfilter(WMAVoiceContext *s, const float *synth,
00751                        float *samples,    int size,
00752                        const float *lpcs, float *zero_exc_pf,
00753                        int fcb_type,      int pitch)
00754 {
00755     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00756           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00757           *synth_filter_in = zero_exc_pf;
00758 
00759     assert(size <= MAX_FRAMESIZE / 2);
00760 
00761     /* generate excitation from input signal */
00762     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00763 
00764     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00765         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00766         synth_filter_in = synth_filter_in_buf;
00767 
00768     /* re-synthesize speech after smoothening, and keep history */
00769     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00770                                  synth_filter_in, size, s->lsps);
00771     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00772            sizeof(synth_pf[0]) * s->lsps);
00773 
00774     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00775 
00776     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00777                           &s->postfilter_agc);
00778 
00779     if (s->dc_level > 8) {
00780         /* remove ultra-low frequency DC noise / highpass filter;
00781          * coefficients are identical to those used in SIPR decoding,
00782          * and very closely resemble those used in AMR-NB decoding. */
00783         ff_acelp_apply_order_2_transfer_function(samples, samples,
00784             (const float[2]) { -1.99997,      1.0 },
00785             (const float[2]) { -1.9330735188, 0.93589198496 },
00786             0.93980580475, s->dcf_mem, size);
00787     }
00788 }
00804 static void dequant_lsps(double *lsps, int num,
00805                          const uint16_t *values,
00806                          const uint16_t *sizes,
00807                          int n_stages, const uint8_t *table,
00808                          const double *mul_q,
00809                          const double *base_q)
00810 {
00811     int n, m;
00812 
00813     memset(lsps, 0, num * sizeof(*lsps));
00814     for (n = 0; n < n_stages; n++) {
00815         const uint8_t *t_off = &table[values[n] * num];
00816         double base = base_q[n], mul = mul_q[n];
00817 
00818         for (m = 0; m < num; m++)
00819             lsps[m] += base + mul * t_off[m];
00820 
00821         table += sizes[n] * num;
00822     }
00823 }
00824 
00836 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00837 {
00838     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00839     static const double mul_lsf[4] = {
00840         5.2187144800e-3,    1.4626986422e-3,
00841         9.6179549166e-4,    1.1325736225e-3
00842     };
00843     static const double base_lsf[4] = {
00844         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00845         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00846     };
00847     uint16_t v[4];
00848 
00849     v[0] = get_bits(gb, 8);
00850     v[1] = get_bits(gb, 6);
00851     v[2] = get_bits(gb, 5);
00852     v[3] = get_bits(gb, 5);
00853 
00854     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00855                  mul_lsf, base_lsf);
00856 }
00857 
00862 static void dequant_lsp10r(GetBitContext *gb,
00863                            double *i_lsps, const double *old,
00864                            double *a1, double *a2, int q_mode)
00865 {
00866     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00867     static const double mul_lsf[3] = {
00868         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00869     };
00870     static const double base_lsf[3] = {
00871         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00872     };
00873     const float (*ipol_tab)[2][10] = q_mode ?
00874         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00875     uint16_t interpol, v[3];
00876     int n;
00877 
00878     dequant_lsp10i(gb, i_lsps);
00879 
00880     interpol = get_bits(gb, 5);
00881     v[0]     = get_bits(gb, 7);
00882     v[1]     = get_bits(gb, 6);
00883     v[2]     = get_bits(gb, 6);
00884 
00885     for (n = 0; n < 10; n++) {
00886         double delta = old[n] - i_lsps[n];
00887         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00888         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00889     }
00890 
00891     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00892                  mul_lsf, base_lsf);
00893 }
00894 
00898 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00899 {
00900     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00901     static const double mul_lsf[5] = {
00902         3.3439586280e-3,    6.9908173703e-4,
00903         3.3216608306e-3,    1.0334960326e-3,
00904         3.1899104283e-3
00905     };
00906     static const double base_lsf[5] = {
00907         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00908         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00909         M_PI * -1.29816e-1
00910     };
00911     uint16_t v[5];
00912 
00913     v[0] = get_bits(gb, 8);
00914     v[1] = get_bits(gb, 6);
00915     v[2] = get_bits(gb, 7);
00916     v[3] = get_bits(gb, 6);
00917     v[4] = get_bits(gb, 7);
00918 
00919     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00920                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00921     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00922                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00923     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00924                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00925 }
00926 
00931 static void dequant_lsp16r(GetBitContext *gb,
00932                            double *i_lsps, const double *old,
00933                            double *a1, double *a2, int q_mode)
00934 {
00935     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00936     static const double mul_lsf[3] = {
00937         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00938     };
00939     static const double base_lsf[3] = {
00940         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00941     };
00942     const float (*ipol_tab)[2][16] = q_mode ?
00943         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00944     uint16_t interpol, v[3];
00945     int n;
00946 
00947     dequant_lsp16i(gb, i_lsps);
00948 
00949     interpol = get_bits(gb, 5);
00950     v[0]     = get_bits(gb, 7);
00951     v[1]     = get_bits(gb, 7);
00952     v[2]     = get_bits(gb, 7);
00953 
00954     for (n = 0; n < 16; n++) {
00955         double delta = old[n] - i_lsps[n];
00956         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00957         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00958     }
00959 
00960     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00961                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00962     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00963                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00964     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00965                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00966 }
00967 
00981 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00982                             const int *pitch)
00983 {
00984     static const int16_t start_offset[94] = {
00985         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
00986          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
00987          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
00988          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
00989          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
00990          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
00991         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
00992         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
00993     };
00994     int bits, offset;
00995 
00996     /* position of pulse */
00997     s->aw_idx_is_ext = 0;
00998     if ((bits = get_bits(gb, 6)) >= 54) {
00999         s->aw_idx_is_ext = 1;
01000         bits += (bits - 54) * 3 + get_bits(gb, 2);
01001     }
01002 
01003     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01004      * the distribution of the pulses in each block contained in this frame. */
01005     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01006     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01007     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01008     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01009     offset                  += s->aw_n_pulses[0] * pitch[0];
01010     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01011     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01012 
01013     /* if continuing from a position before the block, reset position to
01014      * start of block (when corrected for the range over which it can be
01015      * spread in aw_pulse_set1()). */
01016     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01017         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01018             s->aw_first_pulse_off[1] -= pitch[1];
01019         if (start_offset[bits] < 0)
01020             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01021                 s->aw_first_pulse_off[0] -= pitch[0];
01022     }
01023 }
01024 
01032 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01033                           int block_idx, AMRFixed *fcb)
01034 {
01035     uint16_t use_mask[7]; // only 5 are used, rest is padding
01036     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01037      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01038      * of idx are the position of the bit within a particular item in the
01039      * array (0 being the most significant bit, and 15 being the least
01040      * significant bit), and the remainder (>> 4) is the index in the
01041      * use_mask[]-array. This is faster and uses less memory than using a
01042      * 80-byte/80-int array. */
01043     int pulse_off = s->aw_first_pulse_off[block_idx],
01044         pulse_start, n, idx, range, aidx, start_off = 0;
01045 
01046     /* set offset of first pulse to within this block */
01047     if (s->aw_n_pulses[block_idx] > 0)
01048         while (pulse_off + s->aw_pulse_range < 1)
01049             pulse_off += fcb->pitch_lag;
01050 
01051     /* find range per pulse */
01052     if (s->aw_n_pulses[0] > 0) {
01053         if (block_idx == 0) {
01054             range = 32;
01055         } else /* block_idx = 1 */ {
01056             range = 8;
01057             if (s->aw_n_pulses[block_idx] > 0)
01058                 pulse_off = s->aw_next_pulse_off_cache;
01059         }
01060     } else
01061         range = 16;
01062     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01063 
01064     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01065      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01066      * we exclude that range from being pulsed again in this function. */
01067     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01068     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01069     if (s->aw_n_pulses[block_idx] > 0)
01070         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01071             int excl_range         = s->aw_pulse_range; // always 16 or 24
01072             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01073             int first_sh           = 16 - (idx & 15);
01074             *use_mask_ptr++       &= 0xFFFF << first_sh;
01075             excl_range            -= first_sh;
01076             if (excl_range >= 16) {
01077                 *use_mask_ptr++    = 0;
01078                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01079             } else
01080                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01081         }
01082 
01083     /* find the 'aidx'th offset that is not excluded */
01084     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01085     for (n = 0; n <= aidx; pulse_start++) {
01086         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01087         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01088             if (use_mask[0])      idx = 0x0F;
01089             else if (use_mask[1]) idx = 0x1F;
01090             else if (use_mask[2]) idx = 0x2F;
01091             else if (use_mask[3]) idx = 0x3F;
01092             else if (use_mask[4]) idx = 0x4F;
01093             else                  return;
01094             idx -= av_log2_16bit(use_mask[idx >> 4]);
01095         }
01096         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01097             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01098             n++;
01099             start_off = idx;
01100         }
01101     }
01102 
01103     fcb->x[fcb->n] = start_off;
01104     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01105     fcb->n++;
01106 
01107     /* set offset for next block, relative to start of that block */
01108     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01109     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01110 }
01111 
01119 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01120                           int block_idx, AMRFixed *fcb)
01121 {
01122     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01123     float v;
01124 
01125     if (s->aw_n_pulses[block_idx] > 0) {
01126         int n, v_mask, i_mask, sh, n_pulses;
01127 
01128         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01129             n_pulses = 3;
01130             v_mask   = 8;
01131             i_mask   = 7;
01132             sh       = 4;
01133         } else { // 4 pulses, 1:sign + 2:index each
01134             n_pulses = 4;
01135             v_mask   = 4;
01136             i_mask   = 3;
01137             sh       = 3;
01138         }
01139 
01140         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01141             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01142             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01143                                  s->aw_first_pulse_off[block_idx];
01144             while (fcb->x[fcb->n] < 0)
01145                 fcb->x[fcb->n] += fcb->pitch_lag;
01146             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01147                 fcb->n++;
01148         }
01149     } else {
01150         int num2 = (val & 0x1FF) >> 1, delta, idx;
01151 
01152         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01153         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01154         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01155         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01156         v = (val & 0x200) ? -1.0 : 1.0;
01157 
01158         fcb->no_repeat_mask |= 3 << fcb->n;
01159         fcb->x[fcb->n]       = idx - delta;
01160         fcb->y[fcb->n]       = v;
01161         fcb->x[fcb->n + 1]   = idx;
01162         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01163         fcb->n              += 2;
01164     }
01165 }
01166 
01180 static int pRNG(int frame_cntr, int block_num, int block_size)
01181 {
01182     /* array to simplify the calculation of z:
01183      * y = (x % 9) * 5 + 6;
01184      * z = (49995 * x) / y;
01185      * Since y only has 9 values, we can remove the division by using a
01186      * LUT and using FASTDIV-style divisions. For each of the 9 values
01187      * of y, we can rewrite z as:
01188      * z = x * (49995 / y) + x * ((49995 % y) / y)
01189      * In this table, each col represents one possible value of y, the
01190      * first number is 49995 / y, and the second is the FASTDIV variant
01191      * of 49995 % y / y. */
01192     static const unsigned int div_tbl[9][2] = {
01193         { 8332,  3 * 715827883U }, // y =  6
01194         { 4545,  0 * 390451573U }, // y = 11
01195         { 3124, 11 * 268435456U }, // y = 16
01196         { 2380, 15 * 204522253U }, // y = 21
01197         { 1922, 23 * 165191050U }, // y = 26
01198         { 1612, 23 * 138547333U }, // y = 31
01199         { 1388, 27 * 119304648U }, // y = 36
01200         { 1219, 16 * 104755300U }, // y = 41
01201         { 1086, 39 *  93368855U }  // y = 46
01202     };
01203     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01204     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01205                                     // so this is effectively a modulo (%)
01206     y = x - 9 * MULH(477218589, x); // x % 9
01207     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01208                                     // z = x * 49995 / (y * 5 + 6)
01209     return z % (1000 - block_size);
01210 }
01211 
01216 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01217                                  int block_idx, int size,
01218                                  const struct frame_type_desc *frame_desc,
01219                                  float *excitation)
01220 {
01221     float gain;
01222     int n, r_idx;
01223 
01224     assert(size <= MAX_FRAMESIZE);
01225 
01226     /* Set the offset from which we start reading wmavoice_std_codebook */
01227     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01228         r_idx = pRNG(s->frame_cntr, block_idx, size);
01229         gain  = s->silence_gain;
01230     } else /* FCB_TYPE_HARDCODED */ {
01231         r_idx = get_bits(gb, 8);
01232         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01233     }
01234 
01235     /* Clear gain prediction parameters */
01236     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01237 
01238     /* Apply gain to hardcoded codebook and use that as excitation signal */
01239     for (n = 0; n < size; n++)
01240         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01241 }
01242 
01247 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01248                                 int block_idx, int size,
01249                                 int block_pitch_sh2,
01250                                 const struct frame_type_desc *frame_desc,
01251                                 float *excitation)
01252 {
01253     static const float gain_coeff[6] = {
01254         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01255     };
01256     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01257     int n, idx, gain_weight;
01258     AMRFixed fcb;
01259 
01260     assert(size <= MAX_FRAMESIZE / 2);
01261     memset(pulses, 0, sizeof(*pulses) * size);
01262 
01263     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01264     fcb.pitch_fac      = 1.0;
01265     fcb.no_repeat_mask = 0;
01266     fcb.n              = 0;
01267 
01268     /* For the other frame types, this is where we apply the innovation
01269      * (fixed) codebook pulses of the speech signal. */
01270     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01271         aw_pulse_set1(s, gb, block_idx, &fcb);
01272         aw_pulse_set2(s, gb, block_idx, &fcb);
01273     } else /* FCB_TYPE_EXC_PULSES */ {
01274         int offset_nbits = 5 - frame_desc->log_n_blocks;
01275 
01276         fcb.no_repeat_mask = -1;
01277         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01278          * (instead of double) for a subset of pulses */
01279         for (n = 0; n < 5; n++) {
01280             float sign;
01281             int pos1, pos2;
01282 
01283             sign           = get_bits1(gb) ? 1.0 : -1.0;
01284             pos1           = get_bits(gb, offset_nbits);
01285             fcb.x[fcb.n]   = n + 5 * pos1;
01286             fcb.y[fcb.n++] = sign;
01287             if (n < frame_desc->dbl_pulses) {
01288                 pos2           = get_bits(gb, offset_nbits);
01289                 fcb.x[fcb.n]   = n + 5 * pos2;
01290                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01291             }
01292         }
01293     }
01294     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01295 
01296     /* Calculate gain for adaptive & fixed codebook signal.
01297      * see ff_amr_set_fixed_gain(). */
01298     idx = get_bits(gb, 7);
01299     fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01300                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01301     acb_gain = wmavoice_gain_codebook_acb[idx];
01302     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01303                         -2.9957322736 /* log(0.05) */,
01304                          1.6094379124 /* log(5.0)  */);
01305 
01306     gain_weight = 8 >> frame_desc->log_n_blocks;
01307     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01308             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01309     for (n = 0; n < gain_weight; n++)
01310         s->gain_pred_err[n] = pred_err;
01311 
01312     /* Calculation of adaptive codebook */
01313     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01314         int len;
01315         for (n = 0; n < size; n += len) {
01316             int next_idx_sh16;
01317             int abs_idx    = block_idx * size + n;
01318             int pitch_sh16 = (s->last_pitch_val << 16) +
01319                              s->pitch_diff_sh16 * abs_idx;
01320             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01321             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01322             idx            = idx_sh16 >> 16;
01323             if (s->pitch_diff_sh16) {
01324                 if (s->pitch_diff_sh16 > 0) {
01325                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01326                 } else
01327                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01328                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01329                               1, size - n);
01330             } else
01331                 len = size;
01332 
01333             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01334                                   wmavoice_ipol1_coeffs, 17,
01335                                   idx, 9, len);
01336         }
01337     } else /* ACB_TYPE_HAMMING */ {
01338         int block_pitch = block_pitch_sh2 >> 2;
01339         idx             = block_pitch_sh2 & 3;
01340         if (idx) {
01341             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01342                                   wmavoice_ipol2_coeffs, 4,
01343                                   idx, 8, size);
01344         } else
01345             av_memcpy_backptr(excitation, sizeof(float) * block_pitch,
01346                               sizeof(float) * size);
01347     }
01348 
01349     /* Interpolate ACB/FCB and use as excitation signal */
01350     ff_weighted_vector_sumf(excitation, excitation, pulses,
01351                             acb_gain, fcb_gain, size);
01352 }
01353 
01370 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01371                         int block_idx, int size,
01372                         int block_pitch_sh2,
01373                         const double *lsps, const double *prev_lsps,
01374                         const struct frame_type_desc *frame_desc,
01375                         float *excitation, float *synth)
01376 {
01377     double i_lsps[MAX_LSPS];
01378     float lpcs[MAX_LSPS];
01379     float fac;
01380     int n;
01381 
01382     if (frame_desc->acb_type == ACB_TYPE_NONE)
01383         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01384     else
01385         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01386                             frame_desc, excitation);
01387 
01388     /* convert interpolated LSPs to LPCs */
01389     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01390     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01391         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01392     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01393 
01394     /* Speech synthesis */
01395     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01396 }
01397 
01413 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01414                        float *samples,
01415                        const double *lsps, const double *prev_lsps,
01416                        float *excitation, float *synth)
01417 {
01418     WMAVoiceContext *s = ctx->priv_data;
01419     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01420     int pitch[MAX_BLOCKS], last_block_pitch;
01421 
01422     /* Parse frame type ("frame header"), see frame_descs */
01423     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01424         block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01425 
01426     if (bd_idx < 0) {
01427         av_log(ctx, AV_LOG_ERROR,
01428                "Invalid frame type VLC code, skipping\n");
01429         return -1;
01430     }
01431 
01432     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01433     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01434         /* Pitch is provided per frame, which is interpreted as the pitch of
01435          * the last sample of the last block of this frame. We can interpolate
01436          * the pitch of other blocks (and even pitch-per-sample) by gradually
01437          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01438         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01439         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01440         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01441         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01442         if (s->last_acb_type == ACB_TYPE_NONE ||
01443             20 * abs(cur_pitch_val - s->last_pitch_val) >
01444                 (cur_pitch_val + s->last_pitch_val))
01445             s->last_pitch_val = cur_pitch_val;
01446 
01447         /* pitch per block */
01448         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01449             int fac = n * 2 + 1;
01450 
01451             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01452                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01453                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01454         }
01455 
01456         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01457         s->pitch_diff_sh16 =
01458             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01459     }
01460 
01461     /* Global gain (if silence) and pitch-adaptive window coordinates */
01462     switch (frame_descs[bd_idx].fcb_type) {
01463     case FCB_TYPE_SILENCE:
01464         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01465         break;
01466     case FCB_TYPE_AW_PULSES:
01467         aw_parse_coords(s, gb, pitch);
01468         break;
01469     }
01470 
01471     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01472         int bl_pitch_sh2;
01473 
01474         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01475         switch (frame_descs[bd_idx].acb_type) {
01476         case ACB_TYPE_HAMMING: {
01477             /* Pitch is given per block. Per-block pitches are encoded as an
01478              * absolute value for the first block, and then delta values
01479              * relative to this value) for all subsequent blocks. The scale of
01480              * this pitch value is semi-logaritmic compared to its use in the
01481              * decoder, so we convert it to normal scale also. */
01482             int block_pitch,
01483                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01484                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01485                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01486 
01487             if (n == 0) {
01488                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01489             } else
01490                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01491                                  get_bits(gb, s->block_delta_pitch_nbits);
01492             /* Convert last_ so that any next delta is within _range */
01493             last_block_pitch = av_clip(block_pitch,
01494                                        s->block_delta_pitch_hrange,
01495                                        s->block_pitch_range -
01496                                            s->block_delta_pitch_hrange);
01497 
01498             /* Convert semi-log-style scale back to normal scale */
01499             if (block_pitch < t1) {
01500                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01501             } else {
01502                 block_pitch -= t1;
01503                 if (block_pitch < t2) {
01504                     bl_pitch_sh2 =
01505                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01506                 } else {
01507                     block_pitch -= t2;
01508                     if (block_pitch < t3) {
01509                         bl_pitch_sh2 =
01510                             (s->block_conv_table[2] + block_pitch) << 2;
01511                     } else
01512                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01513                 }
01514             }
01515             pitch[n] = bl_pitch_sh2 >> 2;
01516             break;
01517         }
01518 
01519         case ACB_TYPE_ASYMMETRIC: {
01520             bl_pitch_sh2 = pitch[n] << 2;
01521             break;
01522         }
01523 
01524         default: // ACB_TYPE_NONE has no pitch
01525             bl_pitch_sh2 = 0;
01526             break;
01527         }
01528 
01529         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01530                     lsps, prev_lsps, &frame_descs[bd_idx],
01531                     &excitation[n * block_nsamples],
01532                     &synth[n * block_nsamples]);
01533     }
01534 
01535     /* Averaging projection filter, if applicable. Else, just copy samples
01536      * from synthesis buffer */
01537     if (s->do_apf) {
01538         double i_lsps[MAX_LSPS];
01539         float lpcs[MAX_LSPS];
01540 
01541         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01542             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01543         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01544         postfilter(s, synth, samples, 80, lpcs,
01545                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01546                    frame_descs[bd_idx].fcb_type, pitch[0]);
01547 
01548         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01549             i_lsps[n] = cos(lsps[n]);
01550         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01551         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01552                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01553                    frame_descs[bd_idx].fcb_type, pitch[0]);
01554     } else
01555         memcpy(samples, synth, 160 * sizeof(synth[0]));
01556 
01557     /* Cache values for next frame */
01558     s->frame_cntr++;
01559     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01560     s->last_acb_type = frame_descs[bd_idx].acb_type;
01561     switch (frame_descs[bd_idx].acb_type) {
01562     case ACB_TYPE_NONE:
01563         s->last_pitch_val = 0;
01564         break;
01565     case ACB_TYPE_ASYMMETRIC:
01566         s->last_pitch_val = cur_pitch_val;
01567         break;
01568     case ACB_TYPE_HAMMING:
01569         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01570         break;
01571     }
01572 
01573     return 0;
01574 }
01575 
01588 static void stabilize_lsps(double *lsps, int num)
01589 {
01590     int n, m, l;
01591 
01592     /* set minimum value for first, maximum value for last and minimum
01593      * spacing between LSF values.
01594      * Very similar to ff_set_min_dist_lsf(), but in double. */
01595     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01596     for (n = 1; n < num; n++)
01597         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01598     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01599 
01600     /* reorder (looks like one-time / non-recursed bubblesort).
01601      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01602     for (n = 1; n < num; n++) {
01603         if (lsps[n] < lsps[n - 1]) {
01604             for (m = 1; m < num; m++) {
01605                 double tmp = lsps[m];
01606                 for (l = m - 1; l >= 0; l--) {
01607                     if (lsps[l] <= tmp) break;
01608                     lsps[l + 1] = lsps[l];
01609                 }
01610                 lsps[l + 1] = tmp;
01611             }
01612             break;
01613         }
01614     }
01615 }
01616 
01626 static int check_bits_for_superframe(GetBitContext *orig_gb,
01627                                      WMAVoiceContext *s)
01628 {
01629     GetBitContext s_gb, *gb = &s_gb;
01630     int n, need_bits, bd_idx;
01631     const struct frame_type_desc *frame_desc;
01632 
01633     /* initialize a copy */
01634     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01635     skip_bits_long(gb, get_bits_count(orig_gb));
01636     assert(get_bits_left(gb) == get_bits_left(orig_gb));
01637 
01638     /* superframe header */
01639     if (get_bits_left(gb) < 14)
01640         return 1;
01641     if (!get_bits1(gb))
01642         return -1;                        // WMAPro-in-WMAVoice superframe
01643     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01644     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01645         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01646             return 1;
01647         skip_bits_long(gb, s->sframe_lsp_bitsize);
01648     }
01649 
01650     /* frames */
01651     for (n = 0; n < MAX_FRAMES; n++) {
01652         int aw_idx_is_ext = 0;
01653 
01654         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01655            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01656            skip_bits_long(gb, s->frame_lsp_bitsize);
01657         }
01658         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01659         if (bd_idx < 0)
01660             return -1;                   // invalid frame type VLC code
01661         frame_desc = &frame_descs[bd_idx];
01662         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01663             if (get_bits_left(gb) < s->pitch_nbits)
01664                 return 1;
01665             skip_bits_long(gb, s->pitch_nbits);
01666         }
01667         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01668             skip_bits(gb, 8);
01669         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01670             int tmp = get_bits(gb, 6);
01671             if (tmp >= 0x36) {
01672                 skip_bits(gb, 2);
01673                 aw_idx_is_ext = 1;
01674             }
01675         }
01676 
01677         /* blocks */
01678         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01679             need_bits = s->block_pitch_nbits +
01680                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01681         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01682             need_bits = 2 * !aw_idx_is_ext;
01683         } else
01684             need_bits = 0;
01685         need_bits += frame_desc->frame_size;
01686         if (get_bits_left(gb) < need_bits)
01687             return 1;
01688         skip_bits_long(gb, need_bits);
01689     }
01690 
01691     return 0;
01692 }
01693 
01714 static int synth_superframe(AVCodecContext *ctx,
01715                             float *samples, int *data_size)
01716 {
01717     WMAVoiceContext *s = ctx->priv_data;
01718     GetBitContext *gb = &s->gb, s_gb;
01719     int n, res, n_samples = 480;
01720     double lsps[MAX_FRAMES][MAX_LSPS];
01721     const double *mean_lsf = s->lsps == 16 ?
01722         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01723     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01724     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01725 
01726     memcpy(synth,      s->synth_history,
01727            s->lsps             * sizeof(*synth));
01728     memcpy(excitation, s->excitation_history,
01729            s->history_nsamples * sizeof(*excitation));
01730 
01731     if (s->sframe_cache_size > 0) {
01732         gb = &s_gb;
01733         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01734         s->sframe_cache_size = 0;
01735     }
01736 
01737     if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;
01738 
01739     /* First bit is speech/music bit, it differentiates between WMAVoice
01740      * speech samples (the actual codec) and WMAVoice music samples, which
01741      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01742      * the wild yet. */
01743     if (!get_bits1(gb)) {
01744         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01745         return -1;
01746     }
01747 
01748     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01749     if (get_bits1(gb)) {
01750         if ((n_samples = get_bits(gb, 12)) > 480) {
01751             av_log(ctx, AV_LOG_ERROR,
01752                    "Superframe encodes >480 samples (%d), not allowed\n",
01753                    n_samples);
01754             return -1;
01755         }
01756     }
01757     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01758     if (s->has_residual_lsps) {
01759         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01760 
01761         for (n = 0; n < s->lsps; n++)
01762             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01763 
01764         if (s->lsps == 10) {
01765             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01766         } else /* s->lsps == 16 */
01767             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01768 
01769         for (n = 0; n < s->lsps; n++) {
01770             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01771             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01772             lsps[2][n] += mean_lsf[n];
01773         }
01774         for (n = 0; n < 3; n++)
01775             stabilize_lsps(lsps[n], s->lsps);
01776     }
01777 
01778     /* Parse frames, optionally preceeded by per-frame (independent) LSPs. */
01779     for (n = 0; n < 3; n++) {
01780         if (!s->has_residual_lsps) {
01781             int m;
01782 
01783             if (s->lsps == 10) {
01784                 dequant_lsp10i(gb, lsps[n]);
01785             } else /* s->lsps == 16 */
01786                 dequant_lsp16i(gb, lsps[n]);
01787 
01788             for (m = 0; m < s->lsps; m++)
01789                 lsps[n][m] += mean_lsf[m];
01790             stabilize_lsps(lsps[n], s->lsps);
01791         }
01792 
01793         if ((res = synth_frame(ctx, gb, n,
01794                                &samples[n * MAX_FRAMESIZE],
01795                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01796                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01797                                &synth[s->lsps + n * MAX_FRAMESIZE])))
01798             return res;
01799     }
01800 
01801     /* Statistics? FIXME - we don't check for length, a slight overrun
01802      * will be caught by internal buffer padding, and anything else
01803      * will be skipped, not read. */
01804     if (get_bits1(gb)) {
01805         res = get_bits(gb, 4);
01806         skip_bits(gb, 10 * (res + 1));
01807     }
01808 
01809     /* Specify nr. of output samples */
01810     *data_size = n_samples * sizeof(float);
01811 
01812     /* Update history */
01813     memcpy(s->prev_lsps,           lsps[2],
01814            s->lsps             * sizeof(*s->prev_lsps));
01815     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01816            s->lsps             * sizeof(*synth));
01817     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01818            s->history_nsamples * sizeof(*excitation));
01819     if (s->do_apf)
01820         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01821                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01822 
01823     return 0;
01824 }
01825 
01833 static int parse_packet_header(WMAVoiceContext *s)
01834 {
01835     GetBitContext *gb = &s->gb;
01836     unsigned int res;
01837 
01838     if (get_bits_left(gb) < 11)
01839         return 1;
01840     skip_bits(gb, 4);          // packet sequence number
01841     s->has_residual_lsps = get_bits1(gb);
01842     do {
01843         res = get_bits(gb, 6); // number of superframes per packet
01844                                // (minus first one if there is spillover)
01845         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01846             return 1;
01847     } while (res == 0x3F);
01848     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01849 
01850     return 0;
01851 }
01852 
01868 static void copy_bits(PutBitContext *pb,
01869                       const uint8_t *data, int size,
01870                       GetBitContext *gb, int nbits)
01871 {
01872     int rmn_bytes, rmn_bits;
01873 
01874     rmn_bits = rmn_bytes = get_bits_left(gb);
01875     if (rmn_bits < nbits)
01876         return;
01877     rmn_bits &= 7; rmn_bytes >>= 3;
01878     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01879         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01880     ff_copy_bits(pb, data + size - rmn_bytes,
01881                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01882 }
01883 
01895 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01896                                   int *data_size, AVPacket *avpkt)
01897 {
01898     WMAVoiceContext *s = ctx->priv_data;
01899     GetBitContext *gb = &s->gb;
01900     int size, res, pos;
01901 
01902     if (*data_size < 480 * sizeof(float)) {
01903         av_log(ctx, AV_LOG_ERROR,
01904                "Output buffer too small (%d given - %lu needed)\n",
01905                *data_size, 480 * sizeof(float));
01906         return -1;
01907     }
01908     *data_size = 0;
01909 
01910     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01911      * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
01912      * feeds us ASF packets, which may concatenate multiple "codec" packets
01913      * in a single "muxer" packet, so we artificially emulate that by
01914      * capping the packet size at ctx->block_align. */
01915     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01916     if (!size)
01917         return 0;
01918     init_get_bits(&s->gb, avpkt->data, size << 3);
01919 
01920     /* size == ctx->block_align is used to indicate whether we are dealing with
01921      * a new packet or a packet of which we already read the packet header
01922      * previously. */
01923     if (size == ctx->block_align) { // new packet header
01924         if ((res = parse_packet_header(s)) < 0)
01925             return res;
01926 
01927         /* If the packet header specifies a s->spillover_nbits, then we want
01928          * to push out all data of the previous packet (+ spillover) before
01929          * continuing to parse new superframes in the current packet. */
01930         if (s->spillover_nbits > 0) {
01931             if (s->sframe_cache_size > 0) {
01932                 int cnt = get_bits_count(gb);
01933                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01934                 flush_put_bits(&s->pb);
01935                 s->sframe_cache_size += s->spillover_nbits;
01936                 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&
01937                     *data_size > 0) {
01938                     cnt += s->spillover_nbits;
01939                     s->skip_bits_next = cnt & 7;
01940                     return cnt >> 3;
01941                 } else
01942                     skip_bits_long (gb, s->spillover_nbits - cnt +
01943                                     get_bits_count(gb)); // resync
01944             } else
01945                 skip_bits_long(gb, s->spillover_nbits);  // resync
01946         }
01947     } else if (s->skip_bits_next)
01948         skip_bits(gb, s->skip_bits_next);
01949 
01950     /* Try parsing superframes in current packet */
01951     s->sframe_cache_size = 0;
01952     s->skip_bits_next = 0;
01953     pos = get_bits_left(gb);
01954     if ((res = synth_superframe(ctx, data, data_size)) < 0) {
01955         return res;
01956     } else if (*data_size > 0) {
01957         int cnt = get_bits_count(gb);
01958         s->skip_bits_next = cnt & 7;
01959         return cnt >> 3;
01960     } else if ((s->sframe_cache_size = pos) > 0) {
01961         /* rewind bit reader to start of last (incomplete) superframe... */
01962         init_get_bits(gb, avpkt->data, size << 3);
01963         skip_bits_long(gb, (size << 3) - pos);
01964         assert(get_bits_left(gb) == pos);
01965 
01966         /* ...and cache it for spillover in next packet */
01967         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01968         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01969         // FIXME bad - just copy bytes as whole and add use the
01970         // skip_bits_next field
01971     }
01972 
01973     return size;
01974 }
01975 
01976 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
01977 {
01978     WMAVoiceContext *s = ctx->priv_data;
01979 
01980     if (s->do_apf) {
01981         ff_rdft_end(&s->rdft);
01982         ff_rdft_end(&s->irdft);
01983         ff_dct_end(&s->dct);
01984         ff_dct_end(&s->dst);
01985     }
01986 
01987     return 0;
01988 }
01989 
01990 static av_cold void wmavoice_flush(AVCodecContext *ctx)
01991 {
01992     WMAVoiceContext *s = ctx->priv_data;
01993     int n;
01994 
01995     s->postfilter_agc    = 0;
01996     s->sframe_cache_size = 0;
01997     s->skip_bits_next    = 0;
01998     for (n = 0; n < s->lsps; n++)
01999         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02000     memset(s->excitation_history, 0,
02001            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02002     memset(s->synth_history,      0,
02003            sizeof(*s->synth_history)      * MAX_LSPS);
02004     memset(s->gain_pred_err,      0,
02005            sizeof(s->gain_pred_err));
02006 
02007     if (s->do_apf) {
02008         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02009                sizeof(*s->synth_filter_out_buf) * s->lsps);
02010         memset(s->dcf_mem,              0,
02011                sizeof(*s->dcf_mem)              * 2);
02012         memset(s->zero_exc_pf,          0,
02013                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02014         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02015     }
02016 }
02017 
02018 AVCodec wmavoice_decoder = {
02019     "wmavoice",
02020     AVMEDIA_TYPE_AUDIO,
02021     CODEC_ID_WMAVOICE,
02022     sizeof(WMAVoiceContext),
02023     wmavoice_decode_init,
02024     NULL,
02025     wmavoice_decode_end,
02026     wmavoice_decode_packet,
02027     CODEC_CAP_SUBFRAMES,
02028     .flush     = wmavoice_flush,
02029     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02030 };