FFmpeg: libavcodec/wmavoice.c Source File

00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of FFmpeg.
00006  *
00007  * FFmpeg is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * FFmpeg is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with FFmpeg; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include <math.h>
00029 #include "avcodec.h"
00030 #include "get_bits.h"
00031 #include "put_bits.h"
00032 #include "wmavoice_data.h"
00033 #include "celp_math.h"
00034 #include "celp_filters.h"
00035 #include "acelp_vectors.h"
00036 #include "acelp_filters.h"
00037 #include "lsp.h"
00038 #include "libavutil/lzo.h"
00039 #include "dct.h"
00040 #include "rdft.h"
00041 #include "sinewin.h"
00042 
00043 #define MAX_BLOCKS           8   
00044 #define MAX_LSPS             16  
00045 #define MAX_LSPS_ALIGN16     16  
00046 
00047 #define MAX_FRAMES           3   
00048 #define MAX_FRAMESIZE        160 
00049 #define MAX_SIGNAL_HISTORY   416 
00050 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00052 #define SFRAME_CACHE_MAXSIZE 256 
00053 
00054 #define VLC_NBITS            6   
00055 
00056 
00059 static VLC frame_type_vlc;
00060 
00064 enum {
00065     ACB_TYPE_NONE       = 0, 
00066     ACB_TYPE_ASYMMETRIC = 1, 
00067 
00068 
00069 
00070 
00071     ACB_TYPE_HAMMING    = 2  
00072 
00073 
00074 };
00075 
00079 enum {
00080     FCB_TYPE_SILENCE    = 0, 
00081 
00082 
00083     FCB_TYPE_HARDCODED  = 1, 
00084 
00085     FCB_TYPE_AW_PULSES  = 2, 
00086 
00087     FCB_TYPE_EXC_PULSES = 3, 
00088 
00089 
00090 };
00091 
00095 static const struct frame_type_desc {
00096     uint8_t n_blocks;     
00097 
00098     uint8_t log_n_blocks; 
00099     uint8_t acb_type;     
00100     uint8_t fcb_type;     
00101     uint8_t dbl_pulses;   
00102 
00103 
00104     uint16_t frame_size;  
00105 
00106 } frame_descs[17] = {
00107     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00108     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00109     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00110     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00111     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00112     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00113     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00114     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00115     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00116     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00117     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00118     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00119     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00120     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00121     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00122     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00123     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00124 };
00125 
00129 typedef struct {
00134     GetBitContext gb;             
00135 
00136 
00137 
00138     int8_t vbm_tree[25];          
00139 
00140     int spillover_bitsize;        
00141 
00142 
00143     int history_nsamples;         
00144 
00145 
00146     /* postfilter specific values */
00147     int do_apf;                   
00148 
00149     int denoise_strength;         
00150 
00151     int denoise_tilt_corr;        
00152 
00153     int dc_level;                 
00154 
00155 
00156     int lsps;                     
00157     int lsp_q_mode;               
00158     int lsp_def_mode;             
00159 
00160     int frame_lsp_bitsize;        
00161 
00162     int sframe_lsp_bitsize;       
00163 
00164 
00165     int min_pitch_val;            
00166     int max_pitch_val;            
00167     int pitch_nbits;              
00168 
00169     int block_pitch_nbits;        
00170 
00171     int block_pitch_range;        
00172     int block_delta_pitch_nbits;  
00173 
00174 
00175 
00176     int block_delta_pitch_hrange; 
00177 
00178     uint16_t block_conv_table[4]; 
00179 
00180 
00190     int spillover_nbits;          
00191 
00192 
00193 
00194     int has_residual_lsps;        
00195 
00196 
00197 
00198 
00199     int skip_bits_next;           
00200 
00201 
00202 
00203     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00206     int sframe_cache_size;        
00207 
00208 
00209 
00210 
00211     PutBitContext pb;             
00212 
00222     double prev_lsps[MAX_LSPS];   
00223 
00224     int last_pitch_val;           
00225     int last_acb_type;            
00226     int pitch_diff_sh16;          
00227 
00228     float silence_gain;           
00229 
00230     int aw_idx_is_ext;            
00231 
00232     int aw_pulse_range;           
00233 
00234 
00235 
00236 
00237 
00238     int aw_n_pulses[2];           
00239 
00240 
00241     int aw_first_pulse_off[2];    
00242 
00243     int aw_next_pulse_off_cache;  
00244 
00245 
00246 
00247 
00248 
00249     int frame_cntr;               
00250 
00251     float gain_pred_err[6];       
00252     float excitation_history[MAX_SIGNAL_HISTORY];
00256     float synth_history[MAX_LSPS]; 
00257 
00266     RDFTContext rdft, irdft;      
00267 
00268     DCTContext dct, dst;          
00269 
00270     float sin[511], cos[511];     
00271 
00272     float postfilter_agc;         
00273 
00274     float dcf_mem[2];             
00275     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00278     float denoise_filter_cache[MAX_FRAMESIZE];
00279     int   denoise_filter_cache_size; 
00280     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00282     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00284     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00287 
00290 } WMAVoiceContext;
00291 
00301 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00302 {
00303     static const uint8_t bits[] = {
00304          2,  2,  2,  4,  4,  4,
00305          6,  6,  6,  8,  8,  8,
00306         10, 10, 10, 12, 12, 12,
00307         14, 14, 14, 14
00308     };
00309     static const uint16_t codes[] = {
00310           0x0000, 0x0001, 0x0002,        //              00/01/10
00311           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00312           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00313           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00314           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00315           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00316           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00317     };
00318     int cntr[8], n, res;
00319 
00320     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00321     memset(cntr,     0,    sizeof(cntr));
00322     for (n = 0; n < 17; n++) {
00323         res = get_bits(gb, 3);
00324         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00325             return -1;
00326         vbm_tree[res * 3 + cntr[res]++] = n;
00327     }
00328     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00329                     bits, 1, 1, codes, 2, 2, 132);
00330     return 0;
00331 }
00332 
00336 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00337 {
00338     int n, flags, pitch_range, lsp16_flag;
00339     WMAVoiceContext *s = ctx->priv_data;
00340 
00349     if (ctx->extradata_size != 46) {
00350         av_log(ctx, AV_LOG_ERROR,
00351                "Invalid extradata size %d (should be 46)\n",
00352                ctx->extradata_size);
00353         return -1;
00354     }
00355     flags                = AV_RL32(ctx->extradata + 18);
00356     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00357     s->do_apf            =    flags & 0x1;
00358     if (s->do_apf) {
00359         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00360         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00361         ff_dct_init(&s->dct,  6, DCT_I);
00362         ff_dct_init(&s->dst,  6, DST_I);
00363 
00364         ff_sine_window_init(s->cos, 256);
00365         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00366         for (n = 0; n < 255; n++) {
00367             s->sin[n]       = -s->sin[510 - n];
00368             s->cos[510 - n] =  s->cos[n];
00369         }
00370     }
00371     s->denoise_strength  =   (flags >> 2) & 0xF;
00372     if (s->denoise_strength >= 12) {
00373         av_log(ctx, AV_LOG_ERROR,
00374                "Invalid denoise filter strength %d (max=11)\n",
00375                s->denoise_strength);
00376         return -1;
00377     }
00378     s->denoise_tilt_corr = !!(flags & 0x40);
00379     s->dc_level          =   (flags >> 7) & 0xF;
00380     s->lsp_q_mode        = !!(flags & 0x2000);
00381     s->lsp_def_mode      = !!(flags & 0x4000);
00382     lsp16_flag           =    flags & 0x1000;
00383     if (lsp16_flag) {
00384         s->lsps               = 16;
00385         s->frame_lsp_bitsize  = 34;
00386         s->sframe_lsp_bitsize = 60;
00387     } else {
00388         s->lsps               = 10;
00389         s->frame_lsp_bitsize  = 24;
00390         s->sframe_lsp_bitsize = 48;
00391     }
00392     for (n = 0; n < s->lsps; n++)
00393         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00394 
00395     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00396     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00397         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00398         return -1;
00399     }
00400 
00401     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00402     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00403     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00404     if (pitch_range <= 0) {
00405         av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00406         return -1;
00407     }
00408     s->pitch_nbits      = av_ceil_log2(pitch_range);
00409     s->last_pitch_val   = 40;
00410     s->last_acb_type    = ACB_TYPE_NONE;
00411     s->history_nsamples = s->max_pitch_val + 8;
00412 
00413     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00414         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00415             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00416 
00417         av_log(ctx, AV_LOG_ERROR,
00418                "Unsupported samplerate %d (min=%d, max=%d)\n",
00419                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00420 
00421         return -1;
00422     }
00423 
00424     s->block_conv_table[0]      = s->min_pitch_val;
00425     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00426     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00427     s->block_conv_table[3]      = s->max_pitch_val - 1;
00428     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00429     if (s->block_delta_pitch_hrange <= 0) {
00430         av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00431         return -1;
00432     }
00433     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00434     s->block_pitch_range        = s->block_conv_table[2] +
00435                                   s->block_conv_table[3] + 1 +
00436                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00437     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00438 
00439     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00440 
00441     return 0;
00442 }
00443 
00465 static void adaptive_gain_control(float *out, const float *in,
00466                                   const float *speech_synth,
00467                                   int size, float alpha, float *gain_mem)
00468 {
00469     int i;
00470     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00471     float mem = *gain_mem;
00472 
00473     for (i = 0; i < size; i++) {
00474         speech_energy     += fabsf(speech_synth[i]);
00475         postfilter_energy += fabsf(in[i]);
00476     }
00477     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00478 
00479     for (i = 0; i < size; i++) {
00480         mem = alpha * mem + gain_scale_factor;
00481         out[i] = in[i] * mem;
00482     }
00483 
00484     *gain_mem = mem;
00485 }
00486 
00505 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00506                            const float *in, float *out, int size)
00507 {
00508     int n;
00509     float optimal_gain = 0, dot;
00510     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00511                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00512                 *best_hist_ptr;
00513 
00514     /* find best fitting point in history */
00515     do {
00516         dot = ff_dot_productf(in, ptr, size);
00517         if (dot > optimal_gain) {
00518             optimal_gain  = dot;
00519             best_hist_ptr = ptr;
00520         }
00521     } while (--ptr >= end);
00522 
00523     if (optimal_gain <= 0)
00524         return -1;
00525     dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00526     if (dot <= 0) // would be 1.0
00527         return -1;
00528 
00529     if (optimal_gain <= dot) {
00530         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00531     } else
00532         dot = 0.625;
00533 
00534     /* actual smoothing */
00535     for (n = 0; n < size; n++)
00536         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00537 
00538     return 0;
00539 }
00540 
00551 static float tilt_factor(const float *lpcs, int n_lpcs)
00552 {
00553     float rh0, rh1;
00554 
00555     rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
00556     rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00557 
00558     return rh1 / rh0;
00559 }
00560 
00564 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00565                                 int fcb_type, float *coeffs, int remainder)
00566 {
00567     float last_coeff, min = 15.0, max = -15.0;
00568     float irange, angle_mul, gain_mul, range, sq;
00569     int n, idx;
00570 
00571     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00572     s->rdft.rdft_calc(&s->rdft, lpcs);
00573 #define log_range(var, assign) do { \
00574         float tmp = log10f(assign);  var = tmp; \
00575         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00576     } while (0)
00577     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00578     for (n = 1; n < 64; n++)
00579         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00580                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00581     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00582 #undef log_range
00583     range    = max - min;
00584     lpcs[64] = last_coeff;
00585 
00586     /* Now, use this spectrum to pick out these frequencies with higher
00587      * (relative) power/energy (which we then take to be "not noise"),
00588      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00589      * These frequencies will be maintained, while others ("noise") will be
00590      * decreased in the filter output. */
00591     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00592     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00593                                                           (5.0 / 14.7));
00594     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00595     for (n = 0; n <= 64; n++) {
00596         float pwr;
00597 
00598         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00599         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00600         lpcs[n] = angle_mul * pwr;
00601 
00602         /* 70.57 =~ 1/log10(1.0331663) */
00603         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00604         if (idx > 127) { // fallback if index falls outside table range
00605             coeffs[n] = wmavoice_energy_table[127] *
00606                         powf(1.0331663, idx - 127);
00607         } else
00608             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00609     }
00610 
00611     /* calculate the Hilbert transform of the gains, which we do (since this
00612      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00613      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00614      * "moment" of the LPCs in this filter. */
00615     s->dct.dct_calc(&s->dct, lpcs);
00616     s->dst.dct_calc(&s->dst, lpcs);
00617 
00618     /* Split out the coefficient indexes into phase/magnitude pairs */
00619     idx = 255 + av_clip(lpcs[64],               -255, 255);
00620     coeffs[0]  = coeffs[0]  * s->cos[idx];
00621     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00622     last_coeff = coeffs[64] * s->cos[idx];
00623     for (n = 63;; n--) {
00624         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00625         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00626         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00627 
00628         if (!--n) break;
00629 
00630         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00633     }
00634     coeffs[1] = last_coeff;
00635 
00636     /* move into real domain */
00637     s->irdft.rdft_calc(&s->irdft, coeffs);
00638 
00639     /* tilt correction and normalize scale */
00640     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00641     if (s->denoise_tilt_corr) {
00642         float tilt_mem = 0;
00643 
00644         coeffs[remainder - 1] = 0;
00645         ff_tilt_compensation(&tilt_mem,
00646                              -1.8 * tilt_factor(coeffs, remainder - 1),
00647                              coeffs, remainder);
00648     }
00649     sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00650     for (n = 0; n < remainder; n++)
00651         coeffs[n] *= sq;
00652 }
00653 
00680 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00681                            float *synth_pf, int size,
00682                            const float *lpcs)
00683 {
00684     int remainder, lim, n;
00685 
00686     if (fcb_type != FCB_TYPE_SILENCE) {
00687         float *tilted_lpcs = s->tilted_lpcs_pf,
00688               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00689 
00690         tilted_lpcs[0]           = 1.0;
00691         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00692         memset(&tilted_lpcs[s->lsps + 1], 0,
00693                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00694         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00695                              tilted_lpcs, s->lsps + 2);
00696 
00697         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00698          * size is applied to the next frame. All input beyond this is zero,
00699          * and thus all output beyond this will go towards zero, hence we can
00700          * limit to min(size-1, 127-size) as a performance consideration. */
00701         remainder = FFMIN(127 - size, size - 1);
00702         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00703 
00704         /* apply coefficients (in frequency spectrum domain), i.e. complex
00705          * number multiplication */
00706         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00707         s->rdft.rdft_calc(&s->rdft, synth_pf);
00708         s->rdft.rdft_calc(&s->rdft, coeffs);
00709         synth_pf[0] *= coeffs[0];
00710         synth_pf[1] *= coeffs[1];
00711         for (n = 1; n < 64; n++) {
00712             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00713             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00714             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00715         }
00716         s->irdft.rdft_calc(&s->irdft, synth_pf);
00717     }
00718 
00719     /* merge filter output with the history of previous runs */
00720     if (s->denoise_filter_cache_size) {
00721         lim = FFMIN(s->denoise_filter_cache_size, size);
00722         for (n = 0; n < lim; n++)
00723             synth_pf[n] += s->denoise_filter_cache[n];
00724         s->denoise_filter_cache_size -= lim;
00725         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00726                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00727     }
00728 
00729     /* move remainder of filter output into a cache for future runs */
00730     if (fcb_type != FCB_TYPE_SILENCE) {
00731         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00732         for (n = 0; n < lim; n++)
00733             s->denoise_filter_cache[n] += synth_pf[size + n];
00734         if (lim < remainder) {
00735             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00736                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00737             s->denoise_filter_cache_size = remainder;
00738         }
00739     }
00740 }
00741 
00762 static void postfilter(WMAVoiceContext *s, const float *synth,
00763                        float *samples,    int size,
00764                        const float *lpcs, float *zero_exc_pf,
00765                        int fcb_type,      int pitch)
00766 {
00767     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00768           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00769           *synth_filter_in = zero_exc_pf;
00770 
00771     assert(size <= MAX_FRAMESIZE / 2);
00772 
00773     /* generate excitation from input signal */
00774     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00775 
00776     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00777         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00778         synth_filter_in = synth_filter_in_buf;
00779 
00780     /* re-synthesize speech after smoothening, and keep history */
00781     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00782                                  synth_filter_in, size, s->lsps);
00783     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00784            sizeof(synth_pf[0]) * s->lsps);
00785 
00786     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00787 
00788     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00789                           &s->postfilter_agc);
00790 
00791     if (s->dc_level > 8) {
00792         /* remove ultra-low frequency DC noise / highpass filter;
00793          * coefficients are identical to those used in SIPR decoding,
00794          * and very closely resemble those used in AMR-NB decoding. */
00795         ff_acelp_apply_order_2_transfer_function(samples, samples,
00796             (const float[2]) { -1.99997,      1.0 },
00797             (const float[2]) { -1.9330735188, 0.93589198496 },
00798             0.93980580475, s->dcf_mem, size);
00799     }
00800 }
00816 static void dequant_lsps(double *lsps, int num,
00817                          const uint16_t *values,
00818                          const uint16_t *sizes,
00819                          int n_stages, const uint8_t *table,
00820                          const double *mul_q,
00821                          const double *base_q)
00822 {
00823     int n, m;
00824 
00825     memset(lsps, 0, num * sizeof(*lsps));
00826     for (n = 0; n < n_stages; n++) {
00827         const uint8_t *t_off = &table[values[n] * num];
00828         double base = base_q[n], mul = mul_q[n];
00829 
00830         for (m = 0; m < num; m++)
00831             lsps[m] += base + mul * t_off[m];
00832 
00833         table += sizes[n] * num;
00834     }
00835 }
00836 
00848 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00849 {
00850     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00851     static const double mul_lsf[4] = {
00852         5.2187144800e-3,    1.4626986422e-3,
00853         9.6179549166e-4,    1.1325736225e-3
00854     };
00855     static const double base_lsf[4] = {
00856         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00857         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00858     };
00859     uint16_t v[4];
00860 
00861     v[0] = get_bits(gb, 8);
00862     v[1] = get_bits(gb, 6);
00863     v[2] = get_bits(gb, 5);
00864     v[3] = get_bits(gb, 5);
00865 
00866     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00867                  mul_lsf, base_lsf);
00868 }
00869 
00874 static void dequant_lsp10r(GetBitContext *gb,
00875                            double *i_lsps, const double *old,
00876                            double *a1, double *a2, int q_mode)
00877 {
00878     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00879     static const double mul_lsf[3] = {
00880         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00881     };
00882     static const double base_lsf[3] = {
00883         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00884     };
00885     const float (*ipol_tab)[2][10] = q_mode ?
00886         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00887     uint16_t interpol, v[3];
00888     int n;
00889 
00890     dequant_lsp10i(gb, i_lsps);
00891 
00892     interpol = get_bits(gb, 5);
00893     v[0]     = get_bits(gb, 7);
00894     v[1]     = get_bits(gb, 6);
00895     v[2]     = get_bits(gb, 6);
00896 
00897     for (n = 0; n < 10; n++) {
00898         double delta = old[n] - i_lsps[n];
00899         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00900         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00901     }
00902 
00903     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00904                  mul_lsf, base_lsf);
00905 }
00906 
00910 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00911 {
00912     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00913     static const double mul_lsf[5] = {
00914         3.3439586280e-3,    6.9908173703e-4,
00915         3.3216608306e-3,    1.0334960326e-3,
00916         3.1899104283e-3
00917     };
00918     static const double base_lsf[5] = {
00919         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00920         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00921         M_PI * -1.29816e-1
00922     };
00923     uint16_t v[5];
00924 
00925     v[0] = get_bits(gb, 8);
00926     v[1] = get_bits(gb, 6);
00927     v[2] = get_bits(gb, 7);
00928     v[3] = get_bits(gb, 6);
00929     v[4] = get_bits(gb, 7);
00930 
00931     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00932                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00933     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00934                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00935     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00936                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00937 }
00938 
00943 static void dequant_lsp16r(GetBitContext *gb,
00944                            double *i_lsps, const double *old,
00945                            double *a1, double *a2, int q_mode)
00946 {
00947     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00948     static const double mul_lsf[3] = {
00949         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00950     };
00951     static const double base_lsf[3] = {
00952         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00953     };
00954     const float (*ipol_tab)[2][16] = q_mode ?
00955         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00956     uint16_t interpol, v[3];
00957     int n;
00958 
00959     dequant_lsp16i(gb, i_lsps);
00960 
00961     interpol = get_bits(gb, 5);
00962     v[0]     = get_bits(gb, 7);
00963     v[1]     = get_bits(gb, 7);
00964     v[2]     = get_bits(gb, 7);
00965 
00966     for (n = 0; n < 16; n++) {
00967         double delta = old[n] - i_lsps[n];
00968         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00969         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00970     }
00971 
00972     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00973                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00974     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00975                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00976     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00977                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00978 }
00979 
00993 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00994                             const int *pitch)
00995 {
00996     static const int16_t start_offset[94] = {
00997         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
00998          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
00999          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
01000          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
01001          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
01002          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
01003         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01004         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01005     };
01006     int bits, offset;
01007 
01008     /* position of pulse */
01009     s->aw_idx_is_ext = 0;
01010     if ((bits = get_bits(gb, 6)) >= 54) {
01011         s->aw_idx_is_ext = 1;
01012         bits += (bits - 54) * 3 + get_bits(gb, 2);
01013     }
01014 
01015     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01016      * the distribution of the pulses in each block contained in this frame. */
01017     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01018     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01019     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01020     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01021     offset                  += s->aw_n_pulses[0] * pitch[0];
01022     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01023     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01024 
01025     /* if continuing from a position before the block, reset position to
01026      * start of block (when corrected for the range over which it can be
01027      * spread in aw_pulse_set1()). */
01028     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01029         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01030             s->aw_first_pulse_off[1] -= pitch[1];
01031         if (start_offset[bits] < 0)
01032             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01033                 s->aw_first_pulse_off[0] -= pitch[0];
01034     }
01035 }
01036 
01044 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01045                           int block_idx, AMRFixed *fcb)
01046 {
01047     uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
01048     uint16_t *use_mask = use_mask_mem + 2;
01049     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01050      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01051      * of idx are the position of the bit within a particular item in the
01052      * array (0 being the most significant bit, and 15 being the least
01053      * significant bit), and the remainder (>> 4) is the index in the
01054      * use_mask[]-array. This is faster and uses less memory than using a
01055      * 80-byte/80-int array. */
01056     int pulse_off = s->aw_first_pulse_off[block_idx],
01057         pulse_start, n, idx, range, aidx, start_off = 0;
01058 
01059     /* set offset of first pulse to within this block */
01060     if (s->aw_n_pulses[block_idx] > 0)
01061         while (pulse_off + s->aw_pulse_range < 1)
01062             pulse_off += fcb->pitch_lag;
01063 
01064     /* find range per pulse */
01065     if (s->aw_n_pulses[0] > 0) {
01066         if (block_idx == 0) {
01067             range = 32;
01068         } else /* block_idx = 1 */ {
01069             range = 8;
01070             if (s->aw_n_pulses[block_idx] > 0)
01071                 pulse_off = s->aw_next_pulse_off_cache;
01072         }
01073     } else
01074         range = 16;
01075     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01076 
01077     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01078      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01079      * we exclude that range from being pulsed again in this function. */
01080     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01081     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01082     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01083     if (s->aw_n_pulses[block_idx] > 0)
01084         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01085             int excl_range         = s->aw_pulse_range; // always 16 or 24
01086             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01087             int first_sh           = 16 - (idx & 15);
01088             *use_mask_ptr++       &= 0xFFFFu << first_sh;
01089             excl_range            -= first_sh;
01090             if (excl_range >= 16) {
01091                 *use_mask_ptr++    = 0;
01092                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01093             } else
01094                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01095         }
01096 
01097     /* find the 'aidx'th offset that is not excluded */
01098     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01099     for (n = 0; n <= aidx; pulse_start++) {
01100         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01101         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01102             if (use_mask[0])      idx = 0x0F;
01103             else if (use_mask[1]) idx = 0x1F;
01104             else if (use_mask[2]) idx = 0x2F;
01105             else if (use_mask[3]) idx = 0x3F;
01106             else if (use_mask[4]) idx = 0x4F;
01107             else                  return;
01108             idx -= av_log2_16bit(use_mask[idx >> 4]);
01109         }
01110         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01111             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01112             n++;
01113             start_off = idx;
01114         }
01115     }
01116 
01117     fcb->x[fcb->n] = start_off;
01118     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01119     fcb->n++;
01120 
01121     /* set offset for next block, relative to start of that block */
01122     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01123     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01124 }
01125 
01133 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01134                           int block_idx, AMRFixed *fcb)
01135 {
01136     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01137     float v;
01138 
01139     if (s->aw_n_pulses[block_idx] > 0) {
01140         int n, v_mask, i_mask, sh, n_pulses;
01141 
01142         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01143             n_pulses = 3;
01144             v_mask   = 8;
01145             i_mask   = 7;
01146             sh       = 4;
01147         } else { // 4 pulses, 1:sign + 2:index each
01148             n_pulses = 4;
01149             v_mask   = 4;
01150             i_mask   = 3;
01151             sh       = 3;
01152         }
01153 
01154         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01155             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01156             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01157                                  s->aw_first_pulse_off[block_idx];
01158             while (fcb->x[fcb->n] < 0)
01159                 fcb->x[fcb->n] += fcb->pitch_lag;
01160             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01161                 fcb->n++;
01162         }
01163     } else {
01164         int num2 = (val & 0x1FF) >> 1, delta, idx;
01165 
01166         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01167         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01168         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01169         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01170         v = (val & 0x200) ? -1.0 : 1.0;
01171 
01172         fcb->no_repeat_mask |= 3 << fcb->n;
01173         fcb->x[fcb->n]       = idx - delta;
01174         fcb->y[fcb->n]       = v;
01175         fcb->x[fcb->n + 1]   = idx;
01176         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01177         fcb->n              += 2;
01178     }
01179 }
01180 
01194 static int pRNG(int frame_cntr, int block_num, int block_size)
01195 {
01196     /* array to simplify the calculation of z:
01197      * y = (x % 9) * 5 + 6;
01198      * z = (49995 * x) / y;
01199      * Since y only has 9 values, we can remove the division by using a
01200      * LUT and using FASTDIV-style divisions. For each of the 9 values
01201      * of y, we can rewrite z as:
01202      * z = x * (49995 / y) + x * ((49995 % y) / y)
01203      * In this table, each col represents one possible value of y, the
01204      * first number is 49995 / y, and the second is the FASTDIV variant
01205      * of 49995 % y / y. */
01206     static const unsigned int div_tbl[9][2] = {
01207         { 8332,  3 * 715827883U }, // y =  6
01208         { 4545,  0 * 390451573U }, // y = 11
01209         { 3124, 11 * 268435456U }, // y = 16
01210         { 2380, 15 * 204522253U }, // y = 21
01211         { 1922, 23 * 165191050U }, // y = 26
01212         { 1612, 23 * 138547333U }, // y = 31
01213         { 1388, 27 * 119304648U }, // y = 36
01214         { 1219, 16 * 104755300U }, // y = 41
01215         { 1086, 39 *  93368855U }  // y = 46
01216     };
01217     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01218     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01219                                     // so this is effectively a modulo (%)
01220     y = x - 9 * MULH(477218589, x); // x % 9
01221     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01222                                     // z = x * 49995 / (y * 5 + 6)
01223     return z % (1000 - block_size);
01224 }
01225 
01230 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01231                                  int block_idx, int size,
01232                                  const struct frame_type_desc *frame_desc,
01233                                  float *excitation)
01234 {
01235     float gain;
01236     int n, r_idx;
01237 
01238     assert(size <= MAX_FRAMESIZE);
01239 
01240     /* Set the offset from which we start reading wmavoice_std_codebook */
01241     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01242         r_idx = pRNG(s->frame_cntr, block_idx, size);
01243         gain  = s->silence_gain;
01244     } else /* FCB_TYPE_HARDCODED */ {
01245         r_idx = get_bits(gb, 8);
01246         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01247     }
01248 
01249     /* Clear gain prediction parameters */
01250     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01251 
01252     /* Apply gain to hardcoded codebook and use that as excitation signal */
01253     for (n = 0; n < size; n++)
01254         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01255 }
01256 
01261 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01262                                 int block_idx, int size,
01263                                 int block_pitch_sh2,
01264                                 const struct frame_type_desc *frame_desc,
01265                                 float *excitation)
01266 {
01267     static const float gain_coeff[6] = {
01268         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01269     };
01270     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01271     int n, idx, gain_weight;
01272     AMRFixed fcb;
01273 
01274     assert(size <= MAX_FRAMESIZE / 2);
01275     memset(pulses, 0, sizeof(*pulses) * size);
01276 
01277     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01278     fcb.pitch_fac      = 1.0;
01279     fcb.no_repeat_mask = 0;
01280     fcb.n              = 0;
01281 
01282     /* For the other frame types, this is where we apply the innovation
01283      * (fixed) codebook pulses of the speech signal. */
01284     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01285         aw_pulse_set1(s, gb, block_idx, &fcb);
01286         aw_pulse_set2(s, gb, block_idx, &fcb);
01287     } else /* FCB_TYPE_EXC_PULSES */ {
01288         int offset_nbits = 5 - frame_desc->log_n_blocks;
01289 
01290         fcb.no_repeat_mask = -1;
01291         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01292          * (instead of double) for a subset of pulses */
01293         for (n = 0; n < 5; n++) {
01294             float sign;
01295             int pos1, pos2;
01296 
01297             sign           = get_bits1(gb) ? 1.0 : -1.0;
01298             pos1           = get_bits(gb, offset_nbits);
01299             fcb.x[fcb.n]   = n + 5 * pos1;
01300             fcb.y[fcb.n++] = sign;
01301             if (n < frame_desc->dbl_pulses) {
01302                 pos2           = get_bits(gb, offset_nbits);
01303                 fcb.x[fcb.n]   = n + 5 * pos2;
01304                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01305             }
01306         }
01307     }
01308     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01309 
01310     /* Calculate gain for adaptive & fixed codebook signal.
01311      * see ff_amr_set_fixed_gain(). */
01312     idx = get_bits(gb, 7);
01313     fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01314                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01315     acb_gain = wmavoice_gain_codebook_acb[idx];
01316     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01317                         -2.9957322736 /* log(0.05) */,
01318                          1.6094379124 /* log(5.0)  */);
01319 
01320     gain_weight = 8 >> frame_desc->log_n_blocks;
01321     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01322             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01323     for (n = 0; n < gain_weight; n++)
01324         s->gain_pred_err[n] = pred_err;
01325 
01326     /* Calculation of adaptive codebook */
01327     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01328         int len;
01329         for (n = 0; n < size; n += len) {
01330             int next_idx_sh16;
01331             int abs_idx    = block_idx * size + n;
01332             int pitch_sh16 = (s->last_pitch_val << 16) +
01333                              s->pitch_diff_sh16 * abs_idx;
01334             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01335             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01336             idx            = idx_sh16 >> 16;
01337             if (s->pitch_diff_sh16) {
01338                 if (s->pitch_diff_sh16 > 0) {
01339                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01340                 } else
01341                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01342                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01343                               1, size - n);
01344             } else
01345                 len = size;
01346 
01347             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01348                                   wmavoice_ipol1_coeffs, 17,
01349                                   idx, 9, len);
01350         }
01351     } else /* ACB_TYPE_HAMMING */ {
01352         int block_pitch = block_pitch_sh2 >> 2;
01353         idx             = block_pitch_sh2 & 3;
01354         if (idx) {
01355             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01356                                   wmavoice_ipol2_coeffs, 4,
01357                                   idx, 8, size);
01358         } else
01359             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01360                               sizeof(float) * size);
01361     }
01362 
01363     /* Interpolate ACB/FCB and use as excitation signal */
01364     ff_weighted_vector_sumf(excitation, excitation, pulses,
01365                             acb_gain, fcb_gain, size);
01366 }
01367 
01384 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01385                         int block_idx, int size,
01386                         int block_pitch_sh2,
01387                         const double *lsps, const double *prev_lsps,
01388                         const struct frame_type_desc *frame_desc,
01389                         float *excitation, float *synth)
01390 {
01391     double i_lsps[MAX_LSPS];
01392     float lpcs[MAX_LSPS];
01393     float fac;
01394     int n;
01395 
01396     if (frame_desc->acb_type == ACB_TYPE_NONE)
01397         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01398     else
01399         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01400                             frame_desc, excitation);
01401 
01402     /* convert interpolated LSPs to LPCs */
01403     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01404     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01405         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01406     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01407 
01408     /* Speech synthesis */
01409     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01410 }
01411 
01427 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01428                        float *samples,
01429                        const double *lsps, const double *prev_lsps,
01430                        float *excitation, float *synth)
01431 {
01432     WMAVoiceContext *s = ctx->priv_data;
01433     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01434     int pitch[MAX_BLOCKS], last_block_pitch;
01435 
01436     /* Parse frame type ("frame header"), see frame_descs */
01437     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01438         block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01439 
01440     if (bd_idx < 0) {
01441         av_log(ctx, AV_LOG_ERROR,
01442                "Invalid frame type VLC code, skipping\n");
01443         return -1;
01444     }
01445 
01446     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01447     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01448         /* Pitch is provided per frame, which is interpreted as the pitch of
01449          * the last sample of the last block of this frame. We can interpolate
01450          * the pitch of other blocks (and even pitch-per-sample) by gradually
01451          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01452         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01453         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01454         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01455         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01456         if (s->last_acb_type == ACB_TYPE_NONE ||
01457             20 * abs(cur_pitch_val - s->last_pitch_val) >
01458                 (cur_pitch_val + s->last_pitch_val))
01459             s->last_pitch_val = cur_pitch_val;
01460 
01461         /* pitch per block */
01462         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01463             int fac = n * 2 + 1;
01464 
01465             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01466                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01467                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01468         }
01469 
01470         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01471         s->pitch_diff_sh16 =
01472             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01473     }
01474 
01475     /* Global gain (if silence) and pitch-adaptive window coordinates */
01476     switch (frame_descs[bd_idx].fcb_type) {
01477     case FCB_TYPE_SILENCE:
01478         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01479         break;
01480     case FCB_TYPE_AW_PULSES:
01481         aw_parse_coords(s, gb, pitch);
01482         break;
01483     }
01484 
01485     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01486         int bl_pitch_sh2;
01487 
01488         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01489         switch (frame_descs[bd_idx].acb_type) {
01490         case ACB_TYPE_HAMMING: {
01491             /* Pitch is given per block. Per-block pitches are encoded as an
01492              * absolute value for the first block, and then delta values
01493              * relative to this value) for all subsequent blocks. The scale of
01494              * this pitch value is semi-logaritmic compared to its use in the
01495              * decoder, so we convert it to normal scale also. */
01496             int block_pitch,
01497                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01498                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01499                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01500 
01501             if (n == 0) {
01502                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01503             } else
01504                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01505                                  get_bits(gb, s->block_delta_pitch_nbits);
01506             /* Convert last_ so that any next delta is within _range */
01507             last_block_pitch = av_clip(block_pitch,
01508                                        s->block_delta_pitch_hrange,
01509                                        s->block_pitch_range -
01510                                            s->block_delta_pitch_hrange);
01511 
01512             /* Convert semi-log-style scale back to normal scale */
01513             if (block_pitch < t1) {
01514                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01515             } else {
01516                 block_pitch -= t1;
01517                 if (block_pitch < t2) {
01518                     bl_pitch_sh2 =
01519                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01520                 } else {
01521                     block_pitch -= t2;
01522                     if (block_pitch < t3) {
01523                         bl_pitch_sh2 =
01524                             (s->block_conv_table[2] + block_pitch) << 2;
01525                     } else
01526                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01527                 }
01528             }
01529             pitch[n] = bl_pitch_sh2 >> 2;
01530             break;
01531         }
01532 
01533         case ACB_TYPE_ASYMMETRIC: {
01534             bl_pitch_sh2 = pitch[n] << 2;
01535             break;
01536         }
01537 
01538         default: // ACB_TYPE_NONE has no pitch
01539             bl_pitch_sh2 = 0;
01540             break;
01541         }
01542 
01543         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01544                     lsps, prev_lsps, &frame_descs[bd_idx],
01545                     &excitation[n * block_nsamples],
01546                     &synth[n * block_nsamples]);
01547     }
01548 
01549     /* Averaging projection filter, if applicable. Else, just copy samples
01550      * from synthesis buffer */
01551     if (s->do_apf) {
01552         double i_lsps[MAX_LSPS];
01553         float lpcs[MAX_LSPS];
01554 
01555         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01556             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01557         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01558         postfilter(s, synth, samples, 80, lpcs,
01559                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01560                    frame_descs[bd_idx].fcb_type, pitch[0]);
01561 
01562         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01563             i_lsps[n] = cos(lsps[n]);
01564         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01565         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01566                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01567                    frame_descs[bd_idx].fcb_type, pitch[0]);
01568     } else
01569         memcpy(samples, synth, 160 * sizeof(synth[0]));
01570 
01571     /* Cache values for next frame */
01572     s->frame_cntr++;
01573     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01574     s->last_acb_type = frame_descs[bd_idx].acb_type;
01575     switch (frame_descs[bd_idx].acb_type) {
01576     case ACB_TYPE_NONE:
01577         s->last_pitch_val = 0;
01578         break;
01579     case ACB_TYPE_ASYMMETRIC:
01580         s->last_pitch_val = cur_pitch_val;
01581         break;
01582     case ACB_TYPE_HAMMING:
01583         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01584         break;
01585     }
01586 
01587     return 0;
01588 }
01589 
01602 static void stabilize_lsps(double *lsps, int num)
01603 {
01604     int n, m, l;
01605 
01606     /* set minimum value for first, maximum value for last and minimum
01607      * spacing between LSF values.
01608      * Very similar to ff_set_min_dist_lsf(), but in double. */
01609     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01610     for (n = 1; n < num; n++)
01611         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01612     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01613 
01614     /* reorder (looks like one-time / non-recursed bubblesort).
01615      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01616     for (n = 1; n < num; n++) {
01617         if (lsps[n] < lsps[n - 1]) {
01618             for (m = 1; m < num; m++) {
01619                 double tmp = lsps[m];
01620                 for (l = m - 1; l >= 0; l--) {
01621                     if (lsps[l] <= tmp) break;
01622                     lsps[l + 1] = lsps[l];
01623                 }
01624                 lsps[l + 1] = tmp;
01625             }
01626             break;
01627         }
01628     }
01629 }
01630 
01640 static int check_bits_for_superframe(GetBitContext *orig_gb,
01641                                      WMAVoiceContext *s)
01642 {
01643     GetBitContext s_gb, *gb = &s_gb;
01644     int n, need_bits, bd_idx;
01645     const struct frame_type_desc *frame_desc;
01646 
01647     /* initialize a copy */
01648     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01649     skip_bits_long(gb, get_bits_count(orig_gb));
01650     assert(get_bits_left(gb) == get_bits_left(orig_gb));
01651 
01652     /* superframe header */
01653     if (get_bits_left(gb) < 14)
01654         return 1;
01655     if (!get_bits1(gb))
01656         return -1;                        // WMAPro-in-WMAVoice superframe
01657     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01658     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01659         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01660             return 1;
01661         skip_bits_long(gb, s->sframe_lsp_bitsize);
01662     }
01663 
01664     /* frames */
01665     for (n = 0; n < MAX_FRAMES; n++) {
01666         int aw_idx_is_ext = 0;
01667 
01668         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01669            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01670            skip_bits_long(gb, s->frame_lsp_bitsize);
01671         }
01672         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01673         if (bd_idx < 0)
01674             return -1;                   // invalid frame type VLC code
01675         frame_desc = &frame_descs[bd_idx];
01676         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01677             if (get_bits_left(gb) < s->pitch_nbits)
01678                 return 1;
01679             skip_bits_long(gb, s->pitch_nbits);
01680         }
01681         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01682             skip_bits(gb, 8);
01683         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01684             int tmp = get_bits(gb, 6);
01685             if (tmp >= 0x36) {
01686                 skip_bits(gb, 2);
01687                 aw_idx_is_ext = 1;
01688             }
01689         }
01690 
01691         /* blocks */
01692         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01693             need_bits = s->block_pitch_nbits +
01694                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01695         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01696             need_bits = 2 * !aw_idx_is_ext;
01697         } else
01698             need_bits = 0;
01699         need_bits += frame_desc->frame_size;
01700         if (get_bits_left(gb) < need_bits)
01701             return 1;
01702         skip_bits_long(gb, need_bits);
01703     }
01704 
01705     return 0;
01706 }
01707 
01728 static int synth_superframe(AVCodecContext *ctx,
01729                             float *samples, int *data_size)
01730 {
01731     WMAVoiceContext *s = ctx->priv_data;
01732     GetBitContext *gb = &s->gb, s_gb;
01733     int n, res, n_samples = 480;
01734     double lsps[MAX_FRAMES][MAX_LSPS];
01735     const double *mean_lsf = s->lsps == 16 ?
01736         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01737     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01738     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01739 
01740     memcpy(synth,      s->synth_history,
01741            s->lsps             * sizeof(*synth));
01742     memcpy(excitation, s->excitation_history,
01743            s->history_nsamples * sizeof(*excitation));
01744 
01745     if (s->sframe_cache_size > 0) {
01746         gb = &s_gb;
01747         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01748         s->sframe_cache_size = 0;
01749     }
01750 
01751     if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;
01752 
01753     /* First bit is speech/music bit, it differentiates between WMAVoice
01754      * speech samples (the actual codec) and WMAVoice music samples, which
01755      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01756      * the wild yet. */
01757     if (!get_bits1(gb)) {
01758         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01759         return -1;
01760     }
01761 
01762     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01763     if (get_bits1(gb)) {
01764         if ((n_samples = get_bits(gb, 12)) > 480) {
01765             av_log(ctx, AV_LOG_ERROR,
01766                    "Superframe encodes >480 samples (%d), not allowed\n",
01767                    n_samples);
01768             return -1;
01769         }
01770     }
01771     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01772     if (s->has_residual_lsps) {
01773         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01774 
01775         for (n = 0; n < s->lsps; n++)
01776             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01777 
01778         if (s->lsps == 10) {
01779             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01780         } else /* s->lsps == 16 */
01781             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01782 
01783         for (n = 0; n < s->lsps; n++) {
01784             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01785             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01786             lsps[2][n] += mean_lsf[n];
01787         }
01788         for (n = 0; n < 3; n++)
01789             stabilize_lsps(lsps[n], s->lsps);
01790     }
01791 
01792     /* Parse frames, optionally preceeded by per-frame (independent) LSPs. */
01793     for (n = 0; n < 3; n++) {
01794         if (!s->has_residual_lsps) {
01795             int m;
01796 
01797             if (s->lsps == 10) {
01798                 dequant_lsp10i(gb, lsps[n]);
01799             } else /* s->lsps == 16 */
01800                 dequant_lsp16i(gb, lsps[n]);
01801 
01802             for (m = 0; m < s->lsps; m++)
01803                 lsps[n][m] += mean_lsf[m];
01804             stabilize_lsps(lsps[n], s->lsps);
01805         }
01806 
01807         if ((res = synth_frame(ctx, gb, n,
01808                                &samples[n * MAX_FRAMESIZE],
01809                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01810                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01811                                &synth[s->lsps + n * MAX_FRAMESIZE])))
01812             return res;
01813     }
01814 
01815     /* Statistics? FIXME - we don't check for length, a slight overrun
01816      * will be caught by internal buffer padding, and anything else
01817      * will be skipped, not read. */
01818     if (get_bits1(gb)) {
01819         res = get_bits(gb, 4);
01820         skip_bits(gb, 10 * (res + 1));
01821     }
01822 
01823     /* Specify nr. of output samples */
01824     *data_size = n_samples * sizeof(float);
01825 
01826     /* Update history */
01827     memcpy(s->prev_lsps,           lsps[2],
01828            s->lsps             * sizeof(*s->prev_lsps));
01829     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01830            s->lsps             * sizeof(*synth));
01831     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01832            s->history_nsamples * sizeof(*excitation));
01833     if (s->do_apf)
01834         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01835                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01836 
01837     return 0;
01838 }
01839 
01847 static int parse_packet_header(WMAVoiceContext *s)
01848 {
01849     GetBitContext *gb = &s->gb;
01850     unsigned int res;
01851 
01852     if (get_bits_left(gb) < 11)
01853         return 1;
01854     skip_bits(gb, 4);          // packet sequence number
01855     s->has_residual_lsps = get_bits1(gb);
01856     do {
01857         res = get_bits(gb, 6); // number of superframes per packet
01858                                // (minus first one if there is spillover)
01859         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01860             return 1;
01861     } while (res == 0x3F);
01862     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01863 
01864     return 0;
01865 }
01866 
01882 static void copy_bits(PutBitContext *pb,
01883                       const uint8_t *data, int size,
01884                       GetBitContext *gb, int nbits)
01885 {
01886     int rmn_bytes, rmn_bits;
01887 
01888     rmn_bits = rmn_bytes = get_bits_left(gb);
01889     if (rmn_bits < nbits)
01890         return;
01891     if (nbits > pb->size_in_bits - put_bits_count(pb))
01892         return;
01893     rmn_bits &= 7; rmn_bytes >>= 3;
01894     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01895         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01896     ff_copy_bits(pb, data + size - rmn_bytes,
01897                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01898 }
01899 
01911 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01912                                   int *data_size, AVPacket *avpkt)
01913 {
01914     WMAVoiceContext *s = ctx->priv_data;
01915     GetBitContext *gb = &s->gb;
01916     int size, res, pos;
01917 
01918     if (*data_size < 480 * sizeof(float)) {
01919         av_log(ctx, AV_LOG_ERROR,
01920                "Output buffer too small (%d given - %zu needed)\n",
01921                *data_size, 480 * sizeof(float));
01922         return -1;
01923     }
01924     *data_size = 0;
01925 
01926     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01927      * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer
01928      * feeds us ASF packets, which may concatenate multiple "codec" packets
01929      * in a single "muxer" packet, so we artificially emulate that by
01930      * capping the packet size at ctx->block_align. */
01931     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01932     if (!size)
01933         return 0;
01934     init_get_bits(&s->gb, avpkt->data, size << 3);
01935 
01936     /* size == ctx->block_align is used to indicate whether we are dealing with
01937      * a new packet or a packet of which we already read the packet header
01938      * previously. */
01939     if (size == ctx->block_align) { // new packet header
01940         if ((res = parse_packet_header(s)) < 0)
01941             return res;
01942 
01943         /* If the packet header specifies a s->spillover_nbits, then we want
01944          * to push out all data of the previous packet (+ spillover) before
01945          * continuing to parse new superframes in the current packet. */
01946         if (s->spillover_nbits > 0) {
01947             if (s->sframe_cache_size > 0) {
01948                 int cnt = get_bits_count(gb);
01949                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01950                 flush_put_bits(&s->pb);
01951                 s->sframe_cache_size += s->spillover_nbits;
01952                 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&
01953                     *data_size > 0) {
01954                     cnt += s->spillover_nbits;
01955                     s->skip_bits_next = cnt & 7;
01956                     return cnt >> 3;
01957                 } else
01958                     skip_bits_long (gb, s->spillover_nbits - cnt +
01959                                     get_bits_count(gb)); // resync
01960             } else
01961                 skip_bits_long(gb, s->spillover_nbits);  // resync
01962         }
01963     } else if (s->skip_bits_next)
01964         skip_bits(gb, s->skip_bits_next);
01965 
01966     /* Try parsing superframes in current packet */
01967     s->sframe_cache_size = 0;
01968     s->skip_bits_next = 0;
01969     pos = get_bits_left(gb);
01970     if ((res = synth_superframe(ctx, data, data_size)) < 0) {
01971         return res;
01972     } else if (*data_size > 0) {
01973         int cnt = get_bits_count(gb);
01974         s->skip_bits_next = cnt & 7;
01975         return cnt >> 3;
01976     } else if ((s->sframe_cache_size = pos) > 0) {
01977         /* rewind bit reader to start of last (incomplete) superframe... */
01978         init_get_bits(gb, avpkt->data, size << 3);
01979         skip_bits_long(gb, (size << 3) - pos);
01980         assert(get_bits_left(gb) == pos);
01981 
01982         /* ...and cache it for spillover in next packet */
01983         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01984         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01985         // FIXME bad - just copy bytes as whole and add use the
01986         // skip_bits_next field
01987     }
01988 
01989     return size;
01990 }
01991 
01992 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
01993 {
01994     WMAVoiceContext *s = ctx->priv_data;
01995 
01996     if (s->do_apf) {
01997         ff_rdft_end(&s->rdft);
01998         ff_rdft_end(&s->irdft);
01999         ff_dct_end(&s->dct);
02000         ff_dct_end(&s->dst);
02001     }
02002 
02003     return 0;
02004 }
02005 
02006 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02007 {
02008     WMAVoiceContext *s = ctx->priv_data;
02009     int n;
02010 
02011     s->postfilter_agc    = 0;
02012     s->sframe_cache_size = 0;
02013     s->skip_bits_next    = 0;
02014     for (n = 0; n < s->lsps; n++)
02015         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02016     memset(s->excitation_history, 0,
02017            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02018     memset(s->synth_history,      0,
02019            sizeof(*s->synth_history)      * MAX_LSPS);
02020     memset(s->gain_pred_err,      0,
02021            sizeof(s->gain_pred_err));
02022 
02023     if (s->do_apf) {
02024         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02025                sizeof(*s->synth_filter_out_buf) * s->lsps);
02026         memset(s->dcf_mem,              0,
02027                sizeof(*s->dcf_mem)              * 2);
02028         memset(s->zero_exc_pf,          0,
02029                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02030         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02031     }
02032 }
02033 
02034 AVCodec ff_wmavoice_decoder = {
02035     "wmavoice",
02036     AVMEDIA_TYPE_AUDIO,
02037     CODEC_ID_WMAVOICE,
02038     sizeof(WMAVoiceContext),
02039     wmavoice_decode_init,
02040     NULL,
02041     wmavoice_decode_end,
02042     wmavoice_decode_packet,
02043     CODEC_CAP_SUBFRAMES,
02044     .flush     = wmavoice_flush,
02045     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02046 };