00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00028 #include <math.h>
00029 #include "avcodec.h"
00030 #include "get_bits.h"
00031 #include "put_bits.h"
00032 #include "wmavoice_data.h"
00033 #include "celp_math.h"
00034 #include "celp_filters.h"
00035 #include "acelp_vectors.h"
00036 #include "acelp_filters.h"
00037 #include "lsp.h"
00038 #include "libavutil/lzo.h"
00039 #include "dct.h"
00040 #include "rdft.h"
00041 #include "sinewin.h"
00042 
00043 #define MAX_BLOCKS           8   
00044 #define MAX_LSPS             16  
00045 #define MAX_LSPS_ALIGN16     16  
00046 
00047 #define MAX_FRAMES           3   
00048 #define MAX_FRAMESIZE        160 
00049 #define MAX_SIGNAL_HISTORY   416 
00050 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00052 #define SFRAME_CACHE_MAXSIZE 256 
00053 
00054 #define VLC_NBITS            6   
00055 
00056 
00059 static VLC frame_type_vlc;
00060 
00064 enum {
00065     ACB_TYPE_NONE       = 0, 
00066     ACB_TYPE_ASYMMETRIC = 1, 
00067 
00068 
00069 
00070 
00071     ACB_TYPE_HAMMING    = 2  
00072 
00073 
00074 };
00075 
00079 enum {
00080     FCB_TYPE_SILENCE    = 0, 
00081 
00082 
00083     FCB_TYPE_HARDCODED  = 1, 
00084 
00085     FCB_TYPE_AW_PULSES  = 2, 
00086 
00087     FCB_TYPE_EXC_PULSES = 3, 
00088 
00089 
00090 };
00091 
00095 static const struct frame_type_desc {
00096     uint8_t n_blocks;     
00097 
00098     uint8_t log_n_blocks; 
00099     uint8_t acb_type;     
00100     uint8_t fcb_type;     
00101     uint8_t dbl_pulses;   
00102 
00103 
00104     uint16_t frame_size;  
00105 
00106 } frame_descs[17] = {
00107     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00108     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00109     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00110     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00111     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00112     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00113     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00114     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00115     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00116     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00117     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00118     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00119     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00120     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00121     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00122     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00123     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00124 };
00125 
00129 typedef struct {
00134     GetBitContext gb;             
00135 
00136 
00137 
00138     int8_t vbm_tree[25];          
00139 
00140     int spillover_bitsize;        
00141 
00142 
00143     int history_nsamples;         
00144 
00145 
00146     
00147     int do_apf;                   
00148 
00149     int denoise_strength;         
00150 
00151     int denoise_tilt_corr;        
00152 
00153     int dc_level;                 
00154 
00155 
00156     int lsps;                     
00157     int lsp_q_mode;               
00158     int lsp_def_mode;             
00159 
00160     int frame_lsp_bitsize;        
00161 
00162     int sframe_lsp_bitsize;       
00163 
00164 
00165     int min_pitch_val;            
00166     int max_pitch_val;            
00167     int pitch_nbits;              
00168 
00169     int block_pitch_nbits;        
00170 
00171     int block_pitch_range;        
00172     int block_delta_pitch_nbits;  
00173 
00174 
00175 
00176     int block_delta_pitch_hrange; 
00177 
00178     uint16_t block_conv_table[4]; 
00179 
00180 
00190     int spillover_nbits;          
00191 
00192 
00193 
00194     int has_residual_lsps;        
00195 
00196 
00197 
00198 
00199     int skip_bits_next;           
00200 
00201 
00202 
00203     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00206     int sframe_cache_size;        
00207 
00208 
00209 
00210 
00211     PutBitContext pb;             
00212 
00222     double prev_lsps[MAX_LSPS];   
00223 
00224     int last_pitch_val;           
00225     int last_acb_type;            
00226     int pitch_diff_sh16;          
00227 
00228     float silence_gain;           
00229 
00230     int aw_idx_is_ext;            
00231 
00232     int aw_pulse_range;           
00233 
00234 
00235 
00236 
00237 
00238     int aw_n_pulses[2];           
00239 
00240 
00241     int aw_first_pulse_off[2];    
00242 
00243     int aw_next_pulse_off_cache;  
00244 
00245 
00246 
00247 
00248 
00249     int frame_cntr;               
00250 
00251     float gain_pred_err[6];       
00252     float excitation_history[MAX_SIGNAL_HISTORY];
00256     float synth_history[MAX_LSPS]; 
00257 
00266     RDFTContext rdft, irdft;      
00267 
00268     DCTContext dct, dst;          
00269 
00270     float sin[511], cos[511];     
00271 
00272     float postfilter_agc;         
00273 
00274     float dcf_mem[2];             
00275     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00278     float denoise_filter_cache[MAX_FRAMESIZE];
00279     int   denoise_filter_cache_size; 
00280     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00282     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00284     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00287 
00290 } WMAVoiceContext;
00291 
00301 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00302 {
00303     static const uint8_t bits[] = {
00304          2,  2,  2,  4,  4,  4,
00305          6,  6,  6,  8,  8,  8,
00306         10, 10, 10, 12, 12, 12,
00307         14, 14, 14, 14
00308     };
00309     static const uint16_t codes[] = {
00310           0x0000, 0x0001, 0x0002,        
00311           0x000c, 0x000d, 0x000e,        
00312           0x003c, 0x003d, 0x003e,        
00313           0x00fc, 0x00fd, 0x00fe,        
00314           0x03fc, 0x03fd, 0x03fe,        
00315           0x0ffc, 0x0ffd, 0x0ffe,        
00316           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff 
00317     };
00318     int cntr[8], n, res;
00319 
00320     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00321     memset(cntr,     0,    sizeof(cntr));
00322     for (n = 0; n < 17; n++) {
00323         res = get_bits(gb, 3);
00324         if (cntr[res] > 3) 
00325             return -1;
00326         vbm_tree[res * 3 + cntr[res]++] = n;
00327     }
00328     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00329                     bits, 1, 1, codes, 2, 2, 132);
00330     return 0;
00331 }
00332 
00336 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00337 {
00338     int n, flags, pitch_range, lsp16_flag;
00339     WMAVoiceContext *s = ctx->priv_data;
00340 
00349     if (ctx->extradata_size != 46) {
00350         av_log(ctx, AV_LOG_ERROR,
00351                "Invalid extradata size %d (should be 46)\n",
00352                ctx->extradata_size);
00353         return -1;
00354     }
00355     flags                = AV_RL32(ctx->extradata + 18);
00356     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00357     s->do_apf            =    flags & 0x1;
00358     if (s->do_apf) {
00359         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00360         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00361         ff_dct_init(&s->dct,  6, DCT_I);
00362         ff_dct_init(&s->dst,  6, DST_I);
00363 
00364         ff_sine_window_init(s->cos, 256);
00365         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00366         for (n = 0; n < 255; n++) {
00367             s->sin[n]       = -s->sin[510 - n];
00368             s->cos[510 - n] =  s->cos[n];
00369         }
00370     }
00371     s->denoise_strength  =   (flags >> 2) & 0xF;
00372     if (s->denoise_strength >= 12) {
00373         av_log(ctx, AV_LOG_ERROR,
00374                "Invalid denoise filter strength %d (max=11)\n",
00375                s->denoise_strength);
00376         return -1;
00377     }
00378     s->denoise_tilt_corr = !!(flags & 0x40);
00379     s->dc_level          =   (flags >> 7) & 0xF;
00380     s->lsp_q_mode        = !!(flags & 0x2000);
00381     s->lsp_def_mode      = !!(flags & 0x4000);
00382     lsp16_flag           =    flags & 0x1000;
00383     if (lsp16_flag) {
00384         s->lsps               = 16;
00385         s->frame_lsp_bitsize  = 34;
00386         s->sframe_lsp_bitsize = 60;
00387     } else {
00388         s->lsps               = 10;
00389         s->frame_lsp_bitsize  = 24;
00390         s->sframe_lsp_bitsize = 48;
00391     }
00392     for (n = 0; n < s->lsps; n++)
00393         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00394 
00395     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00396     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00397         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00398         return -1;
00399     }
00400 
00401     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00402     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00403     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00404     if (pitch_range <= 0) {
00405         av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00406         return -1;
00407     }
00408     s->pitch_nbits      = av_ceil_log2(pitch_range);
00409     s->last_pitch_val   = 40;
00410     s->last_acb_type    = ACB_TYPE_NONE;
00411     s->history_nsamples = s->max_pitch_val + 8;
00412 
00413     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00414         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00415             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00416 
00417         av_log(ctx, AV_LOG_ERROR,
00418                "Unsupported samplerate %d (min=%d, max=%d)\n",
00419                ctx->sample_rate, min_sr, max_sr); 
00420 
00421         return -1;
00422     }
00423 
00424     s->block_conv_table[0]      = s->min_pitch_val;
00425     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00426     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00427     s->block_conv_table[3]      = s->max_pitch_val - 1;
00428     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00429     if (s->block_delta_pitch_hrange <= 0) {
00430         av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00431         return -1;
00432     }
00433     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00434     s->block_pitch_range        = s->block_conv_table[2] +
00435                                   s->block_conv_table[3] + 1 +
00436                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00437     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00438 
00439     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00440 
00441     return 0;
00442 }
00443 
00465 static void adaptive_gain_control(float *out, const float *in,
00466                                   const float *speech_synth,
00467                                   int size, float alpha, float *gain_mem)
00468 {
00469     int i;
00470     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00471     float mem = *gain_mem;
00472 
00473     for (i = 0; i < size; i++) {
00474         speech_energy     += fabsf(speech_synth[i]);
00475         postfilter_energy += fabsf(in[i]);
00476     }
00477     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00478 
00479     for (i = 0; i < size; i++) {
00480         mem = alpha * mem + gain_scale_factor;
00481         out[i] = in[i] * mem;
00482     }
00483 
00484     *gain_mem = mem;
00485 }
00486 
00505 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00506                            const float *in, float *out, int size)
00507 {
00508     int n;
00509     float optimal_gain = 0, dot;
00510     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00511                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00512                 *best_hist_ptr;
00513 
00514     
00515     do {
00516         dot = ff_dot_productf(in, ptr, size);
00517         if (dot > optimal_gain) {
00518             optimal_gain  = dot;
00519             best_hist_ptr = ptr;
00520         }
00521     } while (--ptr >= end);
00522 
00523     if (optimal_gain <= 0)
00524         return -1;
00525     dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00526     if (dot <= 0) 
00527         return -1;
00528 
00529     if (optimal_gain <= dot) {
00530         dot = dot / (dot + 0.6 * optimal_gain); 
00531     } else
00532         dot = 0.625;
00533 
00534     
00535     for (n = 0; n < size; n++)
00536         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00537 
00538     return 0;
00539 }
00540 
00551 static float tilt_factor(const float *lpcs, int n_lpcs)
00552 {
00553     float rh0, rh1;
00554 
00555     rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
00556     rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00557 
00558     return rh1 / rh0;
00559 }
00560 
00564 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00565                                 int fcb_type, float *coeffs, int remainder)
00566 {
00567     float last_coeff, min = 15.0, max = -15.0;
00568     float irange, angle_mul, gain_mul, range, sq;
00569     int n, idx;
00570 
00571     
00572     s->rdft.rdft_calc(&s->rdft, lpcs);
00573 #define log_range(var, assign) do { \
00574         float tmp = log10f(assign);  var = tmp; \
00575         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00576     } while (0)
00577     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00578     for (n = 1; n < 64; n++)
00579         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00580                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00581     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00582 #undef log_range
00583     range    = max - min;
00584     lpcs[64] = last_coeff;
00585 
00586     
00587 
00588 
00589 
00590 
00591     irange    = 64.0 / range; 
00592     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00593                                                           (5.0 / 14.7));
00594     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00595     for (n = 0; n <= 64; n++) {
00596         float pwr;
00597 
00598         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00599         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00600         lpcs[n] = angle_mul * pwr;
00601 
00602         
00603         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00604         if (idx > 127) { 
00605             coeffs[n] = wmavoice_energy_table[127] *
00606                         powf(1.0331663, idx - 127);
00607         } else
00608             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00609     }
00610 
00611     
00612 
00613 
00614 
00615     s->dct.dct_calc(&s->dct, lpcs);
00616     s->dst.dct_calc(&s->dst, lpcs);
00617 
00618     
00619     idx = 255 + av_clip(lpcs[64],               -255, 255);
00620     coeffs[0]  = coeffs[0]  * s->cos[idx];
00621     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00622     last_coeff = coeffs[64] * s->cos[idx];
00623     for (n = 63;; n--) {
00624         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00625         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00626         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00627 
00628         if (!--n) break;
00629 
00630         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00633     }
00634     coeffs[1] = last_coeff;
00635 
00636     
00637     s->irdft.rdft_calc(&s->irdft, coeffs);
00638 
00639     
00640     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00641     if (s->denoise_tilt_corr) {
00642         float tilt_mem = 0;
00643 
00644         coeffs[remainder - 1] = 0;
00645         ff_tilt_compensation(&tilt_mem,
00646                              -1.8 * tilt_factor(coeffs, remainder - 1),
00647                              coeffs, remainder);
00648     }
00649     sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00650     for (n = 0; n < remainder; n++)
00651         coeffs[n] *= sq;
00652 }
00653 
00680 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00681                            float *synth_pf, int size,
00682                            const float *lpcs)
00683 {
00684     int remainder, lim, n;
00685 
00686     if (fcb_type != FCB_TYPE_SILENCE) {
00687         float *tilted_lpcs = s->tilted_lpcs_pf,
00688               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00689 
00690         tilted_lpcs[0]           = 1.0;
00691         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00692         memset(&tilted_lpcs[s->lsps + 1], 0,
00693                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00694         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00695                              tilted_lpcs, s->lsps + 2);
00696 
00697         
00698 
00699 
00700 
00701         remainder = FFMIN(127 - size, size - 1);
00702         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00703 
00704         
00705 
00706         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00707         s->rdft.rdft_calc(&s->rdft, synth_pf);
00708         s->rdft.rdft_calc(&s->rdft, coeffs);
00709         synth_pf[0] *= coeffs[0];
00710         synth_pf[1] *= coeffs[1];
00711         for (n = 1; n < 64; n++) {
00712             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00713             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00714             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00715         }
00716         s->irdft.rdft_calc(&s->irdft, synth_pf);
00717     }
00718 
00719     
00720     if (s->denoise_filter_cache_size) {
00721         lim = FFMIN(s->denoise_filter_cache_size, size);
00722         for (n = 0; n < lim; n++)
00723             synth_pf[n] += s->denoise_filter_cache[n];
00724         s->denoise_filter_cache_size -= lim;
00725         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00726                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00727     }
00728 
00729     
00730     if (fcb_type != FCB_TYPE_SILENCE) {
00731         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00732         for (n = 0; n < lim; n++)
00733             s->denoise_filter_cache[n] += synth_pf[size + n];
00734         if (lim < remainder) {
00735             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00736                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00737             s->denoise_filter_cache_size = remainder;
00738         }
00739     }
00740 }
00741 
00762 static void postfilter(WMAVoiceContext *s, const float *synth,
00763                        float *samples,    int size,
00764                        const float *lpcs, float *zero_exc_pf,
00765                        int fcb_type,      int pitch)
00766 {
00767     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00768           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00769           *synth_filter_in = zero_exc_pf;
00770 
00771     assert(size <= MAX_FRAMESIZE / 2);
00772 
00773     
00774     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00775 
00776     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00777         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00778         synth_filter_in = synth_filter_in_buf;
00779 
00780     
00781     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00782                                  synth_filter_in, size, s->lsps);
00783     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00784            sizeof(synth_pf[0]) * s->lsps);
00785 
00786     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00787 
00788     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00789                           &s->postfilter_agc);
00790 
00791     if (s->dc_level > 8) {
00792         
00793 
00794 
00795         ff_acelp_apply_order_2_transfer_function(samples, samples,
00796             (const float[2]) { -1.99997,      1.0 },
00797             (const float[2]) { -1.9330735188, 0.93589198496 },
00798             0.93980580475, s->dcf_mem, size);
00799     }
00800 }
00816 static void dequant_lsps(double *lsps, int num,
00817                          const uint16_t *values,
00818                          const uint16_t *sizes,
00819                          int n_stages, const uint8_t *table,
00820                          const double *mul_q,
00821                          const double *base_q)
00822 {
00823     int n, m;
00824 
00825     memset(lsps, 0, num * sizeof(*lsps));
00826     for (n = 0; n < n_stages; n++) {
00827         const uint8_t *t_off = &table[values[n] * num];
00828         double base = base_q[n], mul = mul_q[n];
00829 
00830         for (m = 0; m < num; m++)
00831             lsps[m] += base + mul * t_off[m];
00832 
00833         table += sizes[n] * num;
00834     }
00835 }
00836 
00848 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00849 {
00850     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00851     static const double mul_lsf[4] = {
00852         5.2187144800e-3,    1.4626986422e-3,
00853         9.6179549166e-4,    1.1325736225e-3
00854     };
00855     static const double base_lsf[4] = {
00856         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00857         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00858     };
00859     uint16_t v[4];
00860 
00861     v[0] = get_bits(gb, 8);
00862     v[1] = get_bits(gb, 6);
00863     v[2] = get_bits(gb, 5);
00864     v[3] = get_bits(gb, 5);
00865 
00866     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00867                  mul_lsf, base_lsf);
00868 }
00869 
00874 static void dequant_lsp10r(GetBitContext *gb,
00875                            double *i_lsps, const double *old,
00876                            double *a1, double *a2, int q_mode)
00877 {
00878     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00879     static const double mul_lsf[3] = {
00880         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00881     };
00882     static const double base_lsf[3] = {
00883         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00884     };
00885     const float (*ipol_tab)[2][10] = q_mode ?
00886         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00887     uint16_t interpol, v[3];
00888     int n;
00889 
00890     dequant_lsp10i(gb, i_lsps);
00891 
00892     interpol = get_bits(gb, 5);
00893     v[0]     = get_bits(gb, 7);
00894     v[1]     = get_bits(gb, 6);
00895     v[2]     = get_bits(gb, 6);
00896 
00897     for (n = 0; n < 10; n++) {
00898         double delta = old[n] - i_lsps[n];
00899         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00900         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00901     }
00902 
00903     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00904                  mul_lsf, base_lsf);
00905 }
00906 
00910 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00911 {
00912     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00913     static const double mul_lsf[5] = {
00914         3.3439586280e-3,    6.9908173703e-4,
00915         3.3216608306e-3,    1.0334960326e-3,
00916         3.1899104283e-3
00917     };
00918     static const double base_lsf[5] = {
00919         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00920         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00921         M_PI * -1.29816e-1
00922     };
00923     uint16_t v[5];
00924 
00925     v[0] = get_bits(gb, 8);
00926     v[1] = get_bits(gb, 6);
00927     v[2] = get_bits(gb, 7);
00928     v[3] = get_bits(gb, 6);
00929     v[4] = get_bits(gb, 7);
00930 
00931     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00932                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00933     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00934                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00935     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00936                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00937 }
00938 
00943 static void dequant_lsp16r(GetBitContext *gb,
00944                            double *i_lsps, const double *old,
00945                            double *a1, double *a2, int q_mode)
00946 {
00947     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00948     static const double mul_lsf[3] = {
00949         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00950     };
00951     static const double base_lsf[3] = {
00952         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00953     };
00954     const float (*ipol_tab)[2][16] = q_mode ?
00955         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00956     uint16_t interpol, v[3];
00957     int n;
00958 
00959     dequant_lsp16i(gb, i_lsps);
00960 
00961     interpol = get_bits(gb, 5);
00962     v[0]     = get_bits(gb, 7);
00963     v[1]     = get_bits(gb, 7);
00964     v[2]     = get_bits(gb, 7);
00965 
00966     for (n = 0; n < 16; n++) {
00967         double delta = old[n] - i_lsps[n];
00968         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00969         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00970     }
00971 
00972     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00973                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00974     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00975                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00976     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00977                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00978 }
00979 
00993 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00994                             const int *pitch)
00995 {
00996     static const int16_t start_offset[94] = {
00997         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
00998          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
00999          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
01000          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
01001          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
01002          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
01003         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01004         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01005     };
01006     int bits, offset;
01007 
01008     
01009     s->aw_idx_is_ext = 0;
01010     if ((bits = get_bits(gb, 6)) >= 54) {
01011         s->aw_idx_is_ext = 1;
01012         bits += (bits - 54) * 3 + get_bits(gb, 2);
01013     }
01014 
01015     
01016 
01017     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01018     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01019     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01020     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01021     offset                  += s->aw_n_pulses[0] * pitch[0];
01022     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01023     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01024 
01025     
01026 
01027 
01028     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01029         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01030             s->aw_first_pulse_off[1] -= pitch[1];
01031         if (start_offset[bits] < 0)
01032             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01033                 s->aw_first_pulse_off[0] -= pitch[0];
01034     }
01035 }
01036 
01044 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01045                           int block_idx, AMRFixed *fcb)
01046 {
01047     uint16_t use_mask_mem[9]; 
01048     uint16_t *use_mask = use_mask_mem + 2;
01049     
01050 
01051 
01052 
01053 
01054 
01055 
01056     int pulse_off = s->aw_first_pulse_off[block_idx],
01057         pulse_start, n, idx, range, aidx, start_off = 0;
01058 
01059     
01060     if (s->aw_n_pulses[block_idx] > 0)
01061         while (pulse_off + s->aw_pulse_range < 1)
01062             pulse_off += fcb->pitch_lag;
01063 
01064     
01065     if (s->aw_n_pulses[0] > 0) {
01066         if (block_idx == 0) {
01067             range = 32;
01068         } else  {
01069             range = 8;
01070             if (s->aw_n_pulses[block_idx] > 0)
01071                 pulse_off = s->aw_next_pulse_off_cache;
01072         }
01073     } else
01074         range = 16;
01075     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01076 
01077     
01078 
01079 
01080     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01081     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01082     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01083     if (s->aw_n_pulses[block_idx] > 0)
01084         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01085             int excl_range         = s->aw_pulse_range; 
01086             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01087             int first_sh           = 16 - (idx & 15);
01088             *use_mask_ptr++       &= 0xFFFFu << first_sh;
01089             excl_range            -= first_sh;
01090             if (excl_range >= 16) {
01091                 *use_mask_ptr++    = 0;
01092                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01093             } else
01094                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01095         }
01096 
01097     
01098     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01099     for (n = 0; n <= aidx; pulse_start++) {
01100         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01101         if (idx >= MAX_FRAMESIZE / 2) { 
01102             if (use_mask[0])      idx = 0x0F;
01103             else if (use_mask[1]) idx = 0x1F;
01104             else if (use_mask[2]) idx = 0x2F;
01105             else if (use_mask[3]) idx = 0x3F;
01106             else if (use_mask[4]) idx = 0x4F;
01107             else                  return;
01108             idx -= av_log2_16bit(use_mask[idx >> 4]);
01109         }
01110         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01111             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01112             n++;
01113             start_off = idx;
01114         }
01115     }
01116 
01117     fcb->x[fcb->n] = start_off;
01118     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01119     fcb->n++;
01120 
01121     
01122     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01123     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01124 }
01125 
01133 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01134                           int block_idx, AMRFixed *fcb)
01135 {
01136     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01137     float v;
01138 
01139     if (s->aw_n_pulses[block_idx] > 0) {
01140         int n, v_mask, i_mask, sh, n_pulses;
01141 
01142         if (s->aw_pulse_range == 24) { 
01143             n_pulses = 3;
01144             v_mask   = 8;
01145             i_mask   = 7;
01146             sh       = 4;
01147         } else { 
01148             n_pulses = 4;
01149             v_mask   = 4;
01150             i_mask   = 3;
01151             sh       = 3;
01152         }
01153 
01154         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01155             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01156             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01157                                  s->aw_first_pulse_off[block_idx];
01158             while (fcb->x[fcb->n] < 0)
01159                 fcb->x[fcb->n] += fcb->pitch_lag;
01160             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01161                 fcb->n++;
01162         }
01163     } else {
01164         int num2 = (val & 0x1FF) >> 1, delta, idx;
01165 
01166         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01167         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01168         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01169         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01170         v = (val & 0x200) ? -1.0 : 1.0;
01171 
01172         fcb->no_repeat_mask |= 3 << fcb->n;
01173         fcb->x[fcb->n]       = idx - delta;
01174         fcb->y[fcb->n]       = v;
01175         fcb->x[fcb->n + 1]   = idx;
01176         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01177         fcb->n              += 2;
01178     }
01179 }
01180 
01194 static int pRNG(int frame_cntr, int block_num, int block_size)
01195 {
01196     
01197 
01198 
01199 
01200 
01201 
01202 
01203 
01204 
01205 
01206     static const unsigned int div_tbl[9][2] = {
01207         { 8332,  3 * 715827883U }, 
01208         { 4545,  0 * 390451573U }, 
01209         { 3124, 11 * 268435456U }, 
01210         { 2380, 15 * 204522253U }, 
01211         { 1922, 23 * 165191050U }, 
01212         { 1612, 23 * 138547333U }, 
01213         { 1388, 27 * 119304648U }, 
01214         { 1219, 16 * 104755300U }, 
01215         { 1086, 39 *  93368855U }  
01216     };
01217     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01218     if (x >= 0xFFFF) x -= 0xFFFF;   
01219                                     
01220     y = x - 9 * MULH(477218589, x); 
01221     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01222                                     
01223     return z % (1000 - block_size);
01224 }
01225 
01230 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01231                                  int block_idx, int size,
01232                                  const struct frame_type_desc *frame_desc,
01233                                  float *excitation)
01234 {
01235     float gain;
01236     int n, r_idx;
01237 
01238     assert(size <= MAX_FRAMESIZE);
01239 
01240     
01241     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01242         r_idx = pRNG(s->frame_cntr, block_idx, size);
01243         gain  = s->silence_gain;
01244     } else  {
01245         r_idx = get_bits(gb, 8);
01246         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01247     }
01248 
01249     
01250     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01251 
01252     
01253     for (n = 0; n < size; n++)
01254         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01255 }
01256 
01261 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01262                                 int block_idx, int size,
01263                                 int block_pitch_sh2,
01264                                 const struct frame_type_desc *frame_desc,
01265                                 float *excitation)
01266 {
01267     static const float gain_coeff[6] = {
01268         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01269     };
01270     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01271     int n, idx, gain_weight;
01272     AMRFixed fcb;
01273 
01274     assert(size <= MAX_FRAMESIZE / 2);
01275     memset(pulses, 0, sizeof(*pulses) * size);
01276 
01277     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01278     fcb.pitch_fac      = 1.0;
01279     fcb.no_repeat_mask = 0;
01280     fcb.n              = 0;
01281 
01282     
01283 
01284     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01285         aw_pulse_set1(s, gb, block_idx, &fcb);
01286         aw_pulse_set2(s, gb, block_idx, &fcb);
01287     } else  {
01288         int offset_nbits = 5 - frame_desc->log_n_blocks;
01289 
01290         fcb.no_repeat_mask = -1;
01291         
01292 
01293         for (n = 0; n < 5; n++) {
01294             float sign;
01295             int pos1, pos2;
01296 
01297             sign           = get_bits1(gb) ? 1.0 : -1.0;
01298             pos1           = get_bits(gb, offset_nbits);
01299             fcb.x[fcb.n]   = n + 5 * pos1;
01300             fcb.y[fcb.n++] = sign;
01301             if (n < frame_desc->dbl_pulses) {
01302                 pos2           = get_bits(gb, offset_nbits);
01303                 fcb.x[fcb.n]   = n + 5 * pos2;
01304                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01305             }
01306         }
01307     }
01308     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01309 
01310     
01311 
01312     idx = get_bits(gb, 7);
01313     fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01314                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01315     acb_gain = wmavoice_gain_codebook_acb[idx];
01316     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01317                         -2.9957322736 ,
01318                          1.6094379124 );
01319 
01320     gain_weight = 8 >> frame_desc->log_n_blocks;
01321     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01322             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01323     for (n = 0; n < gain_weight; n++)
01324         s->gain_pred_err[n] = pred_err;
01325 
01326     
01327     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01328         int len;
01329         for (n = 0; n < size; n += len) {
01330             int next_idx_sh16;
01331             int abs_idx    = block_idx * size + n;
01332             int pitch_sh16 = (s->last_pitch_val << 16) +
01333                              s->pitch_diff_sh16 * abs_idx;
01334             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01335             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01336             idx            = idx_sh16 >> 16;
01337             if (s->pitch_diff_sh16) {
01338                 if (s->pitch_diff_sh16 > 0) {
01339                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01340                 } else
01341                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01342                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01343                               1, size - n);
01344             } else
01345                 len = size;
01346 
01347             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01348                                   wmavoice_ipol1_coeffs, 17,
01349                                   idx, 9, len);
01350         }
01351     } else  {
01352         int block_pitch = block_pitch_sh2 >> 2;
01353         idx             = block_pitch_sh2 & 3;
01354         if (idx) {
01355             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01356                                   wmavoice_ipol2_coeffs, 4,
01357                                   idx, 8, size);
01358         } else
01359             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01360                               sizeof(float) * size);
01361     }
01362 
01363     
01364     ff_weighted_vector_sumf(excitation, excitation, pulses,
01365                             acb_gain, fcb_gain, size);
01366 }
01367 
01384 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01385                         int block_idx, int size,
01386                         int block_pitch_sh2,
01387                         const double *lsps, const double *prev_lsps,
01388                         const struct frame_type_desc *frame_desc,
01389                         float *excitation, float *synth)
01390 {
01391     double i_lsps[MAX_LSPS];
01392     float lpcs[MAX_LSPS];
01393     float fac;
01394     int n;
01395 
01396     if (frame_desc->acb_type == ACB_TYPE_NONE)
01397         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01398     else
01399         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01400                             frame_desc, excitation);
01401 
01402     
01403     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01404     for (n = 0; n < s->lsps; n++) 
01405         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01406     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01407 
01408     
01409     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01410 }
01411 
01427 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01428                        float *samples,
01429                        const double *lsps, const double *prev_lsps,
01430                        float *excitation, float *synth)
01431 {
01432     WMAVoiceContext *s = ctx->priv_data;
01433     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01434     int pitch[MAX_BLOCKS], last_block_pitch;
01435 
01436     
01437     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01438         block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01439 
01440     if (bd_idx < 0) {
01441         av_log(ctx, AV_LOG_ERROR,
01442                "Invalid frame type VLC code, skipping\n");
01443         return -1;
01444     }
01445 
01446     
01447     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01448         
01449 
01450 
01451 
01452         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01453         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01454         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01455         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01456         if (s->last_acb_type == ACB_TYPE_NONE ||
01457             20 * abs(cur_pitch_val - s->last_pitch_val) >
01458                 (cur_pitch_val + s->last_pitch_val))
01459             s->last_pitch_val = cur_pitch_val;
01460 
01461         
01462         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01463             int fac = n * 2 + 1;
01464 
01465             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01466                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01467                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01468         }
01469 
01470         
01471         s->pitch_diff_sh16 =
01472             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01473     }
01474 
01475     
01476     switch (frame_descs[bd_idx].fcb_type) {
01477     case FCB_TYPE_SILENCE:
01478         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01479         break;
01480     case FCB_TYPE_AW_PULSES:
01481         aw_parse_coords(s, gb, pitch);
01482         break;
01483     }
01484 
01485     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01486         int bl_pitch_sh2;
01487 
01488         
01489         switch (frame_descs[bd_idx].acb_type) {
01490         case ACB_TYPE_HAMMING: {
01491             
01492 
01493 
01494 
01495 
01496             int block_pitch,
01497                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01498                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01499                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01500 
01501             if (n == 0) {
01502                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01503             } else
01504                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01505                                  get_bits(gb, s->block_delta_pitch_nbits);
01506             
01507             last_block_pitch = av_clip(block_pitch,
01508                                        s->block_delta_pitch_hrange,
01509                                        s->block_pitch_range -
01510                                            s->block_delta_pitch_hrange);
01511 
01512             
01513             if (block_pitch < t1) {
01514                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01515             } else {
01516                 block_pitch -= t1;
01517                 if (block_pitch < t2) {
01518                     bl_pitch_sh2 =
01519                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01520                 } else {
01521                     block_pitch -= t2;
01522                     if (block_pitch < t3) {
01523                         bl_pitch_sh2 =
01524                             (s->block_conv_table[2] + block_pitch) << 2;
01525                     } else
01526                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01527                 }
01528             }
01529             pitch[n] = bl_pitch_sh2 >> 2;
01530             break;
01531         }
01532 
01533         case ACB_TYPE_ASYMMETRIC: {
01534             bl_pitch_sh2 = pitch[n] << 2;
01535             break;
01536         }
01537 
01538         default: 
01539             bl_pitch_sh2 = 0;
01540             break;
01541         }
01542 
01543         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01544                     lsps, prev_lsps, &frame_descs[bd_idx],
01545                     &excitation[n * block_nsamples],
01546                     &synth[n * block_nsamples]);
01547     }
01548 
01549     
01550 
01551     if (s->do_apf) {
01552         double i_lsps[MAX_LSPS];
01553         float lpcs[MAX_LSPS];
01554 
01555         for (n = 0; n < s->lsps; n++) 
01556             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01557         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01558         postfilter(s, synth, samples, 80, lpcs,
01559                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01560                    frame_descs[bd_idx].fcb_type, pitch[0]);
01561 
01562         for (n = 0; n < s->lsps; n++) 
01563             i_lsps[n] = cos(lsps[n]);
01564         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01565         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01566                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01567                    frame_descs[bd_idx].fcb_type, pitch[0]);
01568     } else
01569         memcpy(samples, synth, 160 * sizeof(synth[0]));
01570 
01571     
01572     s->frame_cntr++;
01573     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; 
01574     s->last_acb_type = frame_descs[bd_idx].acb_type;
01575     switch (frame_descs[bd_idx].acb_type) {
01576     case ACB_TYPE_NONE:
01577         s->last_pitch_val = 0;
01578         break;
01579     case ACB_TYPE_ASYMMETRIC:
01580         s->last_pitch_val = cur_pitch_val;
01581         break;
01582     case ACB_TYPE_HAMMING:
01583         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01584         break;
01585     }
01586 
01587     return 0;
01588 }
01589 
01602 static void stabilize_lsps(double *lsps, int num)
01603 {
01604     int n, m, l;
01605 
01606     
01607 
01608 
01609     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01610     for (n = 1; n < num; n++)
01611         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01612     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01613 
01614     
01615 
01616     for (n = 1; n < num; n++) {
01617         if (lsps[n] < lsps[n - 1]) {
01618             for (m = 1; m < num; m++) {
01619                 double tmp = lsps[m];
01620                 for (l = m - 1; l >= 0; l--) {
01621                     if (lsps[l] <= tmp) break;
01622                     lsps[l + 1] = lsps[l];
01623                 }
01624                 lsps[l + 1] = tmp;
01625             }
01626             break;
01627         }
01628     }
01629 }
01630 
01640 static int check_bits_for_superframe(GetBitContext *orig_gb,
01641                                      WMAVoiceContext *s)
01642 {
01643     GetBitContext s_gb, *gb = &s_gb;
01644     int n, need_bits, bd_idx;
01645     const struct frame_type_desc *frame_desc;
01646 
01647     
01648     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01649     skip_bits_long(gb, get_bits_count(orig_gb));
01650     assert(get_bits_left(gb) == get_bits_left(orig_gb));
01651 
01652     
01653     if (get_bits_left(gb) < 14)
01654         return 1;
01655     if (!get_bits1(gb))
01656         return -1;                        
01657     if (get_bits1(gb)) skip_bits(gb, 12); 
01658     if (s->has_residual_lsps) {           
01659         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01660             return 1;
01661         skip_bits_long(gb, s->sframe_lsp_bitsize);
01662     }
01663 
01664     
01665     for (n = 0; n < MAX_FRAMES; n++) {
01666         int aw_idx_is_ext = 0;
01667 
01668         if (!s->has_residual_lsps) {     
01669            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01670            skip_bits_long(gb, s->frame_lsp_bitsize);
01671         }
01672         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01673         if (bd_idx < 0)
01674             return -1;                   
01675         frame_desc = &frame_descs[bd_idx];
01676         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01677             if (get_bits_left(gb) < s->pitch_nbits)
01678                 return 1;
01679             skip_bits_long(gb, s->pitch_nbits);
01680         }
01681         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01682             skip_bits(gb, 8);
01683         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01684             int tmp = get_bits(gb, 6);
01685             if (tmp >= 0x36) {
01686                 skip_bits(gb, 2);
01687                 aw_idx_is_ext = 1;
01688             }
01689         }
01690 
01691         
01692         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01693             need_bits = s->block_pitch_nbits +
01694                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01695         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01696             need_bits = 2 * !aw_idx_is_ext;
01697         } else
01698             need_bits = 0;
01699         need_bits += frame_desc->frame_size;
01700         if (get_bits_left(gb) < need_bits)
01701             return 1;
01702         skip_bits_long(gb, need_bits);
01703     }
01704 
01705     return 0;
01706 }
01707 
01728 static int synth_superframe(AVCodecContext *ctx,
01729                             float *samples, int *data_size)
01730 {
01731     WMAVoiceContext *s = ctx->priv_data;
01732     GetBitContext *gb = &s->gb, s_gb;
01733     int n, res, n_samples = 480;
01734     double lsps[MAX_FRAMES][MAX_LSPS];
01735     const double *mean_lsf = s->lsps == 16 ?
01736         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01737     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01738     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01739 
01740     memcpy(synth,      s->synth_history,
01741            s->lsps             * sizeof(*synth));
01742     memcpy(excitation, s->excitation_history,
01743            s->history_nsamples * sizeof(*excitation));
01744 
01745     if (s->sframe_cache_size > 0) {
01746         gb = &s_gb;
01747         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01748         s->sframe_cache_size = 0;
01749     }
01750 
01751     if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;
01752 
01753     
01754 
01755 
01756 
01757     if (!get_bits1(gb)) {
01758         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01759         return -1;
01760     }
01761 
01762     
01763     if (get_bits1(gb)) {
01764         if ((n_samples = get_bits(gb, 12)) > 480) {
01765             av_log(ctx, AV_LOG_ERROR,
01766                    "Superframe encodes >480 samples (%d), not allowed\n",
01767                    n_samples);
01768             return -1;
01769         }
01770     }
01771     
01772     if (s->has_residual_lsps) {
01773         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01774 
01775         for (n = 0; n < s->lsps; n++)
01776             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01777 
01778         if (s->lsps == 10) {
01779             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01780         } else 
01781             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01782 
01783         for (n = 0; n < s->lsps; n++) {
01784             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01785             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01786             lsps[2][n] += mean_lsf[n];
01787         }
01788         for (n = 0; n < 3; n++)
01789             stabilize_lsps(lsps[n], s->lsps);
01790     }
01791 
01792     
01793     for (n = 0; n < 3; n++) {
01794         if (!s->has_residual_lsps) {
01795             int m;
01796 
01797             if (s->lsps == 10) {
01798                 dequant_lsp10i(gb, lsps[n]);
01799             } else 
01800                 dequant_lsp16i(gb, lsps[n]);
01801 
01802             for (m = 0; m < s->lsps; m++)
01803                 lsps[n][m] += mean_lsf[m];
01804             stabilize_lsps(lsps[n], s->lsps);
01805         }
01806 
01807         if ((res = synth_frame(ctx, gb, n,
01808                                &samples[n * MAX_FRAMESIZE],
01809                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01810                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01811                                &synth[s->lsps + n * MAX_FRAMESIZE])))
01812             return res;
01813     }
01814 
01815     
01816 
01817 
01818     if (get_bits1(gb)) {
01819         res = get_bits(gb, 4);
01820         skip_bits(gb, 10 * (res + 1));
01821     }
01822 
01823     
01824     *data_size = n_samples * sizeof(float);
01825 
01826     
01827     memcpy(s->prev_lsps,           lsps[2],
01828            s->lsps             * sizeof(*s->prev_lsps));
01829     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01830            s->lsps             * sizeof(*synth));
01831     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01832            s->history_nsamples * sizeof(*excitation));
01833     if (s->do_apf)
01834         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01835                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01836 
01837     return 0;
01838 }
01839 
01847 static int parse_packet_header(WMAVoiceContext *s)
01848 {
01849     GetBitContext *gb = &s->gb;
01850     unsigned int res;
01851 
01852     if (get_bits_left(gb) < 11)
01853         return 1;
01854     skip_bits(gb, 4);          
01855     s->has_residual_lsps = get_bits1(gb);
01856     do {
01857         res = get_bits(gb, 6); 
01858                                
01859         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01860             return 1;
01861     } while (res == 0x3F);
01862     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01863 
01864     return 0;
01865 }
01866 
01882 static void copy_bits(PutBitContext *pb,
01883                       const uint8_t *data, int size,
01884                       GetBitContext *gb, int nbits)
01885 {
01886     int rmn_bytes, rmn_bits;
01887 
01888     rmn_bits = rmn_bytes = get_bits_left(gb);
01889     if (rmn_bits < nbits)
01890         return;
01891     if (nbits > pb->size_in_bits - put_bits_count(pb))
01892         return;
01893     rmn_bits &= 7; rmn_bytes >>= 3;
01894     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01895         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01896     ff_copy_bits(pb, data + size - rmn_bytes,
01897                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01898 }
01899 
01911 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01912                                   int *data_size, AVPacket *avpkt)
01913 {
01914     WMAVoiceContext *s = ctx->priv_data;
01915     GetBitContext *gb = &s->gb;
01916     int size, res, pos;
01917 
01918     if (*data_size < 480 * sizeof(float)) {
01919         av_log(ctx, AV_LOG_ERROR,
01920                "Output buffer too small (%d given - %zu needed)\n",
01921                *data_size, 480 * sizeof(float));
01922         return -1;
01923     }
01924     *data_size = 0;
01925 
01926     
01927 
01928 
01929 
01930 
01931     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01932     if (!size)
01933         return 0;
01934     init_get_bits(&s->gb, avpkt->data, size << 3);
01935 
01936     
01937 
01938 
01939     if (size == ctx->block_align) { 
01940         if ((res = parse_packet_header(s)) < 0)
01941             return res;
01942 
01943         
01944 
01945 
01946         if (s->spillover_nbits > 0) {
01947             if (s->sframe_cache_size > 0) {
01948                 int cnt = get_bits_count(gb);
01949                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01950                 flush_put_bits(&s->pb);
01951                 s->sframe_cache_size += s->spillover_nbits;
01952                 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&
01953                     *data_size > 0) {
01954                     cnt += s->spillover_nbits;
01955                     s->skip_bits_next = cnt & 7;
01956                     return cnt >> 3;
01957                 } else
01958                     skip_bits_long (gb, s->spillover_nbits - cnt +
01959                                     get_bits_count(gb)); 
01960             } else
01961                 skip_bits_long(gb, s->spillover_nbits);  
01962         }
01963     } else if (s->skip_bits_next)
01964         skip_bits(gb, s->skip_bits_next);
01965 
01966     
01967     s->sframe_cache_size = 0;
01968     s->skip_bits_next = 0;
01969     pos = get_bits_left(gb);
01970     if ((res = synth_superframe(ctx, data, data_size)) < 0) {
01971         return res;
01972     } else if (*data_size > 0) {
01973         int cnt = get_bits_count(gb);
01974         s->skip_bits_next = cnt & 7;
01975         return cnt >> 3;
01976     } else if ((s->sframe_cache_size = pos) > 0) {
01977         
01978         init_get_bits(gb, avpkt->data, size << 3);
01979         skip_bits_long(gb, (size << 3) - pos);
01980         assert(get_bits_left(gb) == pos);
01981 
01982         
01983         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01984         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01985         
01986         
01987     }
01988 
01989     return size;
01990 }
01991 
01992 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
01993 {
01994     WMAVoiceContext *s = ctx->priv_data;
01995 
01996     if (s->do_apf) {
01997         ff_rdft_end(&s->rdft);
01998         ff_rdft_end(&s->irdft);
01999         ff_dct_end(&s->dct);
02000         ff_dct_end(&s->dst);
02001     }
02002 
02003     return 0;
02004 }
02005 
02006 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02007 {
02008     WMAVoiceContext *s = ctx->priv_data;
02009     int n;
02010 
02011     s->postfilter_agc    = 0;
02012     s->sframe_cache_size = 0;
02013     s->skip_bits_next    = 0;
02014     for (n = 0; n < s->lsps; n++)
02015         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02016     memset(s->excitation_history, 0,
02017            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02018     memset(s->synth_history,      0,
02019            sizeof(*s->synth_history)      * MAX_LSPS);
02020     memset(s->gain_pred_err,      0,
02021            sizeof(s->gain_pred_err));
02022 
02023     if (s->do_apf) {
02024         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02025                sizeof(*s->synth_filter_out_buf) * s->lsps);
02026         memset(s->dcf_mem,              0,
02027                sizeof(*s->dcf_mem)              * 2);
02028         memset(s->zero_exc_pf,          0,
02029                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02030         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02031     }
02032 }
02033 
02034 AVCodec ff_wmavoice_decoder = {
02035     "wmavoice",
02036     AVMEDIA_TYPE_AUDIO,
02037     CODEC_ID_WMAVOICE,
02038     sizeof(WMAVoiceContext),
02039     wmavoice_decode_init,
02040     NULL,
02041     wmavoice_decode_end,
02042     wmavoice_decode_packet,
02043     CODEC_CAP_SUBFRAMES,
02044     .flush     = wmavoice_flush,
02045     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02046 };