00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include <math.h>
00029 #include "avcodec.h"
00030 #include "get_bits.h"
00031 #include "put_bits.h"
00032 #include "wmavoice_data.h"
00033 #include "celp_math.h"
00034 #include "celp_filters.h"
00035 #include "acelp_vectors.h"
00036 #include "acelp_filters.h"
00037 #include "lsp.h"
00038 #include "libavutil/lzo.h"
00039 #include "avfft.h"
00040 #include "fft.h"
00041
00042 #define MAX_BLOCKS 8
00043 #define MAX_LSPS 16
00044 #define MAX_LSPS_ALIGN16 16
00045
00046 #define MAX_FRAMES 3
00047 #define MAX_FRAMESIZE 160
00048 #define MAX_SIGNAL_HISTORY 416
00049 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00051 #define SFRAME_CACHE_MAXSIZE 256
00052
00053 #define VLC_NBITS 6
00054
00055
00058 static VLC frame_type_vlc;
00059
00063 enum {
00064 ACB_TYPE_NONE = 0,
00065 ACB_TYPE_ASYMMETRIC = 1,
00066
00067
00068
00069
00070 ACB_TYPE_HAMMING = 2
00071
00072
00073 };
00074
00078 enum {
00079 FCB_TYPE_SILENCE = 0,
00080
00081
00082 FCB_TYPE_HARDCODED = 1,
00083
00084 FCB_TYPE_AW_PULSES = 2,
00085
00086 FCB_TYPE_EXC_PULSES = 3,
00087
00088
00089 };
00090
00094 static const struct frame_type_desc {
00095 uint8_t n_blocks;
00096
00097 uint8_t log_n_blocks;
00098 uint8_t acb_type;
00099 uint8_t fcb_type;
00100 uint8_t dbl_pulses;
00101
00102
00103 uint16_t frame_size;
00104
00105 } frame_descs[17] = {
00106 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00107 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00108 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00109 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00110 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00111 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00112 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00113 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00114 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00115 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00116 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00117 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00118 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00119 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00120 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00121 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00122 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00123 };
00124
00128 typedef struct {
00135 GetBitContext gb;
00136
00137
00138
00139 int8_t vbm_tree[25];
00140
00141 int spillover_bitsize;
00142
00143
00144 int history_nsamples;
00145
00146
00147
00148 int do_apf;
00149
00150 int denoise_strength;
00151
00152 int denoise_tilt_corr;
00153
00154 int dc_level;
00155
00156
00157 int lsps;
00158 int lsp_q_mode;
00159 int lsp_def_mode;
00160
00161 int frame_lsp_bitsize;
00162
00163 int sframe_lsp_bitsize;
00164
00165
00166 int min_pitch_val;
00167 int max_pitch_val;
00168 int pitch_nbits;
00169
00170 int block_pitch_nbits;
00171
00172 int block_pitch_range;
00173 int block_delta_pitch_nbits;
00174
00175
00176
00177 int block_delta_pitch_hrange;
00178
00179 uint16_t block_conv_table[4];
00180
00181
00190 int spillover_nbits;
00191
00192
00193
00194 int has_residual_lsps;
00195
00196
00197
00198
00199 int skip_bits_next;
00200
00201
00202
00203 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00206 int sframe_cache_size;
00207
00208
00209
00210
00211 PutBitContext pb;
00212
00221 double prev_lsps[MAX_LSPS];
00222
00223 int last_pitch_val;
00224 int last_acb_type;
00225 int pitch_diff_sh16;
00226
00227 float silence_gain;
00228
00229 int aw_idx_is_ext;
00230
00231 int aw_pulse_range;
00232
00233
00234
00235
00236
00237 int aw_n_pulses[2];
00238
00239
00240 int aw_first_pulse_off[2];
00241
00242 int aw_next_pulse_off_cache;
00243
00244
00245
00246
00247
00248 int frame_cntr;
00249
00250 float gain_pred_err[6];
00251 float excitation_history[MAX_SIGNAL_HISTORY];
00255 float synth_history[MAX_LSPS];
00256
00263 RDFTContext rdft, irdft;
00264
00265 DCTContext dct, dst;
00266
00267 float sin[511], cos[511];
00268
00269 float postfilter_agc;
00270
00271 float dcf_mem[2];
00272 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00275 float denoise_filter_cache[MAX_FRAMESIZE];
00276 int denoise_filter_cache_size;
00277 DECLARE_ALIGNED(16, float, tilted_lpcs_pf)[0x80];
00279 DECLARE_ALIGNED(16, float, denoise_coeffs_pf)[0x80];
00281 DECLARE_ALIGNED(16, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00284
00287 } WMAVoiceContext;
00288
00298 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00299 {
00300 static const uint8_t bits[] = {
00301 2, 2, 2, 4, 4, 4,
00302 6, 6, 6, 8, 8, 8,
00303 10, 10, 10, 12, 12, 12,
00304 14, 14, 14, 14
00305 };
00306 static const uint16_t codes[] = {
00307 0x0000, 0x0001, 0x0002,
00308 0x000c, 0x000d, 0x000e,
00309 0x003c, 0x003d, 0x003e,
00310 0x00fc, 0x00fd, 0x00fe,
00311 0x03fc, 0x03fd, 0x03fe,
00312 0x0ffc, 0x0ffd, 0x0ffe,
00313 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00314 };
00315 int cntr[8], n, res;
00316
00317 memset(vbm_tree, 0xff, sizeof(vbm_tree));
00318 memset(cntr, 0, sizeof(cntr));
00319 for (n = 0; n < 17; n++) {
00320 res = get_bits(gb, 3);
00321 if (cntr[res] > 3)
00322 return -1;
00323 vbm_tree[res * 3 + cntr[res]++] = n;
00324 }
00325 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00326 bits, 1, 1, codes, 2, 2, 132);
00327 return 0;
00328 }
00329
00333 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00334 {
00335 int n, flags, pitch_range, lsp16_flag;
00336 WMAVoiceContext *s = ctx->priv_data;
00337
00346 if (ctx->extradata_size != 46) {
00347 av_log(ctx, AV_LOG_ERROR,
00348 "Invalid extradata size %d (should be 46)\n",
00349 ctx->extradata_size);
00350 return -1;
00351 }
00352 flags = AV_RL32(ctx->extradata + 18);
00353 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00354 s->do_apf = flags & 0x1;
00355 if (s->do_apf) {
00356 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00357 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00358 ff_dct_init(&s->dct, 6, DCT_I);
00359 ff_dct_init(&s->dst, 6, DST_I);
00360
00361 ff_sine_window_init(s->cos, 256);
00362 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00363 for (n = 0; n < 255; n++) {
00364 s->sin[n] = -s->sin[510 - n];
00365 s->cos[510 - n] = s->cos[n];
00366 }
00367 }
00368 s->denoise_strength = (flags >> 2) & 0xF;
00369 if (s->denoise_strength >= 12) {
00370 av_log(ctx, AV_LOG_ERROR,
00371 "Invalid denoise filter strength %d (max=11)\n",
00372 s->denoise_strength);
00373 return -1;
00374 }
00375 s->denoise_tilt_corr = !!(flags & 0x40);
00376 s->dc_level = (flags >> 7) & 0xF;
00377 s->lsp_q_mode = !!(flags & 0x2000);
00378 s->lsp_def_mode = !!(flags & 0x4000);
00379 lsp16_flag = flags & 0x1000;
00380 if (lsp16_flag) {
00381 s->lsps = 16;
00382 s->frame_lsp_bitsize = 34;
00383 s->sframe_lsp_bitsize = 60;
00384 } else {
00385 s->lsps = 10;
00386 s->frame_lsp_bitsize = 24;
00387 s->sframe_lsp_bitsize = 48;
00388 }
00389 for (n = 0; n < s->lsps; n++)
00390 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00391
00392 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00393 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00394 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00395 return -1;
00396 }
00397
00398 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00399 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00400 pitch_range = s->max_pitch_val - s->min_pitch_val;
00401 s->pitch_nbits = av_ceil_log2(pitch_range);
00402 s->last_pitch_val = 40;
00403 s->last_acb_type = ACB_TYPE_NONE;
00404 s->history_nsamples = s->max_pitch_val + 8;
00405
00406 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00407 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00408 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00409
00410 av_log(ctx, AV_LOG_ERROR,
00411 "Unsupported samplerate %d (min=%d, max=%d)\n",
00412 ctx->sample_rate, min_sr, max_sr);
00413
00414 return -1;
00415 }
00416
00417 s->block_conv_table[0] = s->min_pitch_val;
00418 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00419 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00420 s->block_conv_table[3] = s->max_pitch_val - 1;
00421 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00422 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00423 s->block_pitch_range = s->block_conv_table[2] +
00424 s->block_conv_table[3] + 1 +
00425 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00426 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00427
00428 ctx->sample_fmt = SAMPLE_FMT_FLT;
00429
00430 return 0;
00431 }
00432
00454 static void adaptive_gain_control(float *out, const float *in,
00455 const float *speech_synth,
00456 int size, float alpha, float *gain_mem)
00457 {
00458 int i;
00459 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00460 float mem = *gain_mem;
00461
00462 for (i = 0; i < size; i++) {
00463 speech_energy += fabsf(speech_synth[i]);
00464 postfilter_energy += fabsf(in[i]);
00465 }
00466 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00467
00468 for (i = 0; i < size; i++) {
00469 mem = alpha * mem + gain_scale_factor;
00470 out[i] = in[i] * mem;
00471 }
00472
00473 *gain_mem = mem;
00474 }
00475
00494 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00495 const float *in, float *out, int size)
00496 {
00497 int n;
00498 float optimal_gain = 0, dot;
00499 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00500 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00501 *best_hist_ptr;
00502
00503
00504 do {
00505 dot = ff_dot_productf(in, ptr, size);
00506 if (dot > optimal_gain) {
00507 optimal_gain = dot;
00508 best_hist_ptr = ptr;
00509 }
00510 } while (--ptr >= end);
00511
00512 if (optimal_gain <= 0)
00513 return -1;
00514 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00515 if (dot <= 0)
00516 return -1;
00517
00518 if (optimal_gain <= dot) {
00519 dot = dot / (dot + 0.6 * optimal_gain);
00520 } else
00521 dot = 0.625;
00522
00523
00524 for (n = 0; n < size; n++)
00525 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00526
00527 return 0;
00528 }
00529
00540 static float tilt_factor(const float *lpcs, int n_lpcs)
00541 {
00542 float rh0, rh1;
00543
00544 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00545 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00546
00547 return rh1 / rh0;
00548 }
00549
00553 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00554 int fcb_type, float *coeffs, int remainder)
00555 {
00556 float last_coeff, min = 15.0, max = -15.0;
00557 float irange, angle_mul, gain_mul, range, sq;
00558 int n, idx;
00559
00560
00561 ff_rdft_calc(&s->rdft, lpcs);
00562 #define log_range(var, assign) do { \
00563 float tmp = log10f(assign); var = tmp; \
00564 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00565 } while (0)
00566 log_range(last_coeff, lpcs[1] * lpcs[1]);
00567 for (n = 1; n < 64; n++)
00568 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00569 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00570 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00571 #undef log_range
00572 range = max - min;
00573 lpcs[64] = last_coeff;
00574
00575
00576
00577
00578
00579
00580 irange = 64.0 / range;
00581 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00582 (5.0 / 14.7));
00583 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00584 for (n = 0; n <= 64; n++) {
00585 float pow;
00586
00587 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00588 pow = wmavoice_denoise_power_table[s->denoise_strength][idx];
00589 lpcs[n] = angle_mul * pow;
00590
00591
00592 idx = (pow * gain_mul - 0.0295) * 70.570526123;
00593 if (idx > 127) {
00594 coeffs[n] = wmavoice_energy_table[127] *
00595 powf(1.0331663, idx - 127);
00596 } else
00597 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00598 }
00599
00600
00601
00602
00603
00604 ff_dct_calc(&s->dct, lpcs);
00605 ff_dct_calc(&s->dst, lpcs);
00606
00607
00608 idx = 255 + av_clip(lpcs[64], -255, 255);
00609 coeffs[0] = coeffs[0] * s->cos[idx];
00610 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00611 last_coeff = coeffs[64] * s->cos[idx];
00612 for (n = 63;; n--) {
00613 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00614 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00615 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00616
00617 if (!--n) break;
00618
00619 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00620 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00621 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00622 }
00623 coeffs[1] = last_coeff;
00624
00625
00626 ff_rdft_calc(&s->irdft, coeffs);
00627
00628
00629 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00630 if (s->denoise_tilt_corr) {
00631 float tilt_mem = 0;
00632
00633 coeffs[remainder - 1] = 0;
00634 ff_tilt_compensation(&tilt_mem,
00635 -1.8 * tilt_factor(coeffs, remainder - 1),
00636 coeffs, remainder);
00637 }
00638 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00639 for (n = 0; n < remainder; n++)
00640 coeffs[n] *= sq;
00641 }
00642
00669 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00670 float *synth_pf, int size,
00671 const float *lpcs)
00672 {
00673 int remainder, lim, n;
00674
00675 if (fcb_type != FCB_TYPE_SILENCE) {
00676 float *tilted_lpcs = s->tilted_lpcs_pf,
00677 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00678
00679 tilted_lpcs[0] = 1.0;
00680 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00681 memset(&tilted_lpcs[s->lsps + 1], 0,
00682 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00683 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00684 tilted_lpcs, s->lsps + 2);
00685
00686
00687
00688
00689
00690 remainder = FFMIN(127 - size, size - 1);
00691 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00692
00693
00694
00695 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00696 ff_rdft_calc(&s->rdft, synth_pf);
00697 ff_rdft_calc(&s->rdft, coeffs);
00698 synth_pf[0] *= coeffs[0];
00699 synth_pf[1] *= coeffs[1];
00700 for (n = 1; n < 64; n++) {
00701 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00702 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00703 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00704 }
00705 ff_rdft_calc(&s->irdft, synth_pf);
00706 }
00707
00708
00709 if (s->denoise_filter_cache_size) {
00710 lim = FFMIN(s->denoise_filter_cache_size, size);
00711 for (n = 0; n < lim; n++)
00712 synth_pf[n] += s->denoise_filter_cache[n];
00713 s->denoise_filter_cache_size -= lim;
00714 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00715 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00716 }
00717
00718
00719 if (fcb_type != FCB_TYPE_SILENCE) {
00720 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00721 for (n = 0; n < lim; n++)
00722 s->denoise_filter_cache[n] += synth_pf[size + n];
00723 if (lim < remainder) {
00724 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00725 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00726 s->denoise_filter_cache_size = remainder;
00727 }
00728 }
00729 }
00730
00750 static void postfilter(WMAVoiceContext *s, const float *synth,
00751 float *samples, int size,
00752 const float *lpcs, float *zero_exc_pf,
00753 int fcb_type, int pitch)
00754 {
00755 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00756 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00757 *synth_filter_in = zero_exc_pf;
00758
00759 assert(size <= MAX_FRAMESIZE / 2);
00760
00761
00762 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00763
00764 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00765 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00766 synth_filter_in = synth_filter_in_buf;
00767
00768
00769 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00770 synth_filter_in, size, s->lsps);
00771 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00772 sizeof(synth_pf[0]) * s->lsps);
00773
00774 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00775
00776 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00777 &s->postfilter_agc);
00778
00779 if (s->dc_level > 8) {
00780
00781
00782
00783 ff_acelp_apply_order_2_transfer_function(samples, samples,
00784 (const float[2]) { -1.99997, 1.0 },
00785 (const float[2]) { -1.9330735188, 0.93589198496 },
00786 0.93980580475, s->dcf_mem, size);
00787 }
00788 }
00804 static void dequant_lsps(double *lsps, int num,
00805 const uint16_t *values,
00806 const uint16_t *sizes,
00807 int n_stages, const uint8_t *table,
00808 const double *mul_q,
00809 const double *base_q)
00810 {
00811 int n, m;
00812
00813 memset(lsps, 0, num * sizeof(*lsps));
00814 for (n = 0; n < n_stages; n++) {
00815 const uint8_t *t_off = &table[values[n] * num];
00816 double base = base_q[n], mul = mul_q[n];
00817
00818 for (m = 0; m < num; m++)
00819 lsps[m] += base + mul * t_off[m];
00820
00821 table += sizes[n] * num;
00822 }
00823 }
00824
00836 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00837 {
00838 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00839 static const double mul_lsf[4] = {
00840 5.2187144800e-3, 1.4626986422e-3,
00841 9.6179549166e-4, 1.1325736225e-3
00842 };
00843 static const double base_lsf[4] = {
00844 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00845 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00846 };
00847 uint16_t v[4];
00848
00849 v[0] = get_bits(gb, 8);
00850 v[1] = get_bits(gb, 6);
00851 v[2] = get_bits(gb, 5);
00852 v[3] = get_bits(gb, 5);
00853
00854 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00855 mul_lsf, base_lsf);
00856 }
00857
00862 static void dequant_lsp10r(GetBitContext *gb,
00863 double *i_lsps, const double *old,
00864 double *a1, double *a2, int q_mode)
00865 {
00866 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00867 static const double mul_lsf[3] = {
00868 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00869 };
00870 static const double base_lsf[3] = {
00871 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00872 };
00873 const float (*ipol_tab)[2][10] = q_mode ?
00874 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00875 uint16_t interpol, v[3];
00876 int n;
00877
00878 dequant_lsp10i(gb, i_lsps);
00879
00880 interpol = get_bits(gb, 5);
00881 v[0] = get_bits(gb, 7);
00882 v[1] = get_bits(gb, 6);
00883 v[2] = get_bits(gb, 6);
00884
00885 for (n = 0; n < 10; n++) {
00886 double delta = old[n] - i_lsps[n];
00887 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00888 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00889 }
00890
00891 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00892 mul_lsf, base_lsf);
00893 }
00894
00898 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00899 {
00900 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00901 static const double mul_lsf[5] = {
00902 3.3439586280e-3, 6.9908173703e-4,
00903 3.3216608306e-3, 1.0334960326e-3,
00904 3.1899104283e-3
00905 };
00906 static const double base_lsf[5] = {
00907 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00908 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00909 M_PI * -1.29816e-1
00910 };
00911 uint16_t v[5];
00912
00913 v[0] = get_bits(gb, 8);
00914 v[1] = get_bits(gb, 6);
00915 v[2] = get_bits(gb, 7);
00916 v[3] = get_bits(gb, 6);
00917 v[4] = get_bits(gb, 7);
00918
00919 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00920 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00921 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00922 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00923 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00924 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00925 }
00926
00931 static void dequant_lsp16r(GetBitContext *gb,
00932 double *i_lsps, const double *old,
00933 double *a1, double *a2, int q_mode)
00934 {
00935 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00936 static const double mul_lsf[3] = {
00937 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00938 };
00939 static const double base_lsf[3] = {
00940 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00941 };
00942 const float (*ipol_tab)[2][16] = q_mode ?
00943 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00944 uint16_t interpol, v[3];
00945 int n;
00946
00947 dequant_lsp16i(gb, i_lsps);
00948
00949 interpol = get_bits(gb, 5);
00950 v[0] = get_bits(gb, 7);
00951 v[1] = get_bits(gb, 7);
00952 v[2] = get_bits(gb, 7);
00953
00954 for (n = 0; n < 16; n++) {
00955 double delta = old[n] - i_lsps[n];
00956 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00957 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00958 }
00959
00960 dequant_lsps( a2, 10, v, vec_sizes, 1,
00961 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00962 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00963 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00964 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00965 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00966 }
00967
00981 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00982 const int *pitch)
00983 {
00984 static const int16_t start_offset[94] = {
00985 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
00986 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
00987 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
00988 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
00989 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
00990 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
00991 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
00992 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
00993 };
00994 int bits, offset;
00995
00996
00997 s->aw_idx_is_ext = 0;
00998 if ((bits = get_bits(gb, 6)) >= 54) {
00999 s->aw_idx_is_ext = 1;
01000 bits += (bits - 54) * 3 + get_bits(gb, 2);
01001 }
01002
01003
01004
01005 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01006 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01007 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01008 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01009 offset += s->aw_n_pulses[0] * pitch[0];
01010 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01011 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01012
01013
01014
01015
01016 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01017 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01018 s->aw_first_pulse_off[1] -= pitch[1];
01019 if (start_offset[bits] < 0)
01020 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01021 s->aw_first_pulse_off[0] -= pitch[0];
01022 }
01023 }
01024
01032 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01033 int block_idx, AMRFixed *fcb)
01034 {
01035 uint16_t use_mask[7];
01036
01037
01038
01039
01040
01041
01042
01043 int pulse_off = s->aw_first_pulse_off[block_idx],
01044 pulse_start, n, idx, range, aidx, start_off = 0;
01045
01046
01047 if (s->aw_n_pulses[block_idx] > 0)
01048 while (pulse_off + s->aw_pulse_range < 1)
01049 pulse_off += fcb->pitch_lag;
01050
01051
01052 if (s->aw_n_pulses[0] > 0) {
01053 if (block_idx == 0) {
01054 range = 32;
01055 } else {
01056 range = 8;
01057 if (s->aw_n_pulses[block_idx] > 0)
01058 pulse_off = s->aw_next_pulse_off_cache;
01059 }
01060 } else
01061 range = 16;
01062 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01063
01064
01065
01066
01067 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01068 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01069 if (s->aw_n_pulses[block_idx] > 0)
01070 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01071 int excl_range = s->aw_pulse_range;
01072 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01073 int first_sh = 16 - (idx & 15);
01074 *use_mask_ptr++ &= 0xFFFF << first_sh;
01075 excl_range -= first_sh;
01076 if (excl_range >= 16) {
01077 *use_mask_ptr++ = 0;
01078 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01079 } else
01080 *use_mask_ptr &= 0xFFFF >> excl_range;
01081 }
01082
01083
01084 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01085 for (n = 0; n <= aidx; pulse_start++) {
01086 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01087 if (idx >= MAX_FRAMESIZE / 2) {
01088 if (use_mask[0]) idx = 0x0F;
01089 else if (use_mask[1]) idx = 0x1F;
01090 else if (use_mask[2]) idx = 0x2F;
01091 else if (use_mask[3]) idx = 0x3F;
01092 else if (use_mask[4]) idx = 0x4F;
01093 else return;
01094 idx -= av_log2_16bit(use_mask[idx >> 4]);
01095 }
01096 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01097 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01098 n++;
01099 start_off = idx;
01100 }
01101 }
01102
01103 fcb->x[fcb->n] = start_off;
01104 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01105 fcb->n++;
01106
01107
01108 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01109 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01110 }
01111
01119 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01120 int block_idx, AMRFixed *fcb)
01121 {
01122 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01123 float v;
01124
01125 if (s->aw_n_pulses[block_idx] > 0) {
01126 int n, v_mask, i_mask, sh, n_pulses;
01127
01128 if (s->aw_pulse_range == 24) {
01129 n_pulses = 3;
01130 v_mask = 8;
01131 i_mask = 7;
01132 sh = 4;
01133 } else {
01134 n_pulses = 4;
01135 v_mask = 4;
01136 i_mask = 3;
01137 sh = 3;
01138 }
01139
01140 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01141 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01142 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01143 s->aw_first_pulse_off[block_idx];
01144 while (fcb->x[fcb->n] < 0)
01145 fcb->x[fcb->n] += fcb->pitch_lag;
01146 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01147 fcb->n++;
01148 }
01149 } else {
01150 int num2 = (val & 0x1FF) >> 1, delta, idx;
01151
01152 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01153 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01154 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01155 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01156 v = (val & 0x200) ? -1.0 : 1.0;
01157
01158 fcb->no_repeat_mask |= 3 << fcb->n;
01159 fcb->x[fcb->n] = idx - delta;
01160 fcb->y[fcb->n] = v;
01161 fcb->x[fcb->n + 1] = idx;
01162 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01163 fcb->n += 2;
01164 }
01165 }
01166
01180 static int pRNG(int frame_cntr, int block_num, int block_size)
01181 {
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192 static const unsigned int div_tbl[9][2] = {
01193 { 8332, 3 * 715827883U },
01194 { 4545, 0 * 390451573U },
01195 { 3124, 11 * 268435456U },
01196 { 2380, 15 * 204522253U },
01197 { 1922, 23 * 165191050U },
01198 { 1612, 23 * 138547333U },
01199 { 1388, 27 * 119304648U },
01200 { 1219, 16 * 104755300U },
01201 { 1086, 39 * 93368855U }
01202 };
01203 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01204 if (x >= 0xFFFF) x -= 0xFFFF;
01205
01206 y = x - 9 * MULH(477218589, x);
01207 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01208
01209 return z % (1000 - block_size);
01210 }
01211
01216 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01217 int block_idx, int size,
01218 const struct frame_type_desc *frame_desc,
01219 float *excitation)
01220 {
01221 float gain;
01222 int n, r_idx;
01223
01224 assert(size <= MAX_FRAMESIZE);
01225
01226
01227 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01228 r_idx = pRNG(s->frame_cntr, block_idx, size);
01229 gain = s->silence_gain;
01230 } else {
01231 r_idx = get_bits(gb, 8);
01232 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01233 }
01234
01235
01236 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01237
01238
01239 for (n = 0; n < size; n++)
01240 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01241 }
01242
01247 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01248 int block_idx, int size,
01249 int block_pitch_sh2,
01250 const struct frame_type_desc *frame_desc,
01251 float *excitation)
01252 {
01253 static const float gain_coeff[6] = {
01254 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01255 };
01256 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01257 int n, idx, gain_weight;
01258 AMRFixed fcb;
01259
01260 assert(size <= MAX_FRAMESIZE / 2);
01261 memset(pulses, 0, sizeof(*pulses) * size);
01262
01263 fcb.pitch_lag = block_pitch_sh2 >> 2;
01264 fcb.pitch_fac = 1.0;
01265 fcb.no_repeat_mask = 0;
01266 fcb.n = 0;
01267
01268
01269
01270 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01271 aw_pulse_set1(s, gb, block_idx, &fcb);
01272 aw_pulse_set2(s, gb, block_idx, &fcb);
01273 } else {
01274 int offset_nbits = 5 - frame_desc->log_n_blocks;
01275
01276 fcb.no_repeat_mask = -1;
01277
01278
01279 for (n = 0; n < 5; n++) {
01280 float sign;
01281 int pos1, pos2;
01282
01283 sign = get_bits1(gb) ? 1.0 : -1.0;
01284 pos1 = get_bits(gb, offset_nbits);
01285 fcb.x[fcb.n] = n + 5 * pos1;
01286 fcb.y[fcb.n++] = sign;
01287 if (n < frame_desc->dbl_pulses) {
01288 pos2 = get_bits(gb, offset_nbits);
01289 fcb.x[fcb.n] = n + 5 * pos2;
01290 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01291 }
01292 }
01293 }
01294 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01295
01296
01297
01298 idx = get_bits(gb, 7);
01299 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01300 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01301 acb_gain = wmavoice_gain_codebook_acb[idx];
01302 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01303 -2.9957322736 ,
01304 1.6094379124 );
01305
01306 gain_weight = 8 >> frame_desc->log_n_blocks;
01307 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01308 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01309 for (n = 0; n < gain_weight; n++)
01310 s->gain_pred_err[n] = pred_err;
01311
01312
01313 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01314 int len;
01315 for (n = 0; n < size; n += len) {
01316 int next_idx_sh16;
01317 int abs_idx = block_idx * size + n;
01318 int pitch_sh16 = (s->last_pitch_val << 16) +
01319 s->pitch_diff_sh16 * abs_idx;
01320 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01321 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01322 idx = idx_sh16 >> 16;
01323 if (s->pitch_diff_sh16) {
01324 if (s->pitch_diff_sh16 > 0) {
01325 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01326 } else
01327 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01328 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01329 1, size - n);
01330 } else
01331 len = size;
01332
01333 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01334 wmavoice_ipol1_coeffs, 17,
01335 idx, 9, len);
01336 }
01337 } else {
01338 int block_pitch = block_pitch_sh2 >> 2;
01339 idx = block_pitch_sh2 & 3;
01340 if (idx) {
01341 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01342 wmavoice_ipol2_coeffs, 4,
01343 idx, 8, size);
01344 } else
01345 av_memcpy_backptr(excitation, sizeof(float) * block_pitch,
01346 sizeof(float) * size);
01347 }
01348
01349
01350 ff_weighted_vector_sumf(excitation, excitation, pulses,
01351 acb_gain, fcb_gain, size);
01352 }
01353
01370 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01371 int block_idx, int size,
01372 int block_pitch_sh2,
01373 const double *lsps, const double *prev_lsps,
01374 const struct frame_type_desc *frame_desc,
01375 float *excitation, float *synth)
01376 {
01377 double i_lsps[MAX_LSPS];
01378 float lpcs[MAX_LSPS];
01379 float fac;
01380 int n;
01381
01382 if (frame_desc->acb_type == ACB_TYPE_NONE)
01383 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01384 else
01385 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01386 frame_desc, excitation);
01387
01388
01389 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01390 for (n = 0; n < s->lsps; n++)
01391 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01392 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01393
01394
01395 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01396 }
01397
01413 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01414 float *samples,
01415 const double *lsps, const double *prev_lsps,
01416 float *excitation, float *synth)
01417 {
01418 WMAVoiceContext *s = ctx->priv_data;
01419 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01420 int pitch[MAX_BLOCKS], last_block_pitch;
01421
01422
01423 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01424 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01425
01426 if (bd_idx < 0) {
01427 av_log(ctx, AV_LOG_ERROR,
01428 "Invalid frame type VLC code, skipping\n");
01429 return -1;
01430 }
01431
01432
01433 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01434
01435
01436
01437
01438 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01439 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01440 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01441 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01442 if (s->last_acb_type == ACB_TYPE_NONE ||
01443 20 * abs(cur_pitch_val - s->last_pitch_val) >
01444 (cur_pitch_val + s->last_pitch_val))
01445 s->last_pitch_val = cur_pitch_val;
01446
01447
01448 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01449 int fac = n * 2 + 1;
01450
01451 pitch[n] = (MUL16(fac, cur_pitch_val) +
01452 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01453 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01454 }
01455
01456
01457 s->pitch_diff_sh16 =
01458 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01459 }
01460
01461
01462 switch (frame_descs[bd_idx].fcb_type) {
01463 case FCB_TYPE_SILENCE:
01464 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01465 break;
01466 case FCB_TYPE_AW_PULSES:
01467 aw_parse_coords(s, gb, pitch);
01468 break;
01469 }
01470
01471 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01472 int bl_pitch_sh2;
01473
01474
01475 switch (frame_descs[bd_idx].acb_type) {
01476 case ACB_TYPE_HAMMING: {
01477
01478
01479
01480
01481
01482 int block_pitch,
01483 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01484 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01485 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01486
01487 if (n == 0) {
01488 block_pitch = get_bits(gb, s->block_pitch_nbits);
01489 } else
01490 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01491 get_bits(gb, s->block_delta_pitch_nbits);
01492
01493 last_block_pitch = av_clip(block_pitch,
01494 s->block_delta_pitch_hrange,
01495 s->block_pitch_range -
01496 s->block_delta_pitch_hrange);
01497
01498
01499 if (block_pitch < t1) {
01500 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01501 } else {
01502 block_pitch -= t1;
01503 if (block_pitch < t2) {
01504 bl_pitch_sh2 =
01505 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01506 } else {
01507 block_pitch -= t2;
01508 if (block_pitch < t3) {
01509 bl_pitch_sh2 =
01510 (s->block_conv_table[2] + block_pitch) << 2;
01511 } else
01512 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01513 }
01514 }
01515 pitch[n] = bl_pitch_sh2 >> 2;
01516 break;
01517 }
01518
01519 case ACB_TYPE_ASYMMETRIC: {
01520 bl_pitch_sh2 = pitch[n] << 2;
01521 break;
01522 }
01523
01524 default:
01525 bl_pitch_sh2 = 0;
01526 break;
01527 }
01528
01529 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01530 lsps, prev_lsps, &frame_descs[bd_idx],
01531 &excitation[n * block_nsamples],
01532 &synth[n * block_nsamples]);
01533 }
01534
01535
01536
01537 if (s->do_apf) {
01538 double i_lsps[MAX_LSPS];
01539 float lpcs[MAX_LSPS];
01540
01541 for (n = 0; n < s->lsps; n++)
01542 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01543 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01544 postfilter(s, synth, samples, 80, lpcs,
01545 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01546 frame_descs[bd_idx].fcb_type, pitch[0]);
01547
01548 for (n = 0; n < s->lsps; n++)
01549 i_lsps[n] = cos(lsps[n]);
01550 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01551 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01552 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01553 frame_descs[bd_idx].fcb_type, pitch[0]);
01554 } else
01555 memcpy(samples, synth, 160 * sizeof(synth[0]));
01556
01557
01558 s->frame_cntr++;
01559 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01560 s->last_acb_type = frame_descs[bd_idx].acb_type;
01561 switch (frame_descs[bd_idx].acb_type) {
01562 case ACB_TYPE_NONE:
01563 s->last_pitch_val = 0;
01564 break;
01565 case ACB_TYPE_ASYMMETRIC:
01566 s->last_pitch_val = cur_pitch_val;
01567 break;
01568 case ACB_TYPE_HAMMING:
01569 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01570 break;
01571 }
01572
01573 return 0;
01574 }
01575
01588 static void stabilize_lsps(double *lsps, int num)
01589 {
01590 int n, m, l;
01591
01592
01593
01594
01595 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01596 for (n = 1; n < num; n++)
01597 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01598 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01599
01600
01601
01602 for (n = 1; n < num; n++) {
01603 if (lsps[n] < lsps[n - 1]) {
01604 for (m = 1; m < num; m++) {
01605 double tmp = lsps[m];
01606 for (l = m - 1; l >= 0; l--) {
01607 if (lsps[l] <= tmp) break;
01608 lsps[l + 1] = lsps[l];
01609 }
01610 lsps[l + 1] = tmp;
01611 }
01612 break;
01613 }
01614 }
01615 }
01616
01626 static int check_bits_for_superframe(GetBitContext *orig_gb,
01627 WMAVoiceContext *s)
01628 {
01629 GetBitContext s_gb, *gb = &s_gb;
01630 int n, need_bits, bd_idx;
01631 const struct frame_type_desc *frame_desc;
01632
01633
01634 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01635 skip_bits_long(gb, get_bits_count(orig_gb));
01636 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01637
01638
01639 if (get_bits_left(gb) < 14)
01640 return 1;
01641 if (!get_bits1(gb))
01642 return -1;
01643 if (get_bits1(gb)) skip_bits(gb, 12);
01644 if (s->has_residual_lsps) {
01645 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01646 return 1;
01647 skip_bits_long(gb, s->sframe_lsp_bitsize);
01648 }
01649
01650
01651 for (n = 0; n < MAX_FRAMES; n++) {
01652 int aw_idx_is_ext = 0;
01653
01654 if (!s->has_residual_lsps) {
01655 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01656 skip_bits_long(gb, s->frame_lsp_bitsize);
01657 }
01658 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01659 if (bd_idx < 0)
01660 return -1;
01661 frame_desc = &frame_descs[bd_idx];
01662 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01663 if (get_bits_left(gb) < s->pitch_nbits)
01664 return 1;
01665 skip_bits_long(gb, s->pitch_nbits);
01666 }
01667 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01668 skip_bits(gb, 8);
01669 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01670 int tmp = get_bits(gb, 6);
01671 if (tmp >= 0x36) {
01672 skip_bits(gb, 2);
01673 aw_idx_is_ext = 1;
01674 }
01675 }
01676
01677
01678 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01679 need_bits = s->block_pitch_nbits +
01680 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01681 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01682 need_bits = 2 * !aw_idx_is_ext;
01683 } else
01684 need_bits = 0;
01685 need_bits += frame_desc->frame_size;
01686 if (get_bits_left(gb) < need_bits)
01687 return 1;
01688 skip_bits_long(gb, need_bits);
01689 }
01690
01691 return 0;
01692 }
01693
01714 static int synth_superframe(AVCodecContext *ctx,
01715 float *samples, int *data_size)
01716 {
01717 WMAVoiceContext *s = ctx->priv_data;
01718 GetBitContext *gb = &s->gb, s_gb;
01719 int n, res, n_samples = 480;
01720 double lsps[MAX_FRAMES][MAX_LSPS];
01721 const double *mean_lsf = s->lsps == 16 ?
01722 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01723 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01724 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01725
01726 memcpy(synth, s->synth_history,
01727 s->lsps * sizeof(*synth));
01728 memcpy(excitation, s->excitation_history,
01729 s->history_nsamples * sizeof(*excitation));
01730
01731 if (s->sframe_cache_size > 0) {
01732 gb = &s_gb;
01733 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01734 s->sframe_cache_size = 0;
01735 }
01736
01737 if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;
01738
01739
01740
01741
01742
01743 if (!get_bits1(gb)) {
01744 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01745 return -1;
01746 }
01747
01748
01749 if (get_bits1(gb)) {
01750 if ((n_samples = get_bits(gb, 12)) > 480) {
01751 av_log(ctx, AV_LOG_ERROR,
01752 "Superframe encodes >480 samples (%d), not allowed\n",
01753 n_samples);
01754 return -1;
01755 }
01756 }
01757
01758 if (s->has_residual_lsps) {
01759 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01760
01761 for (n = 0; n < s->lsps; n++)
01762 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01763
01764 if (s->lsps == 10) {
01765 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01766 } else
01767 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01768
01769 for (n = 0; n < s->lsps; n++) {
01770 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01771 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01772 lsps[2][n] += mean_lsf[n];
01773 }
01774 for (n = 0; n < 3; n++)
01775 stabilize_lsps(lsps[n], s->lsps);
01776 }
01777
01778
01779 for (n = 0; n < 3; n++) {
01780 if (!s->has_residual_lsps) {
01781 int m;
01782
01783 if (s->lsps == 10) {
01784 dequant_lsp10i(gb, lsps[n]);
01785 } else
01786 dequant_lsp16i(gb, lsps[n]);
01787
01788 for (m = 0; m < s->lsps; m++)
01789 lsps[n][m] += mean_lsf[m];
01790 stabilize_lsps(lsps[n], s->lsps);
01791 }
01792
01793 if ((res = synth_frame(ctx, gb, n,
01794 &samples[n * MAX_FRAMESIZE],
01795 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01796 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01797 &synth[s->lsps + n * MAX_FRAMESIZE])))
01798 return res;
01799 }
01800
01801
01802
01803
01804 if (get_bits1(gb)) {
01805 res = get_bits(gb, 4);
01806 skip_bits(gb, 10 * (res + 1));
01807 }
01808
01809
01810 *data_size = n_samples * sizeof(float);
01811
01812
01813 memcpy(s->prev_lsps, lsps[2],
01814 s->lsps * sizeof(*s->prev_lsps));
01815 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01816 s->lsps * sizeof(*synth));
01817 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01818 s->history_nsamples * sizeof(*excitation));
01819 if (s->do_apf)
01820 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01821 s->history_nsamples * sizeof(*s->zero_exc_pf));
01822
01823 return 0;
01824 }
01825
01833 static int parse_packet_header(WMAVoiceContext *s)
01834 {
01835 GetBitContext *gb = &s->gb;
01836 unsigned int res;
01837
01838 if (get_bits_left(gb) < 11)
01839 return 1;
01840 skip_bits(gb, 4);
01841 s->has_residual_lsps = get_bits1(gb);
01842 do {
01843 res = get_bits(gb, 6);
01844
01845 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01846 return 1;
01847 } while (res == 0x3F);
01848 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01849
01850 return 0;
01851 }
01852
01868 static void copy_bits(PutBitContext *pb,
01869 const uint8_t *data, int size,
01870 GetBitContext *gb, int nbits)
01871 {
01872 int rmn_bytes, rmn_bits;
01873
01874 rmn_bits = rmn_bytes = get_bits_left(gb);
01875 if (rmn_bits < nbits)
01876 return;
01877 rmn_bits &= 7; rmn_bytes >>= 3;
01878 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01879 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01880 ff_copy_bits(pb, data + size - rmn_bytes,
01881 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01882 }
01883
01895 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01896 int *data_size, AVPacket *avpkt)
01897 {
01898 WMAVoiceContext *s = ctx->priv_data;
01899 GetBitContext *gb = &s->gb;
01900 int size, res, pos;
01901
01902 if (*data_size < 480 * sizeof(float)) {
01903 av_log(ctx, AV_LOG_ERROR,
01904 "Output buffer too small (%d given - %lu needed)\n",
01905 *data_size, 480 * sizeof(float));
01906 return -1;
01907 }
01908 *data_size = 0;
01909
01910
01911
01912
01913
01914
01915 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01916 if (!size)
01917 return 0;
01918 init_get_bits(&s->gb, avpkt->data, size << 3);
01919
01920
01921
01922
01923 if (size == ctx->block_align) {
01924 if ((res = parse_packet_header(s)) < 0)
01925 return res;
01926
01927
01928
01929
01930 if (s->spillover_nbits > 0) {
01931 if (s->sframe_cache_size > 0) {
01932 int cnt = get_bits_count(gb);
01933 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01934 flush_put_bits(&s->pb);
01935 s->sframe_cache_size += s->spillover_nbits;
01936 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&
01937 *data_size > 0) {
01938 cnt += s->spillover_nbits;
01939 s->skip_bits_next = cnt & 7;
01940 return cnt >> 3;
01941 } else
01942 skip_bits_long (gb, s->spillover_nbits - cnt +
01943 get_bits_count(gb));
01944 } else
01945 skip_bits_long(gb, s->spillover_nbits);
01946 }
01947 } else if (s->skip_bits_next)
01948 skip_bits(gb, s->skip_bits_next);
01949
01950
01951 s->sframe_cache_size = 0;
01952 s->skip_bits_next = 0;
01953 pos = get_bits_left(gb);
01954 if ((res = synth_superframe(ctx, data, data_size)) < 0) {
01955 return res;
01956 } else if (*data_size > 0) {
01957 int cnt = get_bits_count(gb);
01958 s->skip_bits_next = cnt & 7;
01959 return cnt >> 3;
01960 } else if ((s->sframe_cache_size = pos) > 0) {
01961
01962 init_get_bits(gb, avpkt->data, size << 3);
01963 skip_bits_long(gb, (size << 3) - pos);
01964 assert(get_bits_left(gb) == pos);
01965
01966
01967 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01968 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01969
01970
01971 }
01972
01973 return size;
01974 }
01975
01976 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
01977 {
01978 WMAVoiceContext *s = ctx->priv_data;
01979
01980 if (s->do_apf) {
01981 ff_rdft_end(&s->rdft);
01982 ff_rdft_end(&s->irdft);
01983 ff_dct_end(&s->dct);
01984 ff_dct_end(&s->dst);
01985 }
01986
01987 return 0;
01988 }
01989
01990 static av_cold void wmavoice_flush(AVCodecContext *ctx)
01991 {
01992 WMAVoiceContext *s = ctx->priv_data;
01993 int n;
01994
01995 s->postfilter_agc = 0;
01996 s->sframe_cache_size = 0;
01997 s->skip_bits_next = 0;
01998 for (n = 0; n < s->lsps; n++)
01999 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02000 memset(s->excitation_history, 0,
02001 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02002 memset(s->synth_history, 0,
02003 sizeof(*s->synth_history) * MAX_LSPS);
02004 memset(s->gain_pred_err, 0,
02005 sizeof(s->gain_pred_err));
02006
02007 if (s->do_apf) {
02008 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02009 sizeof(*s->synth_filter_out_buf) * s->lsps);
02010 memset(s->dcf_mem, 0,
02011 sizeof(*s->dcf_mem) * 2);
02012 memset(s->zero_exc_pf, 0,
02013 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02014 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02015 }
02016 }
02017
02018 AVCodec wmavoice_decoder = {
02019 "wmavoice",
02020 AVMEDIA_TYPE_AUDIO,
02021 CODEC_ID_WMAVOICE,
02022 sizeof(WMAVoiceContext),
02023 wmavoice_decode_init,
02024 NULL,
02025 wmavoice_decode_end,
02026 wmavoice_decode_packet,
02027 CODEC_CAP_SUBFRAMES,
02028 .flush = wmavoice_flush,
02029 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02030 };