#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
#include "put_bits.h"
#include "wmavoice_data.h"
#include "celp_math.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
#include "libavutil/lzo.h"
#include "avfft.h"
#include "fft.h"
Go to the source code of this file.
Data Structures | |
struct | frame_type_desc |
Description of frame types. More... | |
struct | WMAVoiceContext |
WMA Voice decoding context. More... | |
Defines | |
#define | MAX_BLOCKS 8 |
maximum number of blocks per frame | |
#define | MAX_LSPS 16 |
maximum filter order | |
#define | MAX_LSPS_ALIGN16 16 |
same as MAX_LSPS; needs to be multiple | |
#define | MAX_FRAMES 3 |
maximum number of frames per superframe | |
#define | MAX_FRAMESIZE 160 |
maximum number of samples per frame | |
#define | MAX_SIGNAL_HISTORY 416 |
maximum excitation signal history | |
#define | MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES) |
maximum number of samples per superframe | |
#define | SFRAME_CACHE_MAXSIZE 256 |
maximum cache size for frame data that | |
#define | VLC_NBITS 6 |
number of bits to read per VLC iteration | |
#define | log_range(var, assign) |
Enumerations | |
enum | { ACB_TYPE_NONE = 0, ACB_TYPE_ASYMMETRIC = 1, ACB_TYPE_HAMMING = 2 } |
Adaptive codebook types. More... | |
enum | { FCB_TYPE_SILENCE = 0, FCB_TYPE_HARDCODED = 1, FCB_TYPE_AW_PULSES = 2, FCB_TYPE_EXC_PULSES = 3 } |
Fixed codebook types. More... | |
Functions | |
static av_cold int | decode_vbmtree (GetBitContext *gb, int8_t vbm_tree[25]) |
Sets up the variable bit mode (VBM) tree from container extradata. | |
static av_cold int | wmavoice_decode_init (AVCodecContext *ctx) |
Set up decoder with parameters from demuxer (extradata etc. | |
static void | adaptive_gain_control (float *out, const float *in, const float *speech_synth, int size, float alpha, float *gain_mem) |
Adaptive gain control (as used in postfilter). | |
static int | kalman_smoothen (WMAVoiceContext *s, int pitch, const float *in, float *out, int size) |
Kalman smoothing function. | |
static float | tilt_factor (const float *lpcs, int n_lpcs) |
Get the tilt factor of a formant filter from its transfer function. | |
static void | calc_input_response (WMAVoiceContext *s, float *lpcs, int fcb_type, float *coeffs, int remainder) |
Derive denoise filter coefficients (in real domain) from the LPCs. | |
static void | wiener_denoise (WMAVoiceContext *s, int fcb_type, float *synth_pf, int size, const float *lpcs) |
This function applies a Wiener filter on the (noisy) speech signal as a means to denoise it. | |
static void | postfilter (WMAVoiceContext *s, const float *synth, float *samples, int size, const float *lpcs, float *zero_exc_pf, int fcb_type, int pitch) |
Averaging projection filter, the postfilter used in WMAVoice. | |
static void | dequant_lsps (double *lsps, int num, const uint16_t *values, const uint16_t *sizes, int n_stages, const uint8_t *table, const double *mul_q, const double *base_q) |
Dequantize LSPs. | |
static void | dequant_lsp10i (GetBitContext *gb, double *lsps) |
Parse 10 independently-coded LSPs. | |
static void | dequant_lsp10r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode) |
Parse 10 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). | |
static void | dequant_lsp16i (GetBitContext *gb, double *lsps) |
Parse 16 independently-coded LSPs. | |
static void | dequant_lsp16r (GetBitContext *gb, double *i_lsps, const double *old, double *a1, double *a2, int q_mode) |
Parse 16 independently-coded LSPs, and then derive the tables to generate LSPs for the other frames from them (residual coding). | |
static void | aw_parse_coords (WMAVoiceContext *s, GetBitContext *gb, const int *pitch) |
Parse the offset of the first pitch-adaptive window pulses, and the distribution of pulses between the two blocks in this frame. | |
static void | aw_pulse_set2 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb) |
Apply second set of pitch-adaptive window pulses. | |
static void | aw_pulse_set1 (WMAVoiceContext *s, GetBitContext *gb, int block_idx, AMRFixed *fcb) |
Apply first set of pitch-adaptive window pulses. | |
static int | pRNG (int frame_cntr, int block_num, int block_size) |
Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries). | |
static void | synth_block_hardcoded (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, const struct frame_type_desc *frame_desc, float *excitation) |
Parse hardcoded signal for a single block. | |
static void | synth_block_fcb_acb (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const struct frame_type_desc *frame_desc, float *excitation) |
Parse FCB/ACB signal for a single block. | |
static void | synth_block (WMAVoiceContext *s, GetBitContext *gb, int block_idx, int size, int block_pitch_sh2, const double *lsps, const double *prev_lsps, const struct frame_type_desc *frame_desc, float *excitation, float *synth) |
Parse data in a single block. | |
static int | synth_frame (AVCodecContext *ctx, GetBitContext *gb, int frame_idx, float *samples, const double *lsps, const double *prev_lsps, float *excitation, float *synth) |
Synthesize output samples for a single frame. | |
static void | stabilize_lsps (double *lsps, int num) |
Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering. | |
static int | check_bits_for_superframe (GetBitContext *orig_gb, WMAVoiceContext *s) |
Test if there's enough bits to read 1 superframe. | |
static int | synth_superframe (AVCodecContext *ctx, float *samples, int *data_size) |
Synthesize output samples for a single superframe. | |
static int | parse_packet_header (WMAVoiceContext *s) |
Parse the packet header at the start of each packet (input data to this decoder). | |
static void | copy_bits (PutBitContext *pb, const uint8_t *data, int size, GetBitContext *gb, int nbits) |
Copy (unaligned) bits from gb/data/size to pb. | |
static int | wmavoice_decode_packet (AVCodecContext *ctx, void *data, int *data_size, AVPacket *avpkt) |
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output). | |
static av_cold int | wmavoice_decode_end (AVCodecContext *ctx) |
static av_cold void | wmavoice_flush (AVCodecContext *ctx) |
Variables | |
static VLC | frame_type_vlc |
Frame type VLC coding. | |
static struct frame_type_desc | frame_descs [17] |
Description of frame types. | |
AVCodec | wmavoice_decoder |
Definition in file wmavoice.c.
#define log_range | ( | var, | |||
assign | ) |
Value:
do { \ float tmp = log10f(assign); var = tmp; \ max = FFMAX(max, tmp); min = FFMIN(min, tmp); \ } while (0)
Referenced by calc_input_response().
#define MAX_BLOCKS 8 |
#define MAX_FRAMES 3 |
maximum number of frames per superframe
Definition at line 46 of file wmavoice.c.
Referenced by check_bits_for_superframe(), and synth_superframe().
#define MAX_FRAMESIZE 160 |
#define MAX_LSPS 16 |
maximum filter order
Definition at line 43 of file wmavoice.c.
Referenced by synth_block(), synth_frame(), synth_superframe(), and wmavoice_flush().
#define MAX_LSPS_ALIGN16 16 |
same as MAX_LSPS; needs to be multiple
of 16 for ASM input buffer alignment
Definition at line 44 of file wmavoice.c.
Referenced by postfilter(), and wmavoice_flush().
#define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES) |
maximum number of samples per superframe
Definition at line 49 of file wmavoice.c.
Referenced by synth_superframe().
#define MAX_SIGNAL_HISTORY 416 |
maximum excitation signal history
Definition at line 48 of file wmavoice.c.
Referenced by synth_superframe(), wmavoice_decode_init(), and wmavoice_flush().
#define SFRAME_CACHE_MAXSIZE 256 |
maximum cache size for frame data that
was split over two packets
Definition at line 51 of file wmavoice.c.
Referenced by wmavoice_decode_packet().
#define VLC_NBITS 6 |
number of bits to read per VLC iteration
Definition at line 53 of file wmavoice.c.
Referenced by decode_vbmtree().
anonymous enum |
Adaptive codebook types.
ACB_TYPE_NONE | no adaptive codebook (only hardcoded fixed) |
ACB_TYPE_ASYMMETRIC |
adaptive codebook with per-frame pitch, which we interpolate to get a per-sample pitch.
Signal is generated using an asymmetric sinc window function
|
ACB_TYPE_HAMMING |
Per-block pitch with signal generation using a Hamming sinc window function.
|
Definition at line 63 of file wmavoice.c.
anonymous enum |
Fixed codebook types.
Definition at line 78 of file wmavoice.c.
static int check_bits_for_superframe | ( | GetBitContext * | orig_gb, | |
WMAVoiceContext * | s | |||
) | [static] |
Test if there's enough bits to read 1 superframe.
orig_gb | bit I/O context used for reading. This function does not modify the state of the bitreader; it only uses it to copy the current stream position | |
s | WMA Voice decoding context private data |
Definition at line 1626 of file wmavoice.c.
Referenced by synth_superframe().
static void copy_bits | ( | PutBitContext * | pb, | |
const uint8_t * | data, | |||
int | size, | |||
GetBitContext * | gb, | |||
int | nbits | |||
) | [static] |
Copy (unaligned) bits from gb/data/size to pb.
pb | target buffer to copy bits into | |
data | source buffer to copy bits from | |
size | size of the source data, in bytes | |
gb | bit I/O context specifying the current position in the source. data. This function might use this to align the bit position to a whole-byte boundary before calling ff_copy_bits() on aligned source data | |
nbits | the amount of bits to copy from source to target |
Definition at line 1868 of file wmavoice.c.
static av_cold int decode_vbmtree | ( | GetBitContext * | gb, | |
int8_t | vbm_tree[25] | |||
) | [static] |
Sets up the variable bit mode (VBM) tree from container extradata.
gb | bit I/O context. The bit context (s->gb) should be loaded with byte 23-46 of the container extradata (i.e. the ones containing the VBM tree). | |
vbm_tree | pointer to array to which the decoded VBM tree will be written. |
Definition at line 298 of file wmavoice.c.
Referenced by wmavoice_decode_init().
static void dequant_lsps | ( | double * | lsps, | |
int | num, | |||
const uint16_t * | values, | |||
const uint16_t * | sizes, | |||
int | n_stages, | |||
const uint8_t * | table, | |||
const double * | mul_q, | |||
const double * | base_q | |||
) | [static] |
Dequantize LSPs.
lsps | output pointer to the array that will hold the LSPs | |
num | number of LSPs to be dequantized | |
values | quantized values, contains n_stages values | |
sizes | range (i.e. max value) of each quantized value | |
n_stages | number of dequantization runs | |
table | dequantization table to be used | |
mul_q | LSF multiplier | |
base_q | base (lowest) LSF values |
Definition at line 804 of file wmavoice.c.
Referenced by dequant_lsp10i(), dequant_lsp10r(), dequant_lsp16i(), and dequant_lsp16r().
static int parse_packet_header | ( | WMAVoiceContext * | s | ) | [static] |
Parse the packet header at the start of each packet (input data to this decoder).
s | WMA Voice decoding context private data |
Definition at line 1833 of file wmavoice.c.
Referenced by gxf_header(), gxf_packet(), gxf_resync_media(), and wmavoice_decode_packet().
static int pRNG | ( | int | frame_cntr, | |
int | block_num, | |||
int | block_size | |||
) | [static] |
Generate a random number from frame_cntr and block_idx, which will lief in the range [0, 1000 - block_size] (so it can be used as an index in a table of size 1000 of which you want to read block_size entries).
frame_cntr | current frame number | |
block_num | current block index | |
block_size | amount of entries we want to read from a table that has 1000 entries |
Definition at line 1180 of file wmavoice.c.
Referenced by synth_block_hardcoded().
static void stabilize_lsps | ( | double * | lsps, | |
int | num | |||
) | [static] |
Ensure minimum value for first item, maximum value for last value, proper spacing between each value and proper ordering.
lsps | array of LSPs | |
num | size of LSP array |
Definition at line 1588 of file wmavoice.c.
Referenced by synth_superframe().
static void synth_block | ( | WMAVoiceContext * | s, | |
GetBitContext * | gb, | |||
int | block_idx, | |||
int | size, | |||
int | block_pitch_sh2, | |||
const double * | lsps, | |||
const double * | prev_lsps, | |||
const struct frame_type_desc * | frame_desc, | |||
float * | excitation, | |||
float * | synth | |||
) | [static] |
Parse data in a single block.
s | WMA Voice decoding context private data | |
gb | bit I/O context | |
block_idx | index of the to-be-read block | |
size | amount of samples to be read in this block | |
block_pitch_sh2 | pitch for this block << 2 | |
lsps | LSPs for (the end of) this frame | |
prev_lsps | LSPs for the last frame | |
frame_desc | frame type descriptor | |
excitation | target memory for the ACB+FCB interpolated signal | |
synth | target memory for the speech synthesis filter output |
Definition at line 1370 of file wmavoice.c.
Referenced by synth_frame().
static void synth_block_fcb_acb | ( | WMAVoiceContext * | s, | |
GetBitContext * | gb, | |||
int | block_idx, | |||
int | size, | |||
int | block_pitch_sh2, | |||
const struct frame_type_desc * | frame_desc, | |||
float * | excitation | |||
) | [static] |
Parse FCB/ACB signal for a single block.
Definition at line 1247 of file wmavoice.c.
Referenced by synth_block().
static void synth_block_hardcoded | ( | WMAVoiceContext * | s, | |
GetBitContext * | gb, | |||
int | block_idx, | |||
int | size, | |||
const struct frame_type_desc * | frame_desc, | |||
float * | excitation | |||
) | [static] |
Parse hardcoded signal for a single block.
Definition at line 1216 of file wmavoice.c.
Referenced by synth_block().
static int synth_frame | ( | AVCodecContext * | ctx, | |
GetBitContext * | gb, | |||
int | frame_idx, | |||
float * | samples, | |||
const double * | lsps, | |||
const double * | prev_lsps, | |||
float * | excitation, | |||
float * | synth | |||
) | [static] |
Synthesize output samples for a single frame.
ctx | WMA Voice decoder context | |
gb | bit I/O context (s->gb or one for cross-packet superframes) | |
frame_idx | Frame number within superframe [0-2] | |
samples | pointer to output sample buffer, has space for at least 160 samples | |
lsps | LSP array | |
prev_lsps | array of previous frame's LSPs | |
excitation | target buffer for excitation signal | |
synth | target buffer for synthesized speech data |
Definition at line 1413 of file wmavoice.c.
Referenced by synth_superframe().
static int synth_superframe | ( | AVCodecContext * | ctx, | |
float * | samples, | |||
int * | data_size | |||
) | [static] |
Synthesize output samples for a single superframe.
If we have any data cached in s->sframe_cache, that will be used instead of whatever is loaded in s->gb.
WMA Voice superframes contain 3 frames, each containing 160 audio samples, to give a total of 480 samples per frame. See synth_frame() for frame parsing. In addition to 3 frames, superframes can also contain the LSPs (if these are globally specified for all frames (residually); they can also be specified individually per-frame. See the s->has_residual_lsps option), and can specify the number of samples encoded in this superframe (if less than 480), usually used to prevent blanks at track boundaries.
ctx | WMA Voice decoder context | |
samples | pointer to output buffer for voice samples | |
data_size | pointer containing the size of samples on input, and the amount of samples filled on output |
Definition at line 1714 of file wmavoice.c.
Referenced by wmavoice_decode_packet().
static av_cold int wmavoice_decode_end | ( | AVCodecContext * | ctx | ) | [static] |
Definition at line 1976 of file wmavoice.c.
static av_cold int wmavoice_decode_init | ( | AVCodecContext * | ctx | ) | [static] |
Set up decoder with parameters from demuxer (extradata etc.
).
Extradata layout:
Definition at line 333 of file wmavoice.c.
static int wmavoice_decode_packet | ( | AVCodecContext * | ctx, | |
void * | data, | |||
int * | data_size, | |||
AVPacket * | avpkt | |||
) | [static] |
Packet decoding: a packet is anything that the (ASF) demuxer contains, and we expect that the demuxer / application provides it to us as such (else you'll probably get garbage as output).
Every packet has a size of ctx->block_align bytes, starts with a packet header (see parse_packet_header()), and then a series of superframes. Superframe boundaries may exceed packets, i.e. superframes can split data over multiple (two) packets.
For more information about frames, see synth_superframe().
Definition at line 1895 of file wmavoice.c.
static av_cold void wmavoice_flush | ( | AVCodecContext * | ctx | ) | [static] |
Definition at line 1990 of file wmavoice.c.
struct frame_type_desc frame_descs[17] [static] |
VLC frame_type_vlc [static] |
Initial value:
{ "wmavoice", AVMEDIA_TYPE_AUDIO, CODEC_ID_WMAVOICE, sizeof(WMAVoiceContext), wmavoice_decode_init, NULL, wmavoice_decode_end, wmavoice_decode_packet, CODEC_CAP_SUBFRAMES, .flush = wmavoice_flush, .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"), }
Definition at line 2018 of file wmavoice.c.