Go to the documentation of this file.
32 float *
bits,
float lambda)
35 uint32_t
cm[2] = { (1 <<
f->blocks) - 1, (1 <<
f->blocks) - 1 };
37 float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176];
38 float dist, cost, err_x = 0.0f, err_y = 0.0f;
41 float *
Y = (
f->channels == 2) ? &buf[176] :
NULL;
45 memcpy(
X, X_orig, band_size*
sizeof(
float));
47 memcpy(
Y, Y_orig, band_size*
sizeof(
float));
50 if (band <= f->coded_bands - 1) {
51 int curr_balance =
f->remaining /
FFMIN(3,
f->coded_bands - band);
56 pvq->
quant_band(pvq,
f, rc, band,
X,
NULL, band_size,
b / 2,
f->blocks,
NULL,
57 f->size, norm1, 0, 1.0f, lowband_scratch,
cm[0]);
59 pvq->
quant_band(pvq,
f, rc, band,
Y,
NULL, band_size,
b / 2,
f->blocks,
NULL,
60 f->size, norm2, 0, 1.0f, lowband_scratch,
cm[1]);
62 pvq->
quant_band(pvq,
f, rc, band,
X,
Y, band_size,
b,
f->blocks,
NULL,
f->size,
63 norm1, 0, 1.0f, lowband_scratch,
cm[0] |
cm[1]);
66 for (
i = 0;
i < band_size;
i++) {
67 err_x += (
X[
i] - X_orig[
i])*(
X[
i] - X_orig[
i]);
69 err_y += (
Y[
i] - Y_orig[
i])*(
Y[
i] - Y_orig[
i]);
78 return lambda*dist*cost;
84 int silence = 0, ch,
i, j;
94 const int step_samples =
s->avctx->frame_size;
95 const int steps_per_half = half_samples / step_samples;
100 for (ch = 0; ch <
s->avctx->ch_layout.nb_channels; ch++) {
101 memset(
s->scratch, 0,
sizeof(
float) * (half_samples << 1));
104 const int offset = (steps_per_half -
i) * step_samples;
108 for (
i = 0;
i < steps_per_half;
i++) {
109 if (
index + 1 +
i >=
s->bufqueue->available)
111 const int offset = (steps_per_half +
i) * step_samples;
116 s->dsp->vector_fmul(
s->scratch,
s->scratch,
s->window[
s->bsize_analysis],
119 s->mdct_fn[
s->bsize_analysis](
s->mdct[
s->bsize_analysis], st->
coeffs[ch],
120 s->scratch,
sizeof(
float));
126 for (ch = 0; ch <
s->avctx->ch_layout.nb_channels; ch++) {
128 float avg_c_s, energy = 0.0f, dist_dev = 0.0f;
130 const float *coeffs = st->
bands[ch][
i];
131 for (j = 0; j <
range; j++)
132 energy += coeffs[j]*coeffs[j];
135 silence |= !!st->
energy[ch][
i];
136 avg_c_s = energy /
range;
138 for (j = 0; j <
range; j++) {
139 const float c_s = coeffs[j]*coeffs[j];
140 dist_dev += (avg_c_s - c_s)*(avg_c_s - c_s);
149 if (
s->avctx->ch_layout.nb_channels > 1) {
151 float incompat = 0.0f;
152 const float *coeffs1 = st->
bands[0][
i];
153 const float *coeffs2 = st->
bands[1][
i];
155 for (j = 0; j <
range; j++)
156 incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]);
161 for (ch = 0; ch <
s->avctx->ch_layout.nb_channels; ch++) {
187 float c_change = 0.0f;
190 for (
i = offset_s;
i < offset_e;
i++) {
191 c_change +=
s->steps[
i]->total_change;
192 if (c_change > tgt_change)
198 s->inflection_points[
s->inflection_points_count++] =
i;
210 const int step_samples =
s->avctx->frame_size;
211 int fsize, silent_frames;
213 for (silent_frames = 0; silent_frames <
s->buffered_steps; silent_frames++)
214 if (!
s->steps[silent_frames]->silence)
216 if (--silent_frames < 0)
221 const int steps_per_packet = packet_samples / step_samples;
223 if (steps_per_packet < 1 || silent_frames < steps_per_packet)
227 s->p.frames =
FFMIN(silent_frames / steps_per_packet,
239 int max_delay_samples = (
s->options->max_delay_ms*
s->avctx->sample_rate)/1000;
257 float total_energy_change = 0.0f;
259 if (
s->buffered_steps <
s->max_steps && !
s->eof) {
264 if (++
s->steps_to_process >= awin) {
266 s->steps_to_process = 0;
268 if ((++
s->buffered_steps) <
s->max_steps)
272 for (
i = 0;
i <
s->buffered_steps;
i++)
273 total_energy_change +=
s->steps[
i]->total_change;
276 s->buffered_steps, 1, 0);
280 p->frames =
s->p.frames;
281 p->framesize =
s->p.framesize;
283 p->bandwidth =
s->p.bandwidth;
290 int i, neighbouring_points = 0, start_offset = 0;
292 int step_offset = steps_per_frame*
index;
297 f->channels =
s->avctx->ch_layout.nb_channels;
298 f->size =
s->p.framesize;
300 for (
i = 0;
i < steps_per_frame;
i++)
301 silence &=
s->steps[
index * steps_per_frame +
i]->silence;
309 if (
s->eof && step_offset >=
s->buffered_steps)
312 f->silence = silence;
318 for (
i = 0;
i <
s->inflection_points_count;
i++) {
319 if (
s->inflection_points[
i] >= step_offset) {
325 for (
i = start_offset;
i <
FFMIN(steps_per_frame,
s->inflection_points_count - start_offset);
i++) {
326 if (
s->inflection_points[
i] < (step_offset + steps_per_frame)) {
327 neighbouring_points++;
332 f->transient = neighbouring_points > 0;
346 f->skip_band_floor =
f->end_band;
347 f->intensity_stereo =
f->end_band;
359 int steps_per_frame =
frame_size /
s->avctx->frame_size;
360 float rate, frame_bits = 0;
367 float max_score = 1.0f;
372 float tonal_contrib = 0.0f;
373 for (
f = 0;
f < steps_per_frame;
f++) {
375 for (ch = 0; ch <
s->avctx->ch_layout.nb_channels; ch++) {
377 tonal_contrib += start[
f]->
tone[ch][
i];
380 tonal += tonal_contrib;
387 if (band_score[
i] > max_score)
388 max_score = band_score[
i];
392 f_out->
alloc_boost[
i] = (int)((band_score[
i]/max_score)*3.0f);
432 if (
s->avctx->ch_layout.nb_channels < 2)
439 f->dual_stereo = td2 < td1;
440 s->dual_stereo_used += td2 < td1;
446 float dist, best_dist = FLT_MAX;
450 if (
s->avctx->ch_layout.nb_channels < 2)
453 for (
i =
f->end_band;
i >= end_band;
i--) {
454 f->intensity_stereo =
i;
456 if (best_dist > dist) {
462 f->intensity_stereo = best_band;
463 s->avg_is_band = (
s->avg_is_band +
f->intensity_stereo)/2.0
f;
470 float score[2] = { 0 };
472 for (cway = 0; cway < 2; cway++) {
474 int base =
f->transient ? 120 : 960;
476 for (
i = 0;
i < 2;
i++) {
482 float iscore0 = 0.0f;
483 float iscore1 = 0.0f;
484 for (j = 0; j < steps_per_frame; j++) {
485 for (k = 0; k <
s->avctx->ch_layout.nb_channels; k++) {
491 score[cway] +=
config[cway][
i] ? iscore1 : iscore0;
495 f->tf_select = score[0] < score[1];
503 int start_transient_flag =
f->transient;
515 if (
f->transient != start_transient_flag) {
526 int steps_out =
s->p.frames*(
frame_size/
s->avctx->frame_size);
530 for (
i = 0;
i < steps_out;
i++)
533 for (
i = 0;
i <
s->max_steps;
i++)
536 for (
i = 0;
i <
s->max_steps;
i++) {
537 const int i_new =
i - steps_out;
538 s->steps[i_new < 0 ?
s->max_steps + i_new : i_new] =
tmp[
i];
541 for (
i = steps_out;
i <
s->buffered_steps;
i++)
542 s->steps[
i]->index -= steps_out;
544 ideal_fbits =
s->avctx->bit_rate/(
s->avctx->sample_rate/
frame_size);
546 for (
i = 0;
i <
s->p.frames;
i++) {
547 s->avg_is_band +=
f[
i].intensity_stereo;
548 if (
f[
i].framebits > 0)
549 s->lambda *= ideal_fbits /
f[
i].framebits;
552 s->avg_is_band /= (
s->p.frames + 1);
554 s->steps_to_process = 0;
555 s->buffered_steps -= steps_out;
556 s->total_packets_out +=
s->p.frames;
557 s->inflection_points_count = 0;
568 s->bufqueue = bufqueue;
574 s->inflection_points_count = 0;
576 s->inflection_points =
av_mallocz(
sizeof(*
s->inflection_points)*
s->max_steps);
577 if (!
s->inflection_points) {
588 for (ch = 0; ch <
s->avctx->ch_layout.nb_channels; ch++) {
595 for (
i = 0;
i <
s->max_steps;
i++) {
614 0, 15 << (
i + 3), &
scale, 0);
630 for (
i = 0;
i <
s->max_steps;
i++)
653 for (
i = 0;
i <
s->max_steps;
i++)
657 av_log(
s->avctx,
AV_LOG_INFO,
"Dual Stereo used: %0.2f%%\n", ((
float)
s->dual_stereo_used/
s->total_packets_out)*100.0f);
float stereo[CELT_MAX_BANDS]
static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f)
int frame_size
Number of samples per channel in an audio frame.
static int flush_silent_frames(OpusPsyContext *s)
static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
int sample_rate
samples per second
#define OPUS_MAX_FRAME_SIZE
static void search_for_change_points(OpusPsyContext *s, float tgt_change, int offset_s, int offset_e, int resolution, int level)
static void step_collect_psy_metrics(OpusPsyContext *s, int index)
This structure describes decoded (raw) audio or video data.
static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f)
@ OPUS_BANDWIDTH_FULLBAND
float coeffs[OPUS_MAX_CHANNELS][OPUS_BLOCK_SIZE(CELT_BLOCK_960)]
static __device__ float ceilf(float a)
av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags)
Initialize a transform context with the given configuration (i)MDCTs with an odd length are currently...
#define OPUS_RC_CHECKPOINT_SPAWN(rc)
void ff_opus_rc_enc_init(OpusRangeCoder *rc)
void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f)
The official guide to swscale for confused that consecutive non overlapping rectangles of slice_bottom special converter These generally are unscaled converters of common like for each output line the vertical scaler pulls lines from a ring buffer When the ring buffer does not contain the wanted then it is pulled from the input slice through the input converter and horizontal scaler The result is also stored in the ring buffer to serve future vertical scaler requests When no more output can be generated because lines from a future slice would be then all remaining lines in the current slice are horizontally scaled and put in the ring buffer[This is done for luma and chroma, each with possibly different numbers of lines per picture.] Input to YUV Converter When the input to the main path is not planar bits per component YUV or bit it is converted to planar bit YUV Two sets of converters exist for this the other leaves the full chroma resolution
float change_amp[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
int flags
AV_CODEC_FLAG_*.
const h264_weight_func weight
void ff_opus_psy_signal_eof(OpusPsyContext *s)
#define OPUS_SAMPLES_TO_BLOCK_SIZE(x)
static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f_out)
static float bessel_filter(FFBesselFilter *s, float x)
@ AV_TX_FLOAT_MDCT
Standard MDCT with a sample data type of float, double or int32_t, respectively.
static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f)
int alloc_boost[CELT_MAX_BANDS]
static int64_t fsize(FILE *f)
#define OPUS_BLOCK_SIZE(x)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
const int8_t ff_celt_tf_select[4][2][2][2]
static av_always_inline uint32_t opus_rc_tell_frac(const OpusRangeCoder *rc)
const uint8_t ff_celt_freq_range[]
av_cold int ff_opus_psy_end(OpusPsyContext *s)
static void psy_output_groups(OpusPsyContext *s)
static __device__ float sqrtf(float a)
static void generate_window_func(float *lut, int N, int win_func, float *overlap)
void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
const OptionDef options[]
float * bands[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
#define i(width, name, range_min, range_max)
float tone[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index)
static int bessel_init(FFBesselFilter *s, float n, float f0, float fs, int highpass)
av_cold void av_tx_uninit(AVTXContext **ctx)
Frees a context and sets *ctx to NULL, does nothing when *ctx == NULL.
#define AV_LOG_INFO
Standard information.
static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band, float *bits, float lambda)
int nb_samples
number of audio samples (per channel) described by this frame
static AVFrame * ff_bufqueue_peek(struct FFBufQueue *queue, unsigned index)
Get a buffer from the queue without altering it.
Structure holding the queue.
uint8_t ** extended_data
pointers to the data planes/channels.
#define CELT_SHORT_BLOCKSIZE
float energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS]
void ff_celt_bitalloc(CeltFrame *f, OpusRangeCoder *rc, int encode)
main external API structure.
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
#define AV_CODEC_FLAG_BITEXACT
Use only bitexact stuff (except (I)DCT).
static void scale(int *out, const int *in, const int w, const int h, const int shift)
av_cold AVFloatDSPContext * avpriv_float_dsp_alloc(int bit_exact)
Allocate a float DSP context.
#define OPUS_RC_CHECKPOINT_ROLLBACK(rc)
const uint8_t ff_celt_band_end[]
const uint8_t ff_celt_freq_bands[]
av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx, struct FFBufQueue *bufqueue, OpusEncOptions *options)
#define OPUS_RC_CHECKPOINT_BITS(rc)