Go to the documentation of this file.
55 #define NOISE_SPREAD_THRESHOLD 0.9f
59 #define NOISE_LAMBDA_REPLACE 1.948f
64 const float *in,
float *
quant,
const float *scaled,
65 int size,
int scale_idx,
int cb,
66 const float lambda,
const float uplim,
67 int *
bits,
float *energy);
77 const float *scaled,
int size,
int scale_idx,
78 int cb,
const float lambda,
const float uplim,
79 int *
bits,
float *energy,
int BT_ZERO,
int BT_UNSIGNED,
80 int BT_PAIR,
int BT_ESC,
int BT_NOISE,
int BT_STEREO,
87 const float CLIPPED_ESCAPE = 165140.0f*IQ;
90 const int dim = BT_PAIR ? 2 : 4;
94 if (BT_ZERO || BT_NOISE || BT_STEREO) {
103 for (
int j = 0; j <
dim; j++)
109 s->aacdsp.abs_pow34(
s->scoefs, in,
size);
120 int *quants =
s->qcoefs +
i;
123 float quantized, rd = 0.0f;
124 for (
int j = 0; j <
dim; j++) {
126 curidx += quants[j] + off;
131 for (
int j = 0; j <
dim; j++) {
134 if (BT_ESC && vec[j] == 64.0
f) {
135 if (t >= CLIPPED_ESCAPE) {
136 quantized = CLIPPED_ESCAPE;
144 quantized = vec[j]*IQ;
148 out[
i+j] = in[
i+j] >= 0 ? quantized : -quantized;
151 qenergy += quantized*quantized;
155 for (
int j = 0; j <
dim; j++) {
156 quantized = vec[j]*IQ;
157 qenergy += quantized*quantized;
159 out[
i+j] = quantized;
160 rd += (in[
i+j] - quantized)*(in[
i+j] - quantized);
163 cost += rd *
lambda + curbits;
170 for (
int j = 0; j <
dim; j++)
174 for (
int j = 0; j < 2; j++) {
195 const float *in,
float *
quant,
const float *scaled,
196 int size,
int scale_idx,
int cb,
197 const float lambda,
const float uplim,
198 int *
bits,
float *energy) {
203 #define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING) \
204 static float quantize_and_encode_band_cost_ ## NAME( \
205 struct AACEncContext *s, \
206 PutBitContext *pb, const float *in, float *quant, \
207 const float *scaled, int size, int scale_idx, \
208 int cb, const float lambda, const float uplim, \
209 int *bits, float *energy) { \
210 return quantize_and_encode_band_cost_template( \
211 s, pb, in, quant, scaled, size, scale_idx, \
212 BT_ESC ? ESC_BT : cb, lambda, uplim, bits, energy, \
213 BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, \
229 quantize_and_encode_band_cost_ZERO,
230 quantize_and_encode_band_cost_SQUAD,
231 quantize_and_encode_band_cost_SQUAD,
232 quantize_and_encode_band_cost_UQUAD,
233 quantize_and_encode_band_cost_UQUAD,
234 quantize_and_encode_band_cost_SPAIR,
235 quantize_and_encode_band_cost_SPAIR,
236 quantize_and_encode_band_cost_UPAIR,
237 quantize_and_encode_band_cost_UPAIR,
238 quantize_and_encode_band_cost_UPAIR,
239 quantize_and_encode_band_cost_UPAIR,
240 quantize_and_encode_band_cost_ESC,
242 quantize_and_encode_band_cost_NOISE,
243 quantize_and_encode_band_cost_STEREO,
244 quantize_and_encode_band_cost_STEREO,
249 quantize_and_encode_band_cost_ZERO,
250 quantize_and_encode_band_cost_SQUAD,
251 quantize_and_encode_band_cost_SQUAD,
252 quantize_and_encode_band_cost_UQUAD,
253 quantize_and_encode_band_cost_UQUAD,
254 quantize_and_encode_band_cost_SPAIR,
255 quantize_and_encode_band_cost_SPAIR,
256 quantize_and_encode_band_cost_UPAIR,
257 quantize_and_encode_band_cost_UPAIR,
258 quantize_and_encode_band_cost_UPAIR,
259 quantize_and_encode_band_cost_UPAIR,
260 quantize_and_encode_band_cost_ESC_RTZ,
262 quantize_and_encode_band_cost_NOISE,
263 quantize_and_encode_band_cost_STEREO,
264 quantize_and_encode_band_cost_STEREO,
268 const float *in,
float *
quant,
const float *scaled,
269 int size,
int scale_idx,
int cb,
270 const float lambda,
const float uplim,
271 int *
bits,
float *energy)
279 const float *in,
float *
out,
int size,
int scale_idx,
280 int cb,
const float lambda,
int rtz)
300 #define TRELLIS_STAGES 121
301 #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
306 int prevscaler_n = -255, prevscaler_i = 0;
318 if (prevscaler_n == -255)
346 int start = 0,
i,
w, w2,
g;
348 float dists[128] = { 0 }, uplims[128] = { 0 };
350 int fflag, minscaler;
357 destbits =
FFMIN(destbits, 5800);
366 FFPsyBand *band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
374 uplims[
w*16+
g] = uplim *512;
378 minthr =
FFMIN(minthr, uplim);
395 s->aacdsp.abs_pow34(
s->scoefs, sce->
coeffs, 1024);
401 const float *scaled =
s->scoefs + start;
411 minscaler = sce->
sf_idx[0];
413 qstep = its ? 1 : 32;
420 const float *coefs = sce->
coeffs + start;
421 const float *scaled =
s->scoefs + start;
443 dists[
w*16+
g] = dist -
bits;
452 if (tbits > destbits) {
453 for (
i = 0;
i < 128;
i++)
454 if (sce->
sf_idx[
i] < 218 - qstep)
457 for (
i = 0;
i < 128;
i++)
458 if (sce->
sf_idx[
i] > 60 - qstep)
462 if (!qstep && tbits > destbits*1.02 && sce->
sf_idx[0] < 217)
472 if (dists[
w*16+
g] > uplims[
w*16+
g] && sce->
sf_idx[
w*16+
g] > 60) {
486 }
while (fflag && its < 10);
494 int bandwidth, cutoff;
495 float *PNS = &
s->scoefs[0*128], *PNS34 = &
s->scoefs[1*128];
496 float *NOR34 = &
s->scoefs[3*128];
497 uint8_t nextband[128];
498 const float lambda =
s->lambda;
499 const float freq_mult = avctx->
sample_rate*0.5f/wlen;
502 const float dist_bias =
av_clipf(4.
f * 120 / lambda, 0.25
f, 4.0
f);
503 const float pns_transient_energy_r =
FFMIN(0.7
f, lambda / 140.
f);
510 float rate_bandwidth_multiplier = 1.5f;
511 int prev = -1000, prev_sf = -1;
513 ? (refbits * rate_bandwidth_multiplier * avctx->
sample_rate / 1024)
516 frame_bit_rate *= 1.15f;
519 bandwidth = avctx->
cutoff;
524 cutoff = bandwidth * 2 * wlen / avctx->
sample_rate;
532 float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
533 float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
534 float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
535 float min_energy = -1.0f, max_energy = 0.0f;
537 const float freq = (start-wstart)*freq_mult;
545 band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
546 sfb_energy += band->
energy;
550 min_energy = max_energy = band->
energy;
568 ((sce->
zeroes[
w*16+
g] || !sce->
band_alt[
w*16+
g]) && sfb_energy < threshold*
sqrtf(1.0
f/freq_boost)) || spread < spread_threshold ||
569 (!sce->
zeroes[
w*16+
g] && sce->
band_alt[
w*16+
g] && sfb_energy > threshold*thr_mult*freq_boost) ||
570 min_energy < pns_transient_energy_r * max_energy ) {
577 pns_tgt_energy = sfb_energy*
FFMIN(1.0
f, spread*spread);
589 float band_energy,
scale, pns_senergy;
591 band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
594 PNS[
i] =
s->random_state;
596 band_energy =
s->fdsp->scalarproduct_float(PNS, PNS, sce->
ics.
swb_sizes[
g]);
599 pns_senergy =
s->fdsp->scalarproduct_float(PNS, PNS, sce->
ics.
swb_sizes[
g]);
600 pns_energy += pns_senergy;
617 energy_ratio = pns_tgt_energy/pns_energy;
618 sce->
pns_ener[
w*16+
g] = energy_ratio*pns_tgt_energy;
619 if (sce->
zeroes[
w*16+
g] || !sce->
band_alt[
w*16+
g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
636 int bandwidth, cutoff;
637 const float lambda =
s->lambda;
638 const float freq_mult = avctx->
sample_rate*0.5f/wlen;
640 const float pns_transient_energy_r =
FFMIN(0.7
f, lambda / 140.
f);
647 float rate_bandwidth_multiplier = 1.5f;
649 ? (refbits * rate_bandwidth_multiplier * avctx->
sample_rate / 1024)
652 frame_bit_rate *= 1.15f;
655 bandwidth = avctx->
cutoff;
660 cutoff = bandwidth * 2 * wlen / avctx->
sample_rate;
665 float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
666 float min_energy = -1.0f, max_energy = 0.0f;
668 const float freq = start*freq_mult;
670 if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
675 band = &
s->psy.ch[
s->cur_channel].psy_bands[(
w+w2)*16+
g];
676 sfb_energy += band->
energy;
680 min_energy = max_energy = band->
energy;
693 if (sfb_energy < threshold*
sqrtf(1.5
f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
704 int start = 0,
i,
w, w2,
g, sid_sf_boost, prev_mid, prev_side;
705 uint8_t nextband0[128], nextband1[128];
706 float *
M =
s->scoefs + 128*0, *
S =
s->scoefs + 128*1;
707 float *L34 =
s->scoefs + 128*2, *R34 =
s->scoefs + 128*3;
708 float *M34 =
s->scoefs + 128*4, *S34 =
s->scoefs + 128*5;
709 const float lambda =
s->lambda;
710 const float mslambda =
FFMIN(1.0
f, lambda / 120.
f);
720 prev_mid = sce0->
sf_idx[0];
721 prev_side = sce1->
sf_idx[0];
729 float Mmax = 0.0f, Smax = 0.0f;
735 + sce1->
coeffs[start+(
w+w2)*128+
i]) * 0.5;
742 Mmax =
FFMAX(Mmax, M34[
i]);
743 Smax =
FFMAX(Smax, S34[
i]);
747 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
748 float dist1 = 0.0f, dist2 = 0.0f;
768 midcb =
FFMAX(1,midcb);
769 sidcb =
FFMAX(1,sidcb);
772 FFPsyBand *band0 = &
s->psy.ch[
s->cur_channel+0].psy_bands[(
w+w2)*16+
g];
773 FFPsyBand *band1 = &
s->psy.ch[
s->cur_channel+1].psy_bands[(
w+w2)*16+
g];
778 + sce1->
coeffs[start+(
w+w2)*128+
i]) * 0.5;
810 mslambda / (minthr * bmax + FLT_MIN),
INFINITY, &b4,
NULL);
828 }
else if (
B1 >
B0) {
enum BandType band_alt[128]
alternative band type
uint8_t can_pns[128]
band is allowed to PNS (informative)
static av_always_inline float quantize_and_encode_band_cost_template(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED, int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO, const float ROUNDING)
Calculate rate distortion cost for quantizing with given codebook.
int sample_rate
samples per second
static double cb(void *priv, double x, double y)
#define AV_CODEC_FLAG_QSCALE
Use fixed qscale.
static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
static void put_sbits(PutBitContext *pb, int n, int32_t value)
uint8_t zeroes[128]
band is not coded
static void put_bits(Jpeg2000EncoderContext *s, int val, int n)
put n times val bit
static av_always_inline float bval2bmax(float b)
approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f)))
const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB]
static const uint8_t aac_cb_maxval[12]
static int ff_sfdelta_can_remove_band(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int band)
static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
int nb_channels
Number of channels in this layout.
SingleChannelElement ch[2]
static __device__ float ceilf(float a)
static av_always_inline av_const float roundf(float x)
#define SCALE_MAX_POS
scalefactor index maximum value
int num_swb
number of scalefactor window bands
float coeffs[1024]
coefficients for IMDCT, maybe processed
static double b1(void *priv, double x, double y)
AVChannelLayout ch_layout
Audio channel layout.
#define SCALE_DIV_512
scalefactor difference that corresponds to scale difference in 512 times
static int ff_sfdelta_can_replace(const SingleChannelElement *sce, const uint8_t *nextband, int prev_sf, int new_sf, int band)
int flags
AV_CODEC_FLAG_*.
#define POW_SF2_ZERO
ff_aac_pow2sf_tab index corresponding to pow(2, 0);
static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz)
static const quantize_and_encode_band_func quantize_and_encode_band_cost_arr[]
static __device__ float fabsf(float a)
IndividualChannelStream ics
static const uint8_t quant[64]
#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, ROUNDING)
static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
@ NOISE_BT
Spectral data are scaled white noise not coded in the bitstream.
static double b3(void *priv, double x, double y)
float ff_quantize_and_encode_band_cost(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
const uint8_t * swb_sizes
table of scalefactor band sizes for a particular window
@ INTENSITY_BT2
Scalefactor data are intensity stereo positions (out of phase).
#define av_assert0(cond)
assert() equivalent, that is always enabled.
static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)
static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
static const float bands[]
#define SCALE_DIFF_ZERO
codebook index corresponding to zero scalefactor indices difference
static float quantize_band_cost_cached(struct AACEncContext *s, int w, int g, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy, int rtz)
@ INTENSITY_BT
Scalefactor data are intensity stereo positions (in phase).
uint8_t is_mask[128]
Set if intensity stereo is used.
float is_ener[128]
Intensity stereo pos.
static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda)
void ff_aac_apply_tns(AACEncContext *s, SingleChannelElement *sce)
int64_t bit_rate
the average bitrate
static const uint8_t aac_cb_range[12]
uint8_t ms_mask[128]
Set if mid/side stereo is used for each scalefactor window band.
single band psychoacoustic information
void ff_aac_encode_tns_info(AACEncContext *s, SingleChannelElement *sce)
Encode TNS data.
static __device__ float sqrtf(float a)
static void ff_init_nextband_map(const SingleChannelElement *sce, uint8_t *nextband)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
int sf_idx[128]
scalefactor indices
float ff_aac_pow34sf_tab[428]
const uint8_t ff_aac_scalefactor_bits[121]
#define NOISE_SPREAD_THRESHOLD
const uint16_t *const ff_aac_spectral_codes[11]
static double b2(void *priv, double x, double y)
int common_window
Set if channels share a common 'IndividualChannelStream' in bitstream.
static void search_for_quantizers_twoloop(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, const float lambda)
two-loop quantizers search taken from ISO 13818-7 Appendix C
static const quantize_and_encode_band_func quantize_and_encode_band_cost_rtz_arr[]
enum BandType band_type[128]
band types
#define SCALE_MAX_DIFF
maximum scalefactor difference allowed by standard
float pns_ener[128]
Noise energy values.
#define AAC_CUTOFF_FROM_BITRATE(bit_rate, channels, sample_rate)
float(* quantize_and_encode_band_func)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
Single Channel Element - used for both SCE and LFE elements.
#define i(width, name, range_min, range_max)
#define SCALE_ONE_POS
scalefactor index that corresponds to scale=1.0
static int find_min_book(float maxval, int sf)
void ff_aac_search_for_tns(AACEncContext *s, SingleChannelElement *sce)
channel element - generic struct for SCE/CPE/CCE/LFE
const uint16_t * swb_offset
table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular wind...
int cutoff
Audio cutoff bandwidth (0 means "automatic")
static av_always_inline float cbrtf(float x)
static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
const float *const ff_aac_codebook_vectors[]
#define NOISE_LOW_LIMIT
This file contains a template for the twoloop coder function.
void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe)
structure used in optimal codebook search
const uint8_t *const ff_aac_spectral_bits[11]
@ RESERVED_BT
Band types following are encoded differently from others.
main external API structure.
static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, float *energy)
static float find_max_val(int group_len, int swb_size, const float *scaled)
static av_always_inline int lcg_random(unsigned previous_val)
linear congruential pseudorandom number generator
int prev_idx
pointer to the previous path point
static void scale(int *out, const int *in, const int w, const int h, const int shift)
float ff_aac_pow2sf_tab[428]
void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
#define NOISE_LAMBDA_REPLACE