00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include <math.h>
00029 #include "avcodec.h"
00030 #include "get_bits.h"
00031 #include "put_bits.h"
00032 #include "wmavoice_data.h"
00033 #include "celp_math.h"
00034 #include "celp_filters.h"
00035 #include "acelp_vectors.h"
00036 #include "acelp_filters.h"
00037 #include "lsp.h"
00038 #include "libavutil/lzo.h"
00039 #include "dct.h"
00040 #include "rdft.h"
00041 #include "sinewin.h"
00042
00043 #define MAX_BLOCKS 8
00044 #define MAX_LSPS 16
00045 #define MAX_LSPS_ALIGN16 16
00046
00047 #define MAX_FRAMES 3
00048 #define MAX_FRAMESIZE 160
00049 #define MAX_SIGNAL_HISTORY 416
00050 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)
00052 #define SFRAME_CACHE_MAXSIZE 256
00053
00054 #define VLC_NBITS 6
00055
00056
00059 static VLC frame_type_vlc;
00060
00064 enum {
00065 ACB_TYPE_NONE = 0,
00066 ACB_TYPE_ASYMMETRIC = 1,
00067
00068
00069
00070
00071 ACB_TYPE_HAMMING = 2
00072
00073
00074 };
00075
00079 enum {
00080 FCB_TYPE_SILENCE = 0,
00081
00082
00083 FCB_TYPE_HARDCODED = 1,
00084
00085 FCB_TYPE_AW_PULSES = 2,
00086
00087 FCB_TYPE_EXC_PULSES = 3,
00088
00089
00090 };
00091
00095 static const struct frame_type_desc {
00096 uint8_t n_blocks;
00097
00098 uint8_t log_n_blocks;
00099 uint8_t acb_type;
00100 uint8_t fcb_type;
00101 uint8_t dbl_pulses;
00102
00103
00104 uint16_t frame_size;
00105
00106 } frame_descs[17] = {
00107 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },
00108 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },
00109 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },
00110 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },
00111 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00112 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00113 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00114 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00115 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },
00116 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },
00117 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },
00118 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },
00119 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },
00120 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },
00121 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },
00122 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },
00123 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }
00124 };
00125
00129 typedef struct {
00134 GetBitContext gb;
00135
00136
00137
00138 int8_t vbm_tree[25];
00139
00140 int spillover_bitsize;
00141
00142
00143 int history_nsamples;
00144
00145
00146
00147 int do_apf;
00148
00149 int denoise_strength;
00150
00151 int denoise_tilt_corr;
00152
00153 int dc_level;
00154
00155
00156 int lsps;
00157 int lsp_q_mode;
00158 int lsp_def_mode;
00159
00160 int frame_lsp_bitsize;
00161
00162 int sframe_lsp_bitsize;
00163
00164
00165 int min_pitch_val;
00166 int max_pitch_val;
00167 int pitch_nbits;
00168
00169 int block_pitch_nbits;
00170
00171 int block_pitch_range;
00172 int block_delta_pitch_nbits;
00173
00174
00175
00176 int block_delta_pitch_hrange;
00177
00178 uint16_t block_conv_table[4];
00179
00180
00190 int spillover_nbits;
00191
00192
00193
00194 int has_residual_lsps;
00195
00196
00197
00198
00199 int skip_bits_next;
00200
00201
00202
00203 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00206 int sframe_cache_size;
00207
00208
00209
00210
00211 PutBitContext pb;
00212
00222 double prev_lsps[MAX_LSPS];
00223
00224 int last_pitch_val;
00225 int last_acb_type;
00226 int pitch_diff_sh16;
00227
00228 float silence_gain;
00229
00230 int aw_idx_is_ext;
00231
00232 int aw_pulse_range;
00233
00234
00235
00236
00237
00238 int aw_n_pulses[2];
00239
00240
00241 int aw_first_pulse_off[2];
00242
00243 int aw_next_pulse_off_cache;
00244
00245
00246
00247
00248
00249 int frame_cntr;
00250
00251 float gain_pred_err[6];
00252 float excitation_history[MAX_SIGNAL_HISTORY];
00256 float synth_history[MAX_LSPS];
00257
00266 RDFTContext rdft, irdft;
00267
00268 DCTContext dct, dst;
00269
00270 float sin[511], cos[511];
00271
00272 float postfilter_agc;
00273
00274 float dcf_mem[2];
00275 float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00278 float denoise_filter_cache[MAX_FRAMESIZE];
00279 int denoise_filter_cache_size;
00280 DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00282 DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00284 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00287
00290 } WMAVoiceContext;
00291
00301 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00302 {
00303 static const uint8_t bits[] = {
00304 2, 2, 2, 4, 4, 4,
00305 6, 6, 6, 8, 8, 8,
00306 10, 10, 10, 12, 12, 12,
00307 14, 14, 14, 14
00308 };
00309 static const uint16_t codes[] = {
00310 0x0000, 0x0001, 0x0002,
00311 0x000c, 0x000d, 0x000e,
00312 0x003c, 0x003d, 0x003e,
00313 0x00fc, 0x00fd, 0x00fe,
00314 0x03fc, 0x03fd, 0x03fe,
00315 0x0ffc, 0x0ffd, 0x0ffe,
00316 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff
00317 };
00318 int cntr[8], n, res;
00319
00320 memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00321 memset(cntr, 0, sizeof(cntr));
00322 for (n = 0; n < 17; n++) {
00323 res = get_bits(gb, 3);
00324 if (cntr[res] > 3)
00325 return -1;
00326 vbm_tree[res * 3 + cntr[res]++] = n;
00327 }
00328 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00329 bits, 1, 1, codes, 2, 2, 132);
00330 return 0;
00331 }
00332
00336 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00337 {
00338 int n, flags, pitch_range, lsp16_flag;
00339 WMAVoiceContext *s = ctx->priv_data;
00340
00349 if (ctx->extradata_size != 46) {
00350 av_log(ctx, AV_LOG_ERROR,
00351 "Invalid extradata size %d (should be 46)\n",
00352 ctx->extradata_size);
00353 return -1;
00354 }
00355 flags = AV_RL32(ctx->extradata + 18);
00356 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00357 s->do_apf = flags & 0x1;
00358 if (s->do_apf) {
00359 ff_rdft_init(&s->rdft, 7, DFT_R2C);
00360 ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00361 ff_dct_init(&s->dct, 6, DCT_I);
00362 ff_dct_init(&s->dst, 6, DST_I);
00363
00364 ff_sine_window_init(s->cos, 256);
00365 memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00366 for (n = 0; n < 255; n++) {
00367 s->sin[n] = -s->sin[510 - n];
00368 s->cos[510 - n] = s->cos[n];
00369 }
00370 }
00371 s->denoise_strength = (flags >> 2) & 0xF;
00372 if (s->denoise_strength >= 12) {
00373 av_log(ctx, AV_LOG_ERROR,
00374 "Invalid denoise filter strength %d (max=11)\n",
00375 s->denoise_strength);
00376 return -1;
00377 }
00378 s->denoise_tilt_corr = !!(flags & 0x40);
00379 s->dc_level = (flags >> 7) & 0xF;
00380 s->lsp_q_mode = !!(flags & 0x2000);
00381 s->lsp_def_mode = !!(flags & 0x4000);
00382 lsp16_flag = flags & 0x1000;
00383 if (lsp16_flag) {
00384 s->lsps = 16;
00385 s->frame_lsp_bitsize = 34;
00386 s->sframe_lsp_bitsize = 60;
00387 } else {
00388 s->lsps = 10;
00389 s->frame_lsp_bitsize = 24;
00390 s->sframe_lsp_bitsize = 48;
00391 }
00392 for (n = 0; n < s->lsps; n++)
00393 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00394
00395 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00396 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00397 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00398 return -1;
00399 }
00400
00401 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
00402 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00403 pitch_range = s->max_pitch_val - s->min_pitch_val;
00404 if (pitch_range <= 0) {
00405 av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
00406 return -1;
00407 }
00408 s->pitch_nbits = av_ceil_log2(pitch_range);
00409 s->last_pitch_val = 40;
00410 s->last_acb_type = ACB_TYPE_NONE;
00411 s->history_nsamples = s->max_pitch_val + 8;
00412
00413 if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00414 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00415 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00416
00417 av_log(ctx, AV_LOG_ERROR,
00418 "Unsupported samplerate %d (min=%d, max=%d)\n",
00419 ctx->sample_rate, min_sr, max_sr);
00420
00421 return -1;
00422 }
00423
00424 s->block_conv_table[0] = s->min_pitch_val;
00425 s->block_conv_table[1] = (pitch_range * 25) >> 6;
00426 s->block_conv_table[2] = (pitch_range * 44) >> 6;
00427 s->block_conv_table[3] = s->max_pitch_val - 1;
00428 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00429 if (s->block_delta_pitch_hrange <= 0) {
00430 av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
00431 return -1;
00432 }
00433 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00434 s->block_pitch_range = s->block_conv_table[2] +
00435 s->block_conv_table[3] + 1 +
00436 2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00437 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
00438
00439 ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
00440
00441 return 0;
00442 }
00443
00465 static void adaptive_gain_control(float *out, const float *in,
00466 const float *speech_synth,
00467 int size, float alpha, float *gain_mem)
00468 {
00469 int i;
00470 float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00471 float mem = *gain_mem;
00472
00473 for (i = 0; i < size; i++) {
00474 speech_energy += fabsf(speech_synth[i]);
00475 postfilter_energy += fabsf(in[i]);
00476 }
00477 gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00478
00479 for (i = 0; i < size; i++) {
00480 mem = alpha * mem + gain_scale_factor;
00481 out[i] = in[i] * mem;
00482 }
00483
00484 *gain_mem = mem;
00485 }
00486
00505 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00506 const float *in, float *out, int size)
00507 {
00508 int n;
00509 float optimal_gain = 0, dot;
00510 const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00511 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00512 *best_hist_ptr;
00513
00514
00515 do {
00516 dot = ff_dot_productf(in, ptr, size);
00517 if (dot > optimal_gain) {
00518 optimal_gain = dot;
00519 best_hist_ptr = ptr;
00520 }
00521 } while (--ptr >= end);
00522
00523 if (optimal_gain <= 0)
00524 return -1;
00525 dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00526 if (dot <= 0)
00527 return -1;
00528
00529 if (optimal_gain <= dot) {
00530 dot = dot / (dot + 0.6 * optimal_gain);
00531 } else
00532 dot = 0.625;
00533
00534
00535 for (n = 0; n < size; n++)
00536 out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00537
00538 return 0;
00539 }
00540
00551 static float tilt_factor(const float *lpcs, int n_lpcs)
00552 {
00553 float rh0, rh1;
00554
00555 rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
00556 rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00557
00558 return rh1 / rh0;
00559 }
00560
00564 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00565 int fcb_type, float *coeffs, int remainder)
00566 {
00567 float last_coeff, min = 15.0, max = -15.0;
00568 float irange, angle_mul, gain_mul, range, sq;
00569 int n, idx;
00570
00571
00572 s->rdft.rdft_calc(&s->rdft, lpcs);
00573 #define log_range(var, assign) do { \
00574 float tmp = log10f(assign); var = tmp; \
00575 max = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00576 } while (0)
00577 log_range(last_coeff, lpcs[1] * lpcs[1]);
00578 for (n = 1; n < 64; n++)
00579 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
00580 lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00581 log_range(lpcs[0], lpcs[0] * lpcs[0]);
00582 #undef log_range
00583 range = max - min;
00584 lpcs[64] = last_coeff;
00585
00586
00587
00588
00589
00590
00591 irange = 64.0 / range;
00592 gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00593 (5.0 / 14.7));
00594 angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00595 for (n = 0; n <= 64; n++) {
00596 float pwr;
00597
00598 idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00599 pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00600 lpcs[n] = angle_mul * pwr;
00601
00602
00603 idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00604 if (idx > 127) {
00605 coeffs[n] = wmavoice_energy_table[127] *
00606 powf(1.0331663, idx - 127);
00607 } else
00608 coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00609 }
00610
00611
00612
00613
00614
00615 s->dct.dct_calc(&s->dct, lpcs);
00616 s->dst.dct_calc(&s->dst, lpcs);
00617
00618
00619 idx = 255 + av_clip(lpcs[64], -255, 255);
00620 coeffs[0] = coeffs[0] * s->cos[idx];
00621 idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00622 last_coeff = coeffs[64] * s->cos[idx];
00623 for (n = 63;; n--) {
00624 idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00625 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00626 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00627
00628 if (!--n) break;
00629
00630 idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00631 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00632 coeffs[n * 2] = coeffs[n] * s->cos[idx];
00633 }
00634 coeffs[1] = last_coeff;
00635
00636
00637 s->irdft.rdft_calc(&s->irdft, coeffs);
00638
00639
00640 memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00641 if (s->denoise_tilt_corr) {
00642 float tilt_mem = 0;
00643
00644 coeffs[remainder - 1] = 0;
00645 ff_tilt_compensation(&tilt_mem,
00646 -1.8 * tilt_factor(coeffs, remainder - 1),
00647 coeffs, remainder);
00648 }
00649 sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00650 for (n = 0; n < remainder; n++)
00651 coeffs[n] *= sq;
00652 }
00653
00680 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00681 float *synth_pf, int size,
00682 const float *lpcs)
00683 {
00684 int remainder, lim, n;
00685
00686 if (fcb_type != FCB_TYPE_SILENCE) {
00687 float *tilted_lpcs = s->tilted_lpcs_pf,
00688 *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00689
00690 tilted_lpcs[0] = 1.0;
00691 memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00692 memset(&tilted_lpcs[s->lsps + 1], 0,
00693 sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00694 ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00695 tilted_lpcs, s->lsps + 2);
00696
00697
00698
00699
00700
00701 remainder = FFMIN(127 - size, size - 1);
00702 calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00703
00704
00705
00706 memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00707 s->rdft.rdft_calc(&s->rdft, synth_pf);
00708 s->rdft.rdft_calc(&s->rdft, coeffs);
00709 synth_pf[0] *= coeffs[0];
00710 synth_pf[1] *= coeffs[1];
00711 for (n = 1; n < 64; n++) {
00712 float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00713 synth_pf[n * 2] = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00714 synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00715 }
00716 s->irdft.rdft_calc(&s->irdft, synth_pf);
00717 }
00718
00719
00720 if (s->denoise_filter_cache_size) {
00721 lim = FFMIN(s->denoise_filter_cache_size, size);
00722 for (n = 0; n < lim; n++)
00723 synth_pf[n] += s->denoise_filter_cache[n];
00724 s->denoise_filter_cache_size -= lim;
00725 memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00726 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00727 }
00728
00729
00730 if (fcb_type != FCB_TYPE_SILENCE) {
00731 lim = FFMIN(remainder, s->denoise_filter_cache_size);
00732 for (n = 0; n < lim; n++)
00733 s->denoise_filter_cache[n] += synth_pf[size + n];
00734 if (lim < remainder) {
00735 memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00736 sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00737 s->denoise_filter_cache_size = remainder;
00738 }
00739 }
00740 }
00741
00762 static void postfilter(WMAVoiceContext *s, const float *synth,
00763 float *samples, int size,
00764 const float *lpcs, float *zero_exc_pf,
00765 int fcb_type, int pitch)
00766 {
00767 float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00768 *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00769 *synth_filter_in = zero_exc_pf;
00770
00771 assert(size <= MAX_FRAMESIZE / 2);
00772
00773
00774 ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00775
00776 if (fcb_type >= FCB_TYPE_AW_PULSES &&
00777 !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00778 synth_filter_in = synth_filter_in_buf;
00779
00780
00781 ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00782 synth_filter_in, size, s->lsps);
00783 memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00784 sizeof(synth_pf[0]) * s->lsps);
00785
00786 wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00787
00788 adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00789 &s->postfilter_agc);
00790
00791 if (s->dc_level > 8) {
00792
00793
00794
00795 ff_acelp_apply_order_2_transfer_function(samples, samples,
00796 (const float[2]) { -1.99997, 1.0 },
00797 (const float[2]) { -1.9330735188, 0.93589198496 },
00798 0.93980580475, s->dcf_mem, size);
00799 }
00800 }
00816 static void dequant_lsps(double *lsps, int num,
00817 const uint16_t *values,
00818 const uint16_t *sizes,
00819 int n_stages, const uint8_t *table,
00820 const double *mul_q,
00821 const double *base_q)
00822 {
00823 int n, m;
00824
00825 memset(lsps, 0, num * sizeof(*lsps));
00826 for (n = 0; n < n_stages; n++) {
00827 const uint8_t *t_off = &table[values[n] * num];
00828 double base = base_q[n], mul = mul_q[n];
00829
00830 for (m = 0; m < num; m++)
00831 lsps[m] += base + mul * t_off[m];
00832
00833 table += sizes[n] * num;
00834 }
00835 }
00836
00848 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00849 {
00850 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00851 static const double mul_lsf[4] = {
00852 5.2187144800e-3, 1.4626986422e-3,
00853 9.6179549166e-4, 1.1325736225e-3
00854 };
00855 static const double base_lsf[4] = {
00856 M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00857 M_PI * -3.3486e-2, M_PI * -5.7408e-2
00858 };
00859 uint16_t v[4];
00860
00861 v[0] = get_bits(gb, 8);
00862 v[1] = get_bits(gb, 6);
00863 v[2] = get_bits(gb, 5);
00864 v[3] = get_bits(gb, 5);
00865
00866 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00867 mul_lsf, base_lsf);
00868 }
00869
00874 static void dequant_lsp10r(GetBitContext *gb,
00875 double *i_lsps, const double *old,
00876 double *a1, double *a2, int q_mode)
00877 {
00878 static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00879 static const double mul_lsf[3] = {
00880 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3
00881 };
00882 static const double base_lsf[3] = {
00883 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00884 };
00885 const float (*ipol_tab)[2][10] = q_mode ?
00886 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00887 uint16_t interpol, v[3];
00888 int n;
00889
00890 dequant_lsp10i(gb, i_lsps);
00891
00892 interpol = get_bits(gb, 5);
00893 v[0] = get_bits(gb, 7);
00894 v[1] = get_bits(gb, 6);
00895 v[2] = get_bits(gb, 6);
00896
00897 for (n = 0; n < 10; n++) {
00898 double delta = old[n] - i_lsps[n];
00899 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00900 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00901 }
00902
00903 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00904 mul_lsf, base_lsf);
00905 }
00906
00910 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00911 {
00912 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00913 static const double mul_lsf[5] = {
00914 3.3439586280e-3, 6.9908173703e-4,
00915 3.3216608306e-3, 1.0334960326e-3,
00916 3.1899104283e-3
00917 };
00918 static const double base_lsf[5] = {
00919 M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00920 M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00921 M_PI * -1.29816e-1
00922 };
00923 uint16_t v[5];
00924
00925 v[0] = get_bits(gb, 8);
00926 v[1] = get_bits(gb, 6);
00927 v[2] = get_bits(gb, 7);
00928 v[3] = get_bits(gb, 6);
00929 v[4] = get_bits(gb, 7);
00930
00931 dequant_lsps( lsps, 5, v, vec_sizes, 2,
00932 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
00933 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,
00934 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00935 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00936 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00937 }
00938
00943 static void dequant_lsp16r(GetBitContext *gb,
00944 double *i_lsps, const double *old,
00945 double *a1, double *a2, int q_mode)
00946 {
00947 static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00948 static const double mul_lsf[3] = {
00949 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3
00950 };
00951 static const double base_lsf[3] = {
00952 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00953 };
00954 const float (*ipol_tab)[2][16] = q_mode ?
00955 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00956 uint16_t interpol, v[3];
00957 int n;
00958
00959 dequant_lsp16i(gb, i_lsps);
00960
00961 interpol = get_bits(gb, 5);
00962 v[0] = get_bits(gb, 7);
00963 v[1] = get_bits(gb, 7);
00964 v[2] = get_bits(gb, 7);
00965
00966 for (n = 0; n < 16; n++) {
00967 double delta = old[n] - i_lsps[n];
00968 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00969 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00970 }
00971
00972 dequant_lsps( a2, 10, v, vec_sizes, 1,
00973 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);
00974 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00975 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00976 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00977 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00978 }
00979
00993 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00994 const int *pitch)
00995 {
00996 static const int16_t start_offset[94] = {
00997 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,
00998 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,
00999 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,
01000 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,
01001 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,
01002 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,
01003 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
01004 141, 143, 145, 147, 149, 151, 153, 155, 157, 159
01005 };
01006 int bits, offset;
01007
01008
01009 s->aw_idx_is_ext = 0;
01010 if ((bits = get_bits(gb, 6)) >= 54) {
01011 s->aw_idx_is_ext = 1;
01012 bits += (bits - 54) * 3 + get_bits(gb, 2);
01013 }
01014
01015
01016
01017 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01018 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01019 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01020 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01021 offset += s->aw_n_pulses[0] * pitch[0];
01022 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01023 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01024
01025
01026
01027
01028 if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01029 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01030 s->aw_first_pulse_off[1] -= pitch[1];
01031 if (start_offset[bits] < 0)
01032 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01033 s->aw_first_pulse_off[0] -= pitch[0];
01034 }
01035 }
01036
01044 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01045 int block_idx, AMRFixed *fcb)
01046 {
01047 uint16_t use_mask_mem[9];
01048 uint16_t *use_mask = use_mask_mem + 2;
01049
01050
01051
01052
01053
01054
01055
01056 int pulse_off = s->aw_first_pulse_off[block_idx],
01057 pulse_start, n, idx, range, aidx, start_off = 0;
01058
01059
01060 if (s->aw_n_pulses[block_idx] > 0)
01061 while (pulse_off + s->aw_pulse_range < 1)
01062 pulse_off += fcb->pitch_lag;
01063
01064
01065 if (s->aw_n_pulses[0] > 0) {
01066 if (block_idx == 0) {
01067 range = 32;
01068 } else {
01069 range = 8;
01070 if (s->aw_n_pulses[block_idx] > 0)
01071 pulse_off = s->aw_next_pulse_off_cache;
01072 }
01073 } else
01074 range = 16;
01075 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01076
01077
01078
01079
01080 memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01081 memset( use_mask, -1, 5 * sizeof(use_mask[0]));
01082 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01083 if (s->aw_n_pulses[block_idx] > 0)
01084 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01085 int excl_range = s->aw_pulse_range;
01086 uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01087 int first_sh = 16 - (idx & 15);
01088 *use_mask_ptr++ &= 0xFFFFu << first_sh;
01089 excl_range -= first_sh;
01090 if (excl_range >= 16) {
01091 *use_mask_ptr++ = 0;
01092 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);
01093 } else
01094 *use_mask_ptr &= 0xFFFF >> excl_range;
01095 }
01096
01097
01098 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01099 for (n = 0; n <= aidx; pulse_start++) {
01100 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01101 if (idx >= MAX_FRAMESIZE / 2) {
01102 if (use_mask[0]) idx = 0x0F;
01103 else if (use_mask[1]) idx = 0x1F;
01104 else if (use_mask[2]) idx = 0x2F;
01105 else if (use_mask[3]) idx = 0x3F;
01106 else if (use_mask[4]) idx = 0x4F;
01107 else return;
01108 idx -= av_log2_16bit(use_mask[idx >> 4]);
01109 }
01110 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01111 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01112 n++;
01113 start_off = idx;
01114 }
01115 }
01116
01117 fcb->x[fcb->n] = start_off;
01118 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01119 fcb->n++;
01120
01121
01122 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01123 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01124 }
01125
01133 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01134 int block_idx, AMRFixed *fcb)
01135 {
01136 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01137 float v;
01138
01139 if (s->aw_n_pulses[block_idx] > 0) {
01140 int n, v_mask, i_mask, sh, n_pulses;
01141
01142 if (s->aw_pulse_range == 24) {
01143 n_pulses = 3;
01144 v_mask = 8;
01145 i_mask = 7;
01146 sh = 4;
01147 } else {
01148 n_pulses = 4;
01149 v_mask = 4;
01150 i_mask = 3;
01151 sh = 3;
01152 }
01153
01154 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01155 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01156 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01157 s->aw_first_pulse_off[block_idx];
01158 while (fcb->x[fcb->n] < 0)
01159 fcb->x[fcb->n] += fcb->pitch_lag;
01160 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01161 fcb->n++;
01162 }
01163 } else {
01164 int num2 = (val & 0x1FF) >> 1, delta, idx;
01165
01166 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }
01167 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01168 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01169 else { delta = 7; idx = num2 + 1 - 3 * 75; }
01170 v = (val & 0x200) ? -1.0 : 1.0;
01171
01172 fcb->no_repeat_mask |= 3 << fcb->n;
01173 fcb->x[fcb->n] = idx - delta;
01174 fcb->y[fcb->n] = v;
01175 fcb->x[fcb->n + 1] = idx;
01176 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;
01177 fcb->n += 2;
01178 }
01179 }
01180
01194 static int pRNG(int frame_cntr, int block_num, int block_size)
01195 {
01196
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206 static const unsigned int div_tbl[9][2] = {
01207 { 8332, 3 * 715827883U },
01208 { 4545, 0 * 390451573U },
01209 { 3124, 11 * 268435456U },
01210 { 2380, 15 * 204522253U },
01211 { 1922, 23 * 165191050U },
01212 { 1612, 23 * 138547333U },
01213 { 1388, 27 * 119304648U },
01214 { 1219, 16 * 104755300U },
01215 { 1086, 39 * 93368855U }
01216 };
01217 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01218 if (x >= 0xFFFF) x -= 0xFFFF;
01219
01220 y = x - 9 * MULH(477218589, x);
01221 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01222
01223 return z % (1000 - block_size);
01224 }
01225
01230 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01231 int block_idx, int size,
01232 const struct frame_type_desc *frame_desc,
01233 float *excitation)
01234 {
01235 float gain;
01236 int n, r_idx;
01237
01238 assert(size <= MAX_FRAMESIZE);
01239
01240
01241 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01242 r_idx = pRNG(s->frame_cntr, block_idx, size);
01243 gain = s->silence_gain;
01244 } else {
01245 r_idx = get_bits(gb, 8);
01246 gain = wmavoice_gain_universal[get_bits(gb, 6)];
01247 }
01248
01249
01250 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01251
01252
01253 for (n = 0; n < size; n++)
01254 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01255 }
01256
01261 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01262 int block_idx, int size,
01263 int block_pitch_sh2,
01264 const struct frame_type_desc *frame_desc,
01265 float *excitation)
01266 {
01267 static const float gain_coeff[6] = {
01268 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01269 };
01270 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01271 int n, idx, gain_weight;
01272 AMRFixed fcb;
01273
01274 assert(size <= MAX_FRAMESIZE / 2);
01275 memset(pulses, 0, sizeof(*pulses) * size);
01276
01277 fcb.pitch_lag = block_pitch_sh2 >> 2;
01278 fcb.pitch_fac = 1.0;
01279 fcb.no_repeat_mask = 0;
01280 fcb.n = 0;
01281
01282
01283
01284 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01285 aw_pulse_set1(s, gb, block_idx, &fcb);
01286 aw_pulse_set2(s, gb, block_idx, &fcb);
01287 } else {
01288 int offset_nbits = 5 - frame_desc->log_n_blocks;
01289
01290 fcb.no_repeat_mask = -1;
01291
01292
01293 for (n = 0; n < 5; n++) {
01294 float sign;
01295 int pos1, pos2;
01296
01297 sign = get_bits1(gb) ? 1.0 : -1.0;
01298 pos1 = get_bits(gb, offset_nbits);
01299 fcb.x[fcb.n] = n + 5 * pos1;
01300 fcb.y[fcb.n++] = sign;
01301 if (n < frame_desc->dbl_pulses) {
01302 pos2 = get_bits(gb, offset_nbits);
01303 fcb.x[fcb.n] = n + 5 * pos2;
01304 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01305 }
01306 }
01307 }
01308 ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01309
01310
01311
01312 idx = get_bits(gb, 7);
01313 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01314 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01315 acb_gain = wmavoice_gain_codebook_acb[idx];
01316 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01317 -2.9957322736 ,
01318 1.6094379124 );
01319
01320 gain_weight = 8 >> frame_desc->log_n_blocks;
01321 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01322 sizeof(*s->gain_pred_err) * (6 - gain_weight));
01323 for (n = 0; n < gain_weight; n++)
01324 s->gain_pred_err[n] = pred_err;
01325
01326
01327 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01328 int len;
01329 for (n = 0; n < size; n += len) {
01330 int next_idx_sh16;
01331 int abs_idx = block_idx * size + n;
01332 int pitch_sh16 = (s->last_pitch_val << 16) +
01333 s->pitch_diff_sh16 * abs_idx;
01334 int pitch = (pitch_sh16 + 0x6FFF) >> 16;
01335 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01336 idx = idx_sh16 >> 16;
01337 if (s->pitch_diff_sh16) {
01338 if (s->pitch_diff_sh16 > 0) {
01339 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01340 } else
01341 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01342 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01343 1, size - n);
01344 } else
01345 len = size;
01346
01347 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01348 wmavoice_ipol1_coeffs, 17,
01349 idx, 9, len);
01350 }
01351 } else {
01352 int block_pitch = block_pitch_sh2 >> 2;
01353 idx = block_pitch_sh2 & 3;
01354 if (idx) {
01355 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01356 wmavoice_ipol2_coeffs, 4,
01357 idx, 8, size);
01358 } else
01359 av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01360 sizeof(float) * size);
01361 }
01362
01363
01364 ff_weighted_vector_sumf(excitation, excitation, pulses,
01365 acb_gain, fcb_gain, size);
01366 }
01367
01384 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01385 int block_idx, int size,
01386 int block_pitch_sh2,
01387 const double *lsps, const double *prev_lsps,
01388 const struct frame_type_desc *frame_desc,
01389 float *excitation, float *synth)
01390 {
01391 double i_lsps[MAX_LSPS];
01392 float lpcs[MAX_LSPS];
01393 float fac;
01394 int n;
01395
01396 if (frame_desc->acb_type == ACB_TYPE_NONE)
01397 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01398 else
01399 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01400 frame_desc, excitation);
01401
01402
01403 fac = (block_idx + 0.5) / frame_desc->n_blocks;
01404 for (n = 0; n < s->lsps; n++)
01405 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01406 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01407
01408
01409 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01410 }
01411
01427 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01428 float *samples,
01429 const double *lsps, const double *prev_lsps,
01430 float *excitation, float *synth)
01431 {
01432 WMAVoiceContext *s = ctx->priv_data;
01433 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01434 int pitch[MAX_BLOCKS], last_block_pitch;
01435
01436
01437 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01438 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01439
01440 if (bd_idx < 0) {
01441 av_log(ctx, AV_LOG_ERROR,
01442 "Invalid frame type VLC code, skipping\n");
01443 return -1;
01444 }
01445
01446
01447 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01448
01449
01450
01451
01452 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
01453 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
01454 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01455 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01456 if (s->last_acb_type == ACB_TYPE_NONE ||
01457 20 * abs(cur_pitch_val - s->last_pitch_val) >
01458 (cur_pitch_val + s->last_pitch_val))
01459 s->last_pitch_val = cur_pitch_val;
01460
01461
01462 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01463 int fac = n * 2 + 1;
01464
01465 pitch[n] = (MUL16(fac, cur_pitch_val) +
01466 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01467 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01468 }
01469
01470
01471 s->pitch_diff_sh16 =
01472 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01473 }
01474
01475
01476 switch (frame_descs[bd_idx].fcb_type) {
01477 case FCB_TYPE_SILENCE:
01478 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01479 break;
01480 case FCB_TYPE_AW_PULSES:
01481 aw_parse_coords(s, gb, pitch);
01482 break;
01483 }
01484
01485 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01486 int bl_pitch_sh2;
01487
01488
01489 switch (frame_descs[bd_idx].acb_type) {
01490 case ACB_TYPE_HAMMING: {
01491
01492
01493
01494
01495
01496 int block_pitch,
01497 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01498 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01499 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
01500
01501 if (n == 0) {
01502 block_pitch = get_bits(gb, s->block_pitch_nbits);
01503 } else
01504 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01505 get_bits(gb, s->block_delta_pitch_nbits);
01506
01507 last_block_pitch = av_clip(block_pitch,
01508 s->block_delta_pitch_hrange,
01509 s->block_pitch_range -
01510 s->block_delta_pitch_hrange);
01511
01512
01513 if (block_pitch < t1) {
01514 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01515 } else {
01516 block_pitch -= t1;
01517 if (block_pitch < t2) {
01518 bl_pitch_sh2 =
01519 (s->block_conv_table[1] << 2) + (block_pitch << 1);
01520 } else {
01521 block_pitch -= t2;
01522 if (block_pitch < t3) {
01523 bl_pitch_sh2 =
01524 (s->block_conv_table[2] + block_pitch) << 2;
01525 } else
01526 bl_pitch_sh2 = s->block_conv_table[3] << 2;
01527 }
01528 }
01529 pitch[n] = bl_pitch_sh2 >> 2;
01530 break;
01531 }
01532
01533 case ACB_TYPE_ASYMMETRIC: {
01534 bl_pitch_sh2 = pitch[n] << 2;
01535 break;
01536 }
01537
01538 default:
01539 bl_pitch_sh2 = 0;
01540 break;
01541 }
01542
01543 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01544 lsps, prev_lsps, &frame_descs[bd_idx],
01545 &excitation[n * block_nsamples],
01546 &synth[n * block_nsamples]);
01547 }
01548
01549
01550
01551 if (s->do_apf) {
01552 double i_lsps[MAX_LSPS];
01553 float lpcs[MAX_LSPS];
01554
01555 for (n = 0; n < s->lsps; n++)
01556 i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01557 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01558 postfilter(s, synth, samples, 80, lpcs,
01559 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01560 frame_descs[bd_idx].fcb_type, pitch[0]);
01561
01562 for (n = 0; n < s->lsps; n++)
01563 i_lsps[n] = cos(lsps[n]);
01564 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01565 postfilter(s, &synth[80], &samples[80], 80, lpcs,
01566 &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01567 frame_descs[bd_idx].fcb_type, pitch[0]);
01568 } else
01569 memcpy(samples, synth, 160 * sizeof(synth[0]));
01570
01571
01572 s->frame_cntr++;
01573 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF;
01574 s->last_acb_type = frame_descs[bd_idx].acb_type;
01575 switch (frame_descs[bd_idx].acb_type) {
01576 case ACB_TYPE_NONE:
01577 s->last_pitch_val = 0;
01578 break;
01579 case ACB_TYPE_ASYMMETRIC:
01580 s->last_pitch_val = cur_pitch_val;
01581 break;
01582 case ACB_TYPE_HAMMING:
01583 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01584 break;
01585 }
01586
01587 return 0;
01588 }
01589
01602 static void stabilize_lsps(double *lsps, int num)
01603 {
01604 int n, m, l;
01605
01606
01607
01608
01609 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);
01610 for (n = 1; n < num; n++)
01611 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);
01612 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01613
01614
01615
01616 for (n = 1; n < num; n++) {
01617 if (lsps[n] < lsps[n - 1]) {
01618 for (m = 1; m < num; m++) {
01619 double tmp = lsps[m];
01620 for (l = m - 1; l >= 0; l--) {
01621 if (lsps[l] <= tmp) break;
01622 lsps[l + 1] = lsps[l];
01623 }
01624 lsps[l + 1] = tmp;
01625 }
01626 break;
01627 }
01628 }
01629 }
01630
01640 static int check_bits_for_superframe(GetBitContext *orig_gb,
01641 WMAVoiceContext *s)
01642 {
01643 GetBitContext s_gb, *gb = &s_gb;
01644 int n, need_bits, bd_idx;
01645 const struct frame_type_desc *frame_desc;
01646
01647
01648 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01649 skip_bits_long(gb, get_bits_count(orig_gb));
01650 assert(get_bits_left(gb) == get_bits_left(orig_gb));
01651
01652
01653 if (get_bits_left(gb) < 14)
01654 return 1;
01655 if (!get_bits1(gb))
01656 return -1;
01657 if (get_bits1(gb)) skip_bits(gb, 12);
01658 if (s->has_residual_lsps) {
01659 if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01660 return 1;
01661 skip_bits_long(gb, s->sframe_lsp_bitsize);
01662 }
01663
01664
01665 for (n = 0; n < MAX_FRAMES; n++) {
01666 int aw_idx_is_ext = 0;
01667
01668 if (!s->has_residual_lsps) {
01669 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01670 skip_bits_long(gb, s->frame_lsp_bitsize);
01671 }
01672 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01673 if (bd_idx < 0)
01674 return -1;
01675 frame_desc = &frame_descs[bd_idx];
01676 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01677 if (get_bits_left(gb) < s->pitch_nbits)
01678 return 1;
01679 skip_bits_long(gb, s->pitch_nbits);
01680 }
01681 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01682 skip_bits(gb, 8);
01683 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01684 int tmp = get_bits(gb, 6);
01685 if (tmp >= 0x36) {
01686 skip_bits(gb, 2);
01687 aw_idx_is_ext = 1;
01688 }
01689 }
01690
01691
01692 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01693 need_bits = s->block_pitch_nbits +
01694 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01695 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01696 need_bits = 2 * !aw_idx_is_ext;
01697 } else
01698 need_bits = 0;
01699 need_bits += frame_desc->frame_size;
01700 if (get_bits_left(gb) < need_bits)
01701 return 1;
01702 skip_bits_long(gb, need_bits);
01703 }
01704
01705 return 0;
01706 }
01707
01728 static int synth_superframe(AVCodecContext *ctx,
01729 float *samples, int *data_size)
01730 {
01731 WMAVoiceContext *s = ctx->priv_data;
01732 GetBitContext *gb = &s->gb, s_gb;
01733 int n, res, n_samples = 480;
01734 double lsps[MAX_FRAMES][MAX_LSPS];
01735 const double *mean_lsf = s->lsps == 16 ?
01736 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01737 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01738 float synth[MAX_LSPS + MAX_SFRAMESIZE];
01739
01740 memcpy(synth, s->synth_history,
01741 s->lsps * sizeof(*synth));
01742 memcpy(excitation, s->excitation_history,
01743 s->history_nsamples * sizeof(*excitation));
01744
01745 if (s->sframe_cache_size > 0) {
01746 gb = &s_gb;
01747 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01748 s->sframe_cache_size = 0;
01749 }
01750
01751 if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;
01752
01753
01754
01755
01756
01757 if (!get_bits1(gb)) {
01758 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01759 return -1;
01760 }
01761
01762
01763 if (get_bits1(gb)) {
01764 if ((n_samples = get_bits(gb, 12)) > 480) {
01765 av_log(ctx, AV_LOG_ERROR,
01766 "Superframe encodes >480 samples (%d), not allowed\n",
01767 n_samples);
01768 return -1;
01769 }
01770 }
01771
01772 if (s->has_residual_lsps) {
01773 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01774
01775 for (n = 0; n < s->lsps; n++)
01776 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01777
01778 if (s->lsps == 10) {
01779 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01780 } else
01781 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01782
01783 for (n = 0; n < s->lsps; n++) {
01784 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
01785 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01786 lsps[2][n] += mean_lsf[n];
01787 }
01788 for (n = 0; n < 3; n++)
01789 stabilize_lsps(lsps[n], s->lsps);
01790 }
01791
01792
01793 for (n = 0; n < 3; n++) {
01794 if (!s->has_residual_lsps) {
01795 int m;
01796
01797 if (s->lsps == 10) {
01798 dequant_lsp10i(gb, lsps[n]);
01799 } else
01800 dequant_lsp16i(gb, lsps[n]);
01801
01802 for (m = 0; m < s->lsps; m++)
01803 lsps[n][m] += mean_lsf[m];
01804 stabilize_lsps(lsps[n], s->lsps);
01805 }
01806
01807 if ((res = synth_frame(ctx, gb, n,
01808 &samples[n * MAX_FRAMESIZE],
01809 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01810 &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01811 &synth[s->lsps + n * MAX_FRAMESIZE])))
01812 return res;
01813 }
01814
01815
01816
01817
01818 if (get_bits1(gb)) {
01819 res = get_bits(gb, 4);
01820 skip_bits(gb, 10 * (res + 1));
01821 }
01822
01823
01824 *data_size = n_samples * sizeof(float);
01825
01826
01827 memcpy(s->prev_lsps, lsps[2],
01828 s->lsps * sizeof(*s->prev_lsps));
01829 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],
01830 s->lsps * sizeof(*synth));
01831 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01832 s->history_nsamples * sizeof(*excitation));
01833 if (s->do_apf)
01834 memmove(s->zero_exc_pf, &s->zero_exc_pf[MAX_SFRAMESIZE],
01835 s->history_nsamples * sizeof(*s->zero_exc_pf));
01836
01837 return 0;
01838 }
01839
01847 static int parse_packet_header(WMAVoiceContext *s)
01848 {
01849 GetBitContext *gb = &s->gb;
01850 unsigned int res;
01851
01852 if (get_bits_left(gb) < 11)
01853 return 1;
01854 skip_bits(gb, 4);
01855 s->has_residual_lsps = get_bits1(gb);
01856 do {
01857 res = get_bits(gb, 6);
01858
01859 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01860 return 1;
01861 } while (res == 0x3F);
01862 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
01863
01864 return 0;
01865 }
01866
01882 static void copy_bits(PutBitContext *pb,
01883 const uint8_t *data, int size,
01884 GetBitContext *gb, int nbits)
01885 {
01886 int rmn_bytes, rmn_bits;
01887
01888 rmn_bits = rmn_bytes = get_bits_left(gb);
01889 if (rmn_bits < nbits)
01890 return;
01891 if (nbits > pb->size_in_bits - put_bits_count(pb))
01892 return;
01893 rmn_bits &= 7; rmn_bytes >>= 3;
01894 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01895 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01896 ff_copy_bits(pb, data + size - rmn_bytes,
01897 FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01898 }
01899
01911 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01912 int *data_size, AVPacket *avpkt)
01913 {
01914 WMAVoiceContext *s = ctx->priv_data;
01915 GetBitContext *gb = &s->gb;
01916 int size, res, pos;
01917
01918 if (*data_size < 480 * sizeof(float)) {
01919 av_log(ctx, AV_LOG_ERROR,
01920 "Output buffer too small (%d given - %zu needed)\n",
01921 *data_size, 480 * sizeof(float));
01922 return -1;
01923 }
01924 *data_size = 0;
01925
01926
01927
01928
01929
01930
01931 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01932 if (!size)
01933 return 0;
01934 init_get_bits(&s->gb, avpkt->data, size << 3);
01935
01936
01937
01938
01939 if (size == ctx->block_align) {
01940 if ((res = parse_packet_header(s)) < 0)
01941 return res;
01942
01943
01944
01945
01946 if (s->spillover_nbits > 0) {
01947 if (s->sframe_cache_size > 0) {
01948 int cnt = get_bits_count(gb);
01949 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01950 flush_put_bits(&s->pb);
01951 s->sframe_cache_size += s->spillover_nbits;
01952 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&
01953 *data_size > 0) {
01954 cnt += s->spillover_nbits;
01955 s->skip_bits_next = cnt & 7;
01956 return cnt >> 3;
01957 } else
01958 skip_bits_long (gb, s->spillover_nbits - cnt +
01959 get_bits_count(gb));
01960 } else
01961 skip_bits_long(gb, s->spillover_nbits);
01962 }
01963 } else if (s->skip_bits_next)
01964 skip_bits(gb, s->skip_bits_next);
01965
01966
01967 s->sframe_cache_size = 0;
01968 s->skip_bits_next = 0;
01969 pos = get_bits_left(gb);
01970 if ((res = synth_superframe(ctx, data, data_size)) < 0) {
01971 return res;
01972 } else if (*data_size > 0) {
01973 int cnt = get_bits_count(gb);
01974 s->skip_bits_next = cnt & 7;
01975 return cnt >> 3;
01976 } else if ((s->sframe_cache_size = pos) > 0) {
01977
01978 init_get_bits(gb, avpkt->data, size << 3);
01979 skip_bits_long(gb, (size << 3) - pos);
01980 assert(get_bits_left(gb) == pos);
01981
01982
01983 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01984 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01985
01986
01987 }
01988
01989 return size;
01990 }
01991
01992 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
01993 {
01994 WMAVoiceContext *s = ctx->priv_data;
01995
01996 if (s->do_apf) {
01997 ff_rdft_end(&s->rdft);
01998 ff_rdft_end(&s->irdft);
01999 ff_dct_end(&s->dct);
02000 ff_dct_end(&s->dst);
02001 }
02002
02003 return 0;
02004 }
02005
02006 static av_cold void wmavoice_flush(AVCodecContext *ctx)
02007 {
02008 WMAVoiceContext *s = ctx->priv_data;
02009 int n;
02010
02011 s->postfilter_agc = 0;
02012 s->sframe_cache_size = 0;
02013 s->skip_bits_next = 0;
02014 for (n = 0; n < s->lsps; n++)
02015 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02016 memset(s->excitation_history, 0,
02017 sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02018 memset(s->synth_history, 0,
02019 sizeof(*s->synth_history) * MAX_LSPS);
02020 memset(s->gain_pred_err, 0,
02021 sizeof(s->gain_pred_err));
02022
02023 if (s->do_apf) {
02024 memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02025 sizeof(*s->synth_filter_out_buf) * s->lsps);
02026 memset(s->dcf_mem, 0,
02027 sizeof(*s->dcf_mem) * 2);
02028 memset(s->zero_exc_pf, 0,
02029 sizeof(*s->zero_exc_pf) * s->history_nsamples);
02030 memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02031 }
02032 }
02033
02034 AVCodec ff_wmavoice_decoder = {
02035 "wmavoice",
02036 AVMEDIA_TYPE_AUDIO,
02037 CODEC_ID_WMAVOICE,
02038 sizeof(WMAVoiceContext),
02039 wmavoice_decode_init,
02040 NULL,
02041 wmavoice_decode_end,
02042 wmavoice_decode_packet,
02043 CODEC_CAP_SUBFRAMES,
02044 .flush = wmavoice_flush,
02045 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02046 };