Libav 0.7.1
libavcodec/wmavoice.c
Go to the documentation of this file.
00001 /*
00002  * Windows Media Audio Voice decoder.
00003  * Copyright (c) 2009 Ronald S. Bultje
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include <math.h>
00029 #include "avcodec.h"
00030 #include "get_bits.h"
00031 #include "put_bits.h"
00032 #include "wmavoice_data.h"
00033 #include "celp_math.h"
00034 #include "celp_filters.h"
00035 #include "acelp_vectors.h"
00036 #include "acelp_filters.h"
00037 #include "lsp.h"
00038 #include "libavutil/lzo.h"
00039 #include "dct.h"
00040 #include "rdft.h"
00041 #include "sinewin.h"
00042 
00043 #define MAX_BLOCKS           8   ///< maximum number of blocks per frame
00044 #define MAX_LSPS             16  ///< maximum filter order
00045 #define MAX_LSPS_ALIGN16     16  ///< same as #MAX_LSPS; needs to be multiple
00046 
00047 #define MAX_FRAMES           3   ///< maximum number of frames per superframe
00048 #define MAX_FRAMESIZE        160 ///< maximum number of samples per frame
00049 #define MAX_SIGNAL_HISTORY   416 ///< maximum excitation signal history
00050 #define MAX_SFRAMESIZE       (MAX_FRAMESIZE * MAX_FRAMES)
00051 
00052 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that
00053 
00054 #define VLC_NBITS            6   ///< number of bits to read per VLC iteration
00055 
00059 static VLC frame_type_vlc;
00060 
00064 enum {
00065     ACB_TYPE_NONE       = 0, 
00066     ACB_TYPE_ASYMMETRIC = 1, 
00067 
00068 
00069 
00070 
00071     ACB_TYPE_HAMMING    = 2  
00072 
00073 
00074 };
00075 
00079 enum {
00080     FCB_TYPE_SILENCE    = 0, 
00081 
00082 
00083     FCB_TYPE_HARDCODED  = 1, 
00084 
00085     FCB_TYPE_AW_PULSES  = 2, 
00086 
00087     FCB_TYPE_EXC_PULSES = 3, 
00088 
00089 
00090 };
00091 
00095 static const struct frame_type_desc {
00096     uint8_t n_blocks;     
00097 
00098     uint8_t log_n_blocks; 
00099     uint8_t acb_type;     
00100     uint8_t fcb_type;     
00101     uint8_t dbl_pulses;   
00102 
00103 
00104     uint16_t frame_size;  
00105 
00106 } frame_descs[17] = {
00107     { 1, 0, ACB_TYPE_NONE,       FCB_TYPE_SILENCE,    0,   0 },
00108     { 2, 1, ACB_TYPE_NONE,       FCB_TYPE_HARDCODED,  0,  28 },
00109     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
00110     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
00111     { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
00112     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
00113     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
00114     { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
00115     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0,  64 },
00116     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2,  80 },
00117     { 2, 1, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 104 },
00118     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 108 },
00119     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 132 },
00120     { 4, 2, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 168 },
00121     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 0, 176 },
00122     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 2, 208 },
00123     { 8, 3, ACB_TYPE_HAMMING,    FCB_TYPE_EXC_PULSES, 5, 256 }
00124 };
00125 
00129 typedef struct {
00134     GetBitContext gb;             
00135 
00136 
00137 
00138     int8_t vbm_tree[25];          
00139 
00140     int spillover_bitsize;        
00141 
00142 
00143     int history_nsamples;         
00144 
00145 
00146     /* postfilter specific values */
00147     int do_apf;                   
00148 
00149     int denoise_strength;         
00150 
00151     int denoise_tilt_corr;        
00152 
00153     int dc_level;                 
00154 
00155 
00156     int lsps;                     
00157     int lsp_q_mode;               
00158     int lsp_def_mode;             
00159 
00160     int frame_lsp_bitsize;        
00161 
00162     int sframe_lsp_bitsize;       
00163 
00164 
00165     int min_pitch_val;            
00166     int max_pitch_val;            
00167     int pitch_nbits;              
00168 
00169     int block_pitch_nbits;        
00170 
00171     int block_pitch_range;        
00172     int block_delta_pitch_nbits;  
00173 
00174 
00175 
00176     int block_delta_pitch_hrange; 
00177 
00178     uint16_t block_conv_table[4]; 
00179 
00180 
00190     int spillover_nbits;          
00191 
00192 
00193 
00194     int has_residual_lsps;        
00195 
00196 
00197 
00198 
00199     int skip_bits_next;           
00200 
00201 
00202 
00203     uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
00206     int sframe_cache_size;        
00207 
00208 
00209 
00210 
00211     PutBitContext pb;             
00212 
00222     double prev_lsps[MAX_LSPS];   
00223 
00224     int last_pitch_val;           
00225     int last_acb_type;            
00226     int pitch_diff_sh16;          
00227 
00228     float silence_gain;           
00229 
00230     int aw_idx_is_ext;            
00231 
00232     int aw_pulse_range;           
00233 
00234 
00235 
00236 
00237 
00238     int aw_n_pulses[2];           
00239 
00240 
00241     int aw_first_pulse_off[2];    
00242 
00243     int aw_next_pulse_off_cache;  
00244 
00245 
00246 
00247 
00248 
00249     int frame_cntr;               
00250 
00251     float gain_pred_err[6];       
00252     float excitation_history[MAX_SIGNAL_HISTORY];
00256     float synth_history[MAX_LSPS]; 
00257 
00266     RDFTContext rdft, irdft;      
00267 
00268     DCTContext dct, dst;          
00269 
00270     float sin[511], cos[511];     
00271 
00272     float postfilter_agc;         
00273 
00274     float dcf_mem[2];             
00275     float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
00278     float denoise_filter_cache[MAX_FRAMESIZE];
00279     int   denoise_filter_cache_size; 
00280     DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
00282     DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
00284     DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
00287 
00290 } WMAVoiceContext;
00291 
00301 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
00302 {
00303     static const uint8_t bits[] = {
00304          2,  2,  2,  4,  4,  4,
00305          6,  6,  6,  8,  8,  8,
00306         10, 10, 10, 12, 12, 12,
00307         14, 14, 14, 14
00308     };
00309     static const uint16_t codes[] = {
00310           0x0000, 0x0001, 0x0002,        //              00/01/10
00311           0x000c, 0x000d, 0x000e,        //           11+00/01/10
00312           0x003c, 0x003d, 0x003e,        //         1111+00/01/10
00313           0x00fc, 0x00fd, 0x00fe,        //       111111+00/01/10
00314           0x03fc, 0x03fd, 0x03fe,        //     11111111+00/01/10
00315           0x0ffc, 0x0ffd, 0x0ffe,        //   1111111111+00/01/10
00316           0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
00317     };
00318     int cntr[8], n, res;
00319 
00320     memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
00321     memset(cntr,     0,    sizeof(cntr));
00322     for (n = 0; n < 17; n++) {
00323         res = get_bits(gb, 3);
00324         if (cntr[res] > 3) // should be >= 3 + (res == 7))
00325             return -1;
00326         vbm_tree[res * 3 + cntr[res]++] = n;
00327     }
00328     INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
00329                     bits, 1, 1, codes, 2, 2, 132);
00330     return 0;
00331 }
00332 
00336 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
00337 {
00338     int n, flags, pitch_range, lsp16_flag;
00339     WMAVoiceContext *s = ctx->priv_data;
00340 
00349     if (ctx->extradata_size != 46) {
00350         av_log(ctx, AV_LOG_ERROR,
00351                "Invalid extradata size %d (should be 46)\n",
00352                ctx->extradata_size);
00353         return -1;
00354     }
00355     flags                = AV_RL32(ctx->extradata + 18);
00356     s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
00357     s->do_apf            =    flags & 0x1;
00358     if (s->do_apf) {
00359         ff_rdft_init(&s->rdft,  7, DFT_R2C);
00360         ff_rdft_init(&s->irdft, 7, IDFT_C2R);
00361         ff_dct_init(&s->dct,  6, DCT_I);
00362         ff_dct_init(&s->dst,  6, DST_I);
00363 
00364         ff_sine_window_init(s->cos, 256);
00365         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
00366         for (n = 0; n < 255; n++) {
00367             s->sin[n]       = -s->sin[510 - n];
00368             s->cos[510 - n] =  s->cos[n];
00369         }
00370     }
00371     s->denoise_strength  =   (flags >> 2) & 0xF;
00372     if (s->denoise_strength >= 12) {
00373         av_log(ctx, AV_LOG_ERROR,
00374                "Invalid denoise filter strength %d (max=11)\n",
00375                s->denoise_strength);
00376         return -1;
00377     }
00378     s->denoise_tilt_corr = !!(flags & 0x40);
00379     s->dc_level          =   (flags >> 7) & 0xF;
00380     s->lsp_q_mode        = !!(flags & 0x2000);
00381     s->lsp_def_mode      = !!(flags & 0x4000);
00382     lsp16_flag           =    flags & 0x1000;
00383     if (lsp16_flag) {
00384         s->lsps               = 16;
00385         s->frame_lsp_bitsize  = 34;
00386         s->sframe_lsp_bitsize = 60;
00387     } else {
00388         s->lsps               = 10;
00389         s->frame_lsp_bitsize  = 24;
00390         s->sframe_lsp_bitsize = 48;
00391     }
00392     for (n = 0; n < s->lsps; n++)
00393         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
00394 
00395     init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
00396     if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
00397         av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
00398         return -1;
00399     }
00400 
00401     s->min_pitch_val    = ((ctx->sample_rate << 8)      /  400 + 50) >> 8;
00402     s->max_pitch_val    = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;
00403     pitch_range         = s->max_pitch_val - s->min_pitch_val;
00404     s->pitch_nbits      = av_ceil_log2(pitch_range);
00405     s->last_pitch_val   = 40;
00406     s->last_acb_type    = ACB_TYPE_NONE;
00407     s->history_nsamples = s->max_pitch_val + 8;
00408 
00409     if (s->min_pitch_val < 1 || s->history_nsamples > MAX_SIGNAL_HISTORY) {
00410         int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,
00411             max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;
00412 
00413         av_log(ctx, AV_LOG_ERROR,
00414                "Unsupported samplerate %d (min=%d, max=%d)\n",
00415                ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
00416 
00417         return -1;
00418     }
00419 
00420     s->block_conv_table[0]      = s->min_pitch_val;
00421     s->block_conv_table[1]      = (pitch_range * 25) >> 6;
00422     s->block_conv_table[2]      = (pitch_range * 44) >> 6;
00423     s->block_conv_table[3]      = s->max_pitch_val - 1;
00424     s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
00425     s->block_delta_pitch_nbits  = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
00426     s->block_pitch_range        = s->block_conv_table[2] +
00427                                   s->block_conv_table[3] + 1 +
00428                                   2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
00429     s->block_pitch_nbits        = av_ceil_log2(s->block_pitch_range);
00430 
00431     ctx->sample_fmt             = AV_SAMPLE_FMT_FLT;
00432 
00433     return 0;
00434 }
00435 
00457 static void adaptive_gain_control(float *out, const float *in,
00458                                   const float *speech_synth,
00459                                   int size, float alpha, float *gain_mem)
00460 {
00461     int i;
00462     float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
00463     float mem = *gain_mem;
00464 
00465     for (i = 0; i < size; i++) {
00466         speech_energy     += fabsf(speech_synth[i]);
00467         postfilter_energy += fabsf(in[i]);
00468     }
00469     gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
00470 
00471     for (i = 0; i < size; i++) {
00472         mem = alpha * mem + gain_scale_factor;
00473         out[i] = in[i] * mem;
00474     }
00475 
00476     *gain_mem = mem;
00477 }
00478 
00497 static int kalman_smoothen(WMAVoiceContext *s, int pitch,
00498                            const float *in, float *out, int size)
00499 {
00500     int n;
00501     float optimal_gain = 0, dot;
00502     const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
00503                 *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
00504                 *best_hist_ptr;
00505 
00506     /* find best fitting point in history */
00507     do {
00508         dot = ff_dot_productf(in, ptr, size);
00509         if (dot > optimal_gain) {
00510             optimal_gain  = dot;
00511             best_hist_ptr = ptr;
00512         }
00513     } while (--ptr >= end);
00514 
00515     if (optimal_gain <= 0)
00516         return -1;
00517     dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
00518     if (dot <= 0) // would be 1.0
00519         return -1;
00520 
00521     if (optimal_gain <= dot) {
00522         dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
00523     } else
00524         dot = 0.625;
00525 
00526     /* actual smoothing */
00527     for (n = 0; n < size; n++)
00528         out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
00529 
00530     return 0;
00531 }
00532 
00543 static float tilt_factor(const float *lpcs, int n_lpcs)
00544 {
00545     float rh0, rh1;
00546 
00547     rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
00548     rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
00549 
00550     return rh1 / rh0;
00551 }
00552 
00556 static void calc_input_response(WMAVoiceContext *s, float *lpcs,
00557                                 int fcb_type, float *coeffs, int remainder)
00558 {
00559     float last_coeff, min = 15.0, max = -15.0;
00560     float irange, angle_mul, gain_mul, range, sq;
00561     int n, idx;
00562 
00563     /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
00564     s->rdft.rdft_calc(&s->rdft, lpcs);
00565 #define log_range(var, assign) do { \
00566         float tmp = log10f(assign);  var = tmp; \
00567         max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
00568     } while (0)
00569     log_range(last_coeff,  lpcs[1]         * lpcs[1]);
00570     for (n = 1; n < 64; n++)
00571         log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
00572                            lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
00573     log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
00574 #undef log_range
00575     range    = max - min;
00576     lpcs[64] = last_coeff;
00577 
00578     /* Now, use this spectrum to pick out these frequencies with higher
00579      * (relative) power/energy (which we then take to be "not noise"),
00580      * and set up a table (still in lpc[]) of (relative) gains per frequency.
00581      * These frequencies will be maintained, while others ("noise") will be
00582      * decreased in the filter output. */
00583     irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
00584     gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
00585                                                           (5.0 / 14.7));
00586     angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
00587     for (n = 0; n <= 64; n++) {
00588         float pwr;
00589 
00590         idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
00591         pwr = wmavoice_denoise_power_table[s->denoise_strength][idx];
00592         lpcs[n] = angle_mul * pwr;
00593 
00594         /* 70.57 =~ 1/log10(1.0331663) */
00595         idx = (pwr * gain_mul - 0.0295) * 70.570526123;
00596         if (idx > 127) { // fallback if index falls outside table range
00597             coeffs[n] = wmavoice_energy_table[127] *
00598                         powf(1.0331663, idx - 127);
00599         } else
00600             coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
00601     }
00602 
00603     /* calculate the Hilbert transform of the gains, which we do (since this
00604      * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
00605      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
00606      * "moment" of the LPCs in this filter. */
00607     s->dct.dct_calc(&s->dct, lpcs);
00608     s->dst.dct_calc(&s->dst, lpcs);
00609 
00610     /* Split out the coefficient indexes into phase/magnitude pairs */
00611     idx = 255 + av_clip(lpcs[64],               -255, 255);
00612     coeffs[0]  = coeffs[0]  * s->cos[idx];
00613     idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
00614     last_coeff = coeffs[64] * s->cos[idx];
00615     for (n = 63;; n--) {
00616         idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00617         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00618         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00619 
00620         if (!--n) break;
00621 
00622         idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
00623         coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
00624         coeffs[n * 2]     = coeffs[n] * s->cos[idx];
00625     }
00626     coeffs[1] = last_coeff;
00627 
00628     /* move into real domain */
00629     s->irdft.rdft_calc(&s->irdft, coeffs);
00630 
00631     /* tilt correction and normalize scale */
00632     memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
00633     if (s->denoise_tilt_corr) {
00634         float tilt_mem = 0;
00635 
00636         coeffs[remainder - 1] = 0;
00637         ff_tilt_compensation(&tilt_mem,
00638                              -1.8 * tilt_factor(coeffs, remainder - 1),
00639                              coeffs, remainder);
00640     }
00641     sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
00642     for (n = 0; n < remainder; n++)
00643         coeffs[n] *= sq;
00644 }
00645 
00672 static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
00673                            float *synth_pf, int size,
00674                            const float *lpcs)
00675 {
00676     int remainder, lim, n;
00677 
00678     if (fcb_type != FCB_TYPE_SILENCE) {
00679         float *tilted_lpcs = s->tilted_lpcs_pf,
00680               *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
00681 
00682         tilted_lpcs[0]           = 1.0;
00683         memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
00684         memset(&tilted_lpcs[s->lsps + 1], 0,
00685                sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
00686         ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
00687                              tilted_lpcs, s->lsps + 2);
00688 
00689         /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
00690          * size is applied to the next frame. All input beyond this is zero,
00691          * and thus all output beyond this will go towards zero, hence we can
00692          * limit to min(size-1, 127-size) as a performance consideration. */
00693         remainder = FFMIN(127 - size, size - 1);
00694         calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
00695 
00696         /* apply coefficients (in frequency spectrum domain), i.e. complex
00697          * number multiplication */
00698         memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
00699         s->rdft.rdft_calc(&s->rdft, synth_pf);
00700         s->rdft.rdft_calc(&s->rdft, coeffs);
00701         synth_pf[0] *= coeffs[0];
00702         synth_pf[1] *= coeffs[1];
00703         for (n = 1; n < 64; n++) {
00704             float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
00705             synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
00706             synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
00707         }
00708         s->irdft.rdft_calc(&s->irdft, synth_pf);
00709     }
00710 
00711     /* merge filter output with the history of previous runs */
00712     if (s->denoise_filter_cache_size) {
00713         lim = FFMIN(s->denoise_filter_cache_size, size);
00714         for (n = 0; n < lim; n++)
00715             synth_pf[n] += s->denoise_filter_cache[n];
00716         s->denoise_filter_cache_size -= lim;
00717         memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
00718                 sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
00719     }
00720 
00721     /* move remainder of filter output into a cache for future runs */
00722     if (fcb_type != FCB_TYPE_SILENCE) {
00723         lim = FFMIN(remainder, s->denoise_filter_cache_size);
00724         for (n = 0; n < lim; n++)
00725             s->denoise_filter_cache[n] += synth_pf[size + n];
00726         if (lim < remainder) {
00727             memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
00728                    sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
00729             s->denoise_filter_cache_size = remainder;
00730         }
00731     }
00732 }
00733 
00754 static void postfilter(WMAVoiceContext *s, const float *synth,
00755                        float *samples,    int size,
00756                        const float *lpcs, float *zero_exc_pf,
00757                        int fcb_type,      int pitch)
00758 {
00759     float synth_filter_in_buf[MAX_FRAMESIZE / 2],
00760           *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
00761           *synth_filter_in = zero_exc_pf;
00762 
00763     assert(size <= MAX_FRAMESIZE / 2);
00764 
00765     /* generate excitation from input signal */
00766     ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
00767 
00768     if (fcb_type >= FCB_TYPE_AW_PULSES &&
00769         !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
00770         synth_filter_in = synth_filter_in_buf;
00771 
00772     /* re-synthesize speech after smoothening, and keep history */
00773     ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
00774                                  synth_filter_in, size, s->lsps);
00775     memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
00776            sizeof(synth_pf[0]) * s->lsps);
00777 
00778     wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
00779 
00780     adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
00781                           &s->postfilter_agc);
00782 
00783     if (s->dc_level > 8) {
00784         /* remove ultra-low frequency DC noise / highpass filter;
00785          * coefficients are identical to those used in SIPR decoding,
00786          * and very closely resemble those used in AMR-NB decoding. */
00787         ff_acelp_apply_order_2_transfer_function(samples, samples,
00788             (const float[2]) { -1.99997,      1.0 },
00789             (const float[2]) { -1.9330735188, 0.93589198496 },
00790             0.93980580475, s->dcf_mem, size);
00791     }
00792 }
00808 static void dequant_lsps(double *lsps, int num,
00809                          const uint16_t *values,
00810                          const uint16_t *sizes,
00811                          int n_stages, const uint8_t *table,
00812                          const double *mul_q,
00813                          const double *base_q)
00814 {
00815     int n, m;
00816 
00817     memset(lsps, 0, num * sizeof(*lsps));
00818     for (n = 0; n < n_stages; n++) {
00819         const uint8_t *t_off = &table[values[n] * num];
00820         double base = base_q[n], mul = mul_q[n];
00821 
00822         for (m = 0; m < num; m++)
00823             lsps[m] += base + mul * t_off[m];
00824 
00825         table += sizes[n] * num;
00826     }
00827 }
00828 
00840 static void dequant_lsp10i(GetBitContext *gb, double *lsps)
00841 {
00842     static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
00843     static const double mul_lsf[4] = {
00844         5.2187144800e-3,    1.4626986422e-3,
00845         9.6179549166e-4,    1.1325736225e-3
00846     };
00847     static const double base_lsf[4] = {
00848         M_PI * -2.15522e-1, M_PI * -6.1646e-2,
00849         M_PI * -3.3486e-2,  M_PI * -5.7408e-2
00850     };
00851     uint16_t v[4];
00852 
00853     v[0] = get_bits(gb, 8);
00854     v[1] = get_bits(gb, 6);
00855     v[2] = get_bits(gb, 5);
00856     v[3] = get_bits(gb, 5);
00857 
00858     dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
00859                  mul_lsf, base_lsf);
00860 }
00861 
00866 static void dequant_lsp10r(GetBitContext *gb,
00867                            double *i_lsps, const double *old,
00868                            double *a1, double *a2, int q_mode)
00869 {
00870     static const uint16_t vec_sizes[3] = { 128, 64, 64 };
00871     static const double mul_lsf[3] = {
00872         2.5807601174e-3,    1.2354460219e-3,   1.1763821673e-3
00873     };
00874     static const double base_lsf[3] = {
00875         M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2
00876     };
00877     const float (*ipol_tab)[2][10] = q_mode ?
00878         wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;
00879     uint16_t interpol, v[3];
00880     int n;
00881 
00882     dequant_lsp10i(gb, i_lsps);
00883 
00884     interpol = get_bits(gb, 5);
00885     v[0]     = get_bits(gb, 7);
00886     v[1]     = get_bits(gb, 6);
00887     v[2]     = get_bits(gb, 6);
00888 
00889     for (n = 0; n < 10; n++) {
00890         double delta = old[n] - i_lsps[n];
00891         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00892         a1[10 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00893     }
00894 
00895     dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,
00896                  mul_lsf, base_lsf);
00897 }
00898 
00902 static void dequant_lsp16i(GetBitContext *gb, double *lsps)
00903 {
00904     static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
00905     static const double mul_lsf[5] = {
00906         3.3439586280e-3,    6.9908173703e-4,
00907         3.3216608306e-3,    1.0334960326e-3,
00908         3.1899104283e-3
00909     };
00910     static const double base_lsf[5] = {
00911         M_PI * -1.27576e-1, M_PI * -2.4292e-2,
00912         M_PI * -1.28094e-1, M_PI * -3.2128e-2,
00913         M_PI * -1.29816e-1
00914     };
00915     uint16_t v[5];
00916 
00917     v[0] = get_bits(gb, 8);
00918     v[1] = get_bits(gb, 6);
00919     v[2] = get_bits(gb, 7);
00920     v[3] = get_bits(gb, 6);
00921     v[4] = get_bits(gb, 7);
00922 
00923     dequant_lsps( lsps,     5,  v,     vec_sizes,    2,
00924                  wmavoice_dq_lsp16i1,  mul_lsf,     base_lsf);
00925     dequant_lsps(&lsps[5],  5, &v[2], &vec_sizes[2], 2,
00926                  wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);
00927     dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,
00928                  wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);
00929 }
00930 
00935 static void dequant_lsp16r(GetBitContext *gb,
00936                            double *i_lsps, const double *old,
00937                            double *a1, double *a2, int q_mode)
00938 {
00939     static const uint16_t vec_sizes[3] = { 128, 128, 128 };
00940     static const double mul_lsf[3] = {
00941         1.2232979501e-3,   1.4062241527e-3,   1.6114744851e-3
00942     };
00943     static const double base_lsf[3] = {
00944         M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2
00945     };
00946     const float (*ipol_tab)[2][16] = q_mode ?
00947         wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;
00948     uint16_t interpol, v[3];
00949     int n;
00950 
00951     dequant_lsp16i(gb, i_lsps);
00952 
00953     interpol = get_bits(gb, 5);
00954     v[0]     = get_bits(gb, 7);
00955     v[1]     = get_bits(gb, 7);
00956     v[2]     = get_bits(gb, 7);
00957 
00958     for (n = 0; n < 16; n++) {
00959         double delta = old[n] - i_lsps[n];
00960         a1[n]        = ipol_tab[interpol][0][n] * delta + i_lsps[n];
00961         a1[16 + n]   = ipol_tab[interpol][1][n] * delta + i_lsps[n];
00962     }
00963 
00964     dequant_lsps( a2,     10,  v,     vec_sizes,    1,
00965                  wmavoice_dq_lsp16r1,  mul_lsf,     base_lsf);
00966     dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,
00967                  wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);
00968     dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,
00969                  wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);
00970 }
00971 
00985 static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
00986                             const int *pitch)
00987 {
00988     static const int16_t start_offset[94] = {
00989         -11,  -9,  -7,  -5,  -3,  -1,   1,   3,   5,   7,   9,  11,
00990          13,  15,  18,  17,  19,  20,  21,  22,  23,  24,  25,  26,
00991          27,  28,  29,  30,  31,  32,  33,  35,  37,  39,  41,  43,
00992          45,  47,  49,  51,  53,  55,  57,  59,  61,  63,  65,  67,
00993          69,  71,  73,  75,  77,  79,  81,  83,  85,  87,  89,  91,
00994          93,  95,  97,  99, 101, 103, 105, 107, 109, 111, 113, 115,
00995         117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,
00996         141, 143, 145, 147, 149, 151, 153, 155, 157, 159
00997     };
00998     int bits, offset;
00999 
01000     /* position of pulse */
01001     s->aw_idx_is_ext = 0;
01002     if ((bits = get_bits(gb, 6)) >= 54) {
01003         s->aw_idx_is_ext = 1;
01004         bits += (bits - 54) * 3 + get_bits(gb, 2);
01005     }
01006 
01007     /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
01008      * the distribution of the pulses in each block contained in this frame. */
01009     s->aw_pulse_range        = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;
01010     for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;
01011     s->aw_n_pulses[0]        = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pitch[0];
01012     s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;
01013     offset                  += s->aw_n_pulses[0] * pitch[0];
01014     s->aw_n_pulses[1]        = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1];
01015     s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;
01016 
01017     /* if continuing from a position before the block, reset position to
01018      * start of block (when corrected for the range over which it can be
01019      * spread in aw_pulse_set1()). */
01020     if (start_offset[bits] < MAX_FRAMESIZE / 2) {
01021         while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)
01022             s->aw_first_pulse_off[1] -= pitch[1];
01023         if (start_offset[bits] < 0)
01024             while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)
01025                 s->aw_first_pulse_off[0] -= pitch[0];
01026     }
01027 }
01028 
01036 static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
01037                           int block_idx, AMRFixed *fcb)
01038 {
01039     uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
01040     uint16_t *use_mask = use_mask_mem + 2;
01041     /* in this function, idx is the index in the 80-bit (+ padding) use_mask
01042      * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits
01043      * of idx are the position of the bit within a particular item in the
01044      * array (0 being the most significant bit, and 15 being the least
01045      * significant bit), and the remainder (>> 4) is the index in the
01046      * use_mask[]-array. This is faster and uses less memory than using a
01047      * 80-byte/80-int array. */
01048     int pulse_off = s->aw_first_pulse_off[block_idx],
01049         pulse_start, n, idx, range, aidx, start_off = 0;
01050 
01051     /* set offset of first pulse to within this block */
01052     if (s->aw_n_pulses[block_idx] > 0)
01053         while (pulse_off + s->aw_pulse_range < 1)
01054             pulse_off += fcb->pitch_lag;
01055 
01056     /* find range per pulse */
01057     if (s->aw_n_pulses[0] > 0) {
01058         if (block_idx == 0) {
01059             range = 32;
01060         } else /* block_idx = 1 */ {
01061             range = 8;
01062             if (s->aw_n_pulses[block_idx] > 0)
01063                 pulse_off = s->aw_next_pulse_off_cache;
01064         }
01065     } else
01066         range = 16;
01067     pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;
01068 
01069     /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,
01070      * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus
01071      * we exclude that range from being pulsed again in this function. */
01072     memset(&use_mask[-2], 0, 2 * sizeof(use_mask[0]));
01073     memset( use_mask,   -1, 5 * sizeof(use_mask[0]));
01074     memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));
01075     if (s->aw_n_pulses[block_idx] > 0)
01076         for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {
01077             int excl_range         = s->aw_pulse_range; // always 16 or 24
01078             uint16_t *use_mask_ptr = &use_mask[idx >> 4];
01079             int first_sh           = 16 - (idx & 15);
01080             *use_mask_ptr++       &= 0xFFFF << first_sh;
01081             excl_range            -= first_sh;
01082             if (excl_range >= 16) {
01083                 *use_mask_ptr++    = 0;
01084                 *use_mask_ptr     &= 0xFFFF >> (excl_range - 16);
01085             } else
01086                 *use_mask_ptr     &= 0xFFFF >> excl_range;
01087         }
01088 
01089     /* find the 'aidx'th offset that is not excluded */
01090     aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
01091     for (n = 0; n <= aidx; pulse_start++) {
01092         for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
01093         if (idx >= MAX_FRAMESIZE / 2) { // find from zero
01094             if (use_mask[0])      idx = 0x0F;
01095             else if (use_mask[1]) idx = 0x1F;
01096             else if (use_mask[2]) idx = 0x2F;
01097             else if (use_mask[3]) idx = 0x3F;
01098             else if (use_mask[4]) idx = 0x4F;
01099             else                  return;
01100             idx -= av_log2_16bit(use_mask[idx >> 4]);
01101         }
01102         if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
01103             use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));
01104             n++;
01105             start_off = idx;
01106         }
01107     }
01108 
01109     fcb->x[fcb->n] = start_off;
01110     fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
01111     fcb->n++;
01112 
01113     /* set offset for next block, relative to start of that block */
01114     n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
01115     s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
01116 }
01117 
01125 static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
01126                           int block_idx, AMRFixed *fcb)
01127 {
01128     int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
01129     float v;
01130 
01131     if (s->aw_n_pulses[block_idx] > 0) {
01132         int n, v_mask, i_mask, sh, n_pulses;
01133 
01134         if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each
01135             n_pulses = 3;
01136             v_mask   = 8;
01137             i_mask   = 7;
01138             sh       = 4;
01139         } else { // 4 pulses, 1:sign + 2:index each
01140             n_pulses = 4;
01141             v_mask   = 4;
01142             i_mask   = 3;
01143             sh       = 3;
01144         }
01145 
01146         for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {
01147             fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;
01148             fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +
01149                                  s->aw_first_pulse_off[block_idx];
01150             while (fcb->x[fcb->n] < 0)
01151                 fcb->x[fcb->n] += fcb->pitch_lag;
01152             if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)
01153                 fcb->n++;
01154         }
01155     } else {
01156         int num2 = (val & 0x1FF) >> 1, delta, idx;
01157 
01158         if (num2 < 1 * 79)      { delta = 1; idx = num2 + 1; }
01159         else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }
01160         else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }
01161         else                    { delta = 7; idx = num2 + 1 - 3 * 75; }
01162         v = (val & 0x200) ? -1.0 : 1.0;
01163 
01164         fcb->no_repeat_mask |= 3 << fcb->n;
01165         fcb->x[fcb->n]       = idx - delta;
01166         fcb->y[fcb->n]       = v;
01167         fcb->x[fcb->n + 1]   = idx;
01168         fcb->y[fcb->n + 1]   = (val & 1) ? -v : v;
01169         fcb->n              += 2;
01170     }
01171 }
01172 
01186 static int pRNG(int frame_cntr, int block_num, int block_size)
01187 {
01188     /* array to simplify the calculation of z:
01189      * y = (x % 9) * 5 + 6;
01190      * z = (49995 * x) / y;
01191      * Since y only has 9 values, we can remove the division by using a
01192      * LUT and using FASTDIV-style divisions. For each of the 9 values
01193      * of y, we can rewrite z as:
01194      * z = x * (49995 / y) + x * ((49995 % y) / y)
01195      * In this table, each col represents one possible value of y, the
01196      * first number is 49995 / y, and the second is the FASTDIV variant
01197      * of 49995 % y / y. */
01198     static const unsigned int div_tbl[9][2] = {
01199         { 8332,  3 * 715827883U }, // y =  6
01200         { 4545,  0 * 390451573U }, // y = 11
01201         { 3124, 11 * 268435456U }, // y = 16
01202         { 2380, 15 * 204522253U }, // y = 21
01203         { 1922, 23 * 165191050U }, // y = 26
01204         { 1612, 23 * 138547333U }, // y = 31
01205         { 1388, 27 * 119304648U }, // y = 36
01206         { 1219, 16 * 104755300U }, // y = 41
01207         { 1086, 39 *  93368855U }  // y = 46
01208     };
01209     unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;
01210     if (x >= 0xFFFF) x -= 0xFFFF;   // max value of x is 8*1877+0xFFFE=0x13AA6,
01211                                     // so this is effectively a modulo (%)
01212     y = x - 9 * MULH(477218589, x); // x % 9
01213     z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));
01214                                     // z = x * 49995 / (y * 5 + 6)
01215     return z % (1000 - block_size);
01216 }
01217 
01222 static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
01223                                  int block_idx, int size,
01224                                  const struct frame_type_desc *frame_desc,
01225                                  float *excitation)
01226 {
01227     float gain;
01228     int n, r_idx;
01229 
01230     assert(size <= MAX_FRAMESIZE);
01231 
01232     /* Set the offset from which we start reading wmavoice_std_codebook */
01233     if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01234         r_idx = pRNG(s->frame_cntr, block_idx, size);
01235         gain  = s->silence_gain;
01236     } else /* FCB_TYPE_HARDCODED */ {
01237         r_idx = get_bits(gb, 8);
01238         gain  = wmavoice_gain_universal[get_bits(gb, 6)];
01239     }
01240 
01241     /* Clear gain prediction parameters */
01242     memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));
01243 
01244     /* Apply gain to hardcoded codebook and use that as excitation signal */
01245     for (n = 0; n < size; n++)
01246         excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;
01247 }
01248 
01253 static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
01254                                 int block_idx, int size,
01255                                 int block_pitch_sh2,
01256                                 const struct frame_type_desc *frame_desc,
01257                                 float *excitation)
01258 {
01259     static const float gain_coeff[6] = {
01260         0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458
01261     };
01262     float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;
01263     int n, idx, gain_weight;
01264     AMRFixed fcb;
01265 
01266     assert(size <= MAX_FRAMESIZE / 2);
01267     memset(pulses, 0, sizeof(*pulses) * size);
01268 
01269     fcb.pitch_lag      = block_pitch_sh2 >> 2;
01270     fcb.pitch_fac      = 1.0;
01271     fcb.no_repeat_mask = 0;
01272     fcb.n              = 0;
01273 
01274     /* For the other frame types, this is where we apply the innovation
01275      * (fixed) codebook pulses of the speech signal. */
01276     if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01277         aw_pulse_set1(s, gb, block_idx, &fcb);
01278         aw_pulse_set2(s, gb, block_idx, &fcb);
01279     } else /* FCB_TYPE_EXC_PULSES */ {
01280         int offset_nbits = 5 - frame_desc->log_n_blocks;
01281 
01282         fcb.no_repeat_mask = -1;
01283         /* similar to ff_decode_10_pulses_35bits(), but with single pulses
01284          * (instead of double) for a subset of pulses */
01285         for (n = 0; n < 5; n++) {
01286             float sign;
01287             int pos1, pos2;
01288 
01289             sign           = get_bits1(gb) ? 1.0 : -1.0;
01290             pos1           = get_bits(gb, offset_nbits);
01291             fcb.x[fcb.n]   = n + 5 * pos1;
01292             fcb.y[fcb.n++] = sign;
01293             if (n < frame_desc->dbl_pulses) {
01294                 pos2           = get_bits(gb, offset_nbits);
01295                 fcb.x[fcb.n]   = n + 5 * pos2;
01296                 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
01297             }
01298         }
01299     }
01300     ff_set_fixed_vector(pulses, &fcb, 1.0, size);
01301 
01302     /* Calculate gain for adaptive & fixed codebook signal.
01303      * see ff_amr_set_fixed_gain(). */
01304     idx = get_bits(gb, 7);
01305     fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
01306                     5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
01307     acb_gain = wmavoice_gain_codebook_acb[idx];
01308     pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
01309                         -2.9957322736 /* log(0.05) */,
01310                          1.6094379124 /* log(5.0)  */);
01311 
01312     gain_weight = 8 >> frame_desc->log_n_blocks;
01313     memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,
01314             sizeof(*s->gain_pred_err) * (6 - gain_weight));
01315     for (n = 0; n < gain_weight; n++)
01316         s->gain_pred_err[n] = pred_err;
01317 
01318     /* Calculation of adaptive codebook */
01319     if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01320         int len;
01321         for (n = 0; n < size; n += len) {
01322             int next_idx_sh16;
01323             int abs_idx    = block_idx * size + n;
01324             int pitch_sh16 = (s->last_pitch_val << 16) +
01325                              s->pitch_diff_sh16 * abs_idx;
01326             int pitch      = (pitch_sh16 + 0x6FFF) >> 16;
01327             int idx_sh16   = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;
01328             idx            = idx_sh16 >> 16;
01329             if (s->pitch_diff_sh16) {
01330                 if (s->pitch_diff_sh16 > 0) {
01331                     next_idx_sh16 = (idx_sh16) &~ 0xFFFF;
01332                 } else
01333                     next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;
01334                 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,
01335                               1, size - n);
01336             } else
01337                 len = size;
01338 
01339             ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],
01340                                   wmavoice_ipol1_coeffs, 17,
01341                                   idx, 9, len);
01342         }
01343     } else /* ACB_TYPE_HAMMING */ {
01344         int block_pitch = block_pitch_sh2 >> 2;
01345         idx             = block_pitch_sh2 & 3;
01346         if (idx) {
01347             ff_acelp_interpolatef(excitation, &excitation[-block_pitch],
01348                                   wmavoice_ipol2_coeffs, 4,
01349                                   idx, 8, size);
01350         } else
01351             av_memcpy_backptr((uint8_t *) excitation, sizeof(float) * block_pitch,
01352                               sizeof(float) * size);
01353     }
01354 
01355     /* Interpolate ACB/FCB and use as excitation signal */
01356     ff_weighted_vector_sumf(excitation, excitation, pulses,
01357                             acb_gain, fcb_gain, size);
01358 }
01359 
01376 static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
01377                         int block_idx, int size,
01378                         int block_pitch_sh2,
01379                         const double *lsps, const double *prev_lsps,
01380                         const struct frame_type_desc *frame_desc,
01381                         float *excitation, float *synth)
01382 {
01383     double i_lsps[MAX_LSPS];
01384     float lpcs[MAX_LSPS];
01385     float fac;
01386     int n;
01387 
01388     if (frame_desc->acb_type == ACB_TYPE_NONE)
01389         synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
01390     else
01391         synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
01392                             frame_desc, excitation);
01393 
01394     /* convert interpolated LSPs to LPCs */
01395     fac = (block_idx + 0.5) / frame_desc->n_blocks;
01396     for (n = 0; n < s->lsps; n++) // LSF -> LSP
01397         i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));
01398     ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01399 
01400     /* Speech synthesis */
01401     ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);
01402 }
01403 
01419 static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
01420                        float *samples,
01421                        const double *lsps, const double *prev_lsps,
01422                        float *excitation, float *synth)
01423 {
01424     WMAVoiceContext *s = ctx->priv_data;
01425     int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;
01426     int pitch[MAX_BLOCKS], last_block_pitch;
01427 
01428     /* Parse frame type ("frame header"), see frame_descs */
01429     int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],
01430         block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
01431 
01432     if (bd_idx < 0) {
01433         av_log(ctx, AV_LOG_ERROR,
01434                "Invalid frame type VLC code, skipping\n");
01435         return -1;
01436     }
01437 
01438     /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */
01439     if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {
01440         /* Pitch is provided per frame, which is interpreted as the pitch of
01441          * the last sample of the last block of this frame. We can interpolate
01442          * the pitch of other blocks (and even pitch-per-sample) by gradually
01443          * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
01444         n_blocks_x2      = frame_descs[bd_idx].n_blocks << 1;
01445         log_n_blocks_x2  = frame_descs[bd_idx].log_n_blocks + 1;
01446         cur_pitch_val    = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
01447         cur_pitch_val    = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
01448         if (s->last_acb_type == ACB_TYPE_NONE ||
01449             20 * abs(cur_pitch_val - s->last_pitch_val) >
01450                 (cur_pitch_val + s->last_pitch_val))
01451             s->last_pitch_val = cur_pitch_val;
01452 
01453         /* pitch per block */
01454         for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01455             int fac = n * 2 + 1;
01456 
01457             pitch[n] = (MUL16(fac,                 cur_pitch_val) +
01458                         MUL16((n_blocks_x2 - fac), s->last_pitch_val) +
01459                         frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;
01460         }
01461 
01462         /* "pitch-diff-per-sample" for calculation of pitch per sample */
01463         s->pitch_diff_sh16 =
01464             ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;
01465     }
01466 
01467     /* Global gain (if silence) and pitch-adaptive window coordinates */
01468     switch (frame_descs[bd_idx].fcb_type) {
01469     case FCB_TYPE_SILENCE:
01470         s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
01471         break;
01472     case FCB_TYPE_AW_PULSES:
01473         aw_parse_coords(s, gb, pitch);
01474         break;
01475     }
01476 
01477     for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {
01478         int bl_pitch_sh2;
01479 
01480         /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */
01481         switch (frame_descs[bd_idx].acb_type) {
01482         case ACB_TYPE_HAMMING: {
01483             /* Pitch is given per block. Per-block pitches are encoded as an
01484              * absolute value for the first block, and then delta values
01485              * relative to this value) for all subsequent blocks. The scale of
01486              * this pitch value is semi-logaritmic compared to its use in the
01487              * decoder, so we convert it to normal scale also. */
01488             int block_pitch,
01489                 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
01490                 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,
01491                 t3 =  s->block_conv_table[3] - s->block_conv_table[2] + 1;
01492 
01493             if (n == 0) {
01494                 block_pitch = get_bits(gb, s->block_pitch_nbits);
01495             } else
01496                 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
01497                                  get_bits(gb, s->block_delta_pitch_nbits);
01498             /* Convert last_ so that any next delta is within _range */
01499             last_block_pitch = av_clip(block_pitch,
01500                                        s->block_delta_pitch_hrange,
01501                                        s->block_pitch_range -
01502                                            s->block_delta_pitch_hrange);
01503 
01504             /* Convert semi-log-style scale back to normal scale */
01505             if (block_pitch < t1) {
01506                 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;
01507             } else {
01508                 block_pitch -= t1;
01509                 if (block_pitch < t2) {
01510                     bl_pitch_sh2 =
01511                         (s->block_conv_table[1] << 2) + (block_pitch << 1);
01512                 } else {
01513                     block_pitch -= t2;
01514                     if (block_pitch < t3) {
01515                         bl_pitch_sh2 =
01516                             (s->block_conv_table[2] + block_pitch) << 2;
01517                     } else
01518                         bl_pitch_sh2 = s->block_conv_table[3] << 2;
01519                 }
01520             }
01521             pitch[n] = bl_pitch_sh2 >> 2;
01522             break;
01523         }
01524 
01525         case ACB_TYPE_ASYMMETRIC: {
01526             bl_pitch_sh2 = pitch[n] << 2;
01527             break;
01528         }
01529 
01530         default: // ACB_TYPE_NONE has no pitch
01531             bl_pitch_sh2 = 0;
01532             break;
01533         }
01534 
01535         synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
01536                     lsps, prev_lsps, &frame_descs[bd_idx],
01537                     &excitation[n * block_nsamples],
01538                     &synth[n * block_nsamples]);
01539     }
01540 
01541     /* Averaging projection filter, if applicable. Else, just copy samples
01542      * from synthesis buffer */
01543     if (s->do_apf) {
01544         double i_lsps[MAX_LSPS];
01545         float lpcs[MAX_LSPS];
01546 
01547         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01548             i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
01549         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01550         postfilter(s, synth, samples, 80, lpcs,
01551                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
01552                    frame_descs[bd_idx].fcb_type, pitch[0]);
01553 
01554         for (n = 0; n < s->lsps; n++) // LSF -> LSP
01555             i_lsps[n] = cos(lsps[n]);
01556         ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
01557         postfilter(s, &synth[80], &samples[80], 80, lpcs,
01558                    &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
01559                    frame_descs[bd_idx].fcb_type, pitch[0]);
01560     } else
01561         memcpy(samples, synth, 160 * sizeof(synth[0]));
01562 
01563     /* Cache values for next frame */
01564     s->frame_cntr++;
01565     if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)
01566     s->last_acb_type = frame_descs[bd_idx].acb_type;
01567     switch (frame_descs[bd_idx].acb_type) {
01568     case ACB_TYPE_NONE:
01569         s->last_pitch_val = 0;
01570         break;
01571     case ACB_TYPE_ASYMMETRIC:
01572         s->last_pitch_val = cur_pitch_val;
01573         break;
01574     case ACB_TYPE_HAMMING:
01575         s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];
01576         break;
01577     }
01578 
01579     return 0;
01580 }
01581 
01594 static void stabilize_lsps(double *lsps, int num)
01595 {
01596     int n, m, l;
01597 
01598     /* set minimum value for first, maximum value for last and minimum
01599      * spacing between LSF values.
01600      * Very similar to ff_set_min_dist_lsf(), but in double. */
01601     lsps[0]       = FFMAX(lsps[0],       0.0015 * M_PI);
01602     for (n = 1; n < num; n++)
01603         lsps[n]   = FFMAX(lsps[n],       lsps[n - 1] + 0.0125 * M_PI);
01604     lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);
01605 
01606     /* reorder (looks like one-time / non-recursed bubblesort).
01607      * Very similar to ff_sort_nearly_sorted_floats(), but in double. */
01608     for (n = 1; n < num; n++) {
01609         if (lsps[n] < lsps[n - 1]) {
01610             for (m = 1; m < num; m++) {
01611                 double tmp = lsps[m];
01612                 for (l = m - 1; l >= 0; l--) {
01613                     if (lsps[l] <= tmp) break;
01614                     lsps[l + 1] = lsps[l];
01615                 }
01616                 lsps[l + 1] = tmp;
01617             }
01618             break;
01619         }
01620     }
01621 }
01622 
01632 static int check_bits_for_superframe(GetBitContext *orig_gb,
01633                                      WMAVoiceContext *s)
01634 {
01635     GetBitContext s_gb, *gb = &s_gb;
01636     int n, need_bits, bd_idx;
01637     const struct frame_type_desc *frame_desc;
01638 
01639     /* initialize a copy */
01640     init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
01641     skip_bits_long(gb, get_bits_count(orig_gb));
01642     assert(get_bits_left(gb) == get_bits_left(orig_gb));
01643 
01644     /* superframe header */
01645     if (get_bits_left(gb) < 14)
01646         return 1;
01647     if (!get_bits1(gb))
01648         return -1;                        // WMAPro-in-WMAVoice superframe
01649     if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
01650     if (s->has_residual_lsps) {           // residual LSPs (for all frames)
01651         if (get_bits_left(gb) < s->sframe_lsp_bitsize)
01652             return 1;
01653         skip_bits_long(gb, s->sframe_lsp_bitsize);
01654     }
01655 
01656     /* frames */
01657     for (n = 0; n < MAX_FRAMES; n++) {
01658         int aw_idx_is_ext = 0;
01659 
01660         if (!s->has_residual_lsps) {     // independent LSPs (per-frame)
01661            if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
01662            skip_bits_long(gb, s->frame_lsp_bitsize);
01663         }
01664         bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
01665         if (bd_idx < 0)
01666             return -1;                   // invalid frame type VLC code
01667         frame_desc = &frame_descs[bd_idx];
01668         if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
01669             if (get_bits_left(gb) < s->pitch_nbits)
01670                 return 1;
01671             skip_bits_long(gb, s->pitch_nbits);
01672         }
01673         if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
01674             skip_bits(gb, 8);
01675         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01676             int tmp = get_bits(gb, 6);
01677             if (tmp >= 0x36) {
01678                 skip_bits(gb, 2);
01679                 aw_idx_is_ext = 1;
01680             }
01681         }
01682 
01683         /* blocks */
01684         if (frame_desc->acb_type == ACB_TYPE_HAMMING) {
01685             need_bits = s->block_pitch_nbits +
01686                 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;
01687         } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
01688             need_bits = 2 * !aw_idx_is_ext;
01689         } else
01690             need_bits = 0;
01691         need_bits += frame_desc->frame_size;
01692         if (get_bits_left(gb) < need_bits)
01693             return 1;
01694         skip_bits_long(gb, need_bits);
01695     }
01696 
01697     return 0;
01698 }
01699 
01720 static int synth_superframe(AVCodecContext *ctx,
01721                             float *samples, int *data_size)
01722 {
01723     WMAVoiceContext *s = ctx->priv_data;
01724     GetBitContext *gb = &s->gb, s_gb;
01725     int n, res, n_samples = 480;
01726     double lsps[MAX_FRAMES][MAX_LSPS];
01727     const double *mean_lsf = s->lsps == 16 ?
01728         wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode];
01729     float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];
01730     float synth[MAX_LSPS + MAX_SFRAMESIZE];
01731 
01732     memcpy(synth,      s->synth_history,
01733            s->lsps             * sizeof(*synth));
01734     memcpy(excitation, s->excitation_history,
01735            s->history_nsamples * sizeof(*excitation));
01736 
01737     if (s->sframe_cache_size > 0) {
01738         gb = &s_gb;
01739         init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
01740         s->sframe_cache_size = 0;
01741     }
01742 
01743     if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;
01744 
01745     /* First bit is speech/music bit, it differentiates between WMAVoice
01746      * speech samples (the actual codec) and WMAVoice music samples, which
01747      * are really WMAPro-in-WMAVoice-superframes. I've never seen those in
01748      * the wild yet. */
01749     if (!get_bits1(gb)) {
01750         av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
01751         return -1;
01752     }
01753 
01754     /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
01755     if (get_bits1(gb)) {
01756         if ((n_samples = get_bits(gb, 12)) > 480) {
01757             av_log(ctx, AV_LOG_ERROR,
01758                    "Superframe encodes >480 samples (%d), not allowed\n",
01759                    n_samples);
01760             return -1;
01761         }
01762     }
01763     /* Parse LSPs, if global for the superframe (can also be per-frame). */
01764     if (s->has_residual_lsps) {
01765         double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];
01766 
01767         for (n = 0; n < s->lsps; n++)
01768             prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
01769 
01770         if (s->lsps == 10) {
01771             dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01772         } else /* s->lsps == 16 */
01773             dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
01774 
01775         for (n = 0; n < s->lsps; n++) {
01776             lsps[0][n]  = mean_lsf[n] + (a1[n]           - a2[n * 2]);
01777             lsps[1][n]  = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);
01778             lsps[2][n] += mean_lsf[n];
01779         }
01780         for (n = 0; n < 3; n++)
01781             stabilize_lsps(lsps[n], s->lsps);
01782     }
01783 
01784     /* Parse frames, optionally preceeded by per-frame (independent) LSPs. */
01785     for (n = 0; n < 3; n++) {
01786         if (!s->has_residual_lsps) {
01787             int m;
01788 
01789             if (s->lsps == 10) {
01790                 dequant_lsp10i(gb, lsps[n]);
01791             } else /* s->lsps == 16 */
01792                 dequant_lsp16i(gb, lsps[n]);
01793 
01794             for (m = 0; m < s->lsps; m++)
01795                 lsps[n][m] += mean_lsf[m];
01796             stabilize_lsps(lsps[n], s->lsps);
01797         }
01798 
01799         if ((res = synth_frame(ctx, gb, n,
01800                                &samples[n * MAX_FRAMESIZE],
01801                                lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
01802                                &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
01803                                &synth[s->lsps + n * MAX_FRAMESIZE])))
01804             return res;
01805     }
01806 
01807     /* Statistics? FIXME - we don't check for length, a slight overrun
01808      * will be caught by internal buffer padding, and anything else
01809      * will be skipped, not read. */
01810     if (get_bits1(gb)) {
01811         res = get_bits(gb, 4);
01812         skip_bits(gb, 10 * (res + 1));
01813     }
01814 
01815     /* Specify nr. of output samples */
01816     *data_size = n_samples * sizeof(float);
01817 
01818     /* Update history */
01819     memcpy(s->prev_lsps,           lsps[2],
01820            s->lsps             * sizeof(*s->prev_lsps));
01821     memcpy(s->synth_history,      &synth[MAX_SFRAMESIZE],
01822            s->lsps             * sizeof(*synth));
01823     memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
01824            s->history_nsamples * sizeof(*excitation));
01825     if (s->do_apf)
01826         memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
01827                 s->history_nsamples * sizeof(*s->zero_exc_pf));
01828 
01829     return 0;
01830 }
01831 
01839 static int parse_packet_header(WMAVoiceContext *s)
01840 {
01841     GetBitContext *gb = &s->gb;
01842     unsigned int res;
01843 
01844     if (get_bits_left(gb) < 11)
01845         return 1;
01846     skip_bits(gb, 4);          // packet sequence number
01847     s->has_residual_lsps = get_bits1(gb);
01848     do {
01849         res = get_bits(gb, 6); // number of superframes per packet
01850                                // (minus first one if there is spillover)
01851         if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
01852             return 1;
01853     } while (res == 0x3F);
01854     s->spillover_nbits   = get_bits(gb, s->spillover_bitsize);
01855 
01856     return 0;
01857 }
01858 
01874 static void copy_bits(PutBitContext *pb,
01875                       const uint8_t *data, int size,
01876                       GetBitContext *gb, int nbits)
01877 {
01878     int rmn_bytes, rmn_bits;
01879 
01880     rmn_bits = rmn_bytes = get_bits_left(gb);
01881     if (rmn_bits < nbits)
01882         return;
01883     rmn_bits &= 7; rmn_bytes >>= 3;
01884     if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
01885         put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
01886     ff_copy_bits(pb, data + size - rmn_bytes,
01887                  FFMIN(nbits - rmn_bits, rmn_bytes << 3));
01888 }
01889 
01901 static int wmavoice_decode_packet(AVCodecContext *ctx, void *data,
01902                                   int *data_size, AVPacket *avpkt)
01903 {
01904     WMAVoiceContext *s = ctx->priv_data;
01905     GetBitContext *gb = &s->gb;
01906     int size, res, pos;
01907 
01908     if (*data_size < 480 * sizeof(float)) {
01909         av_log(ctx, AV_LOG_ERROR,
01910                "Output buffer too small (%d given - %zu needed)\n",
01911                *data_size, 480 * sizeof(float));
01912         return -1;
01913     }
01914     *data_size = 0;
01915 
01916     /* Packets are sometimes a multiple of ctx->block_align, with a packet
01917      * header at each ctx->block_align bytes. However, Libav's ASF demuxer
01918      * feeds us ASF packets, which may concatenate multiple "codec" packets
01919      * in a single "muxer" packet, so we artificially emulate that by
01920      * capping the packet size at ctx->block_align. */
01921     for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);
01922     if (!size)
01923         return 0;
01924     init_get_bits(&s->gb, avpkt->data, size << 3);
01925 
01926     /* size == ctx->block_align is used to indicate whether we are dealing with
01927      * a new packet or a packet of which we already read the packet header
01928      * previously. */
01929     if (size == ctx->block_align) { // new packet header
01930         if ((res = parse_packet_header(s)) < 0)
01931             return res;
01932 
01933         /* If the packet header specifies a s->spillover_nbits, then we want
01934          * to push out all data of the previous packet (+ spillover) before
01935          * continuing to parse new superframes in the current packet. */
01936         if (s->spillover_nbits > 0) {
01937             if (s->sframe_cache_size > 0) {
01938                 int cnt = get_bits_count(gb);
01939                 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
01940                 flush_put_bits(&s->pb);
01941                 s->sframe_cache_size += s->spillover_nbits;
01942                 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&
01943                     *data_size > 0) {
01944                     cnt += s->spillover_nbits;
01945                     s->skip_bits_next = cnt & 7;
01946                     return cnt >> 3;
01947                 } else
01948                     skip_bits_long (gb, s->spillover_nbits - cnt +
01949                                     get_bits_count(gb)); // resync
01950             } else
01951                 skip_bits_long(gb, s->spillover_nbits);  // resync
01952         }
01953     } else if (s->skip_bits_next)
01954         skip_bits(gb, s->skip_bits_next);
01955 
01956     /* Try parsing superframes in current packet */
01957     s->sframe_cache_size = 0;
01958     s->skip_bits_next = 0;
01959     pos = get_bits_left(gb);
01960     if ((res = synth_superframe(ctx, data, data_size)) < 0) {
01961         return res;
01962     } else if (*data_size > 0) {
01963         int cnt = get_bits_count(gb);
01964         s->skip_bits_next = cnt & 7;
01965         return cnt >> 3;
01966     } else if ((s->sframe_cache_size = pos) > 0) {
01967         /* rewind bit reader to start of last (incomplete) superframe... */
01968         init_get_bits(gb, avpkt->data, size << 3);
01969         skip_bits_long(gb, (size << 3) - pos);
01970         assert(get_bits_left(gb) == pos);
01971 
01972         /* ...and cache it for spillover in next packet */
01973         init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
01974         copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
01975         // FIXME bad - just copy bytes as whole and add use the
01976         // skip_bits_next field
01977     }
01978 
01979     return size;
01980 }
01981 
01982 static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
01983 {
01984     WMAVoiceContext *s = ctx->priv_data;
01985 
01986     if (s->do_apf) {
01987         ff_rdft_end(&s->rdft);
01988         ff_rdft_end(&s->irdft);
01989         ff_dct_end(&s->dct);
01990         ff_dct_end(&s->dst);
01991     }
01992 
01993     return 0;
01994 }
01995 
01996 static av_cold void wmavoice_flush(AVCodecContext *ctx)
01997 {
01998     WMAVoiceContext *s = ctx->priv_data;
01999     int n;
02000 
02001     s->postfilter_agc    = 0;
02002     s->sframe_cache_size = 0;
02003     s->skip_bits_next    = 0;
02004     for (n = 0; n < s->lsps; n++)
02005         s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
02006     memset(s->excitation_history, 0,
02007            sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
02008     memset(s->synth_history,      0,
02009            sizeof(*s->synth_history)      * MAX_LSPS);
02010     memset(s->gain_pred_err,      0,
02011            sizeof(s->gain_pred_err));
02012 
02013     if (s->do_apf) {
02014         memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
02015                sizeof(*s->synth_filter_out_buf) * s->lsps);
02016         memset(s->dcf_mem,              0,
02017                sizeof(*s->dcf_mem)              * 2);
02018         memset(s->zero_exc_pf,          0,
02019                sizeof(*s->zero_exc_pf)          * s->history_nsamples);
02020         memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
02021     }
02022 }
02023 
02024 AVCodec ff_wmavoice_decoder = {
02025     "wmavoice",
02026     AVMEDIA_TYPE_AUDIO,
02027     CODEC_ID_WMAVOICE,
02028     sizeof(WMAVoiceContext),
02029     wmavoice_decode_init,
02030     NULL,
02031     wmavoice_decode_end,
02032     wmavoice_decode_packet,
02033     CODEC_CAP_SUBFRAMES,
02034     .flush     = wmavoice_flush,
02035     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
02036 };