Libav 0.7.1
|
00001 /* 00002 * G.722 ADPCM audio encoder/decoder 00003 * 00004 * Copyright (c) CMU 1993 Computer Science, Speech Group 00005 * Chengxiang Lu and Alex Hauptmann 00006 * Copyright (c) 2005 Steve Underwood <steveu at coppice.org> 00007 * Copyright (c) 2009 Kenan Gillet 00008 * Copyright (c) 2010 Martin Storsjo 00009 * 00010 * This file is part of Libav. 00011 * 00012 * Libav is free software; you can redistribute it and/or 00013 * modify it under the terms of the GNU Lesser General Public 00014 * License as published by the Free Software Foundation; either 00015 * version 2.1 of the License, or (at your option) any later version. 00016 * 00017 * Libav is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00020 * Lesser General Public License for more details. 00021 * 00022 * You should have received a copy of the GNU Lesser General Public 00023 * License along with Libav; if not, write to the Free Software 00024 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00025 */ 00026 00040 #include "avcodec.h" 00041 #include "mathops.h" 00042 #include "get_bits.h" 00043 00044 #define PREV_SAMPLES_BUF_SIZE 1024 00045 00046 #define FREEZE_INTERVAL 128 00047 00048 typedef struct { 00049 int16_t prev_samples[PREV_SAMPLES_BUF_SIZE]; 00050 int prev_samples_pos; 00051 00055 struct G722Band { 00056 int16_t s_predictor; 00057 int32_t s_zero; 00058 int8_t part_reconst_mem[2]; 00059 int16_t prev_qtzd_reconst; 00060 int16_t pole_mem[2]; 00061 int32_t diff_mem[6]; 00062 int16_t zero_mem[6]; 00063 int16_t log_factor; 00064 int16_t scale_factor; 00065 } band[2]; 00066 00067 struct TrellisNode { 00068 struct G722Band state; 00069 uint32_t ssd; 00070 int path; 00071 } *node_buf[2], **nodep_buf[2]; 00072 00073 struct TrellisPath { 00074 int value; 00075 int prev; 00076 } *paths[2]; 00077 } G722Context; 00078 00079 00080 static const int8_t sign_lookup[2] = { -1, 1 }; 00081 00082 static const int16_t inv_log2_table[32] = { 00083 2048, 2093, 2139, 2186, 2233, 2282, 2332, 2383, 00084 2435, 2489, 2543, 2599, 2656, 2714, 2774, 2834, 00085 2896, 2960, 3025, 3091, 3158, 3228, 3298, 3371, 00086 3444, 3520, 3597, 3676, 3756, 3838, 3922, 4008 00087 }; 00088 static const int16_t high_log_factor_step[2] = { 798, -214 }; 00089 static const int16_t high_inv_quant[4] = { -926, -202, 926, 202 }; 00093 static const int16_t low_log_factor_step[16] = { 00094 -60, 3042, 1198, 538, 334, 172, 58, -30, 00095 3042, 1198, 538, 334, 172, 58, -30, -60 00096 }; 00097 static const int16_t low_inv_quant4[16] = { 00098 0, -2557, -1612, -1121, -786, -530, -323, -150, 00099 2557, 1612, 1121, 786, 530, 323, 150, 0 00100 }; 00101 static const int16_t low_inv_quant6[64] = { 00102 -17, -17, -17, -17, -3101, -2738, -2376, -2088, 00103 -1873, -1689, -1535, -1399, -1279, -1170, -1072, -982, 00104 -899, -822, -750, -682, -618, -558, -501, -447, 00105 -396, -347, -300, -254, -211, -170, -130, -91, 00106 3101, 2738, 2376, 2088, 1873, 1689, 1535, 1399, 00107 1279, 1170, 1072, 982, 899, 822, 750, 682, 00108 618, 558, 501, 447, 396, 347, 300, 254, 00109 211, 170, 130, 91, 54, 17, -54, -17 00110 }; 00111 00117 static const int16_t qmf_coeffs[12] = { 00118 3, -11, 12, 32, -210, 951, 3876, -805, 362, -156, 53, -11, 00119 }; 00120 00121 00128 static void do_adaptive_prediction(struct G722Band *band, const int cur_diff) 00129 { 00130 int sg[2], limit, i, cur_qtzd_reconst; 00131 00132 const int cur_part_reconst = band->s_zero + cur_diff < 0; 00133 00134 sg[0] = sign_lookup[cur_part_reconst != band->part_reconst_mem[0]]; 00135 sg[1] = sign_lookup[cur_part_reconst == band->part_reconst_mem[1]]; 00136 band->part_reconst_mem[1] = band->part_reconst_mem[0]; 00137 band->part_reconst_mem[0] = cur_part_reconst; 00138 00139 band->pole_mem[1] = av_clip((sg[0] * av_clip(band->pole_mem[0], -8191, 8191) >> 5) + 00140 (sg[1] << 7) + (band->pole_mem[1] * 127 >> 7), -12288, 12288); 00141 00142 limit = 15360 - band->pole_mem[1]; 00143 band->pole_mem[0] = av_clip(-192 * sg[0] + (band->pole_mem[0] * 255 >> 8), -limit, limit); 00144 00145 00146 if (cur_diff) { 00147 for (i = 0; i < 6; i++) 00148 band->zero_mem[i] = ((band->zero_mem[i]*255) >> 8) + 00149 ((band->diff_mem[i]^cur_diff) < 0 ? -128 : 128); 00150 } else 00151 for (i = 0; i < 6; i++) 00152 band->zero_mem[i] = (band->zero_mem[i]*255) >> 8; 00153 00154 for (i = 5; i > 0; i--) 00155 band->diff_mem[i] = band->diff_mem[i-1]; 00156 band->diff_mem[0] = av_clip_int16(cur_diff << 1); 00157 00158 band->s_zero = 0; 00159 for (i = 5; i >= 0; i--) 00160 band->s_zero += (band->zero_mem[i]*band->diff_mem[i]) >> 15; 00161 00162 00163 cur_qtzd_reconst = av_clip_int16((band->s_predictor + cur_diff) << 1); 00164 band->s_predictor = av_clip_int16(band->s_zero + 00165 (band->pole_mem[0] * cur_qtzd_reconst >> 15) + 00166 (band->pole_mem[1] * band->prev_qtzd_reconst >> 15)); 00167 band->prev_qtzd_reconst = cur_qtzd_reconst; 00168 } 00169 00170 static int inline linear_scale_factor(const int log_factor) 00171 { 00172 const int wd1 = inv_log2_table[(log_factor >> 6) & 31]; 00173 const int shift = log_factor >> 11; 00174 return shift < 0 ? wd1 >> -shift : wd1 << shift; 00175 } 00176 00177 static void update_low_predictor(struct G722Band *band, const int ilow) 00178 { 00179 do_adaptive_prediction(band, 00180 band->scale_factor * low_inv_quant4[ilow] >> 10); 00181 00182 // quantizer adaptation 00183 band->log_factor = av_clip((band->log_factor * 127 >> 7) + 00184 low_log_factor_step[ilow], 0, 18432); 00185 band->scale_factor = linear_scale_factor(band->log_factor - (8 << 11)); 00186 } 00187 00188 static void update_high_predictor(struct G722Band *band, const int dhigh, 00189 const int ihigh) 00190 { 00191 do_adaptive_prediction(band, dhigh); 00192 00193 // quantizer adaptation 00194 band->log_factor = av_clip((band->log_factor * 127 >> 7) + 00195 high_log_factor_step[ihigh&1], 0, 22528); 00196 band->scale_factor = linear_scale_factor(band->log_factor - (10 << 11)); 00197 } 00198 00199 static void apply_qmf(const int16_t *prev_samples, int *xout1, int *xout2) 00200 { 00201 int i; 00202 00203 *xout1 = 0; 00204 *xout2 = 0; 00205 for (i = 0; i < 12; i++) { 00206 MAC16(*xout2, prev_samples[2*i ], qmf_coeffs[i ]); 00207 MAC16(*xout1, prev_samples[2*i+1], qmf_coeffs[11-i]); 00208 } 00209 } 00210 00211 static av_cold int g722_init(AVCodecContext * avctx) 00212 { 00213 G722Context *c = avctx->priv_data; 00214 00215 if (avctx->channels != 1) { 00216 av_log(avctx, AV_LOG_ERROR, "Only mono tracks are allowed.\n"); 00217 return AVERROR_INVALIDDATA; 00218 } 00219 avctx->sample_fmt = AV_SAMPLE_FMT_S16; 00220 00221 switch (avctx->bits_per_coded_sample) { 00222 case 8: 00223 case 7: 00224 case 6: 00225 break; 00226 default: 00227 av_log(avctx, AV_LOG_WARNING, "Unsupported bits_per_coded_sample [%d], " 00228 "assuming 8\n", 00229 avctx->bits_per_coded_sample); 00230 case 0: 00231 avctx->bits_per_coded_sample = 8; 00232 break; 00233 } 00234 00235 c->band[0].scale_factor = 8; 00236 c->band[1].scale_factor = 2; 00237 c->prev_samples_pos = 22; 00238 00239 if (avctx->lowres) 00240 avctx->sample_rate /= 2; 00241 00242 if (avctx->trellis) { 00243 int frontier = 1 << avctx->trellis; 00244 int max_paths = frontier * FREEZE_INTERVAL; 00245 int i; 00246 for (i = 0; i < 2; i++) { 00247 c->paths[i] = av_mallocz(max_paths * sizeof(**c->paths)); 00248 c->node_buf[i] = av_mallocz(2 * frontier * sizeof(**c->node_buf)); 00249 c->nodep_buf[i] = av_mallocz(2 * frontier * sizeof(**c->nodep_buf)); 00250 } 00251 } 00252 00253 return 0; 00254 } 00255 00256 static av_cold int g722_close(AVCodecContext *avctx) 00257 { 00258 G722Context *c = avctx->priv_data; 00259 int i; 00260 for (i = 0; i < 2; i++) { 00261 av_freep(&c->paths[i]); 00262 av_freep(&c->node_buf[i]); 00263 av_freep(&c->nodep_buf[i]); 00264 } 00265 return 0; 00266 } 00267 00268 #if CONFIG_ADPCM_G722_DECODER 00269 static const int16_t low_inv_quant5[32] = { 00270 -35, -35, -2919, -2195, -1765, -1458, -1219, -1023, 00271 -858, -714, -587, -473, -370, -276, -190, -110, 00272 2919, 2195, 1765, 1458, 1219, 1023, 858, 714, 00273 587, 473, 370, 276, 190, 110, 35, -35 00274 }; 00275 00276 static const int16_t *low_inv_quants[3] = { low_inv_quant6, low_inv_quant5, 00277 low_inv_quant4 }; 00278 00279 static int g722_decode_frame(AVCodecContext *avctx, void *data, 00280 int *data_size, AVPacket *avpkt) 00281 { 00282 G722Context *c = avctx->priv_data; 00283 int16_t *out_buf = data; 00284 int j, out_len = 0; 00285 const int skip = 8 - avctx->bits_per_coded_sample; 00286 const int16_t *quantizer_table = low_inv_quants[skip]; 00287 GetBitContext gb; 00288 00289 init_get_bits(&gb, avpkt->data, avpkt->size * 8); 00290 00291 for (j = 0; j < avpkt->size; j++) { 00292 int ilow, ihigh, rlow; 00293 00294 ihigh = get_bits(&gb, 2); 00295 ilow = get_bits(&gb, 6 - skip); 00296 skip_bits(&gb, skip); 00297 00298 rlow = av_clip((c->band[0].scale_factor * quantizer_table[ilow] >> 10) 00299 + c->band[0].s_predictor, -16384, 16383); 00300 00301 update_low_predictor(&c->band[0], ilow >> (2 - skip)); 00302 00303 if (!avctx->lowres) { 00304 const int dhigh = c->band[1].scale_factor * 00305 high_inv_quant[ihigh] >> 10; 00306 const int rhigh = av_clip(dhigh + c->band[1].s_predictor, 00307 -16384, 16383); 00308 int xout1, xout2; 00309 00310 update_high_predictor(&c->band[1], dhigh, ihigh); 00311 00312 c->prev_samples[c->prev_samples_pos++] = rlow + rhigh; 00313 c->prev_samples[c->prev_samples_pos++] = rlow - rhigh; 00314 apply_qmf(c->prev_samples + c->prev_samples_pos - 24, 00315 &xout1, &xout2); 00316 out_buf[out_len++] = av_clip_int16(xout1 >> 12); 00317 out_buf[out_len++] = av_clip_int16(xout2 >> 12); 00318 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) { 00319 memmove(c->prev_samples, 00320 c->prev_samples + c->prev_samples_pos - 22, 00321 22 * sizeof(c->prev_samples[0])); 00322 c->prev_samples_pos = 22; 00323 } 00324 } else 00325 out_buf[out_len++] = rlow; 00326 } 00327 *data_size = out_len << 1; 00328 return avpkt->size; 00329 } 00330 00331 AVCodec ff_adpcm_g722_decoder = { 00332 .name = "g722", 00333 .type = AVMEDIA_TYPE_AUDIO, 00334 .id = CODEC_ID_ADPCM_G722, 00335 .priv_data_size = sizeof(G722Context), 00336 .init = g722_init, 00337 .decode = g722_decode_frame, 00338 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"), 00339 .max_lowres = 1, 00340 }; 00341 #endif 00342 00343 #if CONFIG_ADPCM_G722_ENCODER 00344 static const int16_t low_quant[33] = { 00345 35, 72, 110, 150, 190, 233, 276, 323, 00346 370, 422, 473, 530, 587, 650, 714, 786, 00347 858, 940, 1023, 1121, 1219, 1339, 1458, 1612, 00348 1765, 1980, 2195, 2557, 2919 00349 }; 00350 00351 static inline void filter_samples(G722Context *c, const int16_t *samples, 00352 int *xlow, int *xhigh) 00353 { 00354 int xout1, xout2; 00355 c->prev_samples[c->prev_samples_pos++] = samples[0]; 00356 c->prev_samples[c->prev_samples_pos++] = samples[1]; 00357 apply_qmf(c->prev_samples + c->prev_samples_pos - 24, &xout1, &xout2); 00358 *xlow = xout1 + xout2 >> 13; 00359 *xhigh = xout1 - xout2 >> 13; 00360 if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) { 00361 memmove(c->prev_samples, 00362 c->prev_samples + c->prev_samples_pos - 22, 00363 22 * sizeof(c->prev_samples[0])); 00364 c->prev_samples_pos = 22; 00365 } 00366 } 00367 00368 static inline int encode_high(const struct G722Band *state, int xhigh) 00369 { 00370 int diff = av_clip_int16(xhigh - state->s_predictor); 00371 int pred = 141 * state->scale_factor >> 8; 00372 /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */ 00373 return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0); 00374 } 00375 00376 static inline int encode_low(const struct G722Band* state, int xlow) 00377 { 00378 int diff = av_clip_int16(xlow - state->s_predictor); 00379 /* = diff >= 0 ? diff : -(diff + 1) */ 00380 int limit = diff ^ (diff >> (sizeof(diff)*8-1)); 00381 int i = 0; 00382 limit = limit + 1 << 10; 00383 if (limit > low_quant[8] * state->scale_factor) 00384 i = 9; 00385 while (i < 29 && limit > low_quant[i] * state->scale_factor) 00386 i++; 00387 return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i; 00388 } 00389 00390 static int g722_encode_trellis(AVCodecContext *avctx, 00391 uint8_t *dst, int buf_size, void *data) 00392 { 00393 G722Context *c = avctx->priv_data; 00394 const int16_t *samples = data; 00395 int i, j, k; 00396 int frontier = 1 << avctx->trellis; 00397 struct TrellisNode **nodes[2]; 00398 struct TrellisNode **nodes_next[2]; 00399 int pathn[2] = {0, 0}, froze = -1; 00400 struct TrellisPath *p[2]; 00401 00402 for (i = 0; i < 2; i++) { 00403 nodes[i] = c->nodep_buf[i]; 00404 nodes_next[i] = c->nodep_buf[i] + frontier; 00405 memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf)); 00406 nodes[i][0] = c->node_buf[i] + frontier; 00407 nodes[i][0]->ssd = 0; 00408 nodes[i][0]->path = 0; 00409 nodes[i][0]->state = c->band[i]; 00410 } 00411 00412 for (i = 0; i < buf_size >> 1; i++) { 00413 int xlow, xhigh; 00414 struct TrellisNode *next[2]; 00415 int heap_pos[2] = {0, 0}; 00416 00417 for (j = 0; j < 2; j++) { 00418 next[j] = c->node_buf[j] + frontier*(i & 1); 00419 memset(nodes_next[j], 0, frontier * sizeof(**nodes_next)); 00420 } 00421 00422 filter_samples(c, &samples[2*i], &xlow, &xhigh); 00423 00424 for (j = 0; j < frontier && nodes[0][j]; j++) { 00425 /* Only k >> 2 affects the future adaptive state, therefore testing 00426 * small steps that don't change k >> 2 is useless, the orignal 00427 * value from encode_low is better than them. Since we step k 00428 * in steps of 4, make sure range is a multiple of 4, so that 00429 * we don't miss the original value from encode_low. */ 00430 int range = j < frontier/2 ? 4 : 0; 00431 struct TrellisNode *cur_node = nodes[0][j]; 00432 00433 int ilow = encode_low(&cur_node->state, xlow); 00434 00435 for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) { 00436 int decoded, dec_diff, pos; 00437 uint32_t ssd; 00438 struct TrellisNode* node; 00439 00440 if (k < 0) 00441 continue; 00442 00443 decoded = av_clip((cur_node->state.scale_factor * 00444 low_inv_quant6[k] >> 10) 00445 + cur_node->state.s_predictor, -16384, 16383); 00446 dec_diff = xlow - decoded; 00447 00448 #define STORE_NODE(index, UPDATE, VALUE)\ 00449 ssd = cur_node->ssd + dec_diff*dec_diff;\ 00450 /* Check for wraparound. Using 64 bit ssd counters would \ 00451 * be simpler, but is slower on x86 32 bit. */\ 00452 if (ssd < cur_node->ssd)\ 00453 continue;\ 00454 if (heap_pos[index] < frontier) {\ 00455 pos = heap_pos[index]++;\ 00456 assert(pathn[index] < FREEZE_INTERVAL * frontier);\ 00457 node = nodes_next[index][pos] = next[index]++;\ 00458 node->path = pathn[index]++;\ 00459 } else {\ 00460 /* Try to replace one of the leaf nodes with the new \ 00461 * one, but not always testing the same leaf position */\ 00462 pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\ 00463 if (ssd >= nodes_next[index][pos]->ssd)\ 00464 continue;\ 00465 heap_pos[index]++;\ 00466 node = nodes_next[index][pos];\ 00467 }\ 00468 node->ssd = ssd;\ 00469 node->state = cur_node->state;\ 00470 UPDATE;\ 00471 c->paths[index][node->path].value = VALUE;\ 00472 c->paths[index][node->path].prev = cur_node->path;\ 00473 /* Sift the newly inserted node up in the heap to restore \ 00474 * the heap property */\ 00475 while (pos > 0) {\ 00476 int parent = (pos - 1) >> 1;\ 00477 if (nodes_next[index][parent]->ssd <= ssd)\ 00478 break;\ 00479 FFSWAP(struct TrellisNode*, nodes_next[index][parent],\ 00480 nodes_next[index][pos]);\ 00481 pos = parent;\ 00482 } 00483 STORE_NODE(0, update_low_predictor(&node->state, k >> 2), k); 00484 } 00485 } 00486 00487 for (j = 0; j < frontier && nodes[1][j]; j++) { 00488 int ihigh; 00489 struct TrellisNode *cur_node = nodes[1][j]; 00490 00491 /* We don't try to get any initial guess for ihigh via 00492 * encode_high - since there's only 4 possible values, test 00493 * them all. Testing all of these gives a much, much larger 00494 * gain than testing a larger range around ilow. */ 00495 for (ihigh = 0; ihigh < 4; ihigh++) { 00496 int dhigh, decoded, dec_diff, pos; 00497 uint32_t ssd; 00498 struct TrellisNode* node; 00499 00500 dhigh = cur_node->state.scale_factor * 00501 high_inv_quant[ihigh] >> 10; 00502 decoded = av_clip(dhigh + cur_node->state.s_predictor, 00503 -16384, 16383); 00504 dec_diff = xhigh - decoded; 00505 00506 STORE_NODE(1, update_high_predictor(&node->state, dhigh, ihigh), ihigh); 00507 } 00508 } 00509 00510 for (j = 0; j < 2; j++) { 00511 FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]); 00512 00513 if (nodes[j][0]->ssd > (1 << 16)) { 00514 for (k = 1; k < frontier && nodes[j][k]; k++) 00515 nodes[j][k]->ssd -= nodes[j][0]->ssd; 00516 nodes[j][0]->ssd = 0; 00517 } 00518 } 00519 00520 if (i == froze + FREEZE_INTERVAL) { 00521 p[0] = &c->paths[0][nodes[0][0]->path]; 00522 p[1] = &c->paths[1][nodes[1][0]->path]; 00523 for (j = i; j > froze; j--) { 00524 dst[j] = p[1]->value << 6 | p[0]->value; 00525 p[0] = &c->paths[0][p[0]->prev]; 00526 p[1] = &c->paths[1][p[1]->prev]; 00527 } 00528 froze = i; 00529 pathn[0] = pathn[1] = 0; 00530 memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes)); 00531 memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes)); 00532 } 00533 } 00534 00535 p[0] = &c->paths[0][nodes[0][0]->path]; 00536 p[1] = &c->paths[1][nodes[1][0]->path]; 00537 for (j = i; j > froze; j--) { 00538 dst[j] = p[1]->value << 6 | p[0]->value; 00539 p[0] = &c->paths[0][p[0]->prev]; 00540 p[1] = &c->paths[1][p[1]->prev]; 00541 } 00542 c->band[0] = nodes[0][0]->state; 00543 c->band[1] = nodes[1][0]->state; 00544 00545 return i; 00546 } 00547 00548 static int g722_encode_frame(AVCodecContext *avctx, 00549 uint8_t *dst, int buf_size, void *data) 00550 { 00551 G722Context *c = avctx->priv_data; 00552 const int16_t *samples = data; 00553 int i; 00554 00555 if (avctx->trellis) 00556 return g722_encode_trellis(avctx, dst, buf_size, data); 00557 00558 for (i = 0; i < buf_size >> 1; i++) { 00559 int xlow, xhigh, ihigh, ilow; 00560 filter_samples(c, &samples[2*i], &xlow, &xhigh); 00561 ihigh = encode_high(&c->band[1], xhigh); 00562 ilow = encode_low(&c->band[0], xlow); 00563 update_high_predictor(&c->band[1], c->band[1].scale_factor * 00564 high_inv_quant[ihigh] >> 10, ihigh); 00565 update_low_predictor(&c->band[0], ilow >> 2); 00566 *dst++ = ihigh << 6 | ilow; 00567 } 00568 return i; 00569 } 00570 00571 AVCodec ff_adpcm_g722_encoder = { 00572 .name = "g722", 00573 .type = AVMEDIA_TYPE_AUDIO, 00574 .id = CODEC_ID_ADPCM_G722, 00575 .priv_data_size = sizeof(G722Context), 00576 .init = g722_init, 00577 .close = g722_close, 00578 .encode = g722_encode_frame, 00579 .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"), 00580 .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, 00581 }; 00582 #endif 00583