00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 #include "png.h"
00042
00043 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00044 uint32_t ff_squareTbl[512] = {0, };
00045
00046 #define BIT_DEPTH 9
00047 #include "dsputil_template.c"
00048 #undef BIT_DEPTH
00049
00050 #define BIT_DEPTH 10
00051 #include "dsputil_template.c"
00052 #undef BIT_DEPTH
00053
00054 #define BIT_DEPTH 8
00055 #include "dsputil_template.c"
00056
00057
00058 #define pb_7f (~0UL/255 * 0x7f)
00059 #define pb_80 (~0UL/255 * 0x80)
00060
00061 const uint8_t ff_zigzag_direct[64] = {
00062 0, 1, 8, 16, 9, 2, 3, 10,
00063 17, 24, 32, 25, 18, 11, 4, 5,
00064 12, 19, 26, 33, 40, 48, 41, 34,
00065 27, 20, 13, 6, 7, 14, 21, 28,
00066 35, 42, 49, 56, 57, 50, 43, 36,
00067 29, 22, 15, 23, 30, 37, 44, 51,
00068 58, 59, 52, 45, 38, 31, 39, 46,
00069 53, 60, 61, 54, 47, 55, 62, 63
00070 };
00071
00072
00073
00074 const uint8_t ff_zigzag248_direct[64] = {
00075 0, 8, 1, 9, 16, 24, 2, 10,
00076 17, 25, 32, 40, 48, 56, 33, 41,
00077 18, 26, 3, 11, 4, 12, 19, 27,
00078 34, 42, 49, 57, 50, 58, 35, 43,
00079 20, 28, 5, 13, 6, 14, 21, 29,
00080 36, 44, 51, 59, 52, 60, 37, 45,
00081 22, 30, 7, 15, 23, 31, 38, 46,
00082 53, 61, 54, 62, 39, 47, 55, 63,
00083 };
00084
00085
00086 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00087
00088 const uint8_t ff_alternate_horizontal_scan[64] = {
00089 0, 1, 2, 3, 8, 9, 16, 17,
00090 10, 11, 4, 5, 6, 7, 15, 14,
00091 13, 12, 19, 18, 24, 25, 32, 33,
00092 26, 27, 20, 21, 22, 23, 28, 29,
00093 30, 31, 34, 35, 40, 41, 48, 49,
00094 42, 43, 36, 37, 38, 39, 44, 45,
00095 46, 47, 50, 51, 56, 57, 58, 59,
00096 52, 53, 54, 55, 60, 61, 62, 63,
00097 };
00098
00099 const uint8_t ff_alternate_vertical_scan[64] = {
00100 0, 8, 16, 24, 1, 9, 2, 10,
00101 17, 25, 32, 40, 48, 56, 57, 49,
00102 41, 33, 26, 18, 3, 11, 4, 12,
00103 19, 27, 34, 42, 50, 58, 35, 43,
00104 51, 59, 20, 28, 5, 13, 6, 14,
00105 21, 29, 36, 44, 52, 60, 37, 45,
00106 53, 61, 22, 30, 7, 15, 23, 31,
00107 38, 46, 54, 62, 39, 47, 55, 63,
00108 };
00109
00110
00111 static const uint8_t simple_mmx_permutation[64]={
00112 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00113 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00114 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00115 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00116 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00117 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00118 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00119 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00120 };
00121
00122 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00123
00124 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00125 int i;
00126 int end;
00127
00128 st->scantable= src_scantable;
00129
00130 for(i=0; i<64; i++){
00131 int j;
00132 j = src_scantable[i];
00133 st->permutated[i] = permutation[j];
00134 #if ARCH_PPC
00135 st->inverse[j] = i;
00136 #endif
00137 }
00138
00139 end=-1;
00140 for(i=0; i<64; i++){
00141 int j;
00142 j = st->permutated[i];
00143 if(j>end) end=j;
00144 st->raster_end[i]= end;
00145 }
00146 }
00147
00148 void ff_init_scantable_permutation(uint8_t *idct_permutation,
00149 int idct_permutation_type)
00150 {
00151 int i;
00152
00153 switch(idct_permutation_type){
00154 case FF_NO_IDCT_PERM:
00155 for(i=0; i<64; i++)
00156 idct_permutation[i]= i;
00157 break;
00158 case FF_LIBMPEG2_IDCT_PERM:
00159 for(i=0; i<64; i++)
00160 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00161 break;
00162 case FF_SIMPLE_IDCT_PERM:
00163 for(i=0; i<64; i++)
00164 idct_permutation[i]= simple_mmx_permutation[i];
00165 break;
00166 case FF_TRANSPOSE_IDCT_PERM:
00167 for(i=0; i<64; i++)
00168 idct_permutation[i]= ((i&7)<<3) | (i>>3);
00169 break;
00170 case FF_PARTTRANS_IDCT_PERM:
00171 for(i=0; i<64; i++)
00172 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
00173 break;
00174 case FF_SSE2_IDCT_PERM:
00175 for(i=0; i<64; i++)
00176 idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
00177 break;
00178 default:
00179 av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
00180 }
00181 }
00182
00183 static int pix_sum_c(uint8_t * pix, int line_size)
00184 {
00185 int s, i, j;
00186
00187 s = 0;
00188 for (i = 0; i < 16; i++) {
00189 for (j = 0; j < 16; j += 8) {
00190 s += pix[0];
00191 s += pix[1];
00192 s += pix[2];
00193 s += pix[3];
00194 s += pix[4];
00195 s += pix[5];
00196 s += pix[6];
00197 s += pix[7];
00198 pix += 8;
00199 }
00200 pix += line_size - 16;
00201 }
00202 return s;
00203 }
00204
00205 static int pix_norm1_c(uint8_t * pix, int line_size)
00206 {
00207 int s, i, j;
00208 uint32_t *sq = ff_squareTbl + 256;
00209
00210 s = 0;
00211 for (i = 0; i < 16; i++) {
00212 for (j = 0; j < 16; j += 8) {
00213 #if 0
00214 s += sq[pix[0]];
00215 s += sq[pix[1]];
00216 s += sq[pix[2]];
00217 s += sq[pix[3]];
00218 s += sq[pix[4]];
00219 s += sq[pix[5]];
00220 s += sq[pix[6]];
00221 s += sq[pix[7]];
00222 #else
00223 #if HAVE_FAST_64BIT
00224 register uint64_t x=*(uint64_t*)pix;
00225 s += sq[x&0xff];
00226 s += sq[(x>>8)&0xff];
00227 s += sq[(x>>16)&0xff];
00228 s += sq[(x>>24)&0xff];
00229 s += sq[(x>>32)&0xff];
00230 s += sq[(x>>40)&0xff];
00231 s += sq[(x>>48)&0xff];
00232 s += sq[(x>>56)&0xff];
00233 #else
00234 register uint32_t x=*(uint32_t*)pix;
00235 s += sq[x&0xff];
00236 s += sq[(x>>8)&0xff];
00237 s += sq[(x>>16)&0xff];
00238 s += sq[(x>>24)&0xff];
00239 x=*(uint32_t*)(pix+4);
00240 s += sq[x&0xff];
00241 s += sq[(x>>8)&0xff];
00242 s += sq[(x>>16)&0xff];
00243 s += sq[(x>>24)&0xff];
00244 #endif
00245 #endif
00246 pix += 8;
00247 }
00248 pix += line_size - 16;
00249 }
00250 return s;
00251 }
00252
00253 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00254 int i;
00255
00256 for(i=0; i+8<=w; i+=8){
00257 dst[i+0]= av_bswap32(src[i+0]);
00258 dst[i+1]= av_bswap32(src[i+1]);
00259 dst[i+2]= av_bswap32(src[i+2]);
00260 dst[i+3]= av_bswap32(src[i+3]);
00261 dst[i+4]= av_bswap32(src[i+4]);
00262 dst[i+5]= av_bswap32(src[i+5]);
00263 dst[i+6]= av_bswap32(src[i+6]);
00264 dst[i+7]= av_bswap32(src[i+7]);
00265 }
00266 for(;i<w; i++){
00267 dst[i+0]= av_bswap32(src[i+0]);
00268 }
00269 }
00270
00271 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00272 {
00273 while (len--)
00274 *dst++ = av_bswap16(*src++);
00275 }
00276
00277 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00278 {
00279 int s, i;
00280 uint32_t *sq = ff_squareTbl + 256;
00281
00282 s = 0;
00283 for (i = 0; i < h; i++) {
00284 s += sq[pix1[0] - pix2[0]];
00285 s += sq[pix1[1] - pix2[1]];
00286 s += sq[pix1[2] - pix2[2]];
00287 s += sq[pix1[3] - pix2[3]];
00288 pix1 += line_size;
00289 pix2 += line_size;
00290 }
00291 return s;
00292 }
00293
00294 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00295 {
00296 int s, i;
00297 uint32_t *sq = ff_squareTbl + 256;
00298
00299 s = 0;
00300 for (i = 0; i < h; i++) {
00301 s += sq[pix1[0] - pix2[0]];
00302 s += sq[pix1[1] - pix2[1]];
00303 s += sq[pix1[2] - pix2[2]];
00304 s += sq[pix1[3] - pix2[3]];
00305 s += sq[pix1[4] - pix2[4]];
00306 s += sq[pix1[5] - pix2[5]];
00307 s += sq[pix1[6] - pix2[6]];
00308 s += sq[pix1[7] - pix2[7]];
00309 pix1 += line_size;
00310 pix2 += line_size;
00311 }
00312 return s;
00313 }
00314
00315 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00316 {
00317 int s, i;
00318 uint32_t *sq = ff_squareTbl + 256;
00319
00320 s = 0;
00321 for (i = 0; i < h; i++) {
00322 s += sq[pix1[ 0] - pix2[ 0]];
00323 s += sq[pix1[ 1] - pix2[ 1]];
00324 s += sq[pix1[ 2] - pix2[ 2]];
00325 s += sq[pix1[ 3] - pix2[ 3]];
00326 s += sq[pix1[ 4] - pix2[ 4]];
00327 s += sq[pix1[ 5] - pix2[ 5]];
00328 s += sq[pix1[ 6] - pix2[ 6]];
00329 s += sq[pix1[ 7] - pix2[ 7]];
00330 s += sq[pix1[ 8] - pix2[ 8]];
00331 s += sq[pix1[ 9] - pix2[ 9]];
00332 s += sq[pix1[10] - pix2[10]];
00333 s += sq[pix1[11] - pix2[11]];
00334 s += sq[pix1[12] - pix2[12]];
00335 s += sq[pix1[13] - pix2[13]];
00336 s += sq[pix1[14] - pix2[14]];
00337 s += sq[pix1[15] - pix2[15]];
00338
00339 pix1 += line_size;
00340 pix2 += line_size;
00341 }
00342 return s;
00343 }
00344
00345 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00346 const uint8_t *s2, int stride){
00347 int i;
00348
00349
00350 for(i=0;i<8;i++) {
00351 block[0] = s1[0] - s2[0];
00352 block[1] = s1[1] - s2[1];
00353 block[2] = s1[2] - s2[2];
00354 block[3] = s1[3] - s2[3];
00355 block[4] = s1[4] - s2[4];
00356 block[5] = s1[5] - s2[5];
00357 block[6] = s1[6] - s2[6];
00358 block[7] = s1[7] - s2[7];
00359 s1 += stride;
00360 s2 += stride;
00361 block += 8;
00362 }
00363 }
00364
00365
00366 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00367 int line_size)
00368 {
00369 int i;
00370
00371
00372 for(i=0;i<8;i++) {
00373 pixels[0] = av_clip_uint8(block[0]);
00374 pixels[1] = av_clip_uint8(block[1]);
00375 pixels[2] = av_clip_uint8(block[2]);
00376 pixels[3] = av_clip_uint8(block[3]);
00377 pixels[4] = av_clip_uint8(block[4]);
00378 pixels[5] = av_clip_uint8(block[5]);
00379 pixels[6] = av_clip_uint8(block[6]);
00380 pixels[7] = av_clip_uint8(block[7]);
00381
00382 pixels += line_size;
00383 block += 8;
00384 }
00385 }
00386
00387 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00388 int line_size)
00389 {
00390 int i;
00391
00392
00393 for(i=0;i<4;i++) {
00394 pixels[0] = av_clip_uint8(block[0]);
00395 pixels[1] = av_clip_uint8(block[1]);
00396 pixels[2] = av_clip_uint8(block[2]);
00397 pixels[3] = av_clip_uint8(block[3]);
00398
00399 pixels += line_size;
00400 block += 8;
00401 }
00402 }
00403
00404 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00405 int line_size)
00406 {
00407 int i;
00408
00409
00410 for(i=0;i<2;i++) {
00411 pixels[0] = av_clip_uint8(block[0]);
00412 pixels[1] = av_clip_uint8(block[1]);
00413
00414 pixels += line_size;
00415 block += 8;
00416 }
00417 }
00418
00419 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00420 uint8_t *restrict pixels,
00421 int line_size)
00422 {
00423 int i, j;
00424
00425 for (i = 0; i < 8; i++) {
00426 for (j = 0; j < 8; j++) {
00427 if (*block < -128)
00428 *pixels = 0;
00429 else if (*block > 127)
00430 *pixels = 255;
00431 else
00432 *pixels = (uint8_t)(*block + 128);
00433 block++;
00434 pixels++;
00435 }
00436 pixels += (line_size - 8);
00437 }
00438 }
00439
00440 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00441 int line_size)
00442 {
00443 int i;
00444
00445
00446 for(i=0;i<8;i++) {
00447 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00448 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00449 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00450 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00451 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
00452 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
00453 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
00454 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
00455 pixels += line_size;
00456 block += 8;
00457 }
00458 }
00459
00460 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00461 int line_size)
00462 {
00463 int i;
00464
00465
00466 for(i=0;i<4;i++) {
00467 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00468 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00469 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
00470 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
00471 pixels += line_size;
00472 block += 8;
00473 }
00474 }
00475
00476 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00477 int line_size)
00478 {
00479 int i;
00480
00481
00482 for(i=0;i<2;i++) {
00483 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
00484 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
00485 pixels += line_size;
00486 block += 8;
00487 }
00488 }
00489
00490 static int sum_abs_dctelem_c(DCTELEM *block)
00491 {
00492 int sum=0, i;
00493 for(i=0; i<64; i++)
00494 sum+= FFABS(block[i]);
00495 return sum;
00496 }
00497
00498 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00499 {
00500 int i;
00501
00502 for (i = 0; i < h; i++) {
00503 memset(block, value, 16);
00504 block += line_size;
00505 }
00506 }
00507
00508 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00509 {
00510 int i;
00511
00512 for (i = 0; i < h; i++) {
00513 memset(block, value, 8);
00514 block += line_size;
00515 }
00516 }
00517
00518 #define avg2(a,b) ((a+b+1)>>1)
00519 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00520
00521 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00522 {
00523 const int A=(16-x16)*(16-y16);
00524 const int B=( x16)*(16-y16);
00525 const int C=(16-x16)*( y16);
00526 const int D=( x16)*( y16);
00527 int i;
00528
00529 for(i=0; i<h; i++)
00530 {
00531 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00532 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00533 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00534 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00535 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00536 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00537 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00538 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00539 dst+= stride;
00540 src+= stride;
00541 }
00542 }
00543
00544 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00545 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00546 {
00547 int y, vx, vy;
00548 const int s= 1<<shift;
00549
00550 width--;
00551 height--;
00552
00553 for(y=0; y<h; y++){
00554 int x;
00555
00556 vx= ox;
00557 vy= oy;
00558 for(x=0; x<8; x++){
00559 int src_x, src_y, frac_x, frac_y, index;
00560
00561 src_x= vx>>16;
00562 src_y= vy>>16;
00563 frac_x= src_x&(s-1);
00564 frac_y= src_y&(s-1);
00565 src_x>>=shift;
00566 src_y>>=shift;
00567
00568 if((unsigned)src_x < width){
00569 if((unsigned)src_y < height){
00570 index= src_x + src_y*stride;
00571 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00572 + src[index +1]* frac_x )*(s-frac_y)
00573 + ( src[index+stride ]*(s-frac_x)
00574 + src[index+stride+1]* frac_x )* frac_y
00575 + r)>>(shift*2);
00576 }else{
00577 index= src_x + av_clip(src_y, 0, height)*stride;
00578 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00579 + src[index +1]* frac_x )*s
00580 + r)>>(shift*2);
00581 }
00582 }else{
00583 if((unsigned)src_y < height){
00584 index= av_clip(src_x, 0, width) + src_y*stride;
00585 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00586 + src[index+stride ]* frac_y )*s
00587 + r)>>(shift*2);
00588 }else{
00589 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00590 dst[y*stride + x]= src[index ];
00591 }
00592 }
00593
00594 vx+= dxx;
00595 vy+= dyx;
00596 }
00597 ox += dxy;
00598 oy += dyy;
00599 }
00600 }
00601
00602 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00603 switch(width){
00604 case 2: put_pixels2_8_c (dst, src, stride, height); break;
00605 case 4: put_pixels4_8_c (dst, src, stride, height); break;
00606 case 8: put_pixels8_8_c (dst, src, stride, height); break;
00607 case 16:put_pixels16_8_c(dst, src, stride, height); break;
00608 }
00609 }
00610
00611 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00612 int i,j;
00613 for (i=0; i < height; i++) {
00614 for (j=0; j < width; j++) {
00615 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00616 }
00617 src += stride;
00618 dst += stride;
00619 }
00620 }
00621
00622 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00623 int i,j;
00624 for (i=0; i < height; i++) {
00625 for (j=0; j < width; j++) {
00626 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00627 }
00628 src += stride;
00629 dst += stride;
00630 }
00631 }
00632
00633 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00634 int i,j;
00635 for (i=0; i < height; i++) {
00636 for (j=0; j < width; j++) {
00637 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00638 }
00639 src += stride;
00640 dst += stride;
00641 }
00642 }
00643
00644 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00645 int i,j;
00646 for (i=0; i < height; i++) {
00647 for (j=0; j < width; j++) {
00648 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00649 }
00650 src += stride;
00651 dst += stride;
00652 }
00653 }
00654
00655 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00656 int i,j;
00657 for (i=0; i < height; i++) {
00658 for (j=0; j < width; j++) {
00659 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00660 }
00661 src += stride;
00662 dst += stride;
00663 }
00664 }
00665
00666 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00667 int i,j;
00668 for (i=0; i < height; i++) {
00669 for (j=0; j < width; j++) {
00670 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00671 }
00672 src += stride;
00673 dst += stride;
00674 }
00675 }
00676
00677 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00678 int i,j;
00679 for (i=0; i < height; i++) {
00680 for (j=0; j < width; j++) {
00681 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00682 }
00683 src += stride;
00684 dst += stride;
00685 }
00686 }
00687
00688 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00689 int i,j;
00690 for (i=0; i < height; i++) {
00691 for (j=0; j < width; j++) {
00692 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00693 }
00694 src += stride;
00695 dst += stride;
00696 }
00697 }
00698
00699 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00700 switch(width){
00701 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00702 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00703 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00704 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00705 }
00706 }
00707
00708 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00709 int i,j;
00710 for (i=0; i < height; i++) {
00711 for (j=0; j < width; j++) {
00712 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00713 }
00714 src += stride;
00715 dst += stride;
00716 }
00717 }
00718
00719 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00720 int i,j;
00721 for (i=0; i < height; i++) {
00722 for (j=0; j < width; j++) {
00723 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00724 }
00725 src += stride;
00726 dst += stride;
00727 }
00728 }
00729
00730 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00731 int i,j;
00732 for (i=0; i < height; i++) {
00733 for (j=0; j < width; j++) {
00734 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00735 }
00736 src += stride;
00737 dst += stride;
00738 }
00739 }
00740
00741 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00742 int i,j;
00743 for (i=0; i < height; i++) {
00744 for (j=0; j < width; j++) {
00745 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00746 }
00747 src += stride;
00748 dst += stride;
00749 }
00750 }
00751
00752 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00753 int i,j;
00754 for (i=0; i < height; i++) {
00755 for (j=0; j < width; j++) {
00756 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00757 }
00758 src += stride;
00759 dst += stride;
00760 }
00761 }
00762
00763 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00764 int i,j;
00765 for (i=0; i < height; i++) {
00766 for (j=0; j < width; j++) {
00767 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00768 }
00769 src += stride;
00770 dst += stride;
00771 }
00772 }
00773
00774 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00775 int i,j;
00776 for (i=0; i < height; i++) {
00777 for (j=0; j < width; j++) {
00778 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00779 }
00780 src += stride;
00781 dst += stride;
00782 }
00783 }
00784
00785 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00786 int i,j;
00787 for (i=0; i < height; i++) {
00788 for (j=0; j < width; j++) {
00789 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00790 }
00791 src += stride;
00792 dst += stride;
00793 }
00794 }
00795
00796 #define QPEL_MC(r, OPNAME, RND, OP) \
00797 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00798 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00799 int i;\
00800 for(i=0; i<h; i++)\
00801 {\
00802 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00803 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00804 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00805 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00806 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00807 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00808 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00809 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00810 dst+=dstStride;\
00811 src+=srcStride;\
00812 }\
00813 }\
00814 \
00815 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00816 const int w=8;\
00817 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00818 int i;\
00819 for(i=0; i<w; i++)\
00820 {\
00821 const int src0= src[0*srcStride];\
00822 const int src1= src[1*srcStride];\
00823 const int src2= src[2*srcStride];\
00824 const int src3= src[3*srcStride];\
00825 const int src4= src[4*srcStride];\
00826 const int src5= src[5*srcStride];\
00827 const int src6= src[6*srcStride];\
00828 const int src7= src[7*srcStride];\
00829 const int src8= src[8*srcStride];\
00830 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00831 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00832 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00833 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00834 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00835 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00836 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00837 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00838 dst++;\
00839 src++;\
00840 }\
00841 }\
00842 \
00843 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00844 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00845 int i;\
00846 \
00847 for(i=0; i<h; i++)\
00848 {\
00849 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00850 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00851 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00852 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00853 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00854 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00855 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00856 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00857 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00858 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00859 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00860 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00861 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00862 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00863 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00864 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00865 dst+=dstStride;\
00866 src+=srcStride;\
00867 }\
00868 }\
00869 \
00870 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00871 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00872 int i;\
00873 const int w=16;\
00874 for(i=0; i<w; i++)\
00875 {\
00876 const int src0= src[0*srcStride];\
00877 const int src1= src[1*srcStride];\
00878 const int src2= src[2*srcStride];\
00879 const int src3= src[3*srcStride];\
00880 const int src4= src[4*srcStride];\
00881 const int src5= src[5*srcStride];\
00882 const int src6= src[6*srcStride];\
00883 const int src7= src[7*srcStride];\
00884 const int src8= src[8*srcStride];\
00885 const int src9= src[9*srcStride];\
00886 const int src10= src[10*srcStride];\
00887 const int src11= src[11*srcStride];\
00888 const int src12= src[12*srcStride];\
00889 const int src13= src[13*srcStride];\
00890 const int src14= src[14*srcStride];\
00891 const int src15= src[15*srcStride];\
00892 const int src16= src[16*srcStride];\
00893 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00894 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00895 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00896 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00897 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00898 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00899 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00900 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00901 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00902 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00903 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00904 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00905 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00906 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00907 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00908 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00909 dst++;\
00910 src++;\
00911 }\
00912 }\
00913 \
00914 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00915 uint8_t half[64];\
00916 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00917 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00918 }\
00919 \
00920 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00921 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00922 }\
00923 \
00924 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00925 uint8_t half[64];\
00926 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00927 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00928 }\
00929 \
00930 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00931 uint8_t full[16*9];\
00932 uint8_t half[64];\
00933 copy_block9(full, src, 16, stride, 9);\
00934 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00935 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00936 }\
00937 \
00938 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00939 uint8_t full[16*9];\
00940 copy_block9(full, src, 16, stride, 9);\
00941 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00942 }\
00943 \
00944 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00945 uint8_t full[16*9];\
00946 uint8_t half[64];\
00947 copy_block9(full, src, 16, stride, 9);\
00948 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00949 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00950 }\
00951 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00952 uint8_t full[16*9];\
00953 uint8_t halfH[72];\
00954 uint8_t halfV[64];\
00955 uint8_t halfHV[64];\
00956 copy_block9(full, src, 16, stride, 9);\
00957 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00958 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
00959 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00960 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00961 }\
00962 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
00963 uint8_t full[16*9];\
00964 uint8_t halfH[72];\
00965 uint8_t halfHV[64];\
00966 copy_block9(full, src, 16, stride, 9);\
00967 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00968 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
00969 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00970 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00971 }\
00972 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
00973 uint8_t full[16*9];\
00974 uint8_t halfH[72];\
00975 uint8_t halfV[64];\
00976 uint8_t halfHV[64];\
00977 copy_block9(full, src, 16, stride, 9);\
00978 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00979 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
00980 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00981 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
00982 }\
00983 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
00984 uint8_t full[16*9];\
00985 uint8_t halfH[72];\
00986 uint8_t halfHV[64];\
00987 copy_block9(full, src, 16, stride, 9);\
00988 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00989 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
00990 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00991 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
00992 }\
00993 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
00994 uint8_t full[16*9];\
00995 uint8_t halfH[72];\
00996 uint8_t halfV[64];\
00997 uint8_t halfHV[64];\
00998 copy_block9(full, src, 16, stride, 9);\
00999 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01000 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01001 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01002 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01003 }\
01004 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01005 uint8_t full[16*9];\
01006 uint8_t halfH[72];\
01007 uint8_t halfHV[64];\
01008 copy_block9(full, src, 16, stride, 9);\
01009 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01010 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01011 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01012 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01013 }\
01014 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01015 uint8_t full[16*9];\
01016 uint8_t halfH[72];\
01017 uint8_t halfV[64];\
01018 uint8_t halfHV[64];\
01019 copy_block9(full, src, 16, stride, 9);\
01020 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01021 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01022 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01023 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01024 }\
01025 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01026 uint8_t full[16*9];\
01027 uint8_t halfH[72];\
01028 uint8_t halfHV[64];\
01029 copy_block9(full, src, 16, stride, 9);\
01030 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01031 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01032 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01033 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01034 }\
01035 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01036 uint8_t halfH[72];\
01037 uint8_t halfHV[64];\
01038 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01039 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01040 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01041 }\
01042 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01043 uint8_t halfH[72];\
01044 uint8_t halfHV[64];\
01045 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01046 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01047 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01048 }\
01049 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01050 uint8_t full[16*9];\
01051 uint8_t halfH[72];\
01052 uint8_t halfV[64];\
01053 uint8_t halfHV[64];\
01054 copy_block9(full, src, 16, stride, 9);\
01055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01056 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01057 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01058 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01059 }\
01060 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01061 uint8_t full[16*9];\
01062 uint8_t halfH[72];\
01063 copy_block9(full, src, 16, stride, 9);\
01064 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01065 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01066 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01067 }\
01068 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01069 uint8_t full[16*9];\
01070 uint8_t halfH[72];\
01071 uint8_t halfV[64];\
01072 uint8_t halfHV[64];\
01073 copy_block9(full, src, 16, stride, 9);\
01074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01075 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01076 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01077 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01078 }\
01079 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01080 uint8_t full[16*9];\
01081 uint8_t halfH[72];\
01082 copy_block9(full, src, 16, stride, 9);\
01083 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01084 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01085 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01086 }\
01087 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01088 uint8_t halfH[72];\
01089 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01090 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01091 }\
01092 \
01093 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01094 uint8_t half[256];\
01095 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01096 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01097 }\
01098 \
01099 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01100 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01101 }\
01102 \
01103 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01104 uint8_t half[256];\
01105 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01106 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01107 }\
01108 \
01109 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01110 uint8_t full[24*17];\
01111 uint8_t half[256];\
01112 copy_block17(full, src, 24, stride, 17);\
01113 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01114 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01115 }\
01116 \
01117 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01118 uint8_t full[24*17];\
01119 copy_block17(full, src, 24, stride, 17);\
01120 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01121 }\
01122 \
01123 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01124 uint8_t full[24*17];\
01125 uint8_t half[256];\
01126 copy_block17(full, src, 24, stride, 17);\
01127 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01128 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01129 }\
01130 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01131 uint8_t full[24*17];\
01132 uint8_t halfH[272];\
01133 uint8_t halfV[256];\
01134 uint8_t halfHV[256];\
01135 copy_block17(full, src, 24, stride, 17);\
01136 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01137 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01138 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01139 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01140 }\
01141 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01142 uint8_t full[24*17];\
01143 uint8_t halfH[272];\
01144 uint8_t halfHV[256];\
01145 copy_block17(full, src, 24, stride, 17);\
01146 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01147 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01148 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01149 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01150 }\
01151 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01152 uint8_t full[24*17];\
01153 uint8_t halfH[272];\
01154 uint8_t halfV[256];\
01155 uint8_t halfHV[256];\
01156 copy_block17(full, src, 24, stride, 17);\
01157 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01158 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01159 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01160 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01161 }\
01162 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01163 uint8_t full[24*17];\
01164 uint8_t halfH[272];\
01165 uint8_t halfHV[256];\
01166 copy_block17(full, src, 24, stride, 17);\
01167 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01168 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01169 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01170 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01171 }\
01172 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01173 uint8_t full[24*17];\
01174 uint8_t halfH[272];\
01175 uint8_t halfV[256];\
01176 uint8_t halfHV[256];\
01177 copy_block17(full, src, 24, stride, 17);\
01178 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01179 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01180 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01181 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01182 }\
01183 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01184 uint8_t full[24*17];\
01185 uint8_t halfH[272];\
01186 uint8_t halfHV[256];\
01187 copy_block17(full, src, 24, stride, 17);\
01188 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01189 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01190 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01191 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01192 }\
01193 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01194 uint8_t full[24*17];\
01195 uint8_t halfH[272];\
01196 uint8_t halfV[256];\
01197 uint8_t halfHV[256];\
01198 copy_block17(full, src, 24, stride, 17);\
01199 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01200 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01201 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01202 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01203 }\
01204 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01205 uint8_t full[24*17];\
01206 uint8_t halfH[272];\
01207 uint8_t halfHV[256];\
01208 copy_block17(full, src, 24, stride, 17);\
01209 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01210 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01211 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01212 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01213 }\
01214 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01215 uint8_t halfH[272];\
01216 uint8_t halfHV[256];\
01217 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01218 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01219 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01220 }\
01221 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01222 uint8_t halfH[272];\
01223 uint8_t halfHV[256];\
01224 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01225 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01226 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01227 }\
01228 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01229 uint8_t full[24*17];\
01230 uint8_t halfH[272];\
01231 uint8_t halfV[256];\
01232 uint8_t halfHV[256];\
01233 copy_block17(full, src, 24, stride, 17);\
01234 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01235 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01236 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01237 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01238 }\
01239 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01240 uint8_t full[24*17];\
01241 uint8_t halfH[272];\
01242 copy_block17(full, src, 24, stride, 17);\
01243 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01244 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01245 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01246 }\
01247 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01248 uint8_t full[24*17];\
01249 uint8_t halfH[272];\
01250 uint8_t halfV[256];\
01251 uint8_t halfHV[256];\
01252 copy_block17(full, src, 24, stride, 17);\
01253 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01254 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01255 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01256 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01257 }\
01258 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01259 uint8_t full[24*17];\
01260 uint8_t halfH[272];\
01261 copy_block17(full, src, 24, stride, 17);\
01262 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01263 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01264 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01265 }\
01266 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01267 uint8_t halfH[272];\
01268 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01269 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01270 }
01271
01272 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01273 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01274 #define op_put(a, b) a = cm[((b) + 16)>>5]
01275 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01276
01277 QPEL_MC(0, put_ , _ , op_put)
01278 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01279 QPEL_MC(0, avg_ , _ , op_avg)
01280
01281 #undef op_avg
01282 #undef op_avg_no_rnd
01283 #undef op_put
01284 #undef op_put_no_rnd
01285
01286 #define put_qpel8_mc00_c ff_put_pixels8x8_c
01287 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
01288 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01289 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01290 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
01291 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01292
01293 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01294 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01295 int i;
01296
01297 for(i=0; i<h; i++){
01298 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01299 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01300 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01301 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01302 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01303 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01304 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01305 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01306 dst+=dstStride;
01307 src+=srcStride;
01308 }
01309 }
01310
01311 #if CONFIG_RV40_DECODER
01312 void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01313 put_pixels16_xy2_8_c(dst, src, stride, 16);
01314 }
01315 void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01316 avg_pixels16_xy2_8_c(dst, src, stride, 16);
01317 }
01318 void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01319 put_pixels8_xy2_8_c(dst, src, stride, 8);
01320 }
01321 void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01322 avg_pixels8_xy2_8_c(dst, src, stride, 8);
01323 }
01324 #endif
01325
01326 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01327 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01328 int i;
01329
01330 for(i=0; i<w; i++){
01331 const int src_1= src[ -srcStride];
01332 const int src0 = src[0 ];
01333 const int src1 = src[ srcStride];
01334 const int src2 = src[2*srcStride];
01335 const int src3 = src[3*srcStride];
01336 const int src4 = src[4*srcStride];
01337 const int src5 = src[5*srcStride];
01338 const int src6 = src[6*srcStride];
01339 const int src7 = src[7*srcStride];
01340 const int src8 = src[8*srcStride];
01341 const int src9 = src[9*srcStride];
01342 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01343 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01344 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01345 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01346 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01347 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01348 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01349 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01350 src++;
01351 dst++;
01352 }
01353 }
01354
01355 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01356 uint8_t half[64];
01357 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01358 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01359 }
01360
01361 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01362 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01363 }
01364
01365 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01366 uint8_t half[64];
01367 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01368 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01369 }
01370
01371 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01372 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01373 }
01374
01375 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01376 uint8_t halfH[88];
01377 uint8_t halfV[64];
01378 uint8_t halfHV[64];
01379 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01380 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01381 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01382 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01383 }
01384 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01385 uint8_t halfH[88];
01386 uint8_t halfV[64];
01387 uint8_t halfHV[64];
01388 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01389 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01390 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01391 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01392 }
01393 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01394 uint8_t halfH[88];
01395 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01396 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01397 }
01398
01399 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01400 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01401 int x;
01402 const int strength= ff_h263_loop_filter_strength[qscale];
01403
01404 for(x=0; x<8; x++){
01405 int d1, d2, ad1;
01406 int p0= src[x-2*stride];
01407 int p1= src[x-1*stride];
01408 int p2= src[x+0*stride];
01409 int p3= src[x+1*stride];
01410 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01411
01412 if (d<-2*strength) d1= 0;
01413 else if(d<- strength) d1=-2*strength - d;
01414 else if(d< strength) d1= d;
01415 else if(d< 2*strength) d1= 2*strength - d;
01416 else d1= 0;
01417
01418 p1 += d1;
01419 p2 -= d1;
01420 if(p1&256) p1= ~(p1>>31);
01421 if(p2&256) p2= ~(p2>>31);
01422
01423 src[x-1*stride] = p1;
01424 src[x+0*stride] = p2;
01425
01426 ad1= FFABS(d1)>>1;
01427
01428 d2= av_clip((p0-p3)/4, -ad1, ad1);
01429
01430 src[x-2*stride] = p0 - d2;
01431 src[x+ stride] = p3 + d2;
01432 }
01433 }
01434 }
01435
01436 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01437 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01438 int y;
01439 const int strength= ff_h263_loop_filter_strength[qscale];
01440
01441 for(y=0; y<8; y++){
01442 int d1, d2, ad1;
01443 int p0= src[y*stride-2];
01444 int p1= src[y*stride-1];
01445 int p2= src[y*stride+0];
01446 int p3= src[y*stride+1];
01447 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01448
01449 if (d<-2*strength) d1= 0;
01450 else if(d<- strength) d1=-2*strength - d;
01451 else if(d< strength) d1= d;
01452 else if(d< 2*strength) d1= 2*strength - d;
01453 else d1= 0;
01454
01455 p1 += d1;
01456 p2 -= d1;
01457 if(p1&256) p1= ~(p1>>31);
01458 if(p2&256) p2= ~(p2>>31);
01459
01460 src[y*stride-1] = p1;
01461 src[y*stride+0] = p2;
01462
01463 ad1= FFABS(d1)>>1;
01464
01465 d2= av_clip((p0-p3)/4, -ad1, ad1);
01466
01467 src[y*stride-2] = p0 - d2;
01468 src[y*stride+1] = p3 + d2;
01469 }
01470 }
01471 }
01472
01473 static void h261_loop_filter_c(uint8_t *src, int stride){
01474 int x,y,xy,yz;
01475 int temp[64];
01476
01477 for(x=0; x<8; x++){
01478 temp[x ] = 4*src[x ];
01479 temp[x + 7*8] = 4*src[x + 7*stride];
01480 }
01481 for(y=1; y<7; y++){
01482 for(x=0; x<8; x++){
01483 xy = y * stride + x;
01484 yz = y * 8 + x;
01485 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01486 }
01487 }
01488
01489 for(y=0; y<8; y++){
01490 src[ y*stride] = (temp[ y*8] + 2)>>2;
01491 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01492 for(x=1; x<7; x++){
01493 xy = y * stride + x;
01494 yz = y * 8 + x;
01495 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01496 }
01497 }
01498 }
01499
01500 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01501 {
01502 int s, i;
01503
01504 s = 0;
01505 for(i=0;i<h;i++) {
01506 s += abs(pix1[0] - pix2[0]);
01507 s += abs(pix1[1] - pix2[1]);
01508 s += abs(pix1[2] - pix2[2]);
01509 s += abs(pix1[3] - pix2[3]);
01510 s += abs(pix1[4] - pix2[4]);
01511 s += abs(pix1[5] - pix2[5]);
01512 s += abs(pix1[6] - pix2[6]);
01513 s += abs(pix1[7] - pix2[7]);
01514 s += abs(pix1[8] - pix2[8]);
01515 s += abs(pix1[9] - pix2[9]);
01516 s += abs(pix1[10] - pix2[10]);
01517 s += abs(pix1[11] - pix2[11]);
01518 s += abs(pix1[12] - pix2[12]);
01519 s += abs(pix1[13] - pix2[13]);
01520 s += abs(pix1[14] - pix2[14]);
01521 s += abs(pix1[15] - pix2[15]);
01522 pix1 += line_size;
01523 pix2 += line_size;
01524 }
01525 return s;
01526 }
01527
01528 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01529 {
01530 int s, i;
01531
01532 s = 0;
01533 for(i=0;i<h;i++) {
01534 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01535 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01536 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01537 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01538 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01539 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01540 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01541 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01542 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01543 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01544 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01545 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01546 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01547 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01548 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01549 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01550 pix1 += line_size;
01551 pix2 += line_size;
01552 }
01553 return s;
01554 }
01555
01556 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01557 {
01558 int s, i;
01559 uint8_t *pix3 = pix2 + line_size;
01560
01561 s = 0;
01562 for(i=0;i<h;i++) {
01563 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01564 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01565 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01566 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01567 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01568 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01569 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01570 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01571 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01572 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01573 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01574 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01575 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01576 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01577 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01578 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01579 pix1 += line_size;
01580 pix2 += line_size;
01581 pix3 += line_size;
01582 }
01583 return s;
01584 }
01585
01586 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01587 {
01588 int s, i;
01589 uint8_t *pix3 = pix2 + line_size;
01590
01591 s = 0;
01592 for(i=0;i<h;i++) {
01593 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01594 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01595 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01596 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01597 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01598 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01599 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01600 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01601 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01602 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01603 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01604 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01605 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01606 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01607 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01608 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01609 pix1 += line_size;
01610 pix2 += line_size;
01611 pix3 += line_size;
01612 }
01613 return s;
01614 }
01615
01616 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01617 {
01618 int s, i;
01619
01620 s = 0;
01621 for(i=0;i<h;i++) {
01622 s += abs(pix1[0] - pix2[0]);
01623 s += abs(pix1[1] - pix2[1]);
01624 s += abs(pix1[2] - pix2[2]);
01625 s += abs(pix1[3] - pix2[3]);
01626 s += abs(pix1[4] - pix2[4]);
01627 s += abs(pix1[5] - pix2[5]);
01628 s += abs(pix1[6] - pix2[6]);
01629 s += abs(pix1[7] - pix2[7]);
01630 pix1 += line_size;
01631 pix2 += line_size;
01632 }
01633 return s;
01634 }
01635
01636 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01637 {
01638 int s, i;
01639
01640 s = 0;
01641 for(i=0;i<h;i++) {
01642 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01643 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01644 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01645 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01646 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01647 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01648 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01649 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01650 pix1 += line_size;
01651 pix2 += line_size;
01652 }
01653 return s;
01654 }
01655
01656 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01657 {
01658 int s, i;
01659 uint8_t *pix3 = pix2 + line_size;
01660
01661 s = 0;
01662 for(i=0;i<h;i++) {
01663 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01664 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01665 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01666 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01667 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01668 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01669 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01670 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01671 pix1 += line_size;
01672 pix2 += line_size;
01673 pix3 += line_size;
01674 }
01675 return s;
01676 }
01677
01678 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01679 {
01680 int s, i;
01681 uint8_t *pix3 = pix2 + line_size;
01682
01683 s = 0;
01684 for(i=0;i<h;i++) {
01685 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01686 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01687 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01688 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01689 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01690 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01691 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01692 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01693 pix1 += line_size;
01694 pix2 += line_size;
01695 pix3 += line_size;
01696 }
01697 return s;
01698 }
01699
01700 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01701 MpegEncContext *c = v;
01702 int score1=0;
01703 int score2=0;
01704 int x,y;
01705
01706 for(y=0; y<h; y++){
01707 for(x=0; x<16; x++){
01708 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01709 }
01710 if(y+1<h){
01711 for(x=0; x<15; x++){
01712 score2+= FFABS( s1[x ] - s1[x +stride]
01713 - s1[x+1] + s1[x+1+stride])
01714 -FFABS( s2[x ] - s2[x +stride]
01715 - s2[x+1] + s2[x+1+stride]);
01716 }
01717 }
01718 s1+= stride;
01719 s2+= stride;
01720 }
01721
01722 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01723 else return score1 + FFABS(score2)*8;
01724 }
01725
01726 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01727 MpegEncContext *c = v;
01728 int score1=0;
01729 int score2=0;
01730 int x,y;
01731
01732 for(y=0; y<h; y++){
01733 for(x=0; x<8; x++){
01734 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01735 }
01736 if(y+1<h){
01737 for(x=0; x<7; x++){
01738 score2+= FFABS( s1[x ] - s1[x +stride]
01739 - s1[x+1] + s1[x+1+stride])
01740 -FFABS( s2[x ] - s2[x +stride]
01741 - s2[x+1] + s2[x+1+stride]);
01742 }
01743 }
01744 s1+= stride;
01745 s2+= stride;
01746 }
01747
01748 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01749 else return score1 + FFABS(score2)*8;
01750 }
01751
01752 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01753 int i;
01754 unsigned int sum=0;
01755
01756 for(i=0; i<8*8; i++){
01757 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01758 int w= weight[i];
01759 b>>= RECON_SHIFT;
01760 assert(-512<b && b<512);
01761
01762 sum += (w*b)*(w*b)>>4;
01763 }
01764 return sum>>2;
01765 }
01766
01767 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01768 int i;
01769
01770 for(i=0; i<8*8; i++){
01771 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01772 }
01773 }
01774
01783 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01784 {
01785 int i;
01786 DCTELEM temp[64];
01787
01788 if(last<=0) return;
01789
01790
01791 for(i=0; i<=last; i++){
01792 const int j= scantable[i];
01793 temp[j]= block[j];
01794 block[j]=0;
01795 }
01796
01797 for(i=0; i<=last; i++){
01798 const int j= scantable[i];
01799 const int perm_j= permutation[j];
01800 block[perm_j]= temp[j];
01801 }
01802 }
01803
01804 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01805 return 0;
01806 }
01807
01808 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01809 int i;
01810
01811 memset(cmp, 0, sizeof(void*)*6);
01812
01813 for(i=0; i<6; i++){
01814 switch(type&0xFF){
01815 case FF_CMP_SAD:
01816 cmp[i]= c->sad[i];
01817 break;
01818 case FF_CMP_SATD:
01819 cmp[i]= c->hadamard8_diff[i];
01820 break;
01821 case FF_CMP_SSE:
01822 cmp[i]= c->sse[i];
01823 break;
01824 case FF_CMP_DCT:
01825 cmp[i]= c->dct_sad[i];
01826 break;
01827 case FF_CMP_DCT264:
01828 cmp[i]= c->dct264_sad[i];
01829 break;
01830 case FF_CMP_DCTMAX:
01831 cmp[i]= c->dct_max[i];
01832 break;
01833 case FF_CMP_PSNR:
01834 cmp[i]= c->quant_psnr[i];
01835 break;
01836 case FF_CMP_BIT:
01837 cmp[i]= c->bit[i];
01838 break;
01839 case FF_CMP_RD:
01840 cmp[i]= c->rd[i];
01841 break;
01842 case FF_CMP_VSAD:
01843 cmp[i]= c->vsad[i];
01844 break;
01845 case FF_CMP_VSSE:
01846 cmp[i]= c->vsse[i];
01847 break;
01848 case FF_CMP_ZERO:
01849 cmp[i]= zero_cmp;
01850 break;
01851 case FF_CMP_NSSE:
01852 cmp[i]= c->nsse[i];
01853 break;
01854 #if CONFIG_DWT
01855 case FF_CMP_W53:
01856 cmp[i]= c->w53[i];
01857 break;
01858 case FF_CMP_W97:
01859 cmp[i]= c->w97[i];
01860 break;
01861 #endif
01862 default:
01863 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01864 }
01865 }
01866 }
01867
01868 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01869 long i;
01870 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01871 long a = *(long*)(src+i);
01872 long b = *(long*)(dst+i);
01873 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01874 }
01875 for(; i<w; i++)
01876 dst[i+0] += src[i+0];
01877 }
01878
01879 static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01880 long i;
01881 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01882 long a = *(long*)(src1+i);
01883 long b = *(long*)(src2+i);
01884 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01885 }
01886 for(; i<w; i++)
01887 dst[i] = src1[i]+src2[i];
01888 }
01889
01890 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01891 long i;
01892 #if !HAVE_FAST_UNALIGNED
01893 if((long)src2 & (sizeof(long)-1)){
01894 for(i=0; i+7<w; i+=8){
01895 dst[i+0] = src1[i+0]-src2[i+0];
01896 dst[i+1] = src1[i+1]-src2[i+1];
01897 dst[i+2] = src1[i+2]-src2[i+2];
01898 dst[i+3] = src1[i+3]-src2[i+3];
01899 dst[i+4] = src1[i+4]-src2[i+4];
01900 dst[i+5] = src1[i+5]-src2[i+5];
01901 dst[i+6] = src1[i+6]-src2[i+6];
01902 dst[i+7] = src1[i+7]-src2[i+7];
01903 }
01904 }else
01905 #endif
01906 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01907 long a = *(long*)(src1+i);
01908 long b = *(long*)(src2+i);
01909 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01910 }
01911 for(; i<w; i++)
01912 dst[i+0] = src1[i+0]-src2[i+0];
01913 }
01914
01915 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01916 int i;
01917 uint8_t l, lt;
01918
01919 l= *left;
01920 lt= *left_top;
01921
01922 for(i=0; i<w; i++){
01923 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01924 lt= src1[i];
01925 dst[i]= l;
01926 }
01927
01928 *left= l;
01929 *left_top= lt;
01930 }
01931
01932 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01933 int i;
01934 uint8_t l, lt;
01935
01936 l= *left;
01937 lt= *left_top;
01938
01939 for(i=0; i<w; i++){
01940 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01941 lt= src1[i];
01942 l= src2[i];
01943 dst[i]= l - pred;
01944 }
01945
01946 *left= l;
01947 *left_top= lt;
01948 }
01949
01950 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01951 int i;
01952
01953 for(i=0; i<w-1; i++){
01954 acc+= src[i];
01955 dst[i]= acc;
01956 i++;
01957 acc+= src[i];
01958 dst[i]= acc;
01959 }
01960
01961 for(; i<w; i++){
01962 acc+= src[i];
01963 dst[i]= acc;
01964 }
01965
01966 return acc;
01967 }
01968
01969 #if HAVE_BIGENDIAN
01970 #define B 3
01971 #define G 2
01972 #define R 1
01973 #define A 0
01974 #else
01975 #define B 0
01976 #define G 1
01977 #define R 2
01978 #define A 3
01979 #endif
01980 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
01981 int i;
01982 int r,g,b,a;
01983 r= *red;
01984 g= *green;
01985 b= *blue;
01986 a= *alpha;
01987
01988 for(i=0; i<w; i++){
01989 b+= src[4*i+B];
01990 g+= src[4*i+G];
01991 r+= src[4*i+R];
01992 a+= src[4*i+A];
01993
01994 dst[4*i+B]= b;
01995 dst[4*i+G]= g;
01996 dst[4*i+R]= r;
01997 dst[4*i+A]= a;
01998 }
01999
02000 *red= r;
02001 *green= g;
02002 *blue= b;
02003 *alpha= a;
02004 }
02005 #undef B
02006 #undef G
02007 #undef R
02008 #undef A
02009
02010 #define BUTTERFLY2(o1,o2,i1,i2) \
02011 o1= (i1)+(i2);\
02012 o2= (i1)-(i2);
02013
02014 #define BUTTERFLY1(x,y) \
02015 {\
02016 int a,b;\
02017 a= x;\
02018 b= y;\
02019 x= a+b;\
02020 y= a-b;\
02021 }
02022
02023 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02024
02025 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02026 int i;
02027 int temp[64];
02028 int sum=0;
02029
02030 assert(h==8);
02031
02032 for(i=0; i<8; i++){
02033
02034 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02035 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02036 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02037 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02038
02039 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02040 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02041 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02042 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02043
02044 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02045 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02046 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02047 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02048 }
02049
02050 for(i=0; i<8; i++){
02051 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02052 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02053 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02054 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02055
02056 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02057 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02058 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02059 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02060
02061 sum +=
02062 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02063 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02064 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02065 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02066 }
02067 return sum;
02068 }
02069
02070 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02071 int i;
02072 int temp[64];
02073 int sum=0;
02074
02075 assert(h==8);
02076
02077 for(i=0; i<8; i++){
02078
02079 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02080 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02081 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02082 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02083
02084 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02085 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02086 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02087 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02088
02089 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02090 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02091 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02092 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02093 }
02094
02095 for(i=0; i<8; i++){
02096 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02097 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02098 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02099 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02100
02101 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02102 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02103 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02104 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02105
02106 sum +=
02107 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02108 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02109 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02110 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02111 }
02112
02113 sum -= FFABS(temp[8*0] + temp[8*4]);
02114
02115 return sum;
02116 }
02117
02118 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02119 MpegEncContext * const s= (MpegEncContext *)c;
02120 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02121
02122 assert(h==8);
02123
02124 s->dsp.diff_pixels(temp, src1, src2, stride);
02125 s->dsp.fdct(temp);
02126 return s->dsp.sum_abs_dctelem(temp);
02127 }
02128
02129 #if CONFIG_GPL
02130 #define DCT8_1D {\
02131 const int s07 = SRC(0) + SRC(7);\
02132 const int s16 = SRC(1) + SRC(6);\
02133 const int s25 = SRC(2) + SRC(5);\
02134 const int s34 = SRC(3) + SRC(4);\
02135 const int a0 = s07 + s34;\
02136 const int a1 = s16 + s25;\
02137 const int a2 = s07 - s34;\
02138 const int a3 = s16 - s25;\
02139 const int d07 = SRC(0) - SRC(7);\
02140 const int d16 = SRC(1) - SRC(6);\
02141 const int d25 = SRC(2) - SRC(5);\
02142 const int d34 = SRC(3) - SRC(4);\
02143 const int a4 = d16 + d25 + (d07 + (d07>>1));\
02144 const int a5 = d07 - d34 - (d25 + (d25>>1));\
02145 const int a6 = d07 + d34 - (d16 + (d16>>1));\
02146 const int a7 = d16 - d25 + (d34 + (d34>>1));\
02147 DST(0, a0 + a1 ) ;\
02148 DST(1, a4 + (a7>>2)) ;\
02149 DST(2, a2 + (a3>>1)) ;\
02150 DST(3, a5 + (a6>>2)) ;\
02151 DST(4, a0 - a1 ) ;\
02152 DST(5, a6 - (a5>>2)) ;\
02153 DST(6, (a2>>1) - a3 ) ;\
02154 DST(7, (a4>>2) - a7 ) ;\
02155 }
02156
02157 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02158 MpegEncContext * const s= (MpegEncContext *)c;
02159 DCTELEM dct[8][8];
02160 int i;
02161 int sum=0;
02162
02163 s->dsp.diff_pixels(dct[0], src1, src2, stride);
02164
02165 #define SRC(x) dct[i][x]
02166 #define DST(x,v) dct[i][x]= v
02167 for( i = 0; i < 8; i++ )
02168 DCT8_1D
02169 #undef SRC
02170 #undef DST
02171
02172 #define SRC(x) dct[x][i]
02173 #define DST(x,v) sum += FFABS(v)
02174 for( i = 0; i < 8; i++ )
02175 DCT8_1D
02176 #undef SRC
02177 #undef DST
02178 return sum;
02179 }
02180 #endif
02181
02182 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02183 MpegEncContext * const s= (MpegEncContext *)c;
02184 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02185 int sum=0, i;
02186
02187 assert(h==8);
02188
02189 s->dsp.diff_pixels(temp, src1, src2, stride);
02190 s->dsp.fdct(temp);
02191
02192 for(i=0; i<64; i++)
02193 sum= FFMAX(sum, FFABS(temp[i]));
02194
02195 return sum;
02196 }
02197
02198 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02199 MpegEncContext * const s= (MpegEncContext *)c;
02200 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02201 DCTELEM * const bak = temp+64;
02202 int sum=0, i;
02203
02204 assert(h==8);
02205 s->mb_intra=0;
02206
02207 s->dsp.diff_pixels(temp, src1, src2, stride);
02208
02209 memcpy(bak, temp, 64*sizeof(DCTELEM));
02210
02211 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02212 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02213 ff_simple_idct_8(temp);
02214
02215 for(i=0; i<64; i++)
02216 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02217
02218 return sum;
02219 }
02220
02221 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02222 MpegEncContext * const s= (MpegEncContext *)c;
02223 const uint8_t *scantable= s->intra_scantable.permutated;
02224 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02225 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02226 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02227 int i, last, run, bits, level, distortion, start_i;
02228 const int esc_length= s->ac_esc_length;
02229 uint8_t * length;
02230 uint8_t * last_length;
02231
02232 assert(h==8);
02233
02234 copy_block8(lsrc1, src1, 8, stride, 8);
02235 copy_block8(lsrc2, src2, 8, stride, 8);
02236
02237 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02238
02239 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02240
02241 bits=0;
02242
02243 if (s->mb_intra) {
02244 start_i = 1;
02245 length = s->intra_ac_vlc_length;
02246 last_length= s->intra_ac_vlc_last_length;
02247 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02248 } else {
02249 start_i = 0;
02250 length = s->inter_ac_vlc_length;
02251 last_length= s->inter_ac_vlc_last_length;
02252 }
02253
02254 if(last>=start_i){
02255 run=0;
02256 for(i=start_i; i<last; i++){
02257 int j= scantable[i];
02258 level= temp[j];
02259
02260 if(level){
02261 level+=64;
02262 if((level&(~127)) == 0){
02263 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02264 }else
02265 bits+= esc_length;
02266 run=0;
02267 }else
02268 run++;
02269 }
02270 i= scantable[last];
02271
02272 level= temp[i] + 64;
02273
02274 assert(level - 64);
02275
02276 if((level&(~127)) == 0){
02277 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02278 }else
02279 bits+= esc_length;
02280
02281 }
02282
02283 if(last>=0){
02284 if(s->mb_intra)
02285 s->dct_unquantize_intra(s, temp, 0, s->qscale);
02286 else
02287 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02288 }
02289
02290 s->dsp.idct_add(lsrc2, 8, temp);
02291
02292 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02293
02294 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02295 }
02296
02297 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02298 MpegEncContext * const s= (MpegEncContext *)c;
02299 const uint8_t *scantable= s->intra_scantable.permutated;
02300 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02301 int i, last, run, bits, level, start_i;
02302 const int esc_length= s->ac_esc_length;
02303 uint8_t * length;
02304 uint8_t * last_length;
02305
02306 assert(h==8);
02307
02308 s->dsp.diff_pixels(temp, src1, src2, stride);
02309
02310 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02311
02312 bits=0;
02313
02314 if (s->mb_intra) {
02315 start_i = 1;
02316 length = s->intra_ac_vlc_length;
02317 last_length= s->intra_ac_vlc_last_length;
02318 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02319 } else {
02320 start_i = 0;
02321 length = s->inter_ac_vlc_length;
02322 last_length= s->inter_ac_vlc_last_length;
02323 }
02324
02325 if(last>=start_i){
02326 run=0;
02327 for(i=start_i; i<last; i++){
02328 int j= scantable[i];
02329 level= temp[j];
02330
02331 if(level){
02332 level+=64;
02333 if((level&(~127)) == 0){
02334 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02335 }else
02336 bits+= esc_length;
02337 run=0;
02338 }else
02339 run++;
02340 }
02341 i= scantable[last];
02342
02343 level= temp[i] + 64;
02344
02345 assert(level - 64);
02346
02347 if((level&(~127)) == 0){
02348 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02349 }else
02350 bits+= esc_length;
02351 }
02352
02353 return bits;
02354 }
02355
02356 #define VSAD_INTRA(size) \
02357 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02358 int score=0; \
02359 int x,y; \
02360 \
02361 for(y=1; y<h; y++){ \
02362 for(x=0; x<size; x+=4){ \
02363 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
02364 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
02365 } \
02366 s+= stride; \
02367 } \
02368 \
02369 return score; \
02370 }
02371 VSAD_INTRA(8)
02372 VSAD_INTRA(16)
02373
02374 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02375 int score=0;
02376 int x,y;
02377
02378 for(y=1; y<h; y++){
02379 for(x=0; x<16; x++){
02380 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02381 }
02382 s1+= stride;
02383 s2+= stride;
02384 }
02385
02386 return score;
02387 }
02388
02389 #define SQ(a) ((a)*(a))
02390 #define VSSE_INTRA(size) \
02391 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02392 int score=0; \
02393 int x,y; \
02394 \
02395 for(y=1; y<h; y++){ \
02396 for(x=0; x<size; x+=4){ \
02397 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
02398 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
02399 } \
02400 s+= stride; \
02401 } \
02402 \
02403 return score; \
02404 }
02405 VSSE_INTRA(8)
02406 VSSE_INTRA(16)
02407
02408 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02409 int score=0;
02410 int x,y;
02411
02412 for(y=1; y<h; y++){
02413 for(x=0; x<16; x++){
02414 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02415 }
02416 s1+= stride;
02417 s2+= stride;
02418 }
02419
02420 return score;
02421 }
02422
02423 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02424 int size){
02425 int score=0;
02426 int i;
02427 for(i=0; i<size; i++)
02428 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02429 return score;
02430 }
02431
02432 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02433 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02434 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02435 #if CONFIG_GPL
02436 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02437 #endif
02438 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02439 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02440 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02441 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02442
02443 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02444 int i;
02445 for(i=0; i<len; i++)
02446 dst[i] = src0[i] * src1[i];
02447 }
02448
02449 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02450 int i;
02451 src1 += len-1;
02452 for(i=0; i<len; i++)
02453 dst[i] = src0[i] * src1[-i];
02454 }
02455
02456 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02457 int i;
02458 for(i=0; i<len; i++)
02459 dst[i] = src0[i] * src1[i] + src2[i];
02460 }
02461
02462 static void vector_fmul_window_c(float *dst, const float *src0,
02463 const float *src1, const float *win, int len)
02464 {
02465 int i,j;
02466 dst += len;
02467 win += len;
02468 src0+= len;
02469 for(i=-len, j=len-1; i<0; i++, j--) {
02470 float s0 = src0[i];
02471 float s1 = src1[j];
02472 float wi = win[i];
02473 float wj = win[j];
02474 dst[i] = s0*wj - s1*wi;
02475 dst[j] = s0*wi + s1*wj;
02476 }
02477 }
02478
02479 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02480 int len)
02481 {
02482 int i;
02483 for (i = 0; i < len; i++)
02484 dst[i] = src[i] * mul;
02485 }
02486
02487 static void vector_fmac_scalar_c(float *dst, const float *src, float mul,
02488 int len)
02489 {
02490 int i;
02491 for (i = 0; i < len; i++)
02492 dst[i] += src[i] * mul;
02493 }
02494
02495 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02496 int len)
02497 {
02498 int i;
02499 for (i = 0; i < len; i++) {
02500 float t = v1[i] - v2[i];
02501 v1[i] += v2[i];
02502 v2[i] = t;
02503 }
02504 }
02505
02506 static void butterflies_float_interleave_c(float *dst, const float *src0,
02507 const float *src1, int len)
02508 {
02509 int i;
02510 for (i = 0; i < len; i++) {
02511 float f1 = src0[i];
02512 float f2 = src1[i];
02513 dst[2*i ] = f1 + f2;
02514 dst[2*i + 1] = f1 - f2;
02515 }
02516 }
02517
02518 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02519 {
02520 float p = 0.0;
02521 int i;
02522
02523 for (i = 0; i < len; i++)
02524 p += v1[i] * v2[i];
02525
02526 return p;
02527 }
02528
02529 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02530 uint32_t maxi, uint32_t maxisign)
02531 {
02532
02533 if(a > mini) return mini;
02534 else if((a^(1U<<31)) > maxisign) return maxi;
02535 else return a;
02536 }
02537
02538 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02539 int i;
02540 uint32_t mini = *(uint32_t*)min;
02541 uint32_t maxi = *(uint32_t*)max;
02542 uint32_t maxisign = maxi ^ (1U<<31);
02543 uint32_t *dsti = (uint32_t*)dst;
02544 const uint32_t *srci = (const uint32_t*)src;
02545 for(i=0; i<len; i+=8) {
02546 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02547 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02548 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02549 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02550 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02551 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02552 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02553 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02554 }
02555 }
02556 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02557 int i;
02558 if(min < 0 && max > 0) {
02559 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02560 } else {
02561 for(i=0; i < len; i+=8) {
02562 dst[i ] = av_clipf(src[i ], min, max);
02563 dst[i + 1] = av_clipf(src[i + 1], min, max);
02564 dst[i + 2] = av_clipf(src[i + 2], min, max);
02565 dst[i + 3] = av_clipf(src[i + 3], min, max);
02566 dst[i + 4] = av_clipf(src[i + 4], min, max);
02567 dst[i + 5] = av_clipf(src[i + 5], min, max);
02568 dst[i + 6] = av_clipf(src[i + 6], min, max);
02569 dst[i + 7] = av_clipf(src[i + 7], min, max);
02570 }
02571 }
02572 }
02573
02574 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02575 {
02576 int res = 0;
02577
02578 while (order--)
02579 res += (*v1++ * *v2++) >> shift;
02580
02581 return res;
02582 }
02583
02584 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02585 {
02586 int res = 0;
02587 while (order--) {
02588 res += *v1 * *v2++;
02589 *v1++ += mul * *v3++;
02590 }
02591 return res;
02592 }
02593
02594 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02595 const int16_t *window, unsigned int len)
02596 {
02597 int i;
02598 int len2 = len >> 1;
02599
02600 for (i = 0; i < len2; i++) {
02601 int16_t w = window[i];
02602 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
02603 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02604 }
02605 }
02606
02607 static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
02608 int32_t max, unsigned int len)
02609 {
02610 do {
02611 *dst++ = av_clip(*src++, min, max);
02612 *dst++ = av_clip(*src++, min, max);
02613 *dst++ = av_clip(*src++, min, max);
02614 *dst++ = av_clip(*src++, min, max);
02615 *dst++ = av_clip(*src++, min, max);
02616 *dst++ = av_clip(*src++, min, max);
02617 *dst++ = av_clip(*src++, min, max);
02618 *dst++ = av_clip(*src++, min, max);
02619 len -= 8;
02620 } while (len > 0);
02621 }
02622
02623 #define W0 2048
02624 #define W1 2841
02625 #define W2 2676
02626 #define W3 2408
02627 #define W4 2048
02628 #define W5 1609
02629 #define W6 1108
02630 #define W7 565
02631
02632 static void wmv2_idct_row(short * b)
02633 {
02634 int s1,s2;
02635 int a0,a1,a2,a3,a4,a5,a6,a7;
02636
02637 a1 = W1*b[1]+W7*b[7];
02638 a7 = W7*b[1]-W1*b[7];
02639 a5 = W5*b[5]+W3*b[3];
02640 a3 = W3*b[5]-W5*b[3];
02641 a2 = W2*b[2]+W6*b[6];
02642 a6 = W6*b[2]-W2*b[6];
02643 a0 = W0*b[0]+W0*b[4];
02644 a4 = W0*b[0]-W0*b[4];
02645
02646 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02647 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02648
02649 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02650 b[1] = (a4+a6 +s1 + (1<<7))>>8;
02651 b[2] = (a4-a6 +s2 + (1<<7))>>8;
02652 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02653 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02654 b[5] = (a4-a6 -s2 + (1<<7))>>8;
02655 b[6] = (a4+a6 -s1 + (1<<7))>>8;
02656 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02657 }
02658 static void wmv2_idct_col(short * b)
02659 {
02660 int s1,s2;
02661 int a0,a1,a2,a3,a4,a5,a6,a7;
02662
02663 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02664 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02665 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02666 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02667 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02668 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02669 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
02670 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
02671
02672 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02673 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02674
02675 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02676 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
02677 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
02678 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02679
02680 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02681 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
02682 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
02683 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02684 }
02685 void ff_wmv2_idct_c(short * block){
02686 int i;
02687
02688 for(i=0;i<64;i+=8){
02689 wmv2_idct_row(block+i);
02690 }
02691 for(i=0;i<8;i++){
02692 wmv2_idct_col(block+i);
02693 }
02694 }
02695
02696
02697 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02698 {
02699 ff_wmv2_idct_c(block);
02700 ff_put_pixels_clamped_c(block, dest, line_size);
02701 }
02702 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02703 {
02704 ff_wmv2_idct_c(block);
02705 ff_add_pixels_clamped_c(block, dest, line_size);
02706 }
02707 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02708 {
02709 j_rev_dct (block);
02710 ff_put_pixels_clamped_c(block, dest, line_size);
02711 }
02712 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02713 {
02714 j_rev_dct (block);
02715 ff_add_pixels_clamped_c(block, dest, line_size);
02716 }
02717
02718 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02719 {
02720 j_rev_dct4 (block);
02721 put_pixels_clamped4_c(block, dest, line_size);
02722 }
02723 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02724 {
02725 j_rev_dct4 (block);
02726 add_pixels_clamped4_c(block, dest, line_size);
02727 }
02728
02729 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02730 {
02731 j_rev_dct2 (block);
02732 put_pixels_clamped2_c(block, dest, line_size);
02733 }
02734 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02735 {
02736 j_rev_dct2 (block);
02737 add_pixels_clamped2_c(block, dest, line_size);
02738 }
02739
02740 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02741 {
02742 dest[0] = av_clip_uint8((block[0] + 4)>>3);
02743 }
02744 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02745 {
02746 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
02747 }
02748
02749 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02750
02751
02752 av_cold void dsputil_static_init(void)
02753 {
02754 int i;
02755
02756 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02757 for(i=0;i<MAX_NEG_CROP;i++) {
02758 ff_cropTbl[i] = 0;
02759 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02760 }
02761
02762 for(i=0;i<512;i++) {
02763 ff_squareTbl[i] = (i - 256) * (i - 256);
02764 }
02765
02766 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02767 }
02768
02769 int ff_check_alignment(void){
02770 static int did_fail=0;
02771 LOCAL_ALIGNED_16(int, aligned, [4]);
02772
02773 if((intptr_t)aligned & 15){
02774 if(!did_fail){
02775 #if HAVE_MMX || HAVE_ALTIVEC
02776 av_log(NULL, AV_LOG_ERROR,
02777 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02778 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02779 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02780 "Do not report crashes to Libav developers.\n");
02781 #endif
02782 did_fail=1;
02783 }
02784 return -1;
02785 }
02786 return 0;
02787 }
02788
02789 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02790 {
02791 int i, j;
02792
02793 ff_check_alignment();
02794
02795 #if CONFIG_ENCODERS
02796 if (avctx->bits_per_raw_sample == 10) {
02797 c->fdct = ff_jpeg_fdct_islow_10;
02798 c->fdct248 = ff_fdct248_islow_10;
02799 } else {
02800 if(avctx->dct_algo==FF_DCT_FASTINT) {
02801 c->fdct = fdct_ifast;
02802 c->fdct248 = fdct_ifast248;
02803 }
02804 else if(avctx->dct_algo==FF_DCT_FAAN) {
02805 c->fdct = ff_faandct;
02806 c->fdct248 = ff_faandct248;
02807 }
02808 else {
02809 c->fdct = ff_jpeg_fdct_islow_8;
02810 c->fdct248 = ff_fdct248_islow_8;
02811 }
02812 }
02813 #endif //CONFIG_ENCODERS
02814
02815 if(avctx->lowres==1){
02816 c->idct_put= ff_jref_idct4_put;
02817 c->idct_add= ff_jref_idct4_add;
02818 c->idct = j_rev_dct4;
02819 c->idct_permutation_type= FF_NO_IDCT_PERM;
02820 }else if(avctx->lowres==2){
02821 c->idct_put= ff_jref_idct2_put;
02822 c->idct_add= ff_jref_idct2_add;
02823 c->idct = j_rev_dct2;
02824 c->idct_permutation_type= FF_NO_IDCT_PERM;
02825 }else if(avctx->lowres==3){
02826 c->idct_put= ff_jref_idct1_put;
02827 c->idct_add= ff_jref_idct1_add;
02828 c->idct = j_rev_dct1;
02829 c->idct_permutation_type= FF_NO_IDCT_PERM;
02830 }else{
02831 if (avctx->bits_per_raw_sample == 10) {
02832 c->idct_put = ff_simple_idct_put_10;
02833 c->idct_add = ff_simple_idct_add_10;
02834 c->idct = ff_simple_idct_10;
02835 c->idct_permutation_type = FF_NO_IDCT_PERM;
02836 } else {
02837 if(avctx->idct_algo==FF_IDCT_INT){
02838 c->idct_put= ff_jref_idct_put;
02839 c->idct_add= ff_jref_idct_add;
02840 c->idct = j_rev_dct;
02841 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02842 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02843 avctx->idct_algo==FF_IDCT_VP3){
02844 c->idct_put= ff_vp3_idct_put_c;
02845 c->idct_add= ff_vp3_idct_add_c;
02846 c->idct = ff_vp3_idct_c;
02847 c->idct_permutation_type= FF_NO_IDCT_PERM;
02848 }else if(avctx->idct_algo==FF_IDCT_WMV2){
02849 c->idct_put= ff_wmv2_idct_put_c;
02850 c->idct_add= ff_wmv2_idct_add_c;
02851 c->idct = ff_wmv2_idct_c;
02852 c->idct_permutation_type= FF_NO_IDCT_PERM;
02853 }else if(avctx->idct_algo==FF_IDCT_FAAN){
02854 c->idct_put= ff_faanidct_put;
02855 c->idct_add= ff_faanidct_add;
02856 c->idct = ff_faanidct;
02857 c->idct_permutation_type= FF_NO_IDCT_PERM;
02858 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02859 c->idct_put= ff_ea_idct_put_c;
02860 c->idct_permutation_type= FF_NO_IDCT_PERM;
02861 }else{
02862 c->idct_put = ff_simple_idct_put_8;
02863 c->idct_add = ff_simple_idct_add_8;
02864 c->idct = ff_simple_idct_8;
02865 c->idct_permutation_type= FF_NO_IDCT_PERM;
02866 }
02867 }
02868 }
02869
02870 c->diff_pixels = diff_pixels_c;
02871 c->put_pixels_clamped = ff_put_pixels_clamped_c;
02872 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02873 c->add_pixels_clamped = ff_add_pixels_clamped_c;
02874 c->sum_abs_dctelem = sum_abs_dctelem_c;
02875 c->gmc1 = gmc1_c;
02876 c->gmc = ff_gmc_c;
02877 c->pix_sum = pix_sum_c;
02878 c->pix_norm1 = pix_norm1_c;
02879
02880 c->fill_block_tab[0] = fill_block16_c;
02881 c->fill_block_tab[1] = fill_block8_c;
02882
02883
02884 c->pix_abs[0][0] = pix_abs16_c;
02885 c->pix_abs[0][1] = pix_abs16_x2_c;
02886 c->pix_abs[0][2] = pix_abs16_y2_c;
02887 c->pix_abs[0][3] = pix_abs16_xy2_c;
02888 c->pix_abs[1][0] = pix_abs8_c;
02889 c->pix_abs[1][1] = pix_abs8_x2_c;
02890 c->pix_abs[1][2] = pix_abs8_y2_c;
02891 c->pix_abs[1][3] = pix_abs8_xy2_c;
02892
02893 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02894 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02895 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02896 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02897 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02898 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02899 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02900 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02901 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02902
02903 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02904 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02905 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02906 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02907 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02908 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02909 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02910 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02911 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02912
02913 #define dspfunc(PFX, IDX, NUM) \
02914 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02915 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02916 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02917 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02918 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02919 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02920 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02921 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02922 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02923 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02924 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02925 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02926 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02927 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02928 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02929 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02930
02931 dspfunc(put_qpel, 0, 16);
02932 dspfunc(put_no_rnd_qpel, 0, 16);
02933
02934 dspfunc(avg_qpel, 0, 16);
02935
02936
02937 dspfunc(put_qpel, 1, 8);
02938 dspfunc(put_no_rnd_qpel, 1, 8);
02939
02940 dspfunc(avg_qpel, 1, 8);
02941
02942
02943 #undef dspfunc
02944
02945 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
02946 ff_mlp_init(c, avctx);
02947 #endif
02948 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
02949 ff_intrax8dsp_init(c,avctx);
02950 #endif
02951
02952 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
02953 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
02954 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
02955 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
02956 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
02957 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
02958 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
02959 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
02960
02961 #define SET_CMP_FUNC(name) \
02962 c->name[0]= name ## 16_c;\
02963 c->name[1]= name ## 8x8_c;
02964
02965 SET_CMP_FUNC(hadamard8_diff)
02966 c->hadamard8_diff[4]= hadamard8_intra16_c;
02967 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
02968 SET_CMP_FUNC(dct_sad)
02969 SET_CMP_FUNC(dct_max)
02970 #if CONFIG_GPL
02971 SET_CMP_FUNC(dct264_sad)
02972 #endif
02973 c->sad[0]= pix_abs16_c;
02974 c->sad[1]= pix_abs8_c;
02975 c->sse[0]= sse16_c;
02976 c->sse[1]= sse8_c;
02977 c->sse[2]= sse4_c;
02978 SET_CMP_FUNC(quant_psnr)
02979 SET_CMP_FUNC(rd)
02980 SET_CMP_FUNC(bit)
02981 c->vsad[0]= vsad16_c;
02982 c->vsad[4]= vsad_intra16_c;
02983 c->vsad[5]= vsad_intra8_c;
02984 c->vsse[0]= vsse16_c;
02985 c->vsse[4]= vsse_intra16_c;
02986 c->vsse[5]= vsse_intra8_c;
02987 c->nsse[0]= nsse16_c;
02988 c->nsse[1]= nsse8_c;
02989 #if CONFIG_DWT
02990 ff_dsputil_init_dwt(c);
02991 #endif
02992
02993 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
02994
02995 c->add_bytes= add_bytes_c;
02996 c->add_bytes_l2= add_bytes_l2_c;
02997 c->diff_bytes= diff_bytes_c;
02998 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
02999 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03000 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
03001 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03002 c->bswap_buf= bswap_buf;
03003 c->bswap16_buf = bswap16_buf;
03004 #if CONFIG_PNG_DECODER
03005 c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
03006 #endif
03007
03008 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03009 c->h263_h_loop_filter= h263_h_loop_filter_c;
03010 c->h263_v_loop_filter= h263_v_loop_filter_c;
03011 }
03012
03013 if (CONFIG_VP3_DECODER) {
03014 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03015 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03016 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03017 }
03018
03019 c->h261_loop_filter= h261_loop_filter_c;
03020
03021 c->try_8x8basis= try_8x8basis_c;
03022 c->add_8x8basis= add_8x8basis_c;
03023
03024 #if CONFIG_VORBIS_DECODER
03025 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03026 #endif
03027 #if CONFIG_AC3_DECODER
03028 c->ac3_downmix = ff_ac3_downmix_c;
03029 #endif
03030 c->vector_fmul = vector_fmul_c;
03031 c->vector_fmul_reverse = vector_fmul_reverse_c;
03032 c->vector_fmul_add = vector_fmul_add_c;
03033 c->vector_fmul_window = vector_fmul_window_c;
03034 c->vector_clipf = vector_clipf_c;
03035 c->scalarproduct_int16 = scalarproduct_int16_c;
03036 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03037 c->apply_window_int16 = apply_window_int16_c;
03038 c->vector_clip_int32 = vector_clip_int32_c;
03039 c->scalarproduct_float = scalarproduct_float_c;
03040 c->butterflies_float = butterflies_float_c;
03041 c->butterflies_float_interleave = butterflies_float_interleave_c;
03042 c->vector_fmul_scalar = vector_fmul_scalar_c;
03043 c->vector_fmac_scalar = vector_fmac_scalar_c;
03044
03045 c->shrink[0]= av_image_copy_plane;
03046 c->shrink[1]= ff_shrink22;
03047 c->shrink[2]= ff_shrink44;
03048 c->shrink[3]= ff_shrink88;
03049
03050 c->prefetch= just_return;
03051
03052 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03053 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03054
03055 #undef FUNC
03056 #undef FUNCC
03057 #define FUNC(f, depth) f ## _ ## depth
03058 #define FUNCC(f, depth) f ## _ ## depth ## _c
03059
03060 #define dspfunc1(PFX, IDX, NUM, depth)\
03061 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
03062 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03063 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03064 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03065
03066 #define dspfunc2(PFX, IDX, NUM, depth)\
03067 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03068 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03069 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03070 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03071 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03072 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03073 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03074 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03075 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03076 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03077 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03078 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03079 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03080 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03081 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03082 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03083
03084
03085 #define BIT_DEPTH_FUNCS(depth, dct)\
03086 c->get_pixels = FUNCC(get_pixels ## dct , depth);\
03087 c->draw_edges = FUNCC(draw_edges , depth);\
03088 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
03089 c->clear_block = FUNCC(clear_block ## dct , depth);\
03090 c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
03091 c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
03092 c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
03093 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
03094 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03095 \
03096 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
03097 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
03098 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
03099 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
03100 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
03101 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
03102 \
03103 dspfunc1(put , 0, 16, depth);\
03104 dspfunc1(put , 1, 8, depth);\
03105 dspfunc1(put , 2, 4, depth);\
03106 dspfunc1(put , 3, 2, depth);\
03107 dspfunc1(put_no_rnd, 0, 16, depth);\
03108 dspfunc1(put_no_rnd, 1, 8, depth);\
03109 dspfunc1(avg , 0, 16, depth);\
03110 dspfunc1(avg , 1, 8, depth);\
03111 dspfunc1(avg , 2, 4, depth);\
03112 dspfunc1(avg , 3, 2, depth);\
03113 dspfunc1(avg_no_rnd, 0, 16, depth);\
03114 dspfunc1(avg_no_rnd, 1, 8, depth);\
03115 \
03116 dspfunc2(put_h264_qpel, 0, 16, depth);\
03117 dspfunc2(put_h264_qpel, 1, 8, depth);\
03118 dspfunc2(put_h264_qpel, 2, 4, depth);\
03119 dspfunc2(put_h264_qpel, 3, 2, depth);\
03120 dspfunc2(avg_h264_qpel, 0, 16, depth);\
03121 dspfunc2(avg_h264_qpel, 1, 8, depth);\
03122 dspfunc2(avg_h264_qpel, 2, 4, depth);
03123
03124 switch (avctx->bits_per_raw_sample) {
03125 case 9:
03126 if (c->dct_bits == 32) {
03127 BIT_DEPTH_FUNCS(9, _32);
03128 } else {
03129 BIT_DEPTH_FUNCS(9, _16);
03130 }
03131 break;
03132 case 10:
03133 if (c->dct_bits == 32) {
03134 BIT_DEPTH_FUNCS(10, _32);
03135 } else {
03136 BIT_DEPTH_FUNCS(10, _16);
03137 }
03138 break;
03139 default:
03140 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03141 case 8:
03142 BIT_DEPTH_FUNCS(8, _16);
03143 break;
03144 }
03145
03146
03147 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
03148 if (ARCH_ARM) dsputil_init_arm (c, avctx);
03149 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
03150 if (HAVE_VIS) dsputil_init_vis (c, avctx);
03151 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
03152 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
03153 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
03154 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
03155 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
03156
03157 for (i = 0; i < 4; i++) {
03158 for (j = 0; j < 16; j++) {
03159 if(!c->put_2tap_qpel_pixels_tab[i][j])
03160 c->put_2tap_qpel_pixels_tab[i][j] =
03161 c->put_h264_qpel_pixels_tab[i][j];
03162 if(!c->avg_2tap_qpel_pixels_tab[i][j])
03163 c->avg_2tap_qpel_pixels_tab[i][j] =
03164 c->avg_h264_qpel_pixels_tab[i][j];
03165 }
03166 }
03167
03168 ff_init_scantable_permutation(c->idct_permutation,
03169 c->idct_permutation_type);
03170 }