Libav 0.7.1
|
00001 /* 00002 * DSP utils 00003 * Copyright (c) 2000, 2001 Fabrice Bellard 00004 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 00005 * 00006 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at> 00007 * 00008 * This file is part of Libav. 00009 * 00010 * Libav is free software; you can redistribute it and/or 00011 * modify it under the terms of the GNU Lesser General Public 00012 * License as published by the Free Software Foundation; either 00013 * version 2.1 of the License, or (at your option) any later version. 00014 * 00015 * Libav is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 * Lesser General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU Lesser General Public 00021 * License along with Libav; if not, write to the Free Software 00022 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00023 */ 00024 00030 #include "high_bit_depth.h" 00031 00032 static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) 00033 { 00034 int i; 00035 for(i=0; i<h; i++) 00036 { 00037 AV_WN2P(dst , AV_RN2P(src )); 00038 dst+=dstStride; 00039 src+=srcStride; 00040 } 00041 } 00042 00043 static inline void FUNC(copy_block4)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) 00044 { 00045 int i; 00046 for(i=0; i<h; i++) 00047 { 00048 AV_WN4P(dst , AV_RN4P(src )); 00049 dst+=dstStride; 00050 src+=srcStride; 00051 } 00052 } 00053 00054 static inline void FUNC(copy_block8)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) 00055 { 00056 int i; 00057 for(i=0; i<h; i++) 00058 { 00059 AV_WN4P(dst , AV_RN4P(src )); 00060 AV_WN4P(dst+4*sizeof(pixel), AV_RN4P(src+4*sizeof(pixel))); 00061 dst+=dstStride; 00062 src+=srcStride; 00063 } 00064 } 00065 00066 static inline void FUNC(copy_block16)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) 00067 { 00068 int i; 00069 for(i=0; i<h; i++) 00070 { 00071 AV_WN4P(dst , AV_RN4P(src )); 00072 AV_WN4P(dst+ 4*sizeof(pixel), AV_RN4P(src+ 4*sizeof(pixel))); 00073 AV_WN4P(dst+ 8*sizeof(pixel), AV_RN4P(src+ 8*sizeof(pixel))); 00074 AV_WN4P(dst+12*sizeof(pixel), AV_RN4P(src+12*sizeof(pixel))); 00075 dst+=dstStride; 00076 src+=srcStride; 00077 } 00078 } 00079 00080 /* draw the edges of width 'w' of an image of size width, height */ 00081 //FIXME check that this is ok for mpeg4 interlaced 00082 static void FUNCC(draw_edges)(uint8_t *_buf, int _wrap, int width, int height, int w, int h, int sides) 00083 { 00084 pixel *buf = (pixel*)_buf; 00085 int wrap = _wrap / sizeof(pixel); 00086 pixel *ptr, *last_line; 00087 int i; 00088 00089 /* left and right */ 00090 ptr = buf; 00091 for(i=0;i<height;i++) { 00092 #if BIT_DEPTH > 8 00093 int j; 00094 for (j = 0; j < w; j++) { 00095 ptr[j-w] = ptr[0]; 00096 ptr[j+width] = ptr[width-1]; 00097 } 00098 #else 00099 memset(ptr - w, ptr[0], w); 00100 memset(ptr + width, ptr[width-1], w); 00101 #endif 00102 ptr += wrap; 00103 } 00104 00105 /* top and bottom + corners */ 00106 buf -= w; 00107 last_line = buf + (height - 1) * wrap; 00108 if (sides & EDGE_TOP) 00109 for(i = 0; i < h; i++) 00110 memcpy(buf - (i + 1) * wrap, buf, (width + w + w) * sizeof(pixel)); // top 00111 if (sides & EDGE_BOTTOM) 00112 for (i = 0; i < h; i++) 00113 memcpy(last_line + (i + 1) * wrap, last_line, (width + w + w) * sizeof(pixel)); // bottom 00114 } 00115 00128 void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, 00129 int src_x, int src_y, int w, int h){ 00130 int x, y; 00131 int start_y, start_x, end_y, end_x; 00132 00133 if(src_y>= h){ 00134 src+= (h-1-src_y)*linesize; 00135 src_y=h-1; 00136 }else if(src_y<=-block_h){ 00137 src+= (1-block_h-src_y)*linesize; 00138 src_y=1-block_h; 00139 } 00140 if(src_x>= w){ 00141 src+= (w-1-src_x)*sizeof(pixel); 00142 src_x=w-1; 00143 }else if(src_x<=-block_w){ 00144 src+= (1-block_w-src_x)*sizeof(pixel); 00145 src_x=1-block_w; 00146 } 00147 00148 start_y= FFMAX(0, -src_y); 00149 start_x= FFMAX(0, -src_x); 00150 end_y= FFMIN(block_h, h-src_y); 00151 end_x= FFMIN(block_w, w-src_x); 00152 assert(start_y < end_y && block_h); 00153 assert(start_x < end_x && block_w); 00154 00155 w = end_x - start_x; 00156 src += start_y*linesize + start_x*sizeof(pixel); 00157 buf += start_x*sizeof(pixel); 00158 00159 //top 00160 for(y=0; y<start_y; y++){ 00161 memcpy(buf, src, w*sizeof(pixel)); 00162 buf += linesize; 00163 } 00164 00165 // copy existing part 00166 for(; y<end_y; y++){ 00167 memcpy(buf, src, w*sizeof(pixel)); 00168 src += linesize; 00169 buf += linesize; 00170 } 00171 00172 //bottom 00173 src -= linesize; 00174 for(; y<block_h; y++){ 00175 memcpy(buf, src, w*sizeof(pixel)); 00176 buf += linesize; 00177 } 00178 00179 buf -= block_h * linesize + start_x*sizeof(pixel); 00180 while (block_h--){ 00181 pixel *bufp = (pixel*)buf; 00182 //left 00183 for(x=0; x<start_x; x++){ 00184 bufp[x] = bufp[start_x]; 00185 } 00186 00187 //right 00188 for(x=end_x; x<block_w; x++){ 00189 bufp[x] = bufp[end_x - 1]; 00190 } 00191 buf += linesize; 00192 } 00193 } 00194 00195 static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size) 00196 { 00197 int i; 00198 pixel *restrict pixels = (pixel *restrict)_pixels; 00199 dctcoef *block = (dctcoef*)_block; 00200 line_size /= sizeof(pixel); 00201 00202 for(i=0;i<8;i++) { 00203 pixels[0] += block[0]; 00204 pixels[1] += block[1]; 00205 pixels[2] += block[2]; 00206 pixels[3] += block[3]; 00207 pixels[4] += block[4]; 00208 pixels[5] += block[5]; 00209 pixels[6] += block[6]; 00210 pixels[7] += block[7]; 00211 pixels += line_size; 00212 block += 8; 00213 } 00214 } 00215 00216 static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size) 00217 { 00218 int i; 00219 pixel *restrict pixels = (pixel *restrict)_pixels; 00220 dctcoef *block = (dctcoef*)_block; 00221 line_size /= sizeof(pixel); 00222 00223 for(i=0;i<4;i++) { 00224 pixels[0] += block[0]; 00225 pixels[1] += block[1]; 00226 pixels[2] += block[2]; 00227 pixels[3] += block[3]; 00228 pixels += line_size; 00229 block += 4; 00230 } 00231 } 00232 00233 #if 0 00234 00235 #define PIXOP2(OPNAME, OP) \ 00236 static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00237 {\ 00238 int i;\ 00239 for(i=0; i<h; i++){\ 00240 OP(*((uint64_t*)block), AV_RN64(pixels));\ 00241 pixels+=line_size;\ 00242 block +=line_size;\ 00243 }\ 00244 }\ 00245 \ 00246 static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00247 {\ 00248 int i;\ 00249 for(i=0; i<h; i++){\ 00250 const uint64_t a= AV_RN64(pixels );\ 00251 const uint64_t b= AV_RN64(pixels+1);\ 00252 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 00253 pixels+=line_size;\ 00254 block +=line_size;\ 00255 }\ 00256 }\ 00257 \ 00258 static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00259 {\ 00260 int i;\ 00261 for(i=0; i<h; i++){\ 00262 const uint64_t a= AV_RN64(pixels );\ 00263 const uint64_t b= AV_RN64(pixels+1);\ 00264 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 00265 pixels+=line_size;\ 00266 block +=line_size;\ 00267 }\ 00268 }\ 00269 \ 00270 static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00271 {\ 00272 int i;\ 00273 for(i=0; i<h; i++){\ 00274 const uint64_t a= AV_RN64(pixels );\ 00275 const uint64_t b= AV_RN64(pixels+line_size);\ 00276 OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 00277 pixels+=line_size;\ 00278 block +=line_size;\ 00279 }\ 00280 }\ 00281 \ 00282 static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00283 {\ 00284 int i;\ 00285 for(i=0; i<h; i++){\ 00286 const uint64_t a= AV_RN64(pixels );\ 00287 const uint64_t b= AV_RN64(pixels+line_size);\ 00288 OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\ 00289 pixels+=line_size;\ 00290 block +=line_size;\ 00291 }\ 00292 }\ 00293 \ 00294 static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00295 {\ 00296 int i;\ 00297 const uint64_t a= AV_RN64(pixels );\ 00298 const uint64_t b= AV_RN64(pixels+1);\ 00299 uint64_t l0= (a&0x0303030303030303ULL)\ 00300 + (b&0x0303030303030303ULL)\ 00301 + 0x0202020202020202ULL;\ 00302 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 00303 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 00304 uint64_t l1,h1;\ 00305 \ 00306 pixels+=line_size;\ 00307 for(i=0; i<h; i+=2){\ 00308 uint64_t a= AV_RN64(pixels );\ 00309 uint64_t b= AV_RN64(pixels+1);\ 00310 l1= (a&0x0303030303030303ULL)\ 00311 + (b&0x0303030303030303ULL);\ 00312 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 00313 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 00314 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 00315 pixels+=line_size;\ 00316 block +=line_size;\ 00317 a= AV_RN64(pixels );\ 00318 b= AV_RN64(pixels+1);\ 00319 l0= (a&0x0303030303030303ULL)\ 00320 + (b&0x0303030303030303ULL)\ 00321 + 0x0202020202020202ULL;\ 00322 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 00323 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 00324 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 00325 pixels+=line_size;\ 00326 block +=line_size;\ 00327 }\ 00328 }\ 00329 \ 00330 static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00331 {\ 00332 int i;\ 00333 const uint64_t a= AV_RN64(pixels );\ 00334 const uint64_t b= AV_RN64(pixels+1);\ 00335 uint64_t l0= (a&0x0303030303030303ULL)\ 00336 + (b&0x0303030303030303ULL)\ 00337 + 0x0101010101010101ULL;\ 00338 uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 00339 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 00340 uint64_t l1,h1;\ 00341 \ 00342 pixels+=line_size;\ 00343 for(i=0; i<h; i+=2){\ 00344 uint64_t a= AV_RN64(pixels );\ 00345 uint64_t b= AV_RN64(pixels+1);\ 00346 l1= (a&0x0303030303030303ULL)\ 00347 + (b&0x0303030303030303ULL);\ 00348 h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 00349 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 00350 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 00351 pixels+=line_size;\ 00352 block +=line_size;\ 00353 a= AV_RN64(pixels );\ 00354 b= AV_RN64(pixels+1);\ 00355 l0= (a&0x0303030303030303ULL)\ 00356 + (b&0x0303030303030303ULL)\ 00357 + 0x0101010101010101ULL;\ 00358 h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\ 00359 + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\ 00360 OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\ 00361 pixels+=line_size;\ 00362 block +=line_size;\ 00363 }\ 00364 }\ 00365 \ 00366 CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8*sizeof(pixel))\ 00367 CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8*sizeof(pixel))\ 00368 CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8*sizeof(pixel))\ 00369 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8*sizeof(pixel))\ 00370 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8*sizeof(pixel))\ 00371 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8*sizeof(pixel))\ 00372 CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8*sizeof(pixel)) 00373 00374 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) ) 00375 #else // 64 bit variant 00376 00377 #define PIXOP2(OPNAME, OP) \ 00378 static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00379 int i;\ 00380 for(i=0; i<h; i++){\ 00381 OP(*((pixel2*)(block )), AV_RN2P(pixels ));\ 00382 pixels+=line_size;\ 00383 block +=line_size;\ 00384 }\ 00385 }\ 00386 static void FUNCC(OPNAME ## _pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00387 int i;\ 00388 for(i=0; i<h; i++){\ 00389 OP(*((pixel4*)(block )), AV_RN4P(pixels ));\ 00390 pixels+=line_size;\ 00391 block +=line_size;\ 00392 }\ 00393 }\ 00394 static void FUNCC(OPNAME ## _pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00395 int i;\ 00396 for(i=0; i<h; i++){\ 00397 OP(*((pixel4*)(block )), AV_RN4P(pixels ));\ 00398 OP(*((pixel4*)(block+4*sizeof(pixel))), AV_RN4P(pixels+4*sizeof(pixel)));\ 00399 pixels+=line_size;\ 00400 block +=line_size;\ 00401 }\ 00402 }\ 00403 static inline void FUNCC(OPNAME ## _no_rnd_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00404 FUNCC(OPNAME ## _pixels8)(block, pixels, line_size, h);\ 00405 }\ 00406 \ 00407 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 00408 int src_stride1, int src_stride2, int h){\ 00409 int i;\ 00410 for(i=0; i<h; i++){\ 00411 pixel4 a,b;\ 00412 a= AV_RN4P(&src1[i*src_stride1 ]);\ 00413 b= AV_RN4P(&src2[i*src_stride2 ]);\ 00414 OP(*((pixel4*)&dst[i*dst_stride ]), no_rnd_avg_pixel4(a, b));\ 00415 a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\ 00416 b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\ 00417 OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), no_rnd_avg_pixel4(a, b));\ 00418 }\ 00419 }\ 00420 \ 00421 static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 00422 int src_stride1, int src_stride2, int h){\ 00423 int i;\ 00424 for(i=0; i<h; i++){\ 00425 pixel4 a,b;\ 00426 a= AV_RN4P(&src1[i*src_stride1 ]);\ 00427 b= AV_RN4P(&src2[i*src_stride2 ]);\ 00428 OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\ 00429 a= AV_RN4P(&src1[i*src_stride1+4*sizeof(pixel)]);\ 00430 b= AV_RN4P(&src2[i*src_stride2+4*sizeof(pixel)]);\ 00431 OP(*((pixel4*)&dst[i*dst_stride+4*sizeof(pixel)]), rnd_avg_pixel4(a, b));\ 00432 }\ 00433 }\ 00434 \ 00435 static inline void FUNC(OPNAME ## _pixels4_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 00436 int src_stride1, int src_stride2, int h){\ 00437 int i;\ 00438 for(i=0; i<h; i++){\ 00439 pixel4 a,b;\ 00440 a= AV_RN4P(&src1[i*src_stride1 ]);\ 00441 b= AV_RN4P(&src2[i*src_stride2 ]);\ 00442 OP(*((pixel4*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\ 00443 }\ 00444 }\ 00445 \ 00446 static inline void FUNC(OPNAME ## _pixels2_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 00447 int src_stride1, int src_stride2, int h){\ 00448 int i;\ 00449 for(i=0; i<h; i++){\ 00450 pixel4 a,b;\ 00451 a= AV_RN2P(&src1[i*src_stride1 ]);\ 00452 b= AV_RN2P(&src2[i*src_stride2 ]);\ 00453 OP(*((pixel2*)&dst[i*dst_stride ]), rnd_avg_pixel4(a, b));\ 00454 }\ 00455 }\ 00456 \ 00457 static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 00458 int src_stride1, int src_stride2, int h){\ 00459 FUNC(OPNAME ## _pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ 00460 FUNC(OPNAME ## _pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\ 00461 }\ 00462 \ 00463 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ 00464 int src_stride1, int src_stride2, int h){\ 00465 FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ 00466 FUNC(OPNAME ## _no_rnd_pixels8_l2)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, h);\ 00467 }\ 00468 \ 00469 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00470 FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ 00471 }\ 00472 \ 00473 static inline void FUNCC(OPNAME ## _pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00474 FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ 00475 }\ 00476 \ 00477 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00478 FUNC(OPNAME ## _no_rnd_pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ 00479 }\ 00480 \ 00481 static inline void FUNCC(OPNAME ## _pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00482 FUNC(OPNAME ## _pixels8_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ 00483 }\ 00484 \ 00485 static inline void FUNC(OPNAME ## _pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ 00486 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00487 /* FIXME HIGH BIT DEPTH */\ 00488 int i;\ 00489 for(i=0; i<h; i++){\ 00490 uint32_t a, b, c, d, l0, l1, h0, h1;\ 00491 a= AV_RN32(&src1[i*src_stride1]);\ 00492 b= AV_RN32(&src2[i*src_stride2]);\ 00493 c= AV_RN32(&src3[i*src_stride3]);\ 00494 d= AV_RN32(&src4[i*src_stride4]);\ 00495 l0= (a&0x03030303UL)\ 00496 + (b&0x03030303UL)\ 00497 + 0x02020202UL;\ 00498 h0= ((a&0xFCFCFCFCUL)>>2)\ 00499 + ((b&0xFCFCFCFCUL)>>2);\ 00500 l1= (c&0x03030303UL)\ 00501 + (d&0x03030303UL);\ 00502 h1= ((c&0xFCFCFCFCUL)>>2)\ 00503 + ((d&0xFCFCFCFCUL)>>2);\ 00504 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00505 a= AV_RN32(&src1[i*src_stride1+4]);\ 00506 b= AV_RN32(&src2[i*src_stride2+4]);\ 00507 c= AV_RN32(&src3[i*src_stride3+4]);\ 00508 d= AV_RN32(&src4[i*src_stride4+4]);\ 00509 l0= (a&0x03030303UL)\ 00510 + (b&0x03030303UL)\ 00511 + 0x02020202UL;\ 00512 h0= ((a&0xFCFCFCFCUL)>>2)\ 00513 + ((b&0xFCFCFCFCUL)>>2);\ 00514 l1= (c&0x03030303UL)\ 00515 + (d&0x03030303UL);\ 00516 h1= ((c&0xFCFCFCFCUL)>>2)\ 00517 + ((d&0xFCFCFCFCUL)>>2);\ 00518 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00519 }\ 00520 }\ 00521 \ 00522 static inline void FUNCC(OPNAME ## _pixels4_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00523 FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ 00524 }\ 00525 \ 00526 static inline void FUNCC(OPNAME ## _pixels4_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00527 FUNC(OPNAME ## _pixels4_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ 00528 }\ 00529 \ 00530 static inline void FUNCC(OPNAME ## _pixels2_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00531 FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+sizeof(pixel), line_size, line_size, line_size, h);\ 00532 }\ 00533 \ 00534 static inline void FUNCC(OPNAME ## _pixels2_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 00535 FUNC(OPNAME ## _pixels2_l2)(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ 00536 }\ 00537 \ 00538 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ 00539 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00540 /* FIXME HIGH BIT DEPTH*/\ 00541 int i;\ 00542 for(i=0; i<h; i++){\ 00543 uint32_t a, b, c, d, l0, l1, h0, h1;\ 00544 a= AV_RN32(&src1[i*src_stride1]);\ 00545 b= AV_RN32(&src2[i*src_stride2]);\ 00546 c= AV_RN32(&src3[i*src_stride3]);\ 00547 d= AV_RN32(&src4[i*src_stride4]);\ 00548 l0= (a&0x03030303UL)\ 00549 + (b&0x03030303UL)\ 00550 + 0x01010101UL;\ 00551 h0= ((a&0xFCFCFCFCUL)>>2)\ 00552 + ((b&0xFCFCFCFCUL)>>2);\ 00553 l1= (c&0x03030303UL)\ 00554 + (d&0x03030303UL);\ 00555 h1= ((c&0xFCFCFCFCUL)>>2)\ 00556 + ((d&0xFCFCFCFCUL)>>2);\ 00557 OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00558 a= AV_RN32(&src1[i*src_stride1+4]);\ 00559 b= AV_RN32(&src2[i*src_stride2+4]);\ 00560 c= AV_RN32(&src3[i*src_stride3+4]);\ 00561 d= AV_RN32(&src4[i*src_stride4+4]);\ 00562 l0= (a&0x03030303UL)\ 00563 + (b&0x03030303UL)\ 00564 + 0x01010101UL;\ 00565 h0= ((a&0xFCFCFCFCUL)>>2)\ 00566 + ((b&0xFCFCFCFCUL)>>2);\ 00567 l1= (c&0x03030303UL)\ 00568 + (d&0x03030303UL);\ 00569 h1= ((c&0xFCFCFCFCUL)>>2)\ 00570 + ((d&0xFCFCFCFCUL)>>2);\ 00571 OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00572 }\ 00573 }\ 00574 static inline void FUNC(OPNAME ## _pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ 00575 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00576 FUNC(OPNAME ## _pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 00577 FUNC(OPNAME ## _pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 00578 }\ 00579 static inline void FUNC(OPNAME ## _no_rnd_pixels16_l4)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ 00580 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ 00581 FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 00582 FUNC(OPNAME ## _no_rnd_pixels8_l4)(dst+8*sizeof(pixel), src1+8*sizeof(pixel), src2+8*sizeof(pixel), src3+8*sizeof(pixel), src4+8*sizeof(pixel), dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ 00583 }\ 00584 \ 00585 static inline void FUNCC(OPNAME ## _pixels2_xy2)(uint8_t *_block, const uint8_t *_pixels, int line_size, int h)\ 00586 {\ 00587 int i, a0, b0, a1, b1;\ 00588 pixel *block = (pixel*)_block;\ 00589 const pixel *pixels = (const pixel*)_pixels;\ 00590 line_size /= sizeof(pixel);\ 00591 a0= pixels[0];\ 00592 b0= pixels[1] + 2;\ 00593 a0 += b0;\ 00594 b0 += pixels[2];\ 00595 \ 00596 pixels+=line_size;\ 00597 for(i=0; i<h; i+=2){\ 00598 a1= pixels[0];\ 00599 b1= pixels[1];\ 00600 a1 += b1;\ 00601 b1 += pixels[2];\ 00602 \ 00603 block[0]= (a1+a0)>>2; /* FIXME non put */\ 00604 block[1]= (b1+b0)>>2;\ 00605 \ 00606 pixels+=line_size;\ 00607 block +=line_size;\ 00608 \ 00609 a0= pixels[0];\ 00610 b0= pixels[1] + 2;\ 00611 a0 += b0;\ 00612 b0 += pixels[2];\ 00613 \ 00614 block[0]= (a1+a0)>>2;\ 00615 block[1]= (b1+b0)>>2;\ 00616 pixels+=line_size;\ 00617 block +=line_size;\ 00618 }\ 00619 }\ 00620 \ 00621 static inline void FUNCC(OPNAME ## _pixels4_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00622 {\ 00623 /* FIXME HIGH BIT DEPTH */\ 00624 int i;\ 00625 const uint32_t a= AV_RN32(pixels );\ 00626 const uint32_t b= AV_RN32(pixels+1);\ 00627 uint32_t l0= (a&0x03030303UL)\ 00628 + (b&0x03030303UL)\ 00629 + 0x02020202UL;\ 00630 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 00631 + ((b&0xFCFCFCFCUL)>>2);\ 00632 uint32_t l1,h1;\ 00633 \ 00634 pixels+=line_size;\ 00635 for(i=0; i<h; i+=2){\ 00636 uint32_t a= AV_RN32(pixels );\ 00637 uint32_t b= AV_RN32(pixels+1);\ 00638 l1= (a&0x03030303UL)\ 00639 + (b&0x03030303UL);\ 00640 h1= ((a&0xFCFCFCFCUL)>>2)\ 00641 + ((b&0xFCFCFCFCUL)>>2);\ 00642 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00643 pixels+=line_size;\ 00644 block +=line_size;\ 00645 a= AV_RN32(pixels );\ 00646 b= AV_RN32(pixels+1);\ 00647 l0= (a&0x03030303UL)\ 00648 + (b&0x03030303UL)\ 00649 + 0x02020202UL;\ 00650 h0= ((a&0xFCFCFCFCUL)>>2)\ 00651 + ((b&0xFCFCFCFCUL)>>2);\ 00652 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00653 pixels+=line_size;\ 00654 block +=line_size;\ 00655 }\ 00656 }\ 00657 \ 00658 static inline void FUNCC(OPNAME ## _pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00659 {\ 00660 /* FIXME HIGH BIT DEPTH */\ 00661 int j;\ 00662 for(j=0; j<2; j++){\ 00663 int i;\ 00664 const uint32_t a= AV_RN32(pixels );\ 00665 const uint32_t b= AV_RN32(pixels+1);\ 00666 uint32_t l0= (a&0x03030303UL)\ 00667 + (b&0x03030303UL)\ 00668 + 0x02020202UL;\ 00669 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 00670 + ((b&0xFCFCFCFCUL)>>2);\ 00671 uint32_t l1,h1;\ 00672 \ 00673 pixels+=line_size;\ 00674 for(i=0; i<h; i+=2){\ 00675 uint32_t a= AV_RN32(pixels );\ 00676 uint32_t b= AV_RN32(pixels+1);\ 00677 l1= (a&0x03030303UL)\ 00678 + (b&0x03030303UL);\ 00679 h1= ((a&0xFCFCFCFCUL)>>2)\ 00680 + ((b&0xFCFCFCFCUL)>>2);\ 00681 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00682 pixels+=line_size;\ 00683 block +=line_size;\ 00684 a= AV_RN32(pixels );\ 00685 b= AV_RN32(pixels+1);\ 00686 l0= (a&0x03030303UL)\ 00687 + (b&0x03030303UL)\ 00688 + 0x02020202UL;\ 00689 h0= ((a&0xFCFCFCFCUL)>>2)\ 00690 + ((b&0xFCFCFCFCUL)>>2);\ 00691 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00692 pixels+=line_size;\ 00693 block +=line_size;\ 00694 }\ 00695 pixels+=4-line_size*(h+1);\ 00696 block +=4-line_size*h;\ 00697 }\ 00698 }\ 00699 \ 00700 static inline void FUNCC(OPNAME ## _no_rnd_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ 00701 {\ 00702 /* FIXME HIGH BIT DEPTH */\ 00703 int j;\ 00704 for(j=0; j<2; j++){\ 00705 int i;\ 00706 const uint32_t a= AV_RN32(pixels );\ 00707 const uint32_t b= AV_RN32(pixels+1);\ 00708 uint32_t l0= (a&0x03030303UL)\ 00709 + (b&0x03030303UL)\ 00710 + 0x01010101UL;\ 00711 uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ 00712 + ((b&0xFCFCFCFCUL)>>2);\ 00713 uint32_t l1,h1;\ 00714 \ 00715 pixels+=line_size;\ 00716 for(i=0; i<h; i+=2){\ 00717 uint32_t a= AV_RN32(pixels );\ 00718 uint32_t b= AV_RN32(pixels+1);\ 00719 l1= (a&0x03030303UL)\ 00720 + (b&0x03030303UL);\ 00721 h1= ((a&0xFCFCFCFCUL)>>2)\ 00722 + ((b&0xFCFCFCFCUL)>>2);\ 00723 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00724 pixels+=line_size;\ 00725 block +=line_size;\ 00726 a= AV_RN32(pixels );\ 00727 b= AV_RN32(pixels+1);\ 00728 l0= (a&0x03030303UL)\ 00729 + (b&0x03030303UL)\ 00730 + 0x01010101UL;\ 00731 h0= ((a&0xFCFCFCFCUL)>>2)\ 00732 + ((b&0xFCFCFCFCUL)>>2);\ 00733 OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ 00734 pixels+=line_size;\ 00735 block +=line_size;\ 00736 }\ 00737 pixels+=4-line_size*(h+1);\ 00738 block +=4-line_size*h;\ 00739 }\ 00740 }\ 00741 \ 00742 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\ 00743 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_x2) , FUNCC(OPNAME ## _pixels8_x2) , 8*sizeof(pixel))\ 00744 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_y2) , FUNCC(OPNAME ## _pixels8_y2) , 8*sizeof(pixel))\ 00745 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16_xy2), FUNCC(OPNAME ## _pixels8_xy2), 8*sizeof(pixel))\ 00746 av_unused CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16) , FUNCC(OPNAME ## _pixels8) , 8*sizeof(pixel))\ 00747 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_x2) , FUNCC(OPNAME ## _no_rnd_pixels8_x2) , 8*sizeof(pixel))\ 00748 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_y2) , FUNCC(OPNAME ## _no_rnd_pixels8_y2) , 8*sizeof(pixel))\ 00749 CALL_2X_PIXELS(FUNCC(OPNAME ## _no_rnd_pixels16_xy2), FUNCC(OPNAME ## _no_rnd_pixels8_xy2), 8*sizeof(pixel))\ 00750 00751 #define op_avg(a, b) a = rnd_avg_pixel4(a, b) 00752 #endif 00753 #define op_put(a, b) a = b 00754 00755 PIXOP2(avg, op_avg) 00756 PIXOP2(put, op_put) 00757 #undef op_avg 00758 #undef op_put 00759 00760 #define put_no_rnd_pixels8_c put_pixels8_c 00761 #define put_no_rnd_pixels16_c put_pixels16_c 00762 00763 static void FUNCC(put_no_rnd_pixels16_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ 00764 FUNC(put_no_rnd_pixels16_l2)(dst, a, b, stride, stride, stride, h); 00765 } 00766 00767 static void FUNCC(put_no_rnd_pixels8_l2)(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ 00768 FUNC(put_no_rnd_pixels8_l2)(dst, a, b, stride, stride, stride, h); 00769 } 00770 00771 #define H264_CHROMA_MC(OPNAME, OP)\ 00772 static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ 00773 pixel *dst = (pixel*)_dst;\ 00774 pixel *src = (pixel*)_src;\ 00775 const int A=(8-x)*(8-y);\ 00776 const int B=( x)*(8-y);\ 00777 const int C=(8-x)*( y);\ 00778 const int D=( x)*( y);\ 00779 int i;\ 00780 stride /= sizeof(pixel);\ 00781 \ 00782 assert(x<8 && y<8 && x>=0 && y>=0);\ 00783 \ 00784 if(D){\ 00785 for(i=0; i<h; i++){\ 00786 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ 00787 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ 00788 dst+= stride;\ 00789 src+= stride;\ 00790 }\ 00791 }else{\ 00792 const int E= B+C;\ 00793 const int step= C ? stride : 1;\ 00794 for(i=0; i<h; i++){\ 00795 OP(dst[0], (A*src[0] + E*src[step+0]));\ 00796 OP(dst[1], (A*src[1] + E*src[step+1]));\ 00797 dst+= stride;\ 00798 src+= stride;\ 00799 }\ 00800 }\ 00801 }\ 00802 \ 00803 static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ 00804 pixel *dst = (pixel*)_dst;\ 00805 pixel *src = (pixel*)_src;\ 00806 const int A=(8-x)*(8-y);\ 00807 const int B=( x)*(8-y);\ 00808 const int C=(8-x)*( y);\ 00809 const int D=( x)*( y);\ 00810 int i;\ 00811 stride /= sizeof(pixel);\ 00812 \ 00813 assert(x<8 && y<8 && x>=0 && y>=0);\ 00814 \ 00815 if(D){\ 00816 for(i=0; i<h; i++){\ 00817 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ 00818 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ 00819 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ 00820 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ 00821 dst+= stride;\ 00822 src+= stride;\ 00823 }\ 00824 }else{\ 00825 const int E= B+C;\ 00826 const int step= C ? stride : 1;\ 00827 for(i=0; i<h; i++){\ 00828 OP(dst[0], (A*src[0] + E*src[step+0]));\ 00829 OP(dst[1], (A*src[1] + E*src[step+1]));\ 00830 OP(dst[2], (A*src[2] + E*src[step+2]));\ 00831 OP(dst[3], (A*src[3] + E*src[step+3]));\ 00832 dst+= stride;\ 00833 src+= stride;\ 00834 }\ 00835 }\ 00836 }\ 00837 \ 00838 static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\ 00839 pixel *dst = (pixel*)_dst;\ 00840 pixel *src = (pixel*)_src;\ 00841 const int A=(8-x)*(8-y);\ 00842 const int B=( x)*(8-y);\ 00843 const int C=(8-x)*( y);\ 00844 const int D=( x)*( y);\ 00845 int i;\ 00846 stride /= sizeof(pixel);\ 00847 \ 00848 assert(x<8 && y<8 && x>=0 && y>=0);\ 00849 \ 00850 if(D){\ 00851 for(i=0; i<h; i++){\ 00852 OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ 00853 OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ 00854 OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ 00855 OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ 00856 OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ 00857 OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ 00858 OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ 00859 OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ 00860 dst+= stride;\ 00861 src+= stride;\ 00862 }\ 00863 }else{\ 00864 const int E= B+C;\ 00865 const int step= C ? stride : 1;\ 00866 for(i=0; i<h; i++){\ 00867 OP(dst[0], (A*src[0] + E*src[step+0]));\ 00868 OP(dst[1], (A*src[1] + E*src[step+1]));\ 00869 OP(dst[2], (A*src[2] + E*src[step+2]));\ 00870 OP(dst[3], (A*src[3] + E*src[step+3]));\ 00871 OP(dst[4], (A*src[4] + E*src[step+4]));\ 00872 OP(dst[5], (A*src[5] + E*src[step+5]));\ 00873 OP(dst[6], (A*src[6] + E*src[step+6]));\ 00874 OP(dst[7], (A*src[7] + E*src[step+7]));\ 00875 dst+= stride;\ 00876 src+= stride;\ 00877 }\ 00878 }\ 00879 } 00880 00881 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) 00882 #define op_put(a, b) a = (((b) + 32)>>6) 00883 00884 H264_CHROMA_MC(put_ , op_put) 00885 H264_CHROMA_MC(avg_ , op_avg) 00886 #undef op_avg 00887 #undef op_put 00888 00889 #define H264_LOWPASS(OPNAME, OP, OP2) \ 00890 static av_unused void FUNC(OPNAME ## h264_qpel2_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\ 00891 const int h=2;\ 00892 INIT_CLIP\ 00893 int i;\ 00894 pixel *dst = (pixel*)_dst;\ 00895 pixel *src = (pixel*)_src;\ 00896 dstStride /= sizeof(pixel);\ 00897 srcStride /= sizeof(pixel);\ 00898 for(i=0; i<h; i++)\ 00899 {\ 00900 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ 00901 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ 00902 dst+=dstStride;\ 00903 src+=srcStride;\ 00904 }\ 00905 }\ 00906 \ 00907 static av_unused void FUNC(OPNAME ## h264_qpel2_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\ 00908 const int w=2;\ 00909 INIT_CLIP\ 00910 int i;\ 00911 pixel *dst = (pixel*)_dst;\ 00912 pixel *src = (pixel*)_src;\ 00913 dstStride /= sizeof(pixel);\ 00914 srcStride /= sizeof(pixel);\ 00915 for(i=0; i<w; i++)\ 00916 {\ 00917 const int srcB= src[-2*srcStride];\ 00918 const int srcA= src[-1*srcStride];\ 00919 const int src0= src[0 *srcStride];\ 00920 const int src1= src[1 *srcStride];\ 00921 const int src2= src[2 *srcStride];\ 00922 const int src3= src[3 *srcStride];\ 00923 const int src4= src[4 *srcStride];\ 00924 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ 00925 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ 00926 dst++;\ 00927 src++;\ 00928 }\ 00929 }\ 00930 \ 00931 static av_unused void FUNC(OPNAME ## h264_qpel2_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\ 00932 const int h=2;\ 00933 const int w=2;\ 00934 const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\ 00935 INIT_CLIP\ 00936 int i;\ 00937 pixel *dst = (pixel*)_dst;\ 00938 pixel *src = (pixel*)_src;\ 00939 dstStride /= sizeof(pixel);\ 00940 srcStride /= sizeof(pixel);\ 00941 src -= 2*srcStride;\ 00942 for(i=0; i<h+5; i++)\ 00943 {\ 00944 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\ 00945 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\ 00946 tmp+=tmpStride;\ 00947 src+=srcStride;\ 00948 }\ 00949 tmp -= tmpStride*(h+5-2);\ 00950 for(i=0; i<w; i++)\ 00951 {\ 00952 const int tmpB= tmp[-2*tmpStride] - pad;\ 00953 const int tmpA= tmp[-1*tmpStride] - pad;\ 00954 const int tmp0= tmp[0 *tmpStride] - pad;\ 00955 const int tmp1= tmp[1 *tmpStride] - pad;\ 00956 const int tmp2= tmp[2 *tmpStride] - pad;\ 00957 const int tmp3= tmp[3 *tmpStride] - pad;\ 00958 const int tmp4= tmp[4 *tmpStride] - pad;\ 00959 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ 00960 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ 00961 dst++;\ 00962 tmp++;\ 00963 }\ 00964 }\ 00965 static void FUNC(OPNAME ## h264_qpel4_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\ 00966 const int h=4;\ 00967 INIT_CLIP\ 00968 int i;\ 00969 pixel *dst = (pixel*)_dst;\ 00970 pixel *src = (pixel*)_src;\ 00971 dstStride /= sizeof(pixel);\ 00972 srcStride /= sizeof(pixel);\ 00973 for(i=0; i<h; i++)\ 00974 {\ 00975 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ 00976 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ 00977 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ 00978 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ 00979 dst+=dstStride;\ 00980 src+=srcStride;\ 00981 }\ 00982 }\ 00983 \ 00984 static void FUNC(OPNAME ## h264_qpel4_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\ 00985 const int w=4;\ 00986 INIT_CLIP\ 00987 int i;\ 00988 pixel *dst = (pixel*)_dst;\ 00989 pixel *src = (pixel*)_src;\ 00990 dstStride /= sizeof(pixel);\ 00991 srcStride /= sizeof(pixel);\ 00992 for(i=0; i<w; i++)\ 00993 {\ 00994 const int srcB= src[-2*srcStride];\ 00995 const int srcA= src[-1*srcStride];\ 00996 const int src0= src[0 *srcStride];\ 00997 const int src1= src[1 *srcStride];\ 00998 const int src2= src[2 *srcStride];\ 00999 const int src3= src[3 *srcStride];\ 01000 const int src4= src[4 *srcStride];\ 01001 const int src5= src[5 *srcStride];\ 01002 const int src6= src[6 *srcStride];\ 01003 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ 01004 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ 01005 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ 01006 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ 01007 dst++;\ 01008 src++;\ 01009 }\ 01010 }\ 01011 \ 01012 static void FUNC(OPNAME ## h264_qpel4_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\ 01013 const int h=4;\ 01014 const int w=4;\ 01015 const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\ 01016 INIT_CLIP\ 01017 int i;\ 01018 pixel *dst = (pixel*)_dst;\ 01019 pixel *src = (pixel*)_src;\ 01020 dstStride /= sizeof(pixel);\ 01021 srcStride /= sizeof(pixel);\ 01022 src -= 2*srcStride;\ 01023 for(i=0; i<h+5; i++)\ 01024 {\ 01025 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]) + pad;\ 01026 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]) + pad;\ 01027 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]) + pad;\ 01028 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]) + pad;\ 01029 tmp+=tmpStride;\ 01030 src+=srcStride;\ 01031 }\ 01032 tmp -= tmpStride*(h+5-2);\ 01033 for(i=0; i<w; i++)\ 01034 {\ 01035 const int tmpB= tmp[-2*tmpStride] - pad;\ 01036 const int tmpA= tmp[-1*tmpStride] - pad;\ 01037 const int tmp0= tmp[0 *tmpStride] - pad;\ 01038 const int tmp1= tmp[1 *tmpStride] - pad;\ 01039 const int tmp2= tmp[2 *tmpStride] - pad;\ 01040 const int tmp3= tmp[3 *tmpStride] - pad;\ 01041 const int tmp4= tmp[4 *tmpStride] - pad;\ 01042 const int tmp5= tmp[5 *tmpStride] - pad;\ 01043 const int tmp6= tmp[6 *tmpStride] - pad;\ 01044 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ 01045 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ 01046 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ 01047 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ 01048 dst++;\ 01049 tmp++;\ 01050 }\ 01051 }\ 01052 \ 01053 static void FUNC(OPNAME ## h264_qpel8_h_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\ 01054 const int h=8;\ 01055 INIT_CLIP\ 01056 int i;\ 01057 pixel *dst = (pixel*)_dst;\ 01058 pixel *src = (pixel*)_src;\ 01059 dstStride /= sizeof(pixel);\ 01060 srcStride /= sizeof(pixel);\ 01061 for(i=0; i<h; i++)\ 01062 {\ 01063 OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ 01064 OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ 01065 OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ 01066 OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ 01067 OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ 01068 OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ 01069 OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ 01070 OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ 01071 dst+=dstStride;\ 01072 src+=srcStride;\ 01073 }\ 01074 }\ 01075 \ 01076 static void FUNC(OPNAME ## h264_qpel8_v_lowpass)(uint8_t *_dst, uint8_t *_src, int dstStride, int srcStride){\ 01077 const int w=8;\ 01078 INIT_CLIP\ 01079 int i;\ 01080 pixel *dst = (pixel*)_dst;\ 01081 pixel *src = (pixel*)_src;\ 01082 dstStride /= sizeof(pixel);\ 01083 srcStride /= sizeof(pixel);\ 01084 for(i=0; i<w; i++)\ 01085 {\ 01086 const int srcB= src[-2*srcStride];\ 01087 const int srcA= src[-1*srcStride];\ 01088 const int src0= src[0 *srcStride];\ 01089 const int src1= src[1 *srcStride];\ 01090 const int src2= src[2 *srcStride];\ 01091 const int src3= src[3 *srcStride];\ 01092 const int src4= src[4 *srcStride];\ 01093 const int src5= src[5 *srcStride];\ 01094 const int src6= src[6 *srcStride];\ 01095 const int src7= src[7 *srcStride];\ 01096 const int src8= src[8 *srcStride];\ 01097 const int src9= src[9 *srcStride];\ 01098 const int src10=src[10*srcStride];\ 01099 OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ 01100 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ 01101 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ 01102 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ 01103 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ 01104 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ 01105 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ 01106 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ 01107 dst++;\ 01108 src++;\ 01109 }\ 01110 }\ 01111 \ 01112 static void FUNC(OPNAME ## h264_qpel8_hv_lowpass)(uint8_t *_dst, int16_t *tmp, uint8_t *_src, int dstStride, int tmpStride, int srcStride){\ 01113 const int h=8;\ 01114 const int w=8;\ 01115 const int pad = (BIT_DEPTH > 9) ? (-10 * ((1<<BIT_DEPTH)-1)) : 0;\ 01116 INIT_CLIP\ 01117 int i;\ 01118 pixel *dst = (pixel*)_dst;\ 01119 pixel *src = (pixel*)_src;\ 01120 dstStride /= sizeof(pixel);\ 01121 srcStride /= sizeof(pixel);\ 01122 src -= 2*srcStride;\ 01123 for(i=0; i<h+5; i++)\ 01124 {\ 01125 tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]) + pad;\ 01126 tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]) + pad;\ 01127 tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]) + pad;\ 01128 tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]) + pad;\ 01129 tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]) + pad;\ 01130 tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]) + pad;\ 01131 tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]) + pad;\ 01132 tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]) + pad;\ 01133 tmp+=tmpStride;\ 01134 src+=srcStride;\ 01135 }\ 01136 tmp -= tmpStride*(h+5-2);\ 01137 for(i=0; i<w; i++)\ 01138 {\ 01139 const int tmpB= tmp[-2*tmpStride] - pad;\ 01140 const int tmpA= tmp[-1*tmpStride] - pad;\ 01141 const int tmp0= tmp[0 *tmpStride] - pad;\ 01142 const int tmp1= tmp[1 *tmpStride] - pad;\ 01143 const int tmp2= tmp[2 *tmpStride] - pad;\ 01144 const int tmp3= tmp[3 *tmpStride] - pad;\ 01145 const int tmp4= tmp[4 *tmpStride] - pad;\ 01146 const int tmp5= tmp[5 *tmpStride] - pad;\ 01147 const int tmp6= tmp[6 *tmpStride] - pad;\ 01148 const int tmp7= tmp[7 *tmpStride] - pad;\ 01149 const int tmp8= tmp[8 *tmpStride] - pad;\ 01150 const int tmp9= tmp[9 *tmpStride] - pad;\ 01151 const int tmp10=tmp[10*tmpStride] - pad;\ 01152 OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ 01153 OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ 01154 OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ 01155 OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ 01156 OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ 01157 OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ 01158 OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ 01159 OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ 01160 dst++;\ 01161 tmp++;\ 01162 }\ 01163 }\ 01164 \ 01165 static void FUNC(OPNAME ## h264_qpel16_v_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 01166 FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\ 01167 FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\ 01168 src += 8*srcStride;\ 01169 dst += 8*dstStride;\ 01170 FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst , src , dstStride, srcStride);\ 01171 FUNC(OPNAME ## h264_qpel8_v_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\ 01172 }\ 01173 \ 01174 static void FUNC(OPNAME ## h264_qpel16_h_lowpass)(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ 01175 FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\ 01176 FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\ 01177 src += 8*srcStride;\ 01178 dst += 8*dstStride;\ 01179 FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst , src , dstStride, srcStride);\ 01180 FUNC(OPNAME ## h264_qpel8_h_lowpass)(dst+8*sizeof(pixel), src+8*sizeof(pixel), dstStride, srcStride);\ 01181 }\ 01182 \ 01183 static void FUNC(OPNAME ## h264_qpel16_hv_lowpass)(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ 01184 FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\ 01185 FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\ 01186 src += 8*srcStride;\ 01187 dst += 8*dstStride;\ 01188 FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst , tmp , src , dstStride, tmpStride, srcStride);\ 01189 FUNC(OPNAME ## h264_qpel8_hv_lowpass)(dst+8*sizeof(pixel), tmp+8, src+8*sizeof(pixel), dstStride, tmpStride, srcStride);\ 01190 }\ 01191 01192 #define H264_MC(OPNAME, SIZE) \ 01193 static av_unused void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc00)(uint8_t *dst, uint8_t *src, int stride){\ 01194 FUNCC(OPNAME ## pixels ## SIZE)(dst, src, stride, SIZE);\ 01195 }\ 01196 \ 01197 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc10)(uint8_t *dst, uint8_t *src, int stride){\ 01198 uint8_t half[SIZE*SIZE*sizeof(pixel)];\ 01199 FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\ 01200 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src, half, stride, stride, SIZE*sizeof(pixel), SIZE);\ 01201 }\ 01202 \ 01203 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc20)(uint8_t *dst, uint8_t *src, int stride){\ 01204 FUNC(OPNAME ## h264_qpel ## SIZE ## _h_lowpass)(dst, src, stride, stride);\ 01205 }\ 01206 \ 01207 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc30)(uint8_t *dst, uint8_t *src, int stride){\ 01208 uint8_t half[SIZE*SIZE*sizeof(pixel)];\ 01209 FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(half, src, SIZE*sizeof(pixel), stride);\ 01210 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, src+sizeof(pixel), half, stride, stride, SIZE*sizeof(pixel), SIZE);\ 01211 }\ 01212 \ 01213 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc01)(uint8_t *dst, uint8_t *src, int stride){\ 01214 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01215 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01216 uint8_t half[SIZE*SIZE*sizeof(pixel)];\ 01217 FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\ 01218 FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\ 01219 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid, half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01220 }\ 01221 \ 01222 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc02)(uint8_t *dst, uint8_t *src, int stride){\ 01223 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01224 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01225 FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\ 01226 FUNC(OPNAME ## h264_qpel ## SIZE ## _v_lowpass)(dst, full_mid, stride, SIZE*sizeof(pixel));\ 01227 }\ 01228 \ 01229 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc03)(uint8_t *dst, uint8_t *src, int stride){\ 01230 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01231 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01232 uint8_t half[SIZE*SIZE*sizeof(pixel)];\ 01233 FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\ 01234 FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(half, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\ 01235 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, full_mid+SIZE*sizeof(pixel), half, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01236 }\ 01237 \ 01238 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc11)(uint8_t *dst, uint8_t *src, int stride){\ 01239 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01240 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01241 uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ 01242 uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\ 01243 FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\ 01244 FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\ 01245 FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\ 01246 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01247 }\ 01248 \ 01249 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc31)(uint8_t *dst, uint8_t *src, int stride){\ 01250 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01251 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01252 uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ 01253 uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\ 01254 FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\ 01255 FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\ 01256 FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\ 01257 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01258 }\ 01259 \ 01260 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc13)(uint8_t *dst, uint8_t *src, int stride){\ 01261 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01262 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01263 uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ 01264 uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\ 01265 FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\ 01266 FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\ 01267 FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\ 01268 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01269 }\ 01270 \ 01271 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc33)(uint8_t *dst, uint8_t *src, int stride){\ 01272 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01273 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01274 uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ 01275 uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\ 01276 FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\ 01277 FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\ 01278 FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\ 01279 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01280 }\ 01281 \ 01282 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc22)(uint8_t *dst, uint8_t *src, int stride){\ 01283 int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ 01284 FUNC(OPNAME ## h264_qpel ## SIZE ## _hv_lowpass)(dst, tmp, src, stride, SIZE*sizeof(pixel), stride);\ 01285 }\ 01286 \ 01287 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc21)(uint8_t *dst, uint8_t *src, int stride){\ 01288 int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ 01289 uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ 01290 uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\ 01291 FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src, SIZE*sizeof(pixel), stride);\ 01292 FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\ 01293 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01294 }\ 01295 \ 01296 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc23)(uint8_t *dst, uint8_t *src, int stride){\ 01297 int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ 01298 uint8_t halfH[SIZE*SIZE*sizeof(pixel)];\ 01299 uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\ 01300 FUNC(put_h264_qpel ## SIZE ## _h_lowpass)(halfH, src + stride, SIZE*sizeof(pixel), stride);\ 01301 FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\ 01302 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfH, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01303 }\ 01304 \ 01305 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc12)(uint8_t *dst, uint8_t *src, int stride){\ 01306 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01307 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01308 int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ 01309 uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\ 01310 uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\ 01311 FUNC(copy_block ## SIZE )(full, src - stride*2, SIZE*sizeof(pixel), stride, SIZE + 5);\ 01312 FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\ 01313 FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\ 01314 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01315 }\ 01316 \ 01317 static void FUNCC(OPNAME ## h264_qpel ## SIZE ## _mc32)(uint8_t *dst, uint8_t *src, int stride){\ 01318 uint8_t full[SIZE*(SIZE+5)*sizeof(pixel)];\ 01319 uint8_t * const full_mid= full + SIZE*2*sizeof(pixel);\ 01320 int16_t tmp[SIZE*(SIZE+5)*sizeof(pixel)];\ 01321 uint8_t halfV[SIZE*SIZE*sizeof(pixel)];\ 01322 uint8_t halfHV[SIZE*SIZE*sizeof(pixel)];\ 01323 FUNC(copy_block ## SIZE )(full, src - stride*2 + sizeof(pixel), SIZE*sizeof(pixel), stride, SIZE + 5);\ 01324 FUNC(put_h264_qpel ## SIZE ## _v_lowpass)(halfV, full_mid, SIZE*sizeof(pixel), SIZE*sizeof(pixel));\ 01325 FUNC(put_h264_qpel ## SIZE ## _hv_lowpass)(halfHV, tmp, src, SIZE*sizeof(pixel), SIZE*sizeof(pixel), stride);\ 01326 FUNC(OPNAME ## pixels ## SIZE ## _l2)(dst, halfV, halfHV, stride, SIZE*sizeof(pixel), SIZE*sizeof(pixel), SIZE);\ 01327 }\ 01328 01329 #define op_avg(a, b) a = (((a)+CLIP(((b) + 16)>>5)+1)>>1) 01330 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7) 01331 #define op_put(a, b) a = CLIP(((b) + 16)>>5) 01332 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1) 01333 #define op2_put(a, b) a = CLIP(((b) + 512)>>10) 01334 01335 H264_LOWPASS(put_ , op_put, op2_put) 01336 H264_LOWPASS(avg_ , op_avg, op2_avg) 01337 H264_MC(put_, 2) 01338 H264_MC(put_, 4) 01339 H264_MC(put_, 8) 01340 H264_MC(put_, 16) 01341 H264_MC(avg_, 4) 01342 H264_MC(avg_, 8) 01343 H264_MC(avg_, 16) 01344 01345 #undef op_avg 01346 #undef op_put 01347 #undef op2_avg 01348 #undef op2_put 01349 01350 #if BIT_DEPTH == 8 01351 # define put_h264_qpel8_mc00_8_c ff_put_pixels8x8_8_c 01352 # define avg_h264_qpel8_mc00_8_c ff_avg_pixels8x8_8_c 01353 # define put_h264_qpel16_mc00_8_c ff_put_pixels16x16_8_c 01354 # define avg_h264_qpel16_mc00_8_c ff_avg_pixels16x16_8_c 01355 #elif BIT_DEPTH == 9 01356 # define put_h264_qpel8_mc00_9_c ff_put_pixels8x8_9_c 01357 # define avg_h264_qpel8_mc00_9_c ff_avg_pixels8x8_9_c 01358 # define put_h264_qpel16_mc00_9_c ff_put_pixels16x16_9_c 01359 # define avg_h264_qpel16_mc00_9_c ff_avg_pixels16x16_9_c 01360 #elif BIT_DEPTH == 10 01361 # define put_h264_qpel8_mc00_10_c ff_put_pixels8x8_10_c 01362 # define avg_h264_qpel8_mc00_10_c ff_avg_pixels8x8_10_c 01363 # define put_h264_qpel16_mc00_10_c ff_put_pixels16x16_10_c 01364 # define avg_h264_qpel16_mc00_10_c ff_avg_pixels16x16_10_c 01365 #endif 01366 01367 void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) { 01368 FUNCC(put_pixels8)(dst, src, stride, 8); 01369 } 01370 void FUNCC(ff_avg_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) { 01371 FUNCC(avg_pixels8)(dst, src, stride, 8); 01372 } 01373 void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { 01374 FUNCC(put_pixels16)(dst, src, stride, 16); 01375 } 01376 void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { 01377 FUNCC(avg_pixels16)(dst, src, stride, 16); 01378 } 01379 01380 static void FUNCC(clear_block)(DCTELEM *block) 01381 { 01382 memset(block, 0, sizeof(dctcoef)*64); 01383 } 01384 01388 static void FUNCC(clear_blocks)(DCTELEM *blocks) 01389 { 01390 memset(blocks, 0, sizeof(dctcoef)*6*64); 01391 }