00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00028 #include "mathops.h"
00029
00030 #include "bit_depth_template.c"
00031
00032 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
00033 pixel *src = (pixel*)_src;
00034 int stride = _stride/sizeof(pixel);
00035 const pixel4 a= AV_RN4PA(src-stride);
00036
00037 AV_WN4PA(src+0*stride, a);
00038 AV_WN4PA(src+1*stride, a);
00039 AV_WN4PA(src+2*stride, a);
00040 AV_WN4PA(src+3*stride, a);
00041 }
00042
00043 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){
00044 pixel *src = (pixel*)_src;
00045 int stride = _stride/sizeof(pixel);
00046 AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
00047 AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
00048 AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
00049 AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
00050 }
00051
00052 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00053 pixel *src = (pixel*)_src;
00054 int stride = _stride/sizeof(pixel);
00055 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
00056 + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
00057 const pixel4 a = PIXEL_SPLAT_X4(dc);
00058
00059 AV_WN4PA(src+0*stride, a);
00060 AV_WN4PA(src+1*stride, a);
00061 AV_WN4PA(src+2*stride, a);
00062 AV_WN4PA(src+3*stride, a);
00063 }
00064
00065 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00066 pixel *src = (pixel*)_src;
00067 int stride = _stride/sizeof(pixel);
00068 const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
00069 const pixel4 a = PIXEL_SPLAT_X4(dc);
00070
00071 AV_WN4PA(src+0*stride, a);
00072 AV_WN4PA(src+1*stride, a);
00073 AV_WN4PA(src+2*stride, a);
00074 AV_WN4PA(src+3*stride, a);
00075 }
00076
00077 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00078 pixel *src = (pixel*)_src;
00079 int stride = _stride/sizeof(pixel);
00080 const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
00081 const pixel4 a = PIXEL_SPLAT_X4(dc);
00082
00083 AV_WN4PA(src+0*stride, a);
00084 AV_WN4PA(src+1*stride, a);
00085 AV_WN4PA(src+2*stride, a);
00086 AV_WN4PA(src+3*stride, a);
00087 }
00088
00089 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00090 pixel *src = (pixel*)_src;
00091 int stride = _stride/sizeof(pixel);
00092 const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
00093
00094 AV_WN4PA(src+0*stride, a);
00095 AV_WN4PA(src+1*stride, a);
00096 AV_WN4PA(src+2*stride, a);
00097 AV_WN4PA(src+3*stride, a);
00098 }
00099
00100 static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00101 pixel *src = (pixel*)_src;
00102 int stride = _stride/sizeof(pixel);
00103 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
00104
00105 AV_WN4PA(src+0*stride, a);
00106 AV_WN4PA(src+1*stride, a);
00107 AV_WN4PA(src+2*stride, a);
00108 AV_WN4PA(src+3*stride, a);
00109 }
00110
00111 static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
00112 pixel *src = (pixel*)_src;
00113 int stride = _stride/sizeof(pixel);
00114 const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
00115
00116 AV_WN4PA(src+0*stride, a);
00117 AV_WN4PA(src+1*stride, a);
00118 AV_WN4PA(src+2*stride, a);
00119 AV_WN4PA(src+3*stride, a);
00120 }
00121
00122
00123 #define LOAD_TOP_RIGHT_EDGE\
00124 const unsigned av_unused t4 = topright[0];\
00125 const unsigned av_unused t5 = topright[1];\
00126 const unsigned av_unused t6 = topright[2];\
00127 const unsigned av_unused t7 = topright[3];\
00128
00129 #define LOAD_DOWN_LEFT_EDGE\
00130 const unsigned av_unused l4 = src[-1+4*stride];\
00131 const unsigned av_unused l5 = src[-1+5*stride];\
00132 const unsigned av_unused l6 = src[-1+6*stride];\
00133 const unsigned av_unused l7 = src[-1+7*stride];\
00134
00135 #define LOAD_LEFT_EDGE\
00136 const unsigned av_unused l0 = src[-1+0*stride];\
00137 const unsigned av_unused l1 = src[-1+1*stride];\
00138 const unsigned av_unused l2 = src[-1+2*stride];\
00139 const unsigned av_unused l3 = src[-1+3*stride];\
00140
00141 #define LOAD_TOP_EDGE\
00142 const unsigned av_unused t0 = src[ 0-1*stride];\
00143 const unsigned av_unused t1 = src[ 1-1*stride];\
00144 const unsigned av_unused t2 = src[ 2-1*stride];\
00145 const unsigned av_unused t3 = src[ 3-1*stride];\
00146
00147 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright, int _stride){
00148 pixel *src = (pixel*)_src;
00149 int stride = _stride/sizeof(pixel);
00150 const int lt= src[-1-1*stride];
00151 LOAD_TOP_EDGE
00152 LOAD_LEFT_EDGE
00153
00154 src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
00155 src[0+2*stride]=
00156 src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
00157 src[0+1*stride]=
00158 src[1+2*stride]=
00159 src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
00160 src[0+0*stride]=
00161 src[1+1*stride]=
00162 src[2+2*stride]=
00163 src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
00164 src[1+0*stride]=
00165 src[2+1*stride]=
00166 src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
00167 src[2+0*stride]=
00168 src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00169 src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00170 }
00171
00172 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
00173 pixel *src = (pixel*)_src;
00174 const pixel *topright = (const pixel*)_topright;
00175 int stride = _stride/sizeof(pixel);
00176 LOAD_TOP_EDGE
00177 LOAD_TOP_RIGHT_EDGE
00178
00179
00180 src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
00181 src[1+0*stride]=
00182 src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
00183 src[2+0*stride]=
00184 src[1+1*stride]=
00185 src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
00186 src[3+0*stride]=
00187 src[2+1*stride]=
00188 src[1+2*stride]=
00189 src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
00190 src[3+1*stride]=
00191 src[2+2*stride]=
00192 src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
00193 src[3+2*stride]=
00194 src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
00195 src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
00196 }
00197
00198 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src, const uint8_t *topright, int _stride){
00199 pixel *src = (pixel*)_src;
00200 int stride = _stride/sizeof(pixel);
00201 const int lt= src[-1-1*stride];
00202 LOAD_TOP_EDGE
00203 LOAD_LEFT_EDGE
00204
00205 src[0+0*stride]=
00206 src[1+2*stride]=(lt + t0 + 1)>>1;
00207 src[1+0*stride]=
00208 src[2+2*stride]=(t0 + t1 + 1)>>1;
00209 src[2+0*stride]=
00210 src[3+2*stride]=(t1 + t2 + 1)>>1;
00211 src[3+0*stride]=(t2 + t3 + 1)>>1;
00212 src[0+1*stride]=
00213 src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
00214 src[1+1*stride]=
00215 src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
00216 src[2+1*stride]=
00217 src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00218 src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00219 src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
00220 src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00221 }
00222
00223 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src, const uint8_t *_topright, int _stride){
00224 pixel *src = (pixel*)_src;
00225 const pixel *topright = (const pixel*)_topright;
00226 int stride = _stride/sizeof(pixel);
00227 LOAD_TOP_EDGE
00228 LOAD_TOP_RIGHT_EDGE
00229
00230 src[0+0*stride]=(t0 + t1 + 1)>>1;
00231 src[1+0*stride]=
00232 src[0+2*stride]=(t1 + t2 + 1)>>1;
00233 src[2+0*stride]=
00234 src[1+2*stride]=(t2 + t3 + 1)>>1;
00235 src[3+0*stride]=
00236 src[2+2*stride]=(t3 + t4+ 1)>>1;
00237 src[3+2*stride]=(t4 + t5+ 1)>>1;
00238 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00239 src[1+1*stride]=
00240 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
00241 src[2+1*stride]=
00242 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
00243 src[3+1*stride]=
00244 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
00245 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
00246 }
00247
00248 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright, int _stride){
00249 pixel *src = (pixel*)_src;
00250 int stride = _stride/sizeof(pixel);
00251 LOAD_LEFT_EDGE
00252
00253 src[0+0*stride]=(l0 + l1 + 1)>>1;
00254 src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00255 src[2+0*stride]=
00256 src[0+1*stride]=(l1 + l2 + 1)>>1;
00257 src[3+0*stride]=
00258 src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
00259 src[2+1*stride]=
00260 src[0+2*stride]=(l2 + l3 + 1)>>1;
00261 src[3+1*stride]=
00262 src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
00263 src[3+2*stride]=
00264 src[1+3*stride]=
00265 src[0+3*stride]=
00266 src[2+2*stride]=
00267 src[2+3*stride]=
00268 src[3+3*stride]=l3;
00269 }
00270
00271 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src, const uint8_t *topright, int _stride){
00272 pixel *src = (pixel*)_src;
00273 int stride = _stride/sizeof(pixel);
00274 const int lt= src[-1-1*stride];
00275 LOAD_TOP_EDGE
00276 LOAD_LEFT_EDGE
00277
00278 src[0+0*stride]=
00279 src[2+1*stride]=(lt + l0 + 1)>>1;
00280 src[1+0*stride]=
00281 src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
00282 src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
00283 src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
00284 src[0+1*stride]=
00285 src[2+2*stride]=(l0 + l1 + 1)>>1;
00286 src[1+1*stride]=
00287 src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
00288 src[0+2*stride]=
00289 src[2+3*stride]=(l1 + l2+ 1)>>1;
00290 src[1+2*stride]=
00291 src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
00292 src[0+3*stride]=(l2 + l3 + 1)>>1;
00293 src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
00294 }
00295
00296 static void FUNCC(pred16x16_vertical)(uint8_t *_src, int _stride){
00297 int i;
00298 pixel *src = (pixel*)_src;
00299 int stride = _stride/sizeof(pixel);
00300 const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
00301 const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
00302 const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
00303 const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
00304
00305 for(i=0; i<16; i++){
00306 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00307 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
00308 AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
00309 AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
00310 }
00311 }
00312
00313 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, int stride){
00314 int i;
00315 pixel *src = (pixel*)_src;
00316 stride /= sizeof(pixel);
00317
00318 for(i=0; i<16; i++){
00319 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
00320
00321 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00322 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
00323 AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
00324 AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
00325 }
00326 }
00327
00328 #define PREDICT_16x16_DC(v)\
00329 for(i=0; i<16; i++){\
00330 AV_WN4PA(src+ 0, v);\
00331 AV_WN4PA(src+ 4, v);\
00332 AV_WN4PA(src+ 8, v);\
00333 AV_WN4PA(src+12, v);\
00334 src += stride;\
00335 }
00336
00337 static void FUNCC(pred16x16_dc)(uint8_t *_src, int stride){
00338 int i, dc=0;
00339 pixel *src = (pixel*)_src;
00340 pixel4 dcsplat;
00341 stride /= sizeof(pixel);
00342
00343 for(i=0;i<16; i++){
00344 dc+= src[-1+i*stride];
00345 }
00346
00347 for(i=0;i<16; i++){
00348 dc+= src[i-stride];
00349 }
00350
00351 dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
00352 PREDICT_16x16_DC(dcsplat);
00353 }
00354
00355 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, int stride){
00356 int i, dc=0;
00357 pixel *src = (pixel*)_src;
00358 pixel4 dcsplat;
00359 stride /= sizeof(pixel);
00360
00361 for(i=0;i<16; i++){
00362 dc+= src[-1+i*stride];
00363 }
00364
00365 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
00366 PREDICT_16x16_DC(dcsplat);
00367 }
00368
00369 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, int stride){
00370 int i, dc=0;
00371 pixel *src = (pixel*)_src;
00372 pixel4 dcsplat;
00373 stride /= sizeof(pixel);
00374
00375 for(i=0;i<16; i++){
00376 dc+= src[i-stride];
00377 }
00378
00379 dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
00380 PREDICT_16x16_DC(dcsplat);
00381 }
00382
00383 #define PRED16x16_X(n, v) \
00384 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, int stride){\
00385 int i;\
00386 pixel *src = (pixel*)_src;\
00387 stride /= sizeof(pixel);\
00388 PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
00389 }
00390
00391 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
00392 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
00393 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
00394
00395 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src, int _stride, const int svq3, const int rv40){
00396 int i, j, k;
00397 int a;
00398 INIT_CLIP
00399 pixel *src = (pixel*)_src;
00400 int stride = _stride/sizeof(pixel);
00401 const pixel * const src0 = src +7-stride;
00402 const pixel * src1 = src +8*stride-1;
00403 const pixel * src2 = src1-2*stride;
00404 int H = src0[1] - src0[-1];
00405 int V = src1[0] - src2[ 0];
00406 for(k=2; k<=8; ++k) {
00407 src1 += stride; src2 -= stride;
00408 H += k*(src0[k] - src0[-k]);
00409 V += k*(src1[0] - src2[ 0]);
00410 }
00411 if(svq3){
00412 H = ( 5*(H/4) ) / 16;
00413 V = ( 5*(V/4) ) / 16;
00414
00415
00416 i = H; H = V; V = i;
00417 }else if(rv40){
00418 H = ( H + (H>>2) ) >> 4;
00419 V = ( V + (V>>2) ) >> 4;
00420 }else{
00421 H = ( 5*H+32 ) >> 6;
00422 V = ( 5*V+32 ) >> 6;
00423 }
00424
00425 a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
00426 for(j=16; j>0; --j) {
00427 int b = a;
00428 a += V;
00429 for(i=-16; i<0; i+=4) {
00430 src[16+i] = CLIP((b ) >> 5);
00431 src[17+i] = CLIP((b+ H) >> 5);
00432 src[18+i] = CLIP((b+2*H) >> 5);
00433 src[19+i] = CLIP((b+3*H) >> 5);
00434 b += 4*H;
00435 }
00436 src += stride;
00437 }
00438 }
00439
00440 static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){
00441 FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
00442 }
00443
00444 static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){
00445 int i;
00446 pixel *src = (pixel*)_src;
00447 int stride = _stride/sizeof(pixel);
00448 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
00449 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
00450
00451 for(i=0; i<8; i++){
00452 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00453 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
00454 }
00455 }
00456
00457 static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){
00458 int i;
00459 pixel *src = (pixel*)_src;
00460 int stride = _stride>>(sizeof(pixel)-1);
00461 const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
00462 const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
00463
00464 for(i=0; i<16; i++){
00465 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00466 AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
00467 }
00468 }
00469
00470 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){
00471 int i;
00472 pixel *src = (pixel*)_src;
00473 stride /= sizeof(pixel);
00474
00475 for(i=0; i<8; i++){
00476 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
00477 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00478 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
00479 }
00480 }
00481
00482 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){
00483 int i;
00484 pixel *src = (pixel*)_src;
00485 stride >>= sizeof(pixel)-1;
00486 for(i=0; i<16; i++){
00487 const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
00488 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
00489 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
00490 }
00491 }
00492
00493 #define PRED8x8_X(n, v)\
00494 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
00495 int i;\
00496 const pixel4 a = PIXEL_SPLAT_X4(v);\
00497 pixel *src = (pixel*)_src;\
00498 stride /= sizeof(pixel);\
00499 for(i=0; i<8; i++){\
00500 AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
00501 AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
00502 }\
00503 }
00504
00505 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
00506 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
00507 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
00508
00509 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){
00510 FUNCC(pred8x8_128_dc)(_src, stride);
00511 FUNCC(pred8x8_128_dc)(_src+8*stride, stride);
00512 }
00513
00514 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){
00515 int i;
00516 int dc0, dc2;
00517 pixel4 dc0splat, dc2splat;
00518 pixel *src = (pixel*)_src;
00519 stride /= sizeof(pixel);
00520
00521 dc0=dc2=0;
00522 for(i=0;i<4; i++){
00523 dc0+= src[-1+i*stride];
00524 dc2+= src[-1+(i+4)*stride];
00525 }
00526 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
00527 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
00528
00529 for(i=0; i<4; i++){
00530 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00531 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
00532 }
00533 for(i=4; i<8; i++){
00534 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
00535 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
00536 }
00537 }
00538
00539 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){
00540 FUNCC(pred8x8_left_dc)(_src, stride);
00541 FUNCC(pred8x8_left_dc)(_src+8*stride, stride);
00542 }
00543
00544 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){
00545 int i;
00546 int dc0, dc1;
00547 pixel4 dc0splat, dc1splat;
00548 pixel *src = (pixel*)_src;
00549 stride /= sizeof(pixel);
00550
00551 dc0=dc1=0;
00552 for(i=0;i<4; i++){
00553 dc0+= src[i-stride];
00554 dc1+= src[4+i-stride];
00555 }
00556 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
00557 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
00558
00559 for(i=0; i<4; i++){
00560 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00561 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00562 }
00563 for(i=4; i<8; i++){
00564 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00565 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00566 }
00567 }
00568
00569 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){
00570 int i;
00571 int dc0, dc1;
00572 pixel4 dc0splat, dc1splat;
00573 pixel *src = (pixel*)_src;
00574 stride >>= sizeof(pixel)-1;
00575
00576 dc0=dc1=0;
00577 for(i=0;i<4; i++){
00578 dc0+= src[i-stride];
00579 dc1+= src[4+i-stride];
00580 }
00581 dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
00582 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
00583
00584 for(i=0; i<16; i++){
00585 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00586 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00587 }
00588 }
00589
00590 static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){
00591 int i;
00592 int dc0, dc1, dc2;
00593 pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
00594 pixel *src = (pixel*)_src;
00595 stride /= sizeof(pixel);
00596
00597 dc0=dc1=dc2=0;
00598 for(i=0;i<4; i++){
00599 dc0+= src[-1+i*stride] + src[i-stride];
00600 dc1+= src[4+i-stride];
00601 dc2+= src[-1+(i+4)*stride];
00602 }
00603 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
00604 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
00605 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
00606 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
00607
00608 for(i=0; i<4; i++){
00609 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00610 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00611 }
00612 for(i=4; i<8; i++){
00613 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
00614 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
00615 }
00616 }
00617
00618 static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){
00619 int i;
00620 int dc0, dc1, dc2, dc3, dc4;
00621 pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
00622 pixel *src = (pixel*)_src;
00623 stride >>= sizeof(pixel)-1;
00624
00625 dc0=dc1=dc2=dc3=dc4=0;
00626 for(i=0;i<4; i++){
00627 dc0+= src[-1+i*stride] + src[i-stride];
00628 dc1+= src[4+i-stride];
00629 dc2+= src[-1+(i+4)*stride];
00630 dc3+= src[-1+(i+8)*stride];
00631 dc4+= src[-1+(i+12)*stride];
00632 }
00633 dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
00634 dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
00635 dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
00636 dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
00637 dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
00638 dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
00639 dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
00640 dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
00641
00642 for(i=0; i<4; i++){
00643 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
00644 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
00645 }
00646 for(i=4; i<8; i++){
00647 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
00648 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
00649 }
00650 for(i=8; i<12; i++){
00651 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
00652 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
00653 }
00654 for(i=12; i<16; i++){
00655 AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
00656 AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
00657 }
00658 }
00659
00660 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
00661 FUNCC(pred8x8_top_dc)(src, stride);
00662 FUNCC(pred4x4_dc)(src, NULL, stride);
00663 }
00664
00665 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, int stride){
00666 FUNCC(pred8x16_top_dc)(src, stride);
00667 FUNCC(pred4x4_dc)(src, NULL, stride);
00668 }
00669
00670 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
00671 FUNCC(pred8x8_dc)(src, stride);
00672 FUNCC(pred4x4_top_dc)(src, NULL, stride);
00673 }
00674
00675 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, int stride){
00676 FUNCC(pred8x16_dc)(src, stride);
00677 FUNCC(pred4x4_top_dc)(src, NULL, stride);
00678 }
00679
00680 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
00681 FUNCC(pred8x8_left_dc)(src, stride);
00682 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
00683 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
00684 }
00685
00686 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, int stride){
00687 FUNCC(pred8x16_left_dc)(src, stride);
00688 FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
00689 FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
00690 }
00691
00692 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
00693 FUNCC(pred8x8_left_dc)(src, stride);
00694 FUNCC(pred4x4_128_dc)(src , NULL, stride);
00695 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
00696 }
00697
00698 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, int stride){
00699 FUNCC(pred8x16_left_dc)(src, stride);
00700 FUNCC(pred4x4_128_dc)(src , NULL, stride);
00701 FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
00702 }
00703
00704 static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){
00705 int j, k;
00706 int a;
00707 INIT_CLIP
00708 pixel *src = (pixel*)_src;
00709 int stride = _stride/sizeof(pixel);
00710 const pixel * const src0 = src +3-stride;
00711 const pixel * src1 = src +4*stride-1;
00712 const pixel * src2 = src1-2*stride;
00713 int H = src0[1] - src0[-1];
00714 int V = src1[0] - src2[ 0];
00715 for(k=2; k<=4; ++k) {
00716 src1 += stride; src2 -= stride;
00717 H += k*(src0[k] - src0[-k]);
00718 V += k*(src1[0] - src2[ 0]);
00719 }
00720 H = ( 17*H+16 ) >> 5;
00721 V = ( 17*V+16 ) >> 5;
00722
00723 a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
00724 for(j=8; j>0; --j) {
00725 int b = a;
00726 a += V;
00727 src[0] = CLIP((b ) >> 5);
00728 src[1] = CLIP((b+ H) >> 5);
00729 src[2] = CLIP((b+2*H) >> 5);
00730 src[3] = CLIP((b+3*H) >> 5);
00731 src[4] = CLIP((b+4*H) >> 5);
00732 src[5] = CLIP((b+5*H) >> 5);
00733 src[6] = CLIP((b+6*H) >> 5);
00734 src[7] = CLIP((b+7*H) >> 5);
00735 src += stride;
00736 }
00737 }
00738
00739 static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){
00740 int j, k;
00741 int a;
00742 INIT_CLIP
00743 pixel *src = (pixel*)_src;
00744 int stride = _stride>>(sizeof(pixel)-1);
00745 const pixel * const src0 = src +3-stride;
00746 const pixel * src1 = src +8*stride-1;
00747 const pixel * src2 = src1-2*stride;
00748 int H = src0[1] - src0[-1];
00749 int V = src1[0] - src2[ 0];
00750
00751 for (k = 2; k <= 4; ++k) {
00752 src1 += stride; src2 -= stride;
00753 H += k*(src0[k] - src0[-k]);
00754 V += k*(src1[0] - src2[ 0]);
00755 }
00756 for (; k <= 8; ++k) {
00757 src1 += stride; src2 -= stride;
00758 V += k*(src1[0] - src2[0]);
00759 }
00760
00761 H = (17*H+16) >> 5;
00762 V = (5*V+32) >> 6;
00763
00764 a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
00765 for(j=16; j>0; --j) {
00766 int b = a;
00767 a += V;
00768 src[0] = CLIP((b ) >> 5);
00769 src[1] = CLIP((b+ H) >> 5);
00770 src[2] = CLIP((b+2*H) >> 5);
00771 src[3] = CLIP((b+3*H) >> 5);
00772 src[4] = CLIP((b+4*H) >> 5);
00773 src[5] = CLIP((b+5*H) >> 5);
00774 src[6] = CLIP((b+6*H) >> 5);
00775 src[7] = CLIP((b+7*H) >> 5);
00776 src += stride;
00777 }
00778 }
00779
00780 #define SRC(x,y) src[(x)+(y)*stride]
00781 #define PL(y) \
00782 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
00783 #define PREDICT_8x8_LOAD_LEFT \
00784 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
00785 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
00786 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
00787 const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
00788
00789 #define PT(x) \
00790 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
00791 #define PREDICT_8x8_LOAD_TOP \
00792 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
00793 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
00794 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
00795 const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
00796 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
00797
00798 #define PTR(x) \
00799 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
00800 #define PREDICT_8x8_LOAD_TOPRIGHT \
00801 int t8, t9, t10, t11, t12, t13, t14, t15; \
00802 if(has_topright) { \
00803 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
00804 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
00805 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
00806
00807 #define PREDICT_8x8_LOAD_TOPLEFT \
00808 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
00809
00810 #define PREDICT_8x8_DC(v) \
00811 int y; \
00812 for( y = 0; y < 8; y++ ) { \
00813 AV_WN4PA(((pixel4*)src)+0, v); \
00814 AV_WN4PA(((pixel4*)src)+1, v); \
00815 src += stride; \
00816 }
00817
00818 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00819 {
00820 pixel *src = (pixel*)_src;
00821 int stride = _stride/sizeof(pixel);
00822
00823 PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
00824 }
00825 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00826 {
00827 pixel *src = (pixel*)_src;
00828 int stride = _stride/sizeof(pixel);
00829
00830 PREDICT_8x8_LOAD_LEFT;
00831 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
00832 PREDICT_8x8_DC(dc);
00833 }
00834 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00835 {
00836 pixel *src = (pixel*)_src;
00837 int stride = _stride/sizeof(pixel);
00838
00839 PREDICT_8x8_LOAD_TOP;
00840 const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
00841 PREDICT_8x8_DC(dc);
00842 }
00843 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00844 {
00845 pixel *src = (pixel*)_src;
00846 int stride = _stride/sizeof(pixel);
00847
00848 PREDICT_8x8_LOAD_LEFT;
00849 PREDICT_8x8_LOAD_TOP;
00850 const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
00851 +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
00852 PREDICT_8x8_DC(dc);
00853 }
00854 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00855 {
00856 pixel *src = (pixel*)_src;
00857 int stride = _stride/sizeof(pixel);
00858 pixel4 a;
00859
00860 PREDICT_8x8_LOAD_LEFT;
00861 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
00862 AV_WN4PA(src+y*stride, a); \
00863 AV_WN4PA(src+y*stride+4, a);
00864 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
00865 #undef ROW
00866 }
00867 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00868 {
00869 int y;
00870 pixel *src = (pixel*)_src;
00871 int stride = _stride/sizeof(pixel);
00872 pixel4 a, b;
00873
00874 PREDICT_8x8_LOAD_TOP;
00875 src[0] = t0;
00876 src[1] = t1;
00877 src[2] = t2;
00878 src[3] = t3;
00879 src[4] = t4;
00880 src[5] = t5;
00881 src[6] = t6;
00882 src[7] = t7;
00883 a = AV_RN4PA(((pixel4*)src)+0);
00884 b = AV_RN4PA(((pixel4*)src)+1);
00885 for( y = 1; y < 8; y++ ) {
00886 AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
00887 AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
00888 }
00889 }
00890 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00891 {
00892 pixel *src = (pixel*)_src;
00893 int stride = _stride/sizeof(pixel);
00894 PREDICT_8x8_LOAD_TOP;
00895 PREDICT_8x8_LOAD_TOPRIGHT;
00896 SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
00897 SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
00898 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
00899 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
00900 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
00901 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
00902 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
00903 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
00904 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
00905 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
00906 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
00907 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
00908 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
00909 SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
00910 SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
00911 }
00912 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00913 {
00914 pixel *src = (pixel*)_src;
00915 int stride = _stride/sizeof(pixel);
00916 PREDICT_8x8_LOAD_TOP;
00917 PREDICT_8x8_LOAD_LEFT;
00918 PREDICT_8x8_LOAD_TOPLEFT;
00919 SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
00920 SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
00921 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
00922 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
00923 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
00924 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
00925 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
00926 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
00927 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
00928 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
00929 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
00930 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
00931 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
00932 SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
00933 SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
00934 }
00935 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00936 {
00937 pixel *src = (pixel*)_src;
00938 int stride = _stride/sizeof(pixel);
00939 PREDICT_8x8_LOAD_TOP;
00940 PREDICT_8x8_LOAD_LEFT;
00941 PREDICT_8x8_LOAD_TOPLEFT;
00942 SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
00943 SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
00944 SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
00945 SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
00946 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
00947 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
00948 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
00949 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
00950 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
00951 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
00952 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
00953 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
00954 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
00955 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
00956 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
00957 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
00958 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
00959 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
00960 SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
00961 SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
00962 SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
00963 SRC(7,0)= (t6 + t7 + 1) >> 1;
00964 }
00965 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00966 {
00967 pixel *src = (pixel*)_src;
00968 int stride = _stride/sizeof(pixel);
00969 PREDICT_8x8_LOAD_TOP;
00970 PREDICT_8x8_LOAD_LEFT;
00971 PREDICT_8x8_LOAD_TOPLEFT;
00972 SRC(0,7)= (l6 + l7 + 1) >> 1;
00973 SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
00974 SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
00975 SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
00976 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
00977 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
00978 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
00979 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
00980 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
00981 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
00982 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
00983 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
00984 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
00985 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
00986 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
00987 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
00988 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
00989 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
00990 SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
00991 SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
00992 SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
00993 SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
00994 }
00995 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
00996 {
00997 pixel *src = (pixel*)_src;
00998 int stride = _stride/sizeof(pixel);
00999 PREDICT_8x8_LOAD_TOP;
01000 PREDICT_8x8_LOAD_TOPRIGHT;
01001 SRC(0,0)= (t0 + t1 + 1) >> 1;
01002 SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
01003 SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
01004 SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
01005 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
01006 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
01007 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
01008 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
01009 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
01010 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
01011 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
01012 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
01013 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
01014 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
01015 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
01016 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
01017 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
01018 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
01019 SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
01020 SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
01021 SRC(7,6)= (t10 + t11 + 1) >> 1;
01022 SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
01023 }
01024 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
01025 {
01026 pixel *src = (pixel*)_src;
01027 int stride = _stride/sizeof(pixel);
01028 PREDICT_8x8_LOAD_LEFT;
01029 SRC(0,0)= (l0 + l1 + 1) >> 1;
01030 SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
01031 SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
01032 SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
01033 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
01034 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
01035 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
01036 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
01037 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
01038 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
01039 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
01040 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
01041 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
01042 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
01043 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
01044 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
01045 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
01046 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
01047 }
01048 #undef PREDICT_8x8_LOAD_LEFT
01049 #undef PREDICT_8x8_LOAD_TOP
01050 #undef PREDICT_8x8_LOAD_TOPLEFT
01051 #undef PREDICT_8x8_LOAD_TOPRIGHT
01052 #undef PREDICT_8x8_DC
01053 #undef PTR
01054 #undef PT
01055 #undef PL
01056 #undef SRC
01057
01058 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
01059 int i;
01060 pixel *pix = (pixel*)_pix;
01061 const dctcoef *block = (const dctcoef*)_block;
01062 stride /= sizeof(pixel);
01063 pix -= stride;
01064 for(i=0; i<4; i++){
01065 pixel v = pix[0];
01066 pix[1*stride]= v += block[0];
01067 pix[2*stride]= v += block[4];
01068 pix[3*stride]= v += block[8];
01069 pix[4*stride]= v + block[12];
01070 pix++;
01071 block++;
01072 }
01073 }
01074
01075 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
01076 int i;
01077 pixel *pix = (pixel*)_pix;
01078 const dctcoef *block = (const dctcoef*)_block;
01079 stride /= sizeof(pixel);
01080 for(i=0; i<4; i++){
01081 pixel v = pix[-1];
01082 pix[0]= v += block[0];
01083 pix[1]= v += block[1];
01084 pix[2]= v += block[2];
01085 pix[3]= v + block[3];
01086 pix+= stride;
01087 block+= 4;
01088 }
01089 }
01090
01091 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
01092 int i;
01093 pixel *pix = (pixel*)_pix;
01094 const dctcoef *block = (const dctcoef*)_block;
01095 stride /= sizeof(pixel);
01096 pix -= stride;
01097 for(i=0; i<8; i++){
01098 pixel v = pix[0];
01099 pix[1*stride]= v += block[0];
01100 pix[2*stride]= v += block[8];
01101 pix[3*stride]= v += block[16];
01102 pix[4*stride]= v += block[24];
01103 pix[5*stride]= v += block[32];
01104 pix[6*stride]= v += block[40];
01105 pix[7*stride]= v += block[48];
01106 pix[8*stride]= v + block[56];
01107 pix++;
01108 block++;
01109 }
01110 }
01111
01112 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, const DCTELEM *_block, int stride){
01113 int i;
01114 pixel *pix = (pixel*)_pix;
01115 const dctcoef *block = (const dctcoef*)_block;
01116 stride /= sizeof(pixel);
01117 for(i=0; i<8; i++){
01118 pixel v = pix[-1];
01119 pix[0]= v += block[0];
01120 pix[1]= v += block[1];
01121 pix[2]= v += block[2];
01122 pix[3]= v += block[3];
01123 pix[4]= v += block[4];
01124 pix[5]= v += block[5];
01125 pix[6]= v += block[6];
01126 pix[7]= v + block[7];
01127 pix+= stride;
01128 block+= 8;
01129 }
01130 }
01131
01132 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01133 int i;
01134 for(i=0; i<16; i++)
01135 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01136 }
01137
01138 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01139 int i;
01140 for(i=0; i<16; i++)
01141 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01142 }
01143
01144 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01145 int i;
01146 for(i=0; i<4; i++)
01147 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01148 }
01149
01150 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01151 int i;
01152 for(i=0; i<4; i++)
01153 FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01154 for(i=4; i<8; i++)
01155 FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
01156 }
01157
01158 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01159 int i;
01160 for(i=0; i<4; i++)
01161 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01162 }
01163
01164 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
01165 int i;
01166 for(i=0; i<4; i++)
01167 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
01168 for(i=4; i<8; i++)
01169 FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
01170 }