Libav 0.7.1
|
00001 /* 00002 * Copyright (c) 2010 Jason Garrett-Glaser 00003 * 00004 * This file is part of Libav. 00005 * 00006 * Libav is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2.1 of the License, or (at your option) any later version. 00010 * 00011 * Libav is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Lesser General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Lesser General Public 00017 * License along with Libav; if not, write to the Free Software 00018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00019 */ 00020 00021 #include "libavutil/cpu.h" 00022 #include "libavcodec/h264pred.h" 00023 00024 #define PRED4x4(TYPE, DEPTH, OPT) \ 00025 void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, const uint8_t *topright, int stride); 00026 00027 PRED4x4(dc, 10, mmxext) 00028 PRED4x4(down_left, 10, sse2) 00029 PRED4x4(down_left, 10, avx) 00030 PRED4x4(down_right, 10, sse2) 00031 PRED4x4(down_right, 10, ssse3) 00032 PRED4x4(down_right, 10, avx) 00033 PRED4x4(vertical_left, 10, sse2) 00034 PRED4x4(vertical_left, 10, avx) 00035 PRED4x4(vertical_right, 10, sse2) 00036 PRED4x4(vertical_right, 10, ssse3) 00037 PRED4x4(vertical_right, 10, avx) 00038 PRED4x4(horizontal_up, 10, mmxext) 00039 PRED4x4(horizontal_down, 10, sse2) 00040 PRED4x4(horizontal_down, 10, ssse3) 00041 PRED4x4(horizontal_down, 10, avx) 00042 00043 #define PRED8x8(TYPE, DEPTH, OPT) \ 00044 void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, int stride); 00045 00046 PRED8x8(vertical, 10, sse2) 00047 PRED8x8(horizontal, 10, sse2) 00048 00049 void ff_pred16x16_vertical_mmx (uint8_t *src, int stride); 00050 void ff_pred16x16_vertical_sse (uint8_t *src, int stride); 00051 void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride); 00052 void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride); 00053 void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride); 00054 void ff_pred16x16_dc_mmxext (uint8_t *src, int stride); 00055 void ff_pred16x16_dc_sse2 (uint8_t *src, int stride); 00056 void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride); 00057 void ff_pred16x16_plane_h264_mmx (uint8_t *src, int stride); 00058 void ff_pred16x16_plane_h264_mmx2 (uint8_t *src, int stride); 00059 void ff_pred16x16_plane_h264_sse2 (uint8_t *src, int stride); 00060 void ff_pred16x16_plane_h264_ssse3 (uint8_t *src, int stride); 00061 void ff_pred16x16_plane_rv40_mmx (uint8_t *src, int stride); 00062 void ff_pred16x16_plane_rv40_mmx2 (uint8_t *src, int stride); 00063 void ff_pred16x16_plane_rv40_sse2 (uint8_t *src, int stride); 00064 void ff_pred16x16_plane_rv40_ssse3 (uint8_t *src, int stride); 00065 void ff_pred16x16_plane_svq3_mmx (uint8_t *src, int stride); 00066 void ff_pred16x16_plane_svq3_mmx2 (uint8_t *src, int stride); 00067 void ff_pred16x16_plane_svq3_sse2 (uint8_t *src, int stride); 00068 void ff_pred16x16_plane_svq3_ssse3 (uint8_t *src, int stride); 00069 void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride); 00070 void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride); 00071 void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride); 00072 void ff_pred8x8_top_dc_mmxext (uint8_t *src, int stride); 00073 void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride); 00074 void ff_pred8x8_dc_mmxext (uint8_t *src, int stride); 00075 void ff_pred8x8_vertical_mmx (uint8_t *src, int stride); 00076 void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride); 00077 void ff_pred8x8_horizontal_mmxext (uint8_t *src, int stride); 00078 void ff_pred8x8_horizontal_ssse3 (uint8_t *src, int stride); 00079 void ff_pred8x8_plane_mmx (uint8_t *src, int stride); 00080 void ff_pred8x8_plane_mmx2 (uint8_t *src, int stride); 00081 void ff_pred8x8_plane_sse2 (uint8_t *src, int stride); 00082 void ff_pred8x8_plane_ssse3 (uint8_t *src, int stride); 00083 void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride); 00084 void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride); 00085 void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride); 00086 void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride); 00087 void ff_pred8x8l_top_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); 00088 void ff_pred8x8l_top_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); 00089 void ff_pred8x8l_dc_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); 00090 void ff_pred8x8l_dc_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); 00091 void ff_pred8x8l_horizontal_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); 00092 void ff_pred8x8l_horizontal_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); 00093 void ff_pred8x8l_vertical_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); 00094 void ff_pred8x8l_vertical_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); 00095 void ff_pred8x8l_down_left_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); 00096 void ff_pred8x8l_down_left_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride); 00097 void ff_pred8x8l_down_left_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); 00098 void ff_pred8x8l_down_right_mmxext (uint8_t *src, int has_topleft, int has_topright, int stride); 00099 void ff_pred8x8l_down_right_sse2 (uint8_t *src, int has_topleft, int has_topright, int stride); 00100 void ff_pred8x8l_down_right_ssse3 (uint8_t *src, int has_topleft, int has_topright, int stride); 00101 void ff_pred8x8l_vertical_right_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride); 00102 void ff_pred8x8l_vertical_right_sse2(uint8_t *src, int has_topleft, int has_topright, int stride); 00103 void ff_pred8x8l_vertical_right_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); 00104 void ff_pred8x8l_vertical_left_sse2(uint8_t *src, int has_topleft, int has_topright, int stride); 00105 void ff_pred8x8l_vertical_left_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); 00106 void ff_pred8x8l_horizontal_up_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride); 00107 void ff_pred8x8l_horizontal_up_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); 00108 void ff_pred8x8l_horizontal_down_mmxext(uint8_t *src, int has_topleft, int has_topright, int stride); 00109 void ff_pred8x8l_horizontal_down_sse2(uint8_t *src, int has_topleft, int has_topright, int stride); 00110 void ff_pred8x8l_horizontal_down_ssse3(uint8_t *src, int has_topleft, int has_topright, int stride); 00111 void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); 00112 void ff_pred4x4_down_left_mmxext (uint8_t *src, const uint8_t *topright, int stride); 00113 void ff_pred4x4_down_right_mmxext (uint8_t *src, const uint8_t *topright, int stride); 00114 void ff_pred4x4_vertical_left_mmxext(uint8_t *src, const uint8_t *topright, int stride); 00115 void ff_pred4x4_vertical_right_mmxext(uint8_t *src, const uint8_t *topright, int stride); 00116 void ff_pred4x4_horizontal_up_mmxext(uint8_t *src, const uint8_t *topright, int stride); 00117 void ff_pred4x4_horizontal_down_mmxext(uint8_t *src, const uint8_t *topright, int stride); 00118 void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); 00119 void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int stride); 00120 void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); 00121 void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride); 00122 00123 void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth) 00124 { 00125 int mm_flags = av_get_cpu_flags(); 00126 00127 #if HAVE_YASM 00128 if (bit_depth == 8) { 00129 if (mm_flags & AV_CPU_FLAG_MMX) { 00130 h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; 00131 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; 00132 h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; 00133 h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; 00134 if (codec_id == CODEC_ID_VP8) { 00135 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmx; 00136 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmx; 00137 h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; 00138 } else { 00139 h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; 00140 if (codec_id == CODEC_ID_SVQ3) { 00141 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; 00142 } else if (codec_id == CODEC_ID_RV40) { 00143 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_mmx; 00144 } else { 00145 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_mmx; 00146 } 00147 } 00148 } 00149 00150 if (mm_flags & AV_CPU_FLAG_MMX2) { 00151 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; 00152 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; 00153 h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; 00154 h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; 00155 h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; 00156 h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; 00157 h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_mmxext; 00158 h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_mmxext; 00159 h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_mmxext; 00160 h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_mmxext; 00161 h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_mmxext; 00162 h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_mmxext; 00163 h->pred4x4 [DIAG_DOWN_RIGHT_PRED ] = ff_pred4x4_down_right_mmxext; 00164 h->pred4x4 [VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_mmxext; 00165 h->pred4x4 [HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_mmxext; 00166 h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; 00167 if (codec_id == CODEC_ID_VP8 || codec_id == CODEC_ID_H264) { 00168 h->pred4x4 [DIAG_DOWN_LEFT_PRED] = ff_pred4x4_down_left_mmxext; 00169 } 00170 if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { 00171 h->pred4x4 [VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_mmxext; 00172 } 00173 if (codec_id != CODEC_ID_RV40) { 00174 h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext; 00175 } 00176 if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { 00177 h->pred8x8 [TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; 00178 h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; 00179 } 00180 if (codec_id == CODEC_ID_VP8) { 00181 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmxext; 00182 h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext; 00183 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmxext; 00184 h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; 00185 h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; 00186 } else { 00187 h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; 00188 if (codec_id == CODEC_ID_SVQ3) { 00189 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_mmx2; 00190 } else if (codec_id == CODEC_ID_RV40) { 00191 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_rv40_mmx2; 00192 } else { 00193 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_h264_mmx2; 00194 } 00195 } 00196 } 00197 00198 if (mm_flags & AV_CPU_FLAG_SSE) { 00199 h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_sse; 00200 } 00201 00202 if (mm_flags & AV_CPU_FLAG_SSE2) { 00203 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_sse2; 00204 h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_sse2; 00205 h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_sse2; 00206 h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_sse2; 00207 h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_sse2; 00208 h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_sse2; 00209 if (codec_id == CODEC_ID_VP8) { 00210 h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_sse2; 00211 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_sse2; 00212 } else { 00213 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_plane_sse2; 00214 if (codec_id == CODEC_ID_SVQ3) { 00215 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; 00216 } else if (codec_id == CODEC_ID_RV40) { 00217 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_sse2; 00218 } else { 00219 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_sse2; 00220 } 00221 } 00222 } 00223 00224 if (mm_flags & AV_CPU_FLAG_SSSE3) { 00225 h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; 00226 h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; 00227 h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; 00228 h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; 00229 h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; 00230 h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; 00231 h->pred8x8l [VERT_PRED ] = ff_pred8x8l_vertical_ssse3; 00232 h->pred8x8l [DIAG_DOWN_LEFT_PRED ] = ff_pred8x8l_down_left_ssse3; 00233 h->pred8x8l [DIAG_DOWN_RIGHT_PRED ] = ff_pred8x8l_down_right_ssse3; 00234 h->pred8x8l [VERT_RIGHT_PRED ] = ff_pred8x8l_vertical_right_ssse3; 00235 h->pred8x8l [VERT_LEFT_PRED ] = ff_pred8x8l_vertical_left_ssse3; 00236 h->pred8x8l [HOR_UP_PRED ] = ff_pred8x8l_horizontal_up_ssse3; 00237 h->pred8x8l [HOR_DOWN_PRED ] = ff_pred8x8l_horizontal_down_ssse3; 00238 if (codec_id == CODEC_ID_VP8) { 00239 h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_ssse3; 00240 h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; 00241 } else { 00242 h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; 00243 if (codec_id == CODEC_ID_SVQ3) { 00244 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; 00245 } else if (codec_id == CODEC_ID_RV40) { 00246 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_rv40_ssse3; 00247 } else { 00248 h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_h264_ssse3; 00249 } 00250 } 00251 } 00252 } else if (bit_depth == 10) { 00253 if (mm_flags & AV_CPU_FLAG_MMX2) { 00254 h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; 00255 h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; 00256 } 00257 if (mm_flags & AV_CPU_FLAG_SSE2) { 00258 h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_sse2; 00259 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_sse2; 00260 h->pred4x4[VERT_LEFT_PRED ] = ff_pred4x4_vertical_left_10_sse2; 00261 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; 00262 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; 00263 00264 h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; 00265 h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; 00266 } 00267 if (mm_flags & AV_CPU_FLAG_SSSE3) { 00268 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_ssse3; 00269 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_ssse3; 00270 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_ssse3; 00271 } 00272 #if HAVE_AVX 00273 if (mm_flags & AV_CPU_FLAG_AVX) { 00274 h->pred4x4[DIAG_DOWN_LEFT_PRED ] = ff_pred4x4_down_left_10_avx; 00275 h->pred4x4[DIAG_DOWN_RIGHT_PRED] = ff_pred4x4_down_right_10_avx; 00276 h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_avx; 00277 h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_avx; 00278 } 00279 #endif /* HAVE_AVX */ 00280 } 00281 #endif /* HAVE_YASM */ 00282 }