dct-test.c
Go to the documentation of this file.
1 /*
2  * (c) 2001 Fabrice Bellard
3  * 2007 Marc Hoffman <marc.hoffman@analog.com>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
28 #include "config.h"
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #if HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <math.h>
36 
37 #include "libavutil/cpu.h"
38 #include "libavutil/common.h"
39 #include "libavutil/lfg.h"
40 #include "libavutil/time.h"
41 
42 #include "simple_idct.h"
43 #include "aandcttab.h"
44 #include "faandct.h"
45 #include "faanidct.h"
46 #include "x86/idct_xvid.h"
47 #include "dctref.h"
48 
49 #undef printf
50 
51 // BFIN
54 
55 // ALTIVEC
57 
58 // ARM
64 
66 
67 struct algo {
68  const char *name;
73  int nonspec;
74 };
75 
76 static int cpu_flags;
77 
78 static const struct algo fdct_tab[] = {
79  { "REF-DBL", ff_ref_fdct, NO_PERM },
80  { "FAAN", ff_faandct, NO_PERM },
81  { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM },
82  { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
83 
84 #if HAVE_MMX_INLINE
85  { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },
88 #endif
89 
90 #if HAVE_ALTIVEC
91  { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC },
92 #endif
93 
94 #if ARCH_BFIN
95  { "BFINfdct", ff_bfin_fdct, NO_PERM },
96 #endif
97 
98  { 0 }
99 };
100 
101 static const struct algo idct_tab[] = {
102  { "FAANI", ff_faanidct, NO_PERM },
103  { "REF-DBL", ff_ref_idct, NO_PERM },
104  { "INT", ff_j_rev_dct, MMX_PERM },
105  { "SIMPLE-C", ff_simple_idct_8, NO_PERM },
106 
107 #if HAVE_MMX_INLINE
109  { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 },
110  { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 },
111  { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
112 #endif
113 
114 #if ARCH_BFIN
115  { "BFINidct", ff_bfin_idct, NO_PERM },
116 #endif
117 
118 #if ARCH_ARM
119  { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM },
120  { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM },
121 #endif
122 #if HAVE_ARMV5TE
123  { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE },
124 #endif
125 #if HAVE_ARMV6
126  { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 },
127 #endif
128 #if HAVE_NEON
130 #endif
131 
132 #if ARCH_ALPHA
133  { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM },
134 #endif
135 
136  { 0 }
137 };
138 
139 #define AANSCALE_BITS 12
140 
141 #define NB_ITS 20000
142 #define NB_ITS_SPEED 50000
143 
144 static short idct_mmx_perm[64];
145 
146 static short idct_simple_mmx_perm[64] = {
147  0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
148  0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
149  0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
150  0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
151  0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
152  0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
153  0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
154  0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
155 };
156 
157 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
158 
159 static void idct_mmx_init(void)
160 {
161  int i;
162 
163  /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
164  for (i = 0; i < 64; i++) {
165  idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
166  }
167 }
168 
169 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
171 
172 static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng)
173 {
174  int i, j;
175 
176  memset(block, 0, 64 * sizeof(*block));
177 
178  switch (test) {
179  case 0:
180  for (i = 0; i < 64; i++)
181  block[i] = (av_lfg_get(prng) % 512) - 256;
182  if (is_idct) {
183  ff_ref_fdct(block);
184  for (i = 0; i < 64; i++)
185  block[i] >>= 3;
186  }
187  break;
188  case 1:
189  j = av_lfg_get(prng) % 10 + 1;
190  for (i = 0; i < j; i++)
191  block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
192  break;
193  case 2:
194  block[ 0] = av_lfg_get(prng) % 4096 - 2048;
195  block[63] = (block[0] & 1) ^ 1;
196  break;
197  }
198 }
199 
200 static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
201 {
202  int i;
203 
204  if (perm == MMX_PERM) {
205  for (i = 0; i < 64; i++)
206  dst[idct_mmx_perm[i]] = src[i];
207  } else if (perm == MMX_SIMPLE_PERM) {
208  for (i = 0; i < 64; i++)
209  dst[idct_simple_mmx_perm[i]] = src[i];
210  } else if (perm == SSE2_PERM) {
211  for (i = 0; i < 64; i++)
212  dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
213  } else if (perm == PARTTRANS_PERM) {
214  for (i = 0; i < 64; i++)
215  dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
216  } else {
217  for (i = 0; i < 64; i++)
218  dst[i] = src[i];
219  }
220 }
221 
222 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
223 {
224  void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
225  int it, i, scale;
226  int err_inf, v;
227  int64_t err2, ti, ti1, it1, err_sum = 0;
228  int64_t sysErr[64], sysErrMax = 0;
229  int maxout = 0;
230  int blockSumErrMax = 0, blockSumErr;
231  AVLFG prng;
232  double omse, ome;
233  int spec_err;
234 
235  av_lfg_init(&prng, 1);
236 
237  err_inf = 0;
238  err2 = 0;
239  for (i = 0; i < 64; i++)
240  sysErr[i] = 0;
241  for (it = 0; it < NB_ITS; it++) {
242  init_block(block1, test, is_idct, &prng);
243  permute(block, block1, dct->format);
244 
245  dct->func(block);
246  emms_c();
247 
248  if (dct->format == SCALE_PERM) {
249  for (i = 0; i < 64; i++) {
250  scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
251  block[i] = (block[i] * scale) >> AANSCALE_BITS;
252  }
253  }
254 
255  ref(block1);
256 
257  blockSumErr = 0;
258  for (i = 0; i < 64; i++) {
259  int err = block[i] - block1[i];
260  err_sum += err;
261  v = abs(err);
262  if (v > err_inf)
263  err_inf = v;
264  err2 += v * v;
265  sysErr[i] += block[i] - block1[i];
266  blockSumErr += v;
267  if (abs(block[i]) > maxout)
268  maxout = abs(block[i]);
269  }
270  if (blockSumErrMax < blockSumErr)
271  blockSumErrMax = blockSumErr;
272  }
273  for (i = 0; i < 64; i++)
274  sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
275 
276  for (i = 0; i < 64; i++) {
277  if (i % 8 == 0)
278  printf("\n");
279  printf("%7d ", (int) sysErr[i]);
280  }
281  printf("\n");
282 
283  omse = (double) err2 / NB_ITS / 64;
284  ome = (double) err_sum / NB_ITS / 64;
285 
286  spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
287 
288  printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
289  is_idct ? "IDCT" : "DCT", dct->name, err_inf,
290  omse, ome, (double) sysErrMax / NB_ITS,
291  maxout, blockSumErrMax);
292 
293  if (spec_err && !dct->nonspec)
294  return 1;
295 
296  if (!speed)
297  return 0;
298 
299  /* speed test */
300  init_block(block, test, is_idct, &prng);
301  permute(block1, block, dct->format);
302 
303  ti = av_gettime();
304  it1 = 0;
305  do {
306  for (it = 0; it < NB_ITS_SPEED; it++) {
307  memcpy(block, block1, sizeof(block));
308  dct->func(block);
309  }
310  it1 += NB_ITS_SPEED;
311  ti1 = av_gettime() - ti;
312  } while (ti1 < 1000000);
313  emms_c();
314 
315  printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
316  (double) it1 * 1000.0 / (double) ti1);
317 
318  return 0;
319 }
320 
323 
324 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
325 {
326  static int init;
327  static double c8[8][8];
328  static double c4[4][4];
329  double block1[64], block2[64], block3[64];
330  double s, sum, v;
331  int i, j, k;
332 
333  if (!init) {
334  init = 1;
335 
336  for (i = 0; i < 8; i++) {
337  sum = 0;
338  for (j = 0; j < 8; j++) {
339  s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
340  c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
341  sum += c8[i][j] * c8[i][j];
342  }
343  }
344 
345  for (i = 0; i < 4; i++) {
346  sum = 0;
347  for (j = 0; j < 4; j++) {
348  s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
349  c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
350  sum += c4[i][j] * c4[i][j];
351  }
352  }
353  }
354 
355  /* butterfly */
356  s = 0.5 * sqrt(2.0);
357  for (i = 0; i < 4; i++) {
358  for (j = 0; j < 8; j++) {
359  block1[8 * (2 * i) + j] =
360  (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
361  block1[8 * (2 * i + 1) + j] =
362  (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
363  }
364  }
365 
366  /* idct8 on lines */
367  for (i = 0; i < 8; i++) {
368  for (j = 0; j < 8; j++) {
369  sum = 0;
370  for (k = 0; k < 8; k++)
371  sum += c8[k][j] * block1[8 * i + k];
372  block2[8 * i + j] = sum;
373  }
374  }
375 
376  /* idct4 */
377  for (i = 0; i < 8; i++) {
378  for (j = 0; j < 4; j++) {
379  /* top */
380  sum = 0;
381  for (k = 0; k < 4; k++)
382  sum += c4[k][j] * block2[8 * (2 * k) + i];
383  block3[8 * (2 * j) + i] = sum;
384 
385  /* bottom */
386  sum = 0;
387  for (k = 0; k < 4; k++)
388  sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
389  block3[8 * (2 * j + 1) + i] = sum;
390  }
391  }
392 
393  /* clamp and store the result */
394  for (i = 0; i < 8; i++) {
395  for (j = 0; j < 8; j++) {
396  v = block3[8 * i + j];
397  if (v < 0) v = 0;
398  else if (v > 255) v = 255;
399  dest[i * linesize + j] = (int) rint(v);
400  }
401  }
402 }
403 
404 static void idct248_error(const char *name,
405  void (*idct248_put)(uint8_t *dest, int line_size,
406  int16_t *block),
407  int speed)
408 {
409  int it, i, it1, ti, ti1, err_max, v;
410  AVLFG prng;
411 
412  av_lfg_init(&prng, 1);
413 
414  /* just one test to see if code is correct (precision is less
415  important here) */
416  err_max = 0;
417  for (it = 0; it < NB_ITS; it++) {
418  /* XXX: use forward transform to generate values */
419  for (i = 0; i < 64; i++)
420  block1[i] = av_lfg_get(&prng) % 256 - 128;
421  block1[0] += 1024;
422 
423  for (i = 0; i < 64; i++)
424  block[i] = block1[i];
425  idct248_ref(img_dest1, 8, block);
426 
427  for (i = 0; i < 64; i++)
428  block[i] = block1[i];
429  idct248_put(img_dest, 8, block);
430 
431  for (i = 0; i < 64; i++) {
432  v = abs((int) img_dest[i] - (int) img_dest1[i]);
433  if (v == 255)
434  printf("%d %d\n", img_dest[i], img_dest1[i]);
435  if (v > err_max)
436  err_max = v;
437  }
438  }
439  printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
440 
441  if (!speed)
442  return;
443 
444  ti = av_gettime();
445  it1 = 0;
446  do {
447  for (it = 0; it < NB_ITS_SPEED; it++) {
448  for (i = 0; i < 64; i++)
449  block[i] = block1[i];
450  idct248_put(img_dest, 8, block);
451  }
452  it1 += NB_ITS_SPEED;
453  ti1 = av_gettime() - ti;
454  } while (ti1 < 1000000);
455  emms_c();
456 
457  printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
458  (double) it1 * 1000.0 / (double) ti1);
459 }
460 
461 static void help(void)
462 {
463  printf("dct-test [-i] [<test-number>]\n"
464  "test-number 0 -> test with random matrixes\n"
465  " 1 -> test with random sparse matrixes\n"
466  " 2 -> do 3. test from mpeg4 std\n"
467  "-i test IDCT implementations\n"
468  "-4 test IDCT248 implementations\n"
469  "-t speed test\n");
470 }
471 
472 #if !HAVE_GETOPT
473 #include "compat/getopt.c"
474 #endif
475 
476 int main(int argc, char **argv)
477 {
478  int test_idct = 0, test_248_dct = 0;
479  int c, i;
480  int test = 1;
481  int speed = 0;
482  int err = 0;
483 
485 
486  ff_ref_dct_init();
487  idct_mmx_init();
488 
489  for (;;) {
490  c = getopt(argc, argv, "ih4t");
491  if (c == -1)
492  break;
493  switch (c) {
494  case 'i':
495  test_idct = 1;
496  break;
497  case '4':
498  test_248_dct = 1;
499  break;
500  case 't':
501  speed = 1;
502  break;
503  default:
504  case 'h':
505  help();
506  return 0;
507  }
508  }
509 
510  if (optind < argc)
511  test = atoi(argv[optind]);
512 
513  printf("Libav DCT/IDCT test\n");
514 
515  if (test_248_dct) {
516  idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
517  } else {
518  const struct algo *algos = test_idct ? idct_tab : fdct_tab;
519  for (i = 0; algos[i].name; i++)
520  if (!(~cpu_flags & algos[i].mm_support)) {
521  err |= dct_error(&algos[i], test, test_idct, speed);
522  }
523  }
524 
525  return err;
526 }