Libav 0.7.1
libavcodec/snow.c
Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of Libav.
00005  *
00006  * Libav is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * Libav is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with Libav; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 #include "libavutil/intmath.h"
00022 #include "avcodec.h"
00023 #include "dsputil.h"
00024 #include "dwt.h"
00025 #include "snow.h"
00026 
00027 #include "rangecoder.h"
00028 #include "mathops.h"
00029 
00030 #include "mpegvideo.h"
00031 #include "h263.h"
00032 
00033 #undef NDEBUG
00034 #include <assert.h>
00035 
00036 static const int8_t quant3[256]={
00037  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00038  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00040  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00042  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00043  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00044  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00045 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00046 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00047 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00048 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00049 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00050 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00051 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00052 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
00053 };
00054 static const int8_t quant3b[256]={
00055  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00056  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00057  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00058  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00059  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00060  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00061  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00062  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00063 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00064 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00065 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00066 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00067 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00068 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00069 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00070 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
00071 };
00072 static const int8_t quant3bA[256]={
00073  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00074  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00075  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00076  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00077  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00078  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00079  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00080  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00081  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00082  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00083  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00084  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00085  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00086  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00087  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00088  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
00089 };
00090 static const int8_t quant5[256]={
00091  0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00092  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00093  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00094  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00095  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00096  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00097  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00098  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00099 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
00107 };
00108 static const int8_t quant7[256]={
00109  0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00110  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00111  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
00112  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00113  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00114  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00115  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00116  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
00122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
00123 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
00124 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
00125 };
00126 static const int8_t quant9[256]={
00127  0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00128  3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00129  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00130  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00131  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00132  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00133  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00134  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
00142 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
00143 };
00144 static const int8_t quant11[256]={
00145  0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
00146  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
00147  4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00148  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00149  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00150  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00151  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00152  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
00159 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
00160 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
00161 };
00162 static const int8_t quant13[256]={
00163  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
00164  4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00165  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
00166  5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00167  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00168  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00169  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00170  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
00171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00173 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
00175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
00176 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00177 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
00178 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
00179 };
00180 
00181 #if 0 //64*cubic
00182 static const uint8_t obmc32[1024]={
00183   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00184   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00185   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00186   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
00187   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
00188   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
00189   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
00190   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
00191   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
00192   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
00193   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
00194   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
00195   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
00196   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
00197   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
00198   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
00199   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
00200   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
00201   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
00202   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
00203   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
00204   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
00205   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
00206   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
00207   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
00208   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
00209   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
00210   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
00211   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
00212   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00213   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00214   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00215 //error:0.000022
00216 };
00217 static const uint8_t obmc16[256]={
00218   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00219   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
00220   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
00221   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
00222   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
00223   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
00224   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
00225   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
00226   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
00227   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
00228   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
00229   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
00230   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
00231   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
00232   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
00233   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00234 //error:0.000033
00235 };
00236 #elif 1 // 64*linear
00237 static const uint8_t obmc32[1024]={
00238   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
00239   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
00240   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
00241   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
00242   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
00243   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
00244   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
00245   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
00246   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
00247   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
00248   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
00249   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
00250   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
00251   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
00252   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
00253   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
00254   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
00255   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
00256   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
00257   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
00258   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
00259   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
00260   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
00261   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
00262   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
00263   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
00264   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
00265   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
00266   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
00267   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
00268   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
00269   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
00270  //error:0.000020
00271 };
00272 static const uint8_t obmc16[256]={
00273   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
00274   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
00275   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
00276   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
00277   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
00278  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00279  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00280  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00281  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
00282  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
00283  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
00284   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
00285   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
00286   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
00287   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
00288   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
00289 //error:0.000015
00290 };
00291 #else //64*cos
00292 static const uint8_t obmc32[1024]={
00293   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00294   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00295   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00296   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
00297   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
00298   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
00299   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
00300   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
00301   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
00302   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
00303   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
00304   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
00305   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
00306   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
00307   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
00308   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
00309   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
00310   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
00311   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
00312   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
00313   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
00314   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
00315   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
00316   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
00317   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
00318   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
00319   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
00320   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
00321   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
00322   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
00323   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
00324   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
00325 //error:0.000022
00326 };
00327 static const uint8_t obmc16[256]={
00328   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00329   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
00330   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
00331   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
00332   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
00333   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
00334   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
00335   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
00336   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
00337   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
00338   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
00339   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
00340   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
00341   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
00342   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
00343   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
00344 //error:0.000022
00345 };
00346 #endif /* 0 */
00347 
00348 //linear *64
00349 static const uint8_t obmc8[64]={
00350   4, 12, 20, 28, 28, 20, 12,  4,
00351  12, 36, 60, 84, 84, 60, 36, 12,
00352  20, 60,100,140,140,100, 60, 20,
00353  28, 84,140,196,196,140, 84, 28,
00354  28, 84,140,196,196,140, 84, 28,
00355  20, 60,100,140,140,100, 60, 20,
00356  12, 36, 60, 84, 84, 60, 36, 12,
00357   4, 12, 20, 28, 28, 20, 12,  4,
00358 //error:0.000000
00359 };
00360 
00361 //linear *64
00362 static const uint8_t obmc4[16]={
00363  16, 48, 48, 16,
00364  48,144,144, 48,
00365  48,144,144, 48,
00366  16, 48, 48, 16,
00367 //error:0.000000
00368 };
00369 
00370 static const uint8_t * const obmc_tab[4]={
00371     obmc32, obmc16, obmc8, obmc4
00372 };
00373 
00374 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
00375 
00376 typedef struct BlockNode{
00377     int16_t mx;
00378     int16_t my;
00379     uint8_t ref;
00380     uint8_t color[3];
00381     uint8_t type;
00382 //#define TYPE_SPLIT    1
00383 #define BLOCK_INTRA   1
00384 #define BLOCK_OPT     2
00385 //#define TYPE_NOCOLOR  4
00386     uint8_t level; //FIXME merge into type?
00387 }BlockNode;
00388 
00389 static const BlockNode null_block= { //FIXME add border maybe
00390     .color= {128,128,128},
00391     .mx= 0,
00392     .my= 0,
00393     .ref= 0,
00394     .type= 0,
00395     .level= 0,
00396 };
00397 
00398 #define LOG2_MB_SIZE 4
00399 #define MB_SIZE (1<<LOG2_MB_SIZE)
00400 #define ENCODER_EXTRA_BITS 4
00401 #define HTAPS_MAX 8
00402 
00403 typedef struct x_and_coeff{
00404     int16_t x;
00405     uint16_t coeff;
00406 } x_and_coeff;
00407 
00408 typedef struct SubBand{
00409     int level;
00410     int stride;
00411     int width;
00412     int height;
00413     int qlog;        
00414     DWTELEM *buf;
00415     IDWTELEM *ibuf;
00416     int buf_x_offset;
00417     int buf_y_offset;
00418     int stride_line; 
00419     x_and_coeff * x_coeff;
00420     struct SubBand *parent;
00421     uint8_t state[/*7*2*/ 7 + 512][32];
00422 }SubBand;
00423 
00424 typedef struct Plane{
00425     int width;
00426     int height;
00427     SubBand band[MAX_DECOMPOSITIONS][4];
00428 
00429     int htaps;
00430     int8_t hcoeff[HTAPS_MAX/2];
00431     int diag_mc;
00432     int fast_mc;
00433 
00434     int last_htaps;
00435     int8_t last_hcoeff[HTAPS_MAX/2];
00436     int last_diag_mc;
00437 }Plane;
00438 
00439 typedef struct SnowContext{
00440 
00441     AVCodecContext *avctx;
00442     RangeCoder c;
00443     DSPContext dsp;
00444     DWTContext dwt;
00445     AVFrame new_picture;
00446     AVFrame input_picture;              
00447     AVFrame current_picture;
00448     AVFrame last_picture[MAX_REF_FRAMES];
00449     uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
00450     AVFrame mconly_picture;
00451 //     uint8_t q_context[16];
00452     uint8_t header_state[32];
00453     uint8_t block_state[128 + 32*128];
00454     int keyframe;
00455     int always_reset;
00456     int version;
00457     int spatial_decomposition_type;
00458     int last_spatial_decomposition_type;
00459     int temporal_decomposition_type;
00460     int spatial_decomposition_count;
00461     int last_spatial_decomposition_count;
00462     int temporal_decomposition_count;
00463     int max_ref_frames;
00464     int ref_frames;
00465     int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
00466     uint32_t *ref_scores[MAX_REF_FRAMES];
00467     DWTELEM *spatial_dwt_buffer;
00468     IDWTELEM *spatial_idwt_buffer;
00469     int colorspace_type;
00470     int chroma_h_shift;
00471     int chroma_v_shift;
00472     int spatial_scalability;
00473     int qlog;
00474     int last_qlog;
00475     int lambda;
00476     int lambda2;
00477     int pass1_rc;
00478     int mv_scale;
00479     int last_mv_scale;
00480     int qbias;
00481     int last_qbias;
00482 #define QBIAS_SHIFT 3
00483     int b_width;
00484     int b_height;
00485     int block_max_depth;
00486     int last_block_max_depth;
00487     Plane plane[MAX_PLANES];
00488     BlockNode *block;
00489 #define ME_CACHE_SIZE 1024
00490     int me_cache[ME_CACHE_SIZE];
00491     int me_cache_generation;
00492     slice_buffer sb;
00493 
00494     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
00495 
00496     uint8_t *scratchbuf;
00497 }SnowContext;
00498 
00499 #ifdef __sgi
00500 // Avoid a name clash on SGI IRIX
00501 #undef qexp
00502 #endif
00503 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
00504 static uint8_t qexp[QROOT];
00505 
00506 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
00507     int i;
00508 
00509     if(v){
00510         const int a= FFABS(v);
00511         const int e= av_log2(a);
00512 #if 1
00513         const int el= FFMIN(e, 10);
00514         put_rac(c, state+0, 0);
00515 
00516         for(i=0; i<el; i++){
00517             put_rac(c, state+1+i, 1);  //1..10
00518         }
00519         for(; i<e; i++){
00520             put_rac(c, state+1+9, 1);  //1..10
00521         }
00522         put_rac(c, state+1+FFMIN(i,9), 0);
00523 
00524         for(i=e-1; i>=el; i--){
00525             put_rac(c, state+22+9, (a>>i)&1); //22..31
00526         }
00527         for(; i>=0; i--){
00528             put_rac(c, state+22+i, (a>>i)&1); //22..31
00529         }
00530 
00531         if(is_signed)
00532             put_rac(c, state+11 + el, v < 0); //11..21
00533 #else
00534 
00535         put_rac(c, state+0, 0);
00536         if(e<=9){
00537             for(i=0; i<e; i++){
00538                 put_rac(c, state+1+i, 1);  //1..10
00539             }
00540             put_rac(c, state+1+i, 0);
00541 
00542             for(i=e-1; i>=0; i--){
00543                 put_rac(c, state+22+i, (a>>i)&1); //22..31
00544             }
00545 
00546             if(is_signed)
00547                 put_rac(c, state+11 + e, v < 0); //11..21
00548         }else{
00549             for(i=0; i<e; i++){
00550                 put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
00551             }
00552             put_rac(c, state+1+9, 0);
00553 
00554             for(i=e-1; i>=0; i--){
00555                 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
00556             }
00557 
00558             if(is_signed)
00559                 put_rac(c, state+11 + 10, v < 0); //11..21
00560         }
00561 #endif /* 1 */
00562     }else{
00563         put_rac(c, state+0, 1);
00564     }
00565 }
00566 
00567 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
00568     if(get_rac(c, state+0))
00569         return 0;
00570     else{
00571         int i, e, a;
00572         e= 0;
00573         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
00574             e++;
00575         }
00576 
00577         a= 1;
00578         for(i=e-1; i>=0; i--){
00579             a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
00580         }
00581 
00582         e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
00583         return (a^e)-e;
00584     }
00585 }
00586 
00587 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
00588     int i;
00589     int r= log2>=0 ? 1<<log2 : 1;
00590 
00591     assert(v>=0);
00592     assert(log2>=-4);
00593 
00594     while(v >= r){
00595         put_rac(c, state+4+log2, 1);
00596         v -= r;
00597         log2++;
00598         if(log2>0) r+=r;
00599     }
00600     put_rac(c, state+4+log2, 0);
00601 
00602     for(i=log2-1; i>=0; i--){
00603         put_rac(c, state+31-i, (v>>i)&1);
00604     }
00605 }
00606 
00607 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
00608     int i;
00609     int r= log2>=0 ? 1<<log2 : 1;
00610     int v=0;
00611 
00612     assert(log2>=-4);
00613 
00614     while(get_rac(c, state+4+log2)){
00615         v+= r;
00616         log2++;
00617         if(log2>0) r+=r;
00618     }
00619 
00620     for(i=log2-1; i>=0; i--){
00621         v+= get_rac(c, state+31-i)<<i;
00622     }
00623 
00624     return v;
00625 }
00626 
00627 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
00628     const int w= b->width;
00629     const int h= b->height;
00630     int x,y;
00631 
00632     int run, runs;
00633     x_and_coeff *xc= b->x_coeff;
00634     x_and_coeff *prev_xc= NULL;
00635     x_and_coeff *prev2_xc= xc;
00636     x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
00637     x_and_coeff *prev_parent_xc= parent_xc;
00638 
00639     runs= get_symbol2(&s->c, b->state[30], 0);
00640     if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
00641     else           run= INT_MAX;
00642 
00643     for(y=0; y<h; y++){
00644         int v=0;
00645         int lt=0, t=0, rt=0;
00646 
00647         if(y && prev_xc->x == 0){
00648             rt= prev_xc->coeff;
00649         }
00650         for(x=0; x<w; x++){
00651             int p=0;
00652             const int l= v;
00653 
00654             lt= t; t= rt;
00655 
00656             if(y){
00657                 if(prev_xc->x <= x)
00658                     prev_xc++;
00659                 if(prev_xc->x == x + 1)
00660                     rt= prev_xc->coeff;
00661                 else
00662                     rt=0;
00663             }
00664             if(parent_xc){
00665                 if(x>>1 > parent_xc->x){
00666                     parent_xc++;
00667                 }
00668                 if(x>>1 == parent_xc->x){
00669                     p= parent_xc->coeff;
00670                 }
00671             }
00672             if(/*ll|*/l|lt|t|rt|p){
00673                 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
00674 
00675                 v=get_rac(&s->c, &b->state[0][context]);
00676                 if(v){
00677                     v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
00678                     v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
00679 
00680                     xc->x=x;
00681                     (xc++)->coeff= v;
00682                 }
00683             }else{
00684                 if(!run){
00685                     if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
00686                     else           run= INT_MAX;
00687                     v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
00688                     v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
00689 
00690                     xc->x=x;
00691                     (xc++)->coeff= v;
00692                 }else{
00693                     int max_run;
00694                     run--;
00695                     v=0;
00696 
00697                     if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
00698                     else  max_run= FFMIN(run, w-x-1);
00699                     if(parent_xc)
00700                         max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
00701                     x+= max_run;
00702                     run-= max_run;
00703                 }
00704             }
00705         }
00706         (xc++)->x= w+1; //end marker
00707         prev_xc= prev2_xc;
00708         prev2_xc= xc;
00709 
00710         if(parent_xc){
00711             if(y&1){
00712                 while(parent_xc->x != parent->width+1)
00713                     parent_xc++;
00714                 parent_xc++;
00715                 prev_parent_xc= parent_xc;
00716             }else{
00717                 parent_xc= prev_parent_xc;
00718             }
00719         }
00720     }
00721 
00722     (xc++)->x= w+1; //end marker
00723 }
00724 
00725 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
00726     const int w= b->width;
00727     int y;
00728     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
00729     int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
00730     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
00731     int new_index = 0;
00732 
00733     if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
00734         qadd= 0;
00735         qmul= 1<<QEXPSHIFT;
00736     }
00737 
00738     /* If we are on the second or later slice, restore our index. */
00739     if (start_y != 0)
00740         new_index = save_state[0];
00741 
00742 
00743     for(y=start_y; y<h; y++){
00744         int x = 0;
00745         int v;
00746         IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
00747         memset(line, 0, b->width*sizeof(IDWTELEM));
00748         v = b->x_coeff[new_index].coeff;
00749         x = b->x_coeff[new_index++].x;
00750         while(x < w){
00751             register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
00752             register int u= -(v&1);
00753             line[x] = (t^u) - u;
00754 
00755             v = b->x_coeff[new_index].coeff;
00756             x = b->x_coeff[new_index++].x;
00757         }
00758     }
00759 
00760     /* Save our variables for the next slice. */
00761     save_state[0] = new_index;
00762 
00763     return;
00764 }
00765 
00766 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
00767     int plane_index, level, orientation;
00768 
00769     for(plane_index=0; plane_index<3; plane_index++){
00770         for(level=0; level<MAX_DECOMPOSITIONS; level++){
00771             for(orientation=level ? 1:0; orientation<4; orientation++){
00772                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
00773             }
00774         }
00775     }
00776     memset(s->header_state, MID_STATE, sizeof(s->header_state));
00777     memset(s->block_state, MID_STATE, sizeof(s->block_state));
00778 }
00779 
00780 static int alloc_blocks(SnowContext *s){
00781     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
00782     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
00783 
00784     s->b_width = w;
00785     s->b_height= h;
00786 
00787     av_free(s->block);
00788     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
00789     return 0;
00790 }
00791 
00792 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
00793     uint8_t *bytestream= d->bytestream;
00794     uint8_t *bytestream_start= d->bytestream_start;
00795     *d= *s;
00796     d->bytestream= bytestream;
00797     d->bytestream_start= bytestream_start;
00798 }
00799 
00800 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
00801     const int w= s->b_width << s->block_max_depth;
00802     const int rem_depth= s->block_max_depth - level;
00803     const int index= (x + y*w) << rem_depth;
00804     const int block_w= 1<<rem_depth;
00805     BlockNode block;
00806     int i,j;
00807 
00808     block.color[0]= l;
00809     block.color[1]= cb;
00810     block.color[2]= cr;
00811     block.mx= mx;
00812     block.my= my;
00813     block.ref= ref;
00814     block.type= type;
00815     block.level= level;
00816 
00817     for(j=0; j<block_w; j++){
00818         for(i=0; i<block_w; i++){
00819             s->block[index + i + j*w]= block;
00820         }
00821     }
00822 }
00823 
00824 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
00825     const int offset[3]= {
00826           y*c->  stride + x,
00827         ((y*c->uvstride + x)>>1),
00828         ((y*c->uvstride + x)>>1),
00829     };
00830     int i;
00831     for(i=0; i<3; i++){
00832         c->src[0][i]= src [i];
00833         c->ref[0][i]= ref [i] + offset[i];
00834     }
00835     assert(!ref_index);
00836 }
00837 
00838 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
00839                            const BlockNode *left, const BlockNode *top, const BlockNode *tr){
00840     if(s->ref_frames == 1){
00841         *mx = mid_pred(left->mx, top->mx, tr->mx);
00842         *my = mid_pred(left->my, top->my, tr->my);
00843     }else{
00844         const int *scale = scale_mv_ref[ref];
00845         *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
00846                        (top ->mx * scale[top ->ref] + 128) >>8,
00847                        (tr  ->mx * scale[tr  ->ref] + 128) >>8);
00848         *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
00849                        (top ->my * scale[top ->ref] + 128) >>8,
00850                        (tr  ->my * scale[tr  ->ref] + 128) >>8);
00851     }
00852 }
00853 
00854 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
00855     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
00856         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
00857     }else{
00858         return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
00859     }
00860 }
00861 
00862 static void decode_q_branch(SnowContext *s, int level, int x, int y){
00863     const int w= s->b_width << s->block_max_depth;
00864     const int rem_depth= s->block_max_depth - level;
00865     const int index= (x + y*w) << rem_depth;
00866     int trx= (x+1)<<rem_depth;
00867     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
00868     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
00869     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
00870     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
00871     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
00872 
00873     if(s->keyframe){
00874         set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
00875         return;
00876     }
00877 
00878     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
00879         int type, mx, my;
00880         int l = left->color[0];
00881         int cb= left->color[1];
00882         int cr= left->color[2];
00883         int ref = 0;
00884         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
00885         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
00886         int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
00887 
00888         type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
00889 
00890         if(type){
00891             pred_mv(s, &mx, &my, 0, left, top, tr);
00892             l += get_symbol(&s->c, &s->block_state[32], 1);
00893             cb+= get_symbol(&s->c, &s->block_state[64], 1);
00894             cr+= get_symbol(&s->c, &s->block_state[96], 1);
00895         }else{
00896             if(s->ref_frames > 1)
00897                 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
00898             pred_mv(s, &mx, &my, ref, left, top, tr);
00899             mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
00900             my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
00901         }
00902         set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
00903     }else{
00904         decode_q_branch(s, level+1, 2*x+0, 2*y+0);
00905         decode_q_branch(s, level+1, 2*x+1, 2*y+0);
00906         decode_q_branch(s, level+1, 2*x+0, 2*y+1);
00907         decode_q_branch(s, level+1, 2*x+1, 2*y+1);
00908     }
00909 }
00910 
00911 static void decode_blocks(SnowContext *s){
00912     int x, y;
00913     int w= s->b_width;
00914     int h= s->b_height;
00915 
00916     for(y=0; y<h; y++){
00917         for(x=0; x<w; x++){
00918             decode_q_branch(s, 0, x, y);
00919         }
00920     }
00921 }
00922 
00923 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
00924     static const uint8_t weight[64]={
00925     8,7,6,5,4,3,2,1,
00926     7,7,0,0,0,0,0,1,
00927     6,0,6,0,0,0,2,0,
00928     5,0,0,5,0,3,0,0,
00929     4,0,0,0,4,0,0,0,
00930     3,0,0,5,0,3,0,0,
00931     2,0,6,0,0,0,2,0,
00932     1,7,0,0,0,0,0,1,
00933     };
00934 
00935     static const uint8_t brane[256]={
00936     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
00937     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
00938     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
00939     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
00940     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
00941     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
00942     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
00943     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
00944     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
00945     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
00946     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
00947     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
00948     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
00949     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
00950     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
00951     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
00952     };
00953 
00954     static const uint8_t needs[16]={
00955     0,1,0,0,
00956     2,4,2,0,
00957     0,1,0,0,
00958     15
00959     };
00960 
00961     int x, y, b, r, l;
00962     int16_t tmpIt   [64*(32+HTAPS_MAX)];
00963     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
00964     int16_t *tmpI= tmpIt;
00965     uint8_t *tmp2= tmp2t[0];
00966     const uint8_t *hpel[11];
00967     assert(dx<16 && dy<16);
00968     r= brane[dx + 16*dy]&15;
00969     l= brane[dx + 16*dy]>>4;
00970 
00971     b= needs[l] | needs[r];
00972     if(p && !p->diag_mc)
00973         b= 15;
00974 
00975     if(b&5){
00976         for(y=0; y < b_h+HTAPS_MAX-1; y++){
00977             for(x=0; x < b_w; x++){
00978                 int a_1=src[x + HTAPS_MAX/2-4];
00979                 int a0= src[x + HTAPS_MAX/2-3];
00980                 int a1= src[x + HTAPS_MAX/2-2];
00981                 int a2= src[x + HTAPS_MAX/2-1];
00982                 int a3= src[x + HTAPS_MAX/2+0];
00983                 int a4= src[x + HTAPS_MAX/2+1];
00984                 int a5= src[x + HTAPS_MAX/2+2];
00985                 int a6= src[x + HTAPS_MAX/2+3];
00986                 int am=0;
00987                 if(!p || p->fast_mc){
00988                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
00989                     tmpI[x]= am;
00990                     am= (am+16)>>5;
00991                 }else{
00992                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
00993                     tmpI[x]= am;
00994                     am= (am+32)>>6;
00995                 }
00996 
00997                 if(am&(~255)) am= ~(am>>31);
00998                 tmp2[x]= am;
00999             }
01000             tmpI+= 64;
01001             tmp2+= stride;
01002             src += stride;
01003         }
01004         src -= stride*y;
01005     }
01006     src += HTAPS_MAX/2 - 1;
01007     tmp2= tmp2t[1];
01008 
01009     if(b&2){
01010         for(y=0; y < b_h; y++){
01011             for(x=0; x < b_w+1; x++){
01012                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
01013                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
01014                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
01015                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
01016                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
01017                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
01018                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
01019                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
01020                 int am=0;
01021                 if(!p || p->fast_mc)
01022                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
01023                 else
01024                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
01025 
01026                 if(am&(~255)) am= ~(am>>31);
01027                 tmp2[x]= am;
01028             }
01029             src += stride;
01030             tmp2+= stride;
01031         }
01032         src -= stride*y;
01033     }
01034     src += stride*(HTAPS_MAX/2 - 1);
01035     tmp2= tmp2t[2];
01036     tmpI= tmpIt;
01037     if(b&4){
01038         for(y=0; y < b_h; y++){
01039             for(x=0; x < b_w; x++){
01040                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
01041                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
01042                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
01043                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
01044                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
01045                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
01046                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
01047                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
01048                 int am=0;
01049                 if(!p || p->fast_mc)
01050                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
01051                 else
01052                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
01053                 if(am&(~255)) am= ~(am>>31);
01054                 tmp2[x]= am;
01055             }
01056             tmpI+= 64;
01057             tmp2+= stride;
01058         }
01059     }
01060 
01061     hpel[ 0]= src;
01062     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
01063     hpel[ 2]= src + 1;
01064 
01065     hpel[ 4]= tmp2t[1];
01066     hpel[ 5]= tmp2t[2];
01067     hpel[ 6]= tmp2t[1] + 1;
01068 
01069     hpel[ 8]= src + stride;
01070     hpel[ 9]= hpel[1] + stride;
01071     hpel[10]= hpel[8] + 1;
01072 
01073     if(b==15){
01074         const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
01075         const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
01076         const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
01077         const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
01078         dx&=7;
01079         dy&=7;
01080         for(y=0; y < b_h; y++){
01081             for(x=0; x < b_w; x++){
01082                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
01083                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
01084             }
01085             src1+=stride;
01086             src2+=stride;
01087             src3+=stride;
01088             src4+=stride;
01089             dst +=stride;
01090         }
01091     }else{
01092         const uint8_t *src1= hpel[l];
01093         const uint8_t *src2= hpel[r];
01094         int a= weight[((dx&7) + (8*(dy&7)))];
01095         int b= 8-a;
01096         for(y=0; y < b_h; y++){
01097             for(x=0; x < b_w; x++){
01098                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
01099             }
01100             src1+=stride;
01101             src2+=stride;
01102             dst +=stride;
01103         }
01104     }
01105 }
01106 
01107 #define mca(dx,dy,b_w)\
01108 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
01109     assert(h==b_w);\
01110     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
01111 }
01112 
01113 mca( 0, 0,16)
01114 mca( 8, 0,16)
01115 mca( 0, 8,16)
01116 mca( 8, 8,16)
01117 mca( 0, 0,8)
01118 mca( 8, 0,8)
01119 mca( 0, 8,8)
01120 mca( 8, 8,8)
01121 
01122 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
01123     if(block->type & BLOCK_INTRA){
01124         int x, y;
01125         const int color = block->color[plane_index];
01126         const int color4= color*0x01010101;
01127         if(b_w==32){
01128             for(y=0; y < b_h; y++){
01129                 *(uint32_t*)&dst[0 + y*stride]= color4;
01130                 *(uint32_t*)&dst[4 + y*stride]= color4;
01131                 *(uint32_t*)&dst[8 + y*stride]= color4;
01132                 *(uint32_t*)&dst[12+ y*stride]= color4;
01133                 *(uint32_t*)&dst[16+ y*stride]= color4;
01134                 *(uint32_t*)&dst[20+ y*stride]= color4;
01135                 *(uint32_t*)&dst[24+ y*stride]= color4;
01136                 *(uint32_t*)&dst[28+ y*stride]= color4;
01137             }
01138         }else if(b_w==16){
01139             for(y=0; y < b_h; y++){
01140                 *(uint32_t*)&dst[0 + y*stride]= color4;
01141                 *(uint32_t*)&dst[4 + y*stride]= color4;
01142                 *(uint32_t*)&dst[8 + y*stride]= color4;
01143                 *(uint32_t*)&dst[12+ y*stride]= color4;
01144             }
01145         }else if(b_w==8){
01146             for(y=0; y < b_h; y++){
01147                 *(uint32_t*)&dst[0 + y*stride]= color4;
01148                 *(uint32_t*)&dst[4 + y*stride]= color4;
01149             }
01150         }else if(b_w==4){
01151             for(y=0; y < b_h; y++){
01152                 *(uint32_t*)&dst[0 + y*stride]= color4;
01153             }
01154         }else{
01155             for(y=0; y < b_h; y++){
01156                 for(x=0; x < b_w; x++){
01157                     dst[x + y*stride]= color;
01158                 }
01159             }
01160         }
01161     }else{
01162         uint8_t *src= s->last_picture[block->ref].data[plane_index];
01163         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
01164         int mx= block->mx*scale;
01165         int my= block->my*scale;
01166         const int dx= mx&15;
01167         const int dy= my&15;
01168         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
01169         sx += (mx>>4) - (HTAPS_MAX/2-1);
01170         sy += (my>>4) - (HTAPS_MAX/2-1);
01171         src += sx + sy*stride;
01172         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
01173            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
01174             s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
01175             src= tmp + MB_SIZE;
01176         }
01177 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
01178 //        assert(!(b_w&(b_w-1)));
01179         assert(b_w>1 && b_h>1);
01180         assert((tab_index>=0 && tab_index<4) || b_w==32);
01181         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
01182             mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
01183         else if(b_w==32){
01184             int y;
01185             for(y=0; y<b_h; y+=16){
01186                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
01187                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
01188             }
01189         }else if(b_w==b_h)
01190             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
01191         else if(b_w==2*b_h){
01192             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
01193             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
01194         }else{
01195             assert(2*b_w==b_h);
01196             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
01197             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
01198         }
01199     }
01200 }
01201 
01202 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
01203                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
01204     int y, x;
01205     IDWTELEM * dst;
01206     for(y=0; y<b_h; y++){
01207         //FIXME ugly misuse of obmc_stride
01208         const uint8_t *obmc1= obmc + y*obmc_stride;
01209         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
01210         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
01211         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
01212         dst = slice_buffer_get_line(sb, src_y + y);
01213         for(x=0; x<b_w; x++){
01214             int v=   obmc1[x] * block[3][x + y*src_stride]
01215                     +obmc2[x] * block[2][x + y*src_stride]
01216                     +obmc3[x] * block[1][x + y*src_stride]
01217                     +obmc4[x] * block[0][x + y*src_stride];
01218 
01219             v <<= 8 - LOG2_OBMC_MAX;
01220             if(FRAC_BITS != 8){
01221                 v >>= 8 - FRAC_BITS;
01222             }
01223             if(add){
01224                 v += dst[x + src_x];
01225                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
01226                 if(v&(~255)) v= ~(v>>31);
01227                 dst8[x + y*src_stride] = v;
01228             }else{
01229                 dst[x + src_x] -= v;
01230             }
01231         }
01232     }
01233 }
01234 
01235 //FIXME name cleanup (b_w, block_w, b_width stuff)
01236 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
01237     const int b_width = s->b_width  << s->block_max_depth;
01238     const int b_height= s->b_height << s->block_max_depth;
01239     const int b_stride= b_width;
01240     BlockNode *lt= &s->block[b_x + b_y*b_stride];
01241     BlockNode *rt= lt+1;
01242     BlockNode *lb= lt+b_stride;
01243     BlockNode *rb= lb+1;
01244     uint8_t *block[4];
01245     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
01246     uint8_t *tmp = s->scratchbuf;
01247     uint8_t *ptmp;
01248     int x,y;
01249 
01250     if(b_x<0){
01251         lt= rt;
01252         lb= rb;
01253     }else if(b_x + 1 >= b_width){
01254         rt= lt;
01255         rb= lb;
01256     }
01257     if(b_y<0){
01258         lt= lb;
01259         rt= rb;
01260     }else if(b_y + 1 >= b_height){
01261         lb= lt;
01262         rb= rt;
01263     }
01264 
01265     if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
01266         obmc -= src_x;
01267         b_w += src_x;
01268         if(!sliced && !offset_dst)
01269             dst -= src_x;
01270         src_x=0;
01271     }else if(src_x + b_w > w){
01272         b_w = w - src_x;
01273     }
01274     if(src_y<0){
01275         obmc -= src_y*obmc_stride;
01276         b_h += src_y;
01277         if(!sliced && !offset_dst)
01278             dst -= src_y*dst_stride;
01279         src_y=0;
01280     }else if(src_y + b_h> h){
01281         b_h = h - src_y;
01282     }
01283 
01284     if(b_w<=0 || b_h<=0) return;
01285 
01286     assert(src_stride > 2*MB_SIZE + 5);
01287 
01288     if(!sliced && offset_dst)
01289         dst += src_x + src_y*dst_stride;
01290     dst8+= src_x + src_y*src_stride;
01291 //    src += src_x + src_y*src_stride;
01292 
01293     ptmp= tmp + 3*tmp_step;
01294     block[0]= ptmp;
01295     ptmp+=tmp_step;
01296     pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
01297 
01298     if(same_block(lt, rt)){
01299         block[1]= block[0];
01300     }else{
01301         block[1]= ptmp;
01302         ptmp+=tmp_step;
01303         pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
01304     }
01305 
01306     if(same_block(lt, lb)){
01307         block[2]= block[0];
01308     }else if(same_block(rt, lb)){
01309         block[2]= block[1];
01310     }else{
01311         block[2]= ptmp;
01312         ptmp+=tmp_step;
01313         pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
01314     }
01315 
01316     if(same_block(lt, rb) ){
01317         block[3]= block[0];
01318     }else if(same_block(rt, rb)){
01319         block[3]= block[1];
01320     }else if(same_block(lb, rb)){
01321         block[3]= block[2];
01322     }else{
01323         block[3]= ptmp;
01324         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
01325     }
01326 #if 0
01327     for(y=0; y<b_h; y++){
01328         for(x=0; x<b_w; x++){
01329             int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
01330             if(add) dst[x + y*dst_stride] += v;
01331             else    dst[x + y*dst_stride] -= v;
01332         }
01333     }
01334     for(y=0; y<b_h; y++){
01335         uint8_t *obmc2= obmc + (obmc_stride>>1);
01336         for(x=0; x<b_w; x++){
01337             int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
01338             if(add) dst[x + y*dst_stride] += v;
01339             else    dst[x + y*dst_stride] -= v;
01340         }
01341     }
01342     for(y=0; y<b_h; y++){
01343         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
01344         for(x=0; x<b_w; x++){
01345             int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
01346             if(add) dst[x + y*dst_stride] += v;
01347             else    dst[x + y*dst_stride] -= v;
01348         }
01349     }
01350     for(y=0; y<b_h; y++){
01351         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
01352         uint8_t *obmc4= obmc3+ (obmc_stride>>1);
01353         for(x=0; x<b_w; x++){
01354             int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
01355             if(add) dst[x + y*dst_stride] += v;
01356             else    dst[x + y*dst_stride] -= v;
01357         }
01358     }
01359 #else
01360     if(sliced){
01361         s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
01362     }else{
01363         for(y=0; y<b_h; y++){
01364             //FIXME ugly misuse of obmc_stride
01365             const uint8_t *obmc1= obmc + y*obmc_stride;
01366             const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
01367             const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
01368             const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
01369             for(x=0; x<b_w; x++){
01370                 int v=   obmc1[x] * block[3][x + y*src_stride]
01371                         +obmc2[x] * block[2][x + y*src_stride]
01372                         +obmc3[x] * block[1][x + y*src_stride]
01373                         +obmc4[x] * block[0][x + y*src_stride];
01374 
01375                 v <<= 8 - LOG2_OBMC_MAX;
01376                 if(FRAC_BITS != 8){
01377                     v >>= 8 - FRAC_BITS;
01378                 }
01379                 if(add){
01380                     v += dst[x + y*dst_stride];
01381                     v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
01382                     if(v&(~255)) v= ~(v>>31);
01383                     dst8[x + y*src_stride] = v;
01384                 }else{
01385                     dst[x + y*dst_stride] -= v;
01386                 }
01387             }
01388         }
01389     }
01390 #endif /* 0 */
01391 }
01392 
01393 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
01394     Plane *p= &s->plane[plane_index];
01395     const int mb_w= s->b_width  << s->block_max_depth;
01396     const int mb_h= s->b_height << s->block_max_depth;
01397     int x, y, mb_x;
01398     int block_size = MB_SIZE >> s->block_max_depth;
01399     int block_w    = plane_index ? block_size/2 : block_size;
01400     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
01401     int obmc_stride= plane_index ? block_size : 2*block_size;
01402     int ref_stride= s->current_picture.linesize[plane_index];
01403     uint8_t *dst8= s->current_picture.data[plane_index];
01404     int w= p->width;
01405     int h= p->height;
01406 
01407     if(s->keyframe || (s->avctx->debug&512)){
01408         if(mb_y==mb_h)
01409             return;
01410 
01411         if(add){
01412             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
01413 //                DWTELEM * line = slice_buffer_get_line(sb, y);
01414                 IDWTELEM * line = sb->line[y];
01415                 for(x=0; x<w; x++){
01416 //                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
01417                     int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
01418                     v >>= FRAC_BITS;
01419                     if(v&(~255)) v= ~(v>>31);
01420                     dst8[x + y*ref_stride]= v;
01421                 }
01422             }
01423         }else{
01424             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
01425 //                DWTELEM * line = slice_buffer_get_line(sb, y);
01426                 IDWTELEM * line = sb->line[y];
01427                 for(x=0; x<w; x++){
01428                     line[x] -= 128 << FRAC_BITS;
01429 //                    buf[x + y*w]-= 128<<FRAC_BITS;
01430                 }
01431             }
01432         }
01433 
01434         return;
01435     }
01436 
01437     for(mb_x=0; mb_x<=mb_w; mb_x++){
01438         add_yblock(s, 1, sb, old_buffer, dst8, obmc,
01439                    block_w*mb_x - block_w/2,
01440                    block_w*mb_y - block_w/2,
01441                    block_w, block_w,
01442                    w, h,
01443                    w, ref_stride, obmc_stride,
01444                    mb_x - 1, mb_y - 1,
01445                    add, 0, plane_index);
01446     }
01447 }
01448 
01449 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
01450     Plane *p= &s->plane[plane_index];
01451     const int mb_w= s->b_width  << s->block_max_depth;
01452     const int mb_h= s->b_height << s->block_max_depth;
01453     int x, y, mb_x;
01454     int block_size = MB_SIZE >> s->block_max_depth;
01455     int block_w    = plane_index ? block_size/2 : block_size;
01456     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
01457     const int obmc_stride= plane_index ? block_size : 2*block_size;
01458     int ref_stride= s->current_picture.linesize[plane_index];
01459     uint8_t *dst8= s->current_picture.data[plane_index];
01460     int w= p->width;
01461     int h= p->height;
01462 
01463     if(s->keyframe || (s->avctx->debug&512)){
01464         if(mb_y==mb_h)
01465             return;
01466 
01467         if(add){
01468             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
01469                 for(x=0; x<w; x++){
01470                     int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
01471                     v >>= FRAC_BITS;
01472                     if(v&(~255)) v= ~(v>>31);
01473                     dst8[x + y*ref_stride]= v;
01474                 }
01475             }
01476         }else{
01477             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
01478                 for(x=0; x<w; x++){
01479                     buf[x + y*w]-= 128<<FRAC_BITS;
01480                 }
01481             }
01482         }
01483 
01484         return;
01485     }
01486 
01487     for(mb_x=0; mb_x<=mb_w; mb_x++){
01488         add_yblock(s, 0, NULL, buf, dst8, obmc,
01489                    block_w*mb_x - block_w/2,
01490                    block_w*mb_y - block_w/2,
01491                    block_w, block_w,
01492                    w, h,
01493                    w, ref_stride, obmc_stride,
01494                    mb_x - 1, mb_y - 1,
01495                    add, 1, plane_index);
01496     }
01497 }
01498 
01499 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
01500     const int mb_h= s->b_height << s->block_max_depth;
01501     int mb_y;
01502     for(mb_y=0; mb_y<=mb_h; mb_y++)
01503         predict_slice(s, buf, plane_index, add, mb_y);
01504 }
01505 
01506 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
01507     const int w= b->width;
01508     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
01509     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
01510     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
01511     int x,y;
01512 
01513     if(s->qlog == LOSSLESS_QLOG) return;
01514 
01515     for(y=start_y; y<end_y; y++){
01516 //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
01517         IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
01518         for(x=0; x<w; x++){
01519             int i= line[x];
01520             if(i<0){
01521                 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
01522             }else if(i>0){
01523                 line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
01524             }
01525         }
01526     }
01527 }
01528 
01529 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
01530     const int w= b->width;
01531     int x,y;
01532 
01533     IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
01534     IDWTELEM * prev;
01535 
01536     if (start_y != 0)
01537         line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
01538 
01539     for(y=start_y; y<end_y; y++){
01540         prev = line;
01541 //        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
01542         line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
01543         for(x=0; x<w; x++){
01544             if(x){
01545                 if(use_median){
01546                     if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
01547                     else  line[x] += line[x - 1];
01548                 }else{
01549                     if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
01550                     else  line[x] += line[x - 1];
01551                 }
01552             }else{
01553                 if(y) line[x] += prev[x];
01554             }
01555         }
01556     }
01557 }
01558 
01559 static void decode_qlogs(SnowContext *s){
01560     int plane_index, level, orientation;
01561 
01562     for(plane_index=0; plane_index<3; plane_index++){
01563         for(level=0; level<s->spatial_decomposition_count; level++){
01564             for(orientation=level ? 1:0; orientation<4; orientation++){
01565                 int q;
01566                 if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
01567                 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
01568                 else                    q= get_symbol(&s->c, s->header_state, 1);
01569                 s->plane[plane_index].band[level][orientation].qlog= q;
01570             }
01571         }
01572     }
01573 }
01574 
01575 #define GET_S(dst, check) \
01576     tmp= get_symbol(&s->c, s->header_state, 0);\
01577     if(!(check)){\
01578         av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
01579         return -1;\
01580     }\
01581     dst= tmp;
01582 
01583 static int decode_header(SnowContext *s){
01584     int plane_index, tmp;
01585     uint8_t kstate[32];
01586 
01587     memset(kstate, MID_STATE, sizeof(kstate));
01588 
01589     s->keyframe= get_rac(&s->c, kstate);
01590     if(s->keyframe || s->always_reset){
01591         reset_contexts(s);
01592         s->spatial_decomposition_type=
01593         s->qlog=
01594         s->qbias=
01595         s->mv_scale=
01596         s->block_max_depth= 0;
01597     }
01598     if(s->keyframe){
01599         GET_S(s->version, tmp <= 0U)
01600         s->always_reset= get_rac(&s->c, s->header_state);
01601         s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
01602         s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
01603         GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
01604         s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
01605         s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
01606         s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
01607         s->spatial_scalability= get_rac(&s->c, s->header_state);
01608 //        s->rate_scalability= get_rac(&s->c, s->header_state);
01609         GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
01610         s->max_ref_frames++;
01611 
01612         decode_qlogs(s);
01613     }
01614 
01615     if(!s->keyframe){
01616         if(get_rac(&s->c, s->header_state)){
01617             for(plane_index=0; plane_index<2; plane_index++){
01618                 int htaps, i, sum=0;
01619                 Plane *p= &s->plane[plane_index];
01620                 p->diag_mc= get_rac(&s->c, s->header_state);
01621                 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
01622                 if((unsigned)htaps > HTAPS_MAX || htaps==0)
01623                     return -1;
01624                 p->htaps= htaps;
01625                 for(i= htaps/2; i; i--){
01626                     p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
01627                     sum += p->hcoeff[i];
01628                 }
01629                 p->hcoeff[0]= 32-sum;
01630             }
01631             s->plane[2].diag_mc= s->plane[1].diag_mc;
01632             s->plane[2].htaps  = s->plane[1].htaps;
01633             memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
01634         }
01635         if(get_rac(&s->c, s->header_state)){
01636             GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
01637             decode_qlogs(s);
01638         }
01639     }
01640 
01641     s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
01642     if(s->spatial_decomposition_type > 1U){
01643         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
01644         return -1;
01645     }
01646     if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
01647              s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
01648         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
01649         return -1;
01650     }
01651 
01652     s->qlog           += get_symbol(&s->c, s->header_state, 1);
01653     s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
01654     s->qbias          += get_symbol(&s->c, s->header_state, 1);
01655     s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
01656     if(s->block_max_depth > 1 || s->block_max_depth < 0){
01657         av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
01658         s->block_max_depth= 0;
01659         return -1;
01660     }
01661 
01662     return 0;
01663 }
01664 
01665 static void init_qexp(void){
01666     int i;
01667     double v=128;
01668 
01669     for(i=0; i<QROOT; i++){
01670         qexp[i]= lrintf(v);
01671         v *= pow(2, 1.0 / QROOT);
01672     }
01673 }
01674 
01675 static av_cold int common_init(AVCodecContext *avctx){
01676     SnowContext *s = avctx->priv_data;
01677     int width, height;
01678     int i, j;
01679 
01680     s->avctx= avctx;
01681     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
01682 
01683     dsputil_init(&s->dsp, avctx);
01684     ff_dwt_init(&s->dwt);
01685 
01686 #define mcf(dx,dy)\
01687     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
01688     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
01689         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
01690     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
01691     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
01692         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
01693 
01694     mcf( 0, 0)
01695     mcf( 4, 0)
01696     mcf( 8, 0)
01697     mcf(12, 0)
01698     mcf( 0, 4)
01699     mcf( 4, 4)
01700     mcf( 8, 4)
01701     mcf(12, 4)
01702     mcf( 0, 8)
01703     mcf( 4, 8)
01704     mcf( 8, 8)
01705     mcf(12, 8)
01706     mcf( 0,12)
01707     mcf( 4,12)
01708     mcf( 8,12)
01709     mcf(12,12)
01710 
01711 #define mcfh(dx,dy)\
01712     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
01713     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
01714         mc_block_hpel ## dx ## dy ## 16;\
01715     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
01716     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
01717         mc_block_hpel ## dx ## dy ## 8;
01718 
01719     mcfh(0, 0)
01720     mcfh(8, 0)
01721     mcfh(0, 8)
01722     mcfh(8, 8)
01723 
01724     if(!qexp[0])
01725         init_qexp();
01726 
01727 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
01728 
01729     width= s->avctx->width;
01730     height= s->avctx->height;
01731 
01732     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
01733     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
01734 
01735     for(i=0; i<MAX_REF_FRAMES; i++)
01736         for(j=0; j<MAX_REF_FRAMES; j++)
01737             scale_mv_ref[i][j] = 256*(i+1)/(j+1);
01738 
01739     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
01740     s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
01741 
01742     return 0;
01743 }
01744 
01745 static int common_init_after_header(AVCodecContext *avctx){
01746     SnowContext *s = avctx->priv_data;
01747     int plane_index, level, orientation;
01748 
01749     for(plane_index=0; plane_index<3; plane_index++){
01750         int w= s->avctx->width;
01751         int h= s->avctx->height;
01752 
01753         if(plane_index){
01754             w>>= s->chroma_h_shift;
01755             h>>= s->chroma_v_shift;
01756         }
01757         s->plane[plane_index].width = w;
01758         s->plane[plane_index].height= h;
01759 
01760         for(level=s->spatial_decomposition_count-1; level>=0; level--){
01761             for(orientation=level ? 1 : 0; orientation<4; orientation++){
01762                 SubBand *b= &s->plane[plane_index].band[level][orientation];
01763 
01764                 b->buf= s->spatial_dwt_buffer;
01765                 b->level= level;
01766                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
01767                 b->width = (w + !(orientation&1))>>1;
01768                 b->height= (h + !(orientation>1))>>1;
01769 
01770                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
01771                 b->buf_x_offset = 0;
01772                 b->buf_y_offset = 0;
01773 
01774                 if(orientation&1){
01775                     b->buf += (w+1)>>1;
01776                     b->buf_x_offset = (w+1)>>1;
01777                 }
01778                 if(orientation>1){
01779                     b->buf += b->stride>>1;
01780                     b->buf_y_offset = b->stride_line >> 1;
01781                 }
01782                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
01783 
01784                 if(level)
01785                     b->parent= &s->plane[plane_index].band[level-1][orientation];
01786                 //FIXME avoid this realloc
01787                 av_freep(&b->x_coeff);
01788                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
01789             }
01790             w= (w+1)>>1;
01791             h= (h+1)>>1;
01792         }
01793     }
01794 
01795     return 0;
01796 }
01797 
01798 #define QUANTIZE2 0
01799 
01800 #if QUANTIZE2==1
01801 #define Q2_STEP 8
01802 
01803 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
01804     SubBand *b= &p->band[level][orientation];
01805     int x, y;
01806     int xo=0;
01807     int yo=0;
01808     int step= 1 << (s->spatial_decomposition_count - level);
01809 
01810     if(orientation&1)
01811         xo= step>>1;
01812     if(orientation&2)
01813         yo= step>>1;
01814 
01815     //FIXME bias for nonzero ?
01816     //FIXME optimize
01817     memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
01818     for(y=0; y<p->height; y++){
01819         for(x=0; x<p->width; x++){
01820             int sx= (x-xo + step/2) / step / Q2_STEP;
01821             int sy= (y-yo + step/2) / step / Q2_STEP;
01822             int v= r0[x + y*p->width] - r1[x + y*p->width];
01823             assert(sx>=0 && sy>=0 && sx < score_stride);
01824             v= ((v+8)>>4)<<4;
01825             score[sx + sy*score_stride] += v*v;
01826             assert(score[sx + sy*score_stride] >= 0);
01827         }
01828     }
01829 }
01830 
01831 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
01832     int level, orientation;
01833 
01834     for(level=0; level<s->spatial_decomposition_count; level++){
01835         for(orientation=level ? 1 : 0; orientation<4; orientation++){
01836             SubBand *b= &p->band[level][orientation];
01837             IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
01838 
01839             dequantize(s, b, dst, b->stride);
01840         }
01841     }
01842 }
01843 
01844 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
01845     int level, orientation, ys, xs, x, y, pass;
01846     IDWTELEM best_dequant[height * stride];
01847     IDWTELEM idwt2_buffer[height * stride];
01848     const int score_stride= (width + 10)/Q2_STEP;
01849     int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
01850     int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
01851     int threshold= (s->m.lambda * s->m.lambda) >> 6;
01852 
01853     //FIXME pass the copy cleanly ?
01854 
01855 //    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
01856     ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
01857 
01858     for(level=0; level<s->spatial_decomposition_count; level++){
01859         for(orientation=level ? 1 : 0; orientation<4; orientation++){
01860             SubBand *b= &p->band[level][orientation];
01861             IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
01862              DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
01863             assert(src == b->buf); // code does not depend on this but it is true currently
01864 
01865             quantize(s, b, dst, src, b->stride, s->qbias);
01866         }
01867     }
01868     for(pass=0; pass<1; pass++){
01869         if(s->qbias == 0) //keyframe
01870             continue;
01871         for(level=0; level<s->spatial_decomposition_count; level++){
01872             for(orientation=level ? 1 : 0; orientation<4; orientation++){
01873                 SubBand *b= &p->band[level][orientation];
01874                 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
01875                 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
01876 
01877                 for(ys= 0; ys<Q2_STEP; ys++){
01878                     for(xs= 0; xs<Q2_STEP; xs++){
01879                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
01880                         dequantize_all(s, p, idwt2_buffer, width, height);
01881                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
01882                         find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
01883                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
01884                         for(y=ys; y<b->height; y+= Q2_STEP){
01885                             for(x=xs; x<b->width; x+= Q2_STEP){
01886                                 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
01887                                 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
01888                                 //FIXME try more than just --
01889                             }
01890                         }
01891                         dequantize_all(s, p, idwt2_buffer, width, height);
01892                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
01893                         find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
01894                         for(y=ys; y<b->height; y+= Q2_STEP){
01895                             for(x=xs; x<b->width; x+= Q2_STEP){
01896                                 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
01897                                 if(score[score_idx] <= best_score[score_idx] + threshold){
01898                                     best_score[score_idx]= score[score_idx];
01899                                     if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
01900                                     if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
01901                                     //FIXME copy instead
01902                                 }
01903                             }
01904                         }
01905                     }
01906                 }
01907             }
01908         }
01909     }
01910     memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
01911 }
01912 
01913 #endif /* QUANTIZE2==1 */
01914 
01915 #define USE_HALFPEL_PLANE 0
01916 
01917 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
01918     int p,x,y;
01919 
01920     assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
01921 
01922     for(p=0; p<3; p++){
01923         int is_chroma= !!p;
01924         int w= s->avctx->width  >>is_chroma;
01925         int h= s->avctx->height >>is_chroma;
01926         int ls= frame->linesize[p];
01927         uint8_t *src= frame->data[p];
01928 
01929         halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
01930         halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
01931         halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
01932 
01933         halfpel[0][p]= src;
01934         for(y=0; y<h; y++){
01935             for(x=0; x<w; x++){
01936                 int i= y*ls + x;
01937 
01938                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
01939             }
01940         }
01941         for(y=0; y<h; y++){
01942             for(x=0; x<w; x++){
01943                 int i= y*ls + x;
01944 
01945                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
01946             }
01947         }
01948         src= halfpel[1][p];
01949         for(y=0; y<h; y++){
01950             for(x=0; x<w; x++){
01951                 int i= y*ls + x;
01952 
01953                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
01954             }
01955         }
01956 
01957 //FIXME border!
01958     }
01959 }
01960 
01961 static void release_buffer(AVCodecContext *avctx){
01962     SnowContext *s = avctx->priv_data;
01963     int i;
01964 
01965     if(s->last_picture[s->max_ref_frames-1].data[0]){
01966         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
01967         for(i=0; i<9; i++)
01968             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
01969                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
01970     }
01971 }
01972 
01973 static int frame_start(SnowContext *s){
01974    AVFrame tmp;
01975    int w= s->avctx->width; //FIXME round up to x16 ?
01976    int h= s->avctx->height;
01977 
01978     if(s->current_picture.data[0]){
01979         s->dsp.draw_edges(s->current_picture.data[0],
01980                           s->current_picture.linesize[0], w   , h   ,
01981                           EDGE_WIDTH  , EDGE_WIDTH  , EDGE_TOP | EDGE_BOTTOM);
01982         s->dsp.draw_edges(s->current_picture.data[1],
01983                           s->current_picture.linesize[1], w>>1, h>>1,
01984                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
01985         s->dsp.draw_edges(s->current_picture.data[2],
01986                           s->current_picture.linesize[2], w>>1, h>>1,
01987                           EDGE_WIDTH/2, EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
01988     }
01989 
01990     release_buffer(s->avctx);
01991 
01992     tmp= s->last_picture[s->max_ref_frames-1];
01993     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
01994     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
01995     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
01996         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
01997     s->last_picture[0]= s->current_picture;
01998     s->current_picture= tmp;
01999 
02000     if(s->keyframe){
02001         s->ref_frames= 0;
02002     }else{
02003         int i;
02004         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
02005             if(i && s->last_picture[i-1].key_frame)
02006                 break;
02007         s->ref_frames= i;
02008         if(s->ref_frames==0){
02009             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
02010             return -1;
02011         }
02012     }
02013 
02014     s->current_picture.reference= 1;
02015     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
02016         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
02017         return -1;
02018     }
02019 
02020     s->current_picture.key_frame= s->keyframe;
02021 
02022     return 0;
02023 }
02024 
02025 static av_cold void common_end(SnowContext *s){
02026     int plane_index, level, orientation, i;
02027 
02028     av_freep(&s->spatial_dwt_buffer);
02029     av_freep(&s->spatial_idwt_buffer);
02030 
02031     s->m.me.temp= NULL;
02032     av_freep(&s->m.me.scratchpad);
02033     av_freep(&s->m.me.map);
02034     av_freep(&s->m.me.score_map);
02035     av_freep(&s->m.obmc_scratchpad);
02036 
02037     av_freep(&s->block);
02038     av_freep(&s->scratchbuf);
02039 
02040     for(i=0; i<MAX_REF_FRAMES; i++){
02041         av_freep(&s->ref_mvs[i]);
02042         av_freep(&s->ref_scores[i]);
02043         if(s->last_picture[i].data[0])
02044             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
02045     }
02046 
02047     for(plane_index=0; plane_index<3; plane_index++){
02048         for(level=s->spatial_decomposition_count-1; level>=0; level--){
02049             for(orientation=level ? 1 : 0; orientation<4; orientation++){
02050                 SubBand *b= &s->plane[plane_index].band[level][orientation];
02051 
02052                 av_freep(&b->x_coeff);
02053             }
02054         }
02055     }
02056     if (s->mconly_picture.data[0])
02057         s->avctx->release_buffer(s->avctx, &s->mconly_picture);
02058     if (s->current_picture.data[0])
02059         s->avctx->release_buffer(s->avctx, &s->current_picture);
02060 }
02061 
02062 static av_cold int decode_init(AVCodecContext *avctx)
02063 {
02064     avctx->pix_fmt= PIX_FMT_YUV420P;
02065 
02066     common_init(avctx);
02067 
02068     return 0;
02069 }
02070 
02071 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
02072     const uint8_t *buf = avpkt->data;
02073     int buf_size = avpkt->size;
02074     SnowContext *s = avctx->priv_data;
02075     RangeCoder * const c= &s->c;
02076     int bytes_read;
02077     AVFrame *picture = data;
02078     int level, orientation, plane_index;
02079 
02080     ff_init_range_decoder(c, buf, buf_size);
02081     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
02082 
02083     s->current_picture.pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
02084     if(decode_header(s)<0)
02085         return -1;
02086     common_init_after_header(avctx);
02087 
02088     // realloc slice buffer for the case that spatial_decomposition_count changed
02089     ff_slice_buffer_destroy(&s->sb);
02090     ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
02091 
02092     for(plane_index=0; plane_index<3; plane_index++){
02093         Plane *p= &s->plane[plane_index];
02094         p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
02095                                               && p->hcoeff[1]==-10
02096                                               && p->hcoeff[2]==2;
02097     }
02098 
02099     alloc_blocks(s);
02100 
02101     if(frame_start(s) < 0)
02102         return -1;
02103     //keyframe flag duplication mess FIXME
02104     if(avctx->debug&FF_DEBUG_PICT_INFO)
02105         av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
02106 
02107     decode_blocks(s);
02108 
02109     for(plane_index=0; plane_index<3; plane_index++){
02110         Plane *p= &s->plane[plane_index];
02111         int w= p->width;
02112         int h= p->height;
02113         int x, y;
02114         int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
02115 
02116         if(s->avctx->debug&2048){
02117             memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
02118             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
02119 
02120             for(y=0; y<h; y++){
02121                 for(x=0; x<w; x++){
02122                     int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
02123                     s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
02124                 }
02125             }
02126         }
02127 
02128         {
02129         for(level=0; level<s->spatial_decomposition_count; level++){
02130             for(orientation=level ? 1 : 0; orientation<4; orientation++){
02131                 SubBand *b= &p->band[level][orientation];
02132                 unpack_coeffs(s, b, b->parent, orientation);
02133             }
02134         }
02135         }
02136 
02137         {
02138         const int mb_h= s->b_height << s->block_max_depth;
02139         const int block_size = MB_SIZE >> s->block_max_depth;
02140         const int block_w    = plane_index ? block_size/2 : block_size;
02141         int mb_y;
02142         DWTCompose cs[MAX_DECOMPOSITIONS];
02143         int yd=0, yq=0;
02144         int y;
02145         int end_y;
02146 
02147         ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
02148         for(mb_y=0; mb_y<=mb_h; mb_y++){
02149 
02150             int slice_starty = block_w*mb_y;
02151             int slice_h = block_w*(mb_y+1);
02152             if (!(s->keyframe || s->avctx->debug&512)){
02153                 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
02154                 slice_h -= (block_w >> 1);
02155             }
02156 
02157             for(level=0; level<s->spatial_decomposition_count; level++){
02158                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
02159                     SubBand *b= &p->band[level][orientation];
02160                     int start_y;
02161                     int end_y;
02162                     int our_mb_start = mb_y;
02163                     int our_mb_end = (mb_y + 1);
02164                     const int extra= 3;
02165                     start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
02166                     end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
02167                     if (!(s->keyframe || s->avctx->debug&512)){
02168                         start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
02169                         end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
02170                     }
02171                     start_y = FFMIN(b->height, start_y);
02172                     end_y = FFMIN(b->height, end_y);
02173 
02174                     if (start_y != end_y){
02175                         if (orientation == 0){
02176                             SubBand * correlate_band = &p->band[0][0];
02177                             int correlate_end_y = FFMIN(b->height, end_y + 1);
02178                             int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
02179                             decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
02180                             correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
02181                             dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
02182                         }
02183                         else
02184                             decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
02185                     }
02186                 }
02187             }
02188 
02189             for(; yd<slice_h; yd+=4){
02190                 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
02191             }
02192 
02193             if(s->qlog == LOSSLESS_QLOG){
02194                 for(; yq<slice_h && yq<h; yq++){
02195                     IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
02196                     for(x=0; x<w; x++){
02197                         line[x] <<= FRAC_BITS;
02198                     }
02199                 }
02200             }
02201 
02202             predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
02203 
02204             y = FFMIN(p->height, slice_starty);
02205             end_y = FFMIN(p->height, slice_h);
02206             while(y < end_y)
02207                 ff_slice_buffer_release(&s->sb, y++);
02208         }
02209 
02210         ff_slice_buffer_flush(&s->sb);
02211         }
02212 
02213     }
02214 
02215     emms_c();
02216 
02217     release_buffer(avctx);
02218 
02219     if(!(s->avctx->debug&2048))
02220         *picture= s->current_picture;
02221     else
02222         *picture= s->mconly_picture;
02223 
02224     *data_size = sizeof(AVFrame);
02225 
02226     bytes_read= c->bytestream - c->bytestream_start;
02227     if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
02228 
02229     return bytes_read;
02230 }
02231 
02232 static av_cold int decode_end(AVCodecContext *avctx)
02233 {
02234     SnowContext *s = avctx->priv_data;
02235 
02236     ff_slice_buffer_destroy(&s->sb);
02237 
02238     common_end(s);
02239 
02240     return 0;
02241 }
02242 
02243 AVCodec ff_snow_decoder = {
02244     "snow",
02245     AVMEDIA_TYPE_VIDEO,
02246     CODEC_ID_SNOW,
02247     sizeof(SnowContext),
02248     decode_init,
02249     NULL,
02250     decode_end,
02251     decode_frame,
02252     CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
02253     NULL,
02254     .long_name = NULL_IF_CONFIG_SMALL("Snow"),
02255 };
02256 
02257 #if CONFIG_SNOW_ENCODER
02258 static av_cold int encode_init(AVCodecContext *avctx)
02259 {
02260     SnowContext *s = avctx->priv_data;
02261     int plane_index;
02262 
02263     if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
02264         av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
02265                "Use vstrict=-2 / -strict -2 to use it anyway.\n");
02266         return -1;
02267     }
02268 
02269     if(avctx->prediction_method == DWT_97
02270        && (avctx->flags & CODEC_FLAG_QSCALE)
02271        && avctx->global_quality == 0){
02272         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
02273         return -1;
02274     }
02275 
02276     s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
02277 
02278     s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
02279     s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
02280 
02281     for(plane_index=0; plane_index<3; plane_index++){
02282         s->plane[plane_index].diag_mc= 1;
02283         s->plane[plane_index].htaps= 6;
02284         s->plane[plane_index].hcoeff[0]=  40;
02285         s->plane[plane_index].hcoeff[1]= -10;
02286         s->plane[plane_index].hcoeff[2]=   2;
02287         s->plane[plane_index].fast_mc= 1;
02288     }
02289 
02290     common_init(avctx);
02291     alloc_blocks(s);
02292 
02293     s->version=0;
02294 
02295     s->m.avctx   = avctx;
02296     s->m.flags   = avctx->flags;
02297     s->m.bit_rate= avctx->bit_rate;
02298 
02299     s->m.me.temp      =
02300     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
02301     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
02302     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
02303     s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
02304     h263_encode_init(&s->m); //mv_penalty
02305 
02306     s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
02307 
02308     if(avctx->flags&CODEC_FLAG_PASS1){
02309         if(!avctx->stats_out)
02310             avctx->stats_out = av_mallocz(256);
02311     }
02312     if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
02313         if(ff_rate_control_init(&s->m) < 0)
02314             return -1;
02315     }
02316     s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
02317 
02318     avctx->coded_frame= &s->current_picture;
02319     switch(avctx->pix_fmt){
02320 //    case PIX_FMT_YUV444P:
02321 //    case PIX_FMT_YUV422P:
02322     case PIX_FMT_YUV420P:
02323     case PIX_FMT_GRAY8:
02324 //    case PIX_FMT_YUV411P:
02325 //    case PIX_FMT_YUV410P:
02326         s->colorspace_type= 0;
02327         break;
02328 /*    case PIX_FMT_RGB32:
02329         s->colorspace= 1;
02330         break;*/
02331     default:
02332         av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
02333         return -1;
02334     }
02335 //    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
02336     s->chroma_h_shift= 1;
02337     s->chroma_v_shift= 1;
02338 
02339     ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
02340     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
02341 
02342     s->avctx->get_buffer(s->avctx, &s->input_picture);
02343 
02344     if(s->avctx->me_method == ME_ITER){
02345         int i;
02346         int size= s->b_width * s->b_height << 2*s->block_max_depth;
02347         for(i=0; i<s->max_ref_frames; i++){
02348             s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
02349             s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
02350         }
02351     }
02352 
02353     return 0;
02354 }
02355 
02356 //near copy & paste from dsputil, FIXME
02357 static int pix_sum(uint8_t * pix, int line_size, int w)
02358 {
02359     int s, i, j;
02360 
02361     s = 0;
02362     for (i = 0; i < w; i++) {
02363         for (j = 0; j < w; j++) {
02364             s += pix[0];
02365             pix ++;
02366         }
02367         pix += line_size - w;
02368     }
02369     return s;
02370 }
02371 
02372 //near copy & paste from dsputil, FIXME
02373 static int pix_norm1(uint8_t * pix, int line_size, int w)
02374 {
02375     int s, i, j;
02376     uint32_t *sq = ff_squareTbl + 256;
02377 
02378     s = 0;
02379     for (i = 0; i < w; i++) {
02380         for (j = 0; j < w; j ++) {
02381             s += sq[pix[0]];
02382             pix ++;
02383         }
02384         pix += line_size - w;
02385     }
02386     return s;
02387 }
02388 
02389 //FIXME copy&paste
02390 #define P_LEFT P[1]
02391 #define P_TOP P[2]
02392 #define P_TOPRIGHT P[3]
02393 #define P_MEDIAN P[4]
02394 #define P_MV1 P[9]
02395 #define FLAG_QPEL   1 //must be 1
02396 
02397 static int encode_q_branch(SnowContext *s, int level, int x, int y){
02398     uint8_t p_buffer[1024];
02399     uint8_t i_buffer[1024];
02400     uint8_t p_state[sizeof(s->block_state)];
02401     uint8_t i_state[sizeof(s->block_state)];
02402     RangeCoder pc, ic;
02403     uint8_t *pbbak= s->c.bytestream;
02404     uint8_t *pbbak_start= s->c.bytestream_start;
02405     int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
02406     const int w= s->b_width  << s->block_max_depth;
02407     const int h= s->b_height << s->block_max_depth;
02408     const int rem_depth= s->block_max_depth - level;
02409     const int index= (x + y*w) << rem_depth;
02410     const int block_w= 1<<(LOG2_MB_SIZE - level);
02411     int trx= (x+1)<<rem_depth;
02412     int try= (y+1)<<rem_depth;
02413     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
02414     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
02415     const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
02416     const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
02417     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
02418     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
02419     int pl = left->color[0];
02420     int pcb= left->color[1];
02421     int pcr= left->color[2];
02422     int pmx, pmy;
02423     int mx=0, my=0;
02424     int l,cr,cb;
02425     const int stride= s->current_picture.linesize[0];
02426     const int uvstride= s->current_picture.linesize[1];
02427     uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
02428                                 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
02429                                 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
02430     int P[10][2];
02431     int16_t last_mv[3][2];
02432     int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
02433     const int shift= 1+qpel;
02434     MotionEstContext *c= &s->m.me;
02435     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02436     int mx_context= av_log2(2*FFABS(left->mx - top->mx));
02437     int my_context= av_log2(2*FFABS(left->my - top->my));
02438     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02439     int ref, best_ref, ref_score, ref_mx, ref_my;
02440 
02441     assert(sizeof(s->block_state) >= 256);
02442     if(s->keyframe){
02443         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
02444         return 0;
02445     }
02446 
02447 //    clip predictors / edge ?
02448 
02449     P_LEFT[0]= left->mx;
02450     P_LEFT[1]= left->my;
02451     P_TOP [0]= top->mx;
02452     P_TOP [1]= top->my;
02453     P_TOPRIGHT[0]= tr->mx;
02454     P_TOPRIGHT[1]= tr->my;
02455 
02456     last_mv[0][0]= s->block[index].mx;
02457     last_mv[0][1]= s->block[index].my;
02458     last_mv[1][0]= right->mx;
02459     last_mv[1][1]= right->my;
02460     last_mv[2][0]= bottom->mx;
02461     last_mv[2][1]= bottom->my;
02462 
02463     s->m.mb_stride=2;
02464     s->m.mb_x=
02465     s->m.mb_y= 0;
02466     c->skip= 0;
02467 
02468     assert(c->  stride ==   stride);
02469     assert(c->uvstride == uvstride);
02470 
02471     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
02472     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
02473     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
02474     c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
02475 
02476     c->xmin = - x*block_w - 16+3;
02477     c->ymin = - y*block_w - 16+3;
02478     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
02479     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
02480 
02481     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
02482     if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
02483     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
02484     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
02485     if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
02486     if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
02487     if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
02488 
02489     P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
02490     P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
02491 
02492     if (!y) {
02493         c->pred_x= P_LEFT[0];
02494         c->pred_y= P_LEFT[1];
02495     } else {
02496         c->pred_x = P_MEDIAN[0];
02497         c->pred_y = P_MEDIAN[1];
02498     }
02499 
02500     score= INT_MAX;
02501     best_ref= 0;
02502     for(ref=0; ref<s->ref_frames; ref++){
02503         init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
02504 
02505         ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
02506                                          (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
02507 
02508         assert(ref_mx >= c->xmin);
02509         assert(ref_mx <= c->xmax);
02510         assert(ref_my >= c->ymin);
02511         assert(ref_my <= c->ymax);
02512 
02513         ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
02514         ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
02515         ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
02516         if(s->ref_mvs[ref]){
02517             s->ref_mvs[ref][index][0]= ref_mx;
02518             s->ref_mvs[ref][index][1]= ref_my;
02519             s->ref_scores[ref][index]= ref_score;
02520         }
02521         if(score > ref_score){
02522             score= ref_score;
02523             best_ref= ref;
02524             mx= ref_mx;
02525             my= ref_my;
02526         }
02527     }
02528     //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
02529 
02530   //  subpel search
02531     base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
02532     pc= s->c;
02533     pc.bytestream_start=
02534     pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
02535     memcpy(p_state, s->block_state, sizeof(s->block_state));
02536 
02537     if(level!=s->block_max_depth)
02538         put_rac(&pc, &p_state[4 + s_context], 1);
02539     put_rac(&pc, &p_state[1 + left->type + top->type], 0);
02540     if(s->ref_frames > 1)
02541         put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
02542     pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
02543     put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
02544     put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
02545     p_len= pc.bytestream - pc.bytestream_start;
02546     score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
02547 
02548     block_s= block_w*block_w;
02549     sum = pix_sum(current_data[0], stride, block_w);
02550     l= (sum + block_s/2)/block_s;
02551     iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
02552 
02553     block_s= block_w*block_w>>2;
02554     sum = pix_sum(current_data[1], uvstride, block_w>>1);
02555     cb= (sum + block_s/2)/block_s;
02556 //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
02557     sum = pix_sum(current_data[2], uvstride, block_w>>1);
02558     cr= (sum + block_s/2)/block_s;
02559 //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
02560 
02561     ic= s->c;
02562     ic.bytestream_start=
02563     ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
02564     memcpy(i_state, s->block_state, sizeof(s->block_state));
02565     if(level!=s->block_max_depth)
02566         put_rac(&ic, &i_state[4 + s_context], 1);
02567     put_rac(&ic, &i_state[1 + left->type + top->type], 1);
02568     put_symbol(&ic, &i_state[32],  l-pl , 1);
02569     put_symbol(&ic, &i_state[64], cb-pcb, 1);
02570     put_symbol(&ic, &i_state[96], cr-pcr, 1);
02571     i_len= ic.bytestream - ic.bytestream_start;
02572     iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
02573 
02574 //    assert(score==256*256*256*64-1);
02575     assert(iscore < 255*255*256 + s->lambda2*10);
02576     assert(iscore >= 0);
02577     assert(l>=0 && l<=255);
02578     assert(pl>=0 && pl<=255);
02579 
02580     if(level==0){
02581         int varc= iscore >> 8;
02582         int vard= score >> 8;
02583         if (vard <= 64 || vard < varc)
02584             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
02585         else
02586             c->scene_change_score+= s->m.qscale;
02587     }
02588 
02589     if(level!=s->block_max_depth){
02590         put_rac(&s->c, &s->block_state[4 + s_context], 0);
02591         score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
02592         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
02593         score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
02594         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
02595         score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
02596 
02597         if(score2 < score && score2 < iscore)
02598             return score2;
02599     }
02600 
02601     if(iscore < score){
02602         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
02603         memcpy(pbbak, i_buffer, i_len);
02604         s->c= ic;
02605         s->c.bytestream_start= pbbak_start;
02606         s->c.bytestream= pbbak + i_len;
02607         set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
02608         memcpy(s->block_state, i_state, sizeof(s->block_state));
02609         return iscore;
02610     }else{
02611         memcpy(pbbak, p_buffer, p_len);
02612         s->c= pc;
02613         s->c.bytestream_start= pbbak_start;
02614         s->c.bytestream= pbbak + p_len;
02615         set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
02616         memcpy(s->block_state, p_state, sizeof(s->block_state));
02617         return score;
02618     }
02619 }
02620 
02621 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
02622     const int w= s->b_width  << s->block_max_depth;
02623     const int rem_depth= s->block_max_depth - level;
02624     const int index= (x + y*w) << rem_depth;
02625     int trx= (x+1)<<rem_depth;
02626     BlockNode *b= &s->block[index];
02627     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
02628     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
02629     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
02630     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
02631     int pl = left->color[0];
02632     int pcb= left->color[1];
02633     int pcr= left->color[2];
02634     int pmx, pmy;
02635     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
02636     int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
02637     int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
02638     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
02639 
02640     if(s->keyframe){
02641         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
02642         return;
02643     }
02644 
02645     if(level!=s->block_max_depth){
02646         if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
02647             put_rac(&s->c, &s->block_state[4 + s_context], 1);
02648         }else{
02649             put_rac(&s->c, &s->block_state[4 + s_context], 0);
02650             encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
02651             encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
02652             encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
02653             encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
02654             return;
02655         }
02656     }
02657     if(b->type & BLOCK_INTRA){
02658         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
02659         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
02660         put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
02661         put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
02662         put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
02663         set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
02664     }else{
02665         pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
02666         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
02667         if(s->ref_frames > 1)
02668             put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
02669         put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
02670         put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
02671         set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
02672     }
02673 }
02674 
02675 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
02676     int i, x2, y2;
02677     Plane *p= &s->plane[plane_index];
02678     const int block_size = MB_SIZE >> s->block_max_depth;
02679     const int block_w    = plane_index ? block_size/2 : block_size;
02680     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02681     const int obmc_stride= plane_index ? block_size : 2*block_size;
02682     const int ref_stride= s->current_picture.linesize[plane_index];
02683     uint8_t *src= s-> input_picture.data[plane_index];
02684     IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
02685     const int b_stride = s->b_width << s->block_max_depth;
02686     const int w= p->width;
02687     const int h= p->height;
02688     int index= mb_x + mb_y*b_stride;
02689     BlockNode *b= &s->block[index];
02690     BlockNode backup= *b;
02691     int ab=0;
02692     int aa=0;
02693 
02694     b->type|= BLOCK_INTRA;
02695     b->color[plane_index]= 0;
02696     memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
02697 
02698     for(i=0; i<4; i++){
02699         int mb_x2= mb_x + (i &1) - 1;
02700         int mb_y2= mb_y + (i>>1) - 1;
02701         int x= block_w*mb_x2 + block_w/2;
02702         int y= block_w*mb_y2 + block_w/2;
02703 
02704         add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
02705                     x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
02706 
02707         for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
02708             for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
02709                 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
02710                 int obmc_v= obmc[index];
02711                 int d;
02712                 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
02713                 if(x<0) obmc_v += obmc[index + block_w];
02714                 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
02715                 if(x+block_w>w) obmc_v += obmc[index - block_w];
02716                 //FIXME precalculate this or simplify it somehow else
02717 
02718                 d = -dst[index] + (1<<(FRAC_BITS-1));
02719                 dst[index] = d;
02720                 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
02721                 aa += obmc_v * obmc_v; //FIXME precalculate this
02722             }
02723         }
02724     }
02725     *b= backup;
02726 
02727     return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
02728 }
02729 
02730 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
02731     const int b_stride = s->b_width << s->block_max_depth;
02732     const int b_height = s->b_height<< s->block_max_depth;
02733     int index= x + y*b_stride;
02734     const BlockNode *b     = &s->block[index];
02735     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
02736     const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
02737     const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
02738     const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
02739     int dmx, dmy;
02740 //  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
02741 //  int my_context= av_log2(2*FFABS(left->my - top->my));
02742 
02743     if(x<0 || x>=b_stride || y>=b_height)
02744         return 0;
02745 /*
02746 1            0      0
02747 01X          1-2    1
02748 001XX        3-6    2-3
02749 0001XXX      7-14   4-7
02750 00001XXXX   15-30   8-15
02751 */
02752 //FIXME try accurate rate
02753 //FIXME intra and inter predictors if surrounding blocks are not the same type
02754     if(b->type & BLOCK_INTRA){
02755         return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
02756                    + av_log2(2*FFABS(left->color[1] - b->color[1]))
02757                    + av_log2(2*FFABS(left->color[2] - b->color[2])));
02758     }else{
02759         pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
02760         dmx-= b->mx;
02761         dmy-= b->my;
02762         return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
02763                     + av_log2(2*FFABS(dmy))
02764                     + av_log2(2*b->ref));
02765     }
02766 }
02767 
02768 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
02769     Plane *p= &s->plane[plane_index];
02770     const int block_size = MB_SIZE >> s->block_max_depth;
02771     const int block_w    = plane_index ? block_size/2 : block_size;
02772     const int obmc_stride= plane_index ? block_size : 2*block_size;
02773     const int ref_stride= s->current_picture.linesize[plane_index];
02774     uint8_t *dst= s->current_picture.data[plane_index];
02775     uint8_t *src= s->  input_picture.data[plane_index];
02776     IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
02777     uint8_t *cur = s->scratchbuf;
02778     uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
02779     const int b_stride = s->b_width << s->block_max_depth;
02780     const int b_height = s->b_height<< s->block_max_depth;
02781     const int w= p->width;
02782     const int h= p->height;
02783     int distortion;
02784     int rate= 0;
02785     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02786     int sx= block_w*mb_x - block_w/2;
02787     int sy= block_w*mb_y - block_w/2;
02788     int x0= FFMAX(0,-sx);
02789     int y0= FFMAX(0,-sy);
02790     int x1= FFMIN(block_w*2, w-sx);
02791     int y1= FFMIN(block_w*2, h-sy);
02792     int i,x,y;
02793 
02794     pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
02795 
02796     for(y=y0; y<y1; y++){
02797         const uint8_t *obmc1= obmc_edged + y*obmc_stride;
02798         const IDWTELEM *pred1 = pred + y*obmc_stride;
02799         uint8_t *cur1 = cur + y*ref_stride;
02800         uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
02801         for(x=x0; x<x1; x++){
02802 #if FRAC_BITS >= LOG2_OBMC_MAX
02803             int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
02804 #else
02805             int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
02806 #endif
02807             v = (v + pred1[x]) >> FRAC_BITS;
02808             if(v&(~255)) v= ~(v>>31);
02809             dst1[x] = v;
02810         }
02811     }
02812 
02813     /* copy the regions where obmc[] = (uint8_t)256 */
02814     if(LOG2_OBMC_MAX == 8
02815         && (mb_x == 0 || mb_x == b_stride-1)
02816         && (mb_y == 0 || mb_y == b_height-1)){
02817         if(mb_x == 0)
02818             x1 = block_w;
02819         else
02820             x0 = block_w;
02821         if(mb_y == 0)
02822             y1 = block_w;
02823         else
02824             y0 = block_w;
02825         for(y=y0; y<y1; y++)
02826             memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
02827     }
02828 
02829     if(block_w==16){
02830         /* FIXME rearrange dsputil to fit 32x32 cmp functions */
02831         /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
02832         /* FIXME cmps overlap but do not cover the wavelet's whole support.
02833          * So improving the score of one block is not strictly guaranteed
02834          * to improve the score of the whole frame, thus iterative motion
02835          * estimation does not always converge. */
02836         if(s->avctx->me_cmp == FF_CMP_W97)
02837             distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02838         else if(s->avctx->me_cmp == FF_CMP_W53)
02839             distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
02840         else{
02841             distortion = 0;
02842             for(i=0; i<4; i++){
02843                 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
02844                 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
02845             }
02846         }
02847     }else{
02848         assert(block_w==8);
02849         distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
02850     }
02851 
02852     if(plane_index==0){
02853         for(i=0; i<4; i++){
02854 /* ..RRr
02855  * .RXx.
02856  * rxx..
02857  */
02858             rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
02859         }
02860         if(mb_x == b_stride-2)
02861             rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
02862     }
02863     return distortion + rate*penalty_factor;
02864 }
02865 
02866 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
02867     int i, y2;
02868     Plane *p= &s->plane[plane_index];
02869     const int block_size = MB_SIZE >> s->block_max_depth;
02870     const int block_w    = plane_index ? block_size/2 : block_size;
02871     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
02872     const int obmc_stride= plane_index ? block_size : 2*block_size;
02873     const int ref_stride= s->current_picture.linesize[plane_index];
02874     uint8_t *dst= s->current_picture.data[plane_index];
02875     uint8_t *src= s-> input_picture.data[plane_index];
02876     //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
02877     // const has only been removed from zero_dst to suppress a warning
02878     static IDWTELEM zero_dst[4096]; //FIXME
02879     const int b_stride = s->b_width << s->block_max_depth;
02880     const int w= p->width;
02881     const int h= p->height;
02882     int distortion= 0;
02883     int rate= 0;
02884     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
02885 
02886     for(i=0; i<9; i++){
02887         int mb_x2= mb_x + (i%3) - 1;
02888         int mb_y2= mb_y + (i/3) - 1;
02889         int x= block_w*mb_x2 + block_w/2;
02890         int y= block_w*mb_y2 + block_w/2;
02891 
02892         add_yblock(s, 0, NULL, zero_dst, dst, obmc,
02893                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
02894 
02895         //FIXME find a cleaner/simpler way to skip the outside stuff
02896         for(y2= y; y2<0; y2++)
02897             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02898         for(y2= h; y2<y+block_w; y2++)
02899             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
02900         if(x<0){
02901             for(y2= y; y2<y+block_w; y2++)
02902                 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
02903         }
02904         if(x+block_w > w){
02905             for(y2= y; y2<y+block_w; y2++)
02906                 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
02907         }
02908 
02909         assert(block_w== 8 || block_w==16);
02910         distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
02911     }
02912 
02913     if(plane_index==0){
02914         BlockNode *b= &s->block[mb_x+mb_y*b_stride];
02915         int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
02916 
02917 /* ..RRRr
02918  * .RXXx.
02919  * .RXXx.
02920  * rxxx.
02921  */
02922         if(merged)
02923             rate = get_block_bits(s, mb_x, mb_y, 2);
02924         for(i=merged?4:0; i<9; i++){
02925             static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
02926             rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
02927         }
02928     }
02929     return distortion + rate*penalty_factor;
02930 }
02931 
02932 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
02933     const int w= b->width;
02934     const int h= b->height;
02935     int x, y;
02936 
02937     if(1){
02938         int run=0;
02939         int runs[w*h];
02940         int run_index=0;
02941         int max_index;
02942 
02943         for(y=0; y<h; y++){
02944             for(x=0; x<w; x++){
02945                 int v, p=0;
02946                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
02947                 v= src[x + y*stride];
02948 
02949                 if(y){
02950                     t= src[x + (y-1)*stride];
02951                     if(x){
02952                         lt= src[x - 1 + (y-1)*stride];
02953                     }
02954                     if(x + 1 < w){
02955                         rt= src[x + 1 + (y-1)*stride];
02956                     }
02957                 }
02958                 if(x){
02959                     l= src[x - 1 + y*stride];
02960                     /*if(x > 1){
02961                         if(orientation==1) ll= src[y + (x-2)*stride];
02962                         else               ll= src[x - 2 + y*stride];
02963                     }*/
02964                 }
02965                 if(parent){
02966                     int px= x>>1;
02967                     int py= y>>1;
02968                     if(px<b->parent->width && py<b->parent->height)
02969                         p= parent[px + py*2*stride];
02970                 }
02971                 if(!(/*ll|*/l|lt|t|rt|p)){
02972                     if(v){
02973                         runs[run_index++]= run;
02974                         run=0;
02975                     }else{
02976                         run++;
02977                     }
02978                 }
02979             }
02980         }
02981         max_index= run_index;
02982         runs[run_index++]= run;
02983         run_index=0;
02984         run= runs[run_index++];
02985 
02986         put_symbol2(&s->c, b->state[30], max_index, 0);
02987         if(run_index <= max_index)
02988             put_symbol2(&s->c, b->state[1], run, 3);
02989 
02990         for(y=0; y<h; y++){
02991             if(s->c.bytestream_end - s->c.bytestream < w*40){
02992                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
02993                 return -1;
02994             }
02995             for(x=0; x<w; x++){
02996                 int v, p=0;
02997                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
02998                 v= src[x + y*stride];
02999 
03000                 if(y){
03001                     t= src[x + (y-1)*stride];
03002                     if(x){
03003                         lt= src[x - 1 + (y-1)*stride];
03004                     }
03005                     if(x + 1 < w){
03006                         rt= src[x + 1 + (y-1)*stride];
03007                     }
03008                 }
03009                 if(x){
03010                     l= src[x - 1 + y*stride];
03011                     /*if(x > 1){
03012                         if(orientation==1) ll= src[y + (x-2)*stride];
03013                         else               ll= src[x - 2 + y*stride];
03014                     }*/
03015                 }
03016                 if(parent){
03017                     int px= x>>1;
03018                     int py= y>>1;
03019                     if(px<b->parent->width && py<b->parent->height)
03020                         p= parent[px + py*2*stride];
03021                 }
03022                 if(/*ll|*/l|lt|t|rt|p){
03023                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
03024 
03025                     put_rac(&s->c, &b->state[0][context], !!v);
03026                 }else{
03027                     if(!run){
03028                         run= runs[run_index++];
03029 
03030                         if(run_index <= max_index)
03031                             put_symbol2(&s->c, b->state[1], run, 3);
03032                         assert(v);
03033                     }else{
03034                         run--;
03035                         assert(!v);
03036                     }
03037                 }
03038                 if(v){
03039                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
03040                     int l2= 2*FFABS(l) + (l<0);
03041                     int t2= 2*FFABS(t) + (t<0);
03042 
03043                     put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
03044                     put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
03045                 }
03046             }
03047         }
03048     }
03049     return 0;
03050 }
03051 
03052 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
03053 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
03054 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
03055     return encode_subband_c0run(s, b, src, parent, stride, orientation);
03056 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
03057 }
03058 
03059 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
03060     const int b_stride= s->b_width << s->block_max_depth;
03061     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
03062     BlockNode backup= *block;
03063     int rd, index, value;
03064 
03065     assert(mb_x>=0 && mb_y>=0);
03066     assert(mb_x<b_stride);
03067 
03068     if(intra){
03069         block->color[0] = p[0];
03070         block->color[1] = p[1];
03071         block->color[2] = p[2];
03072         block->type |= BLOCK_INTRA;
03073     }else{
03074         index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
03075         value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
03076         if(s->me_cache[index] == value)
03077             return 0;
03078         s->me_cache[index]= value;
03079 
03080         block->mx= p[0];
03081         block->my= p[1];
03082         block->type &= ~BLOCK_INTRA;
03083     }
03084 
03085     rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
03086 
03087 //FIXME chroma
03088     if(rd < *best_rd){
03089         *best_rd= rd;
03090         return 1;
03091     }else{
03092         *block= backup;
03093         return 0;
03094     }
03095 }
03096 
03097 /* special case for int[2] args we discard afterwards,
03098  * fixes compilation problem with gcc 2.95 */
03099 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
03100     int p[2] = {p0, p1};
03101     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
03102 }
03103 
03104 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
03105     const int b_stride= s->b_width << s->block_max_depth;
03106     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
03107     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
03108     int rd, index, value;
03109 
03110     assert(mb_x>=0 && mb_y>=0);
03111     assert(mb_x<b_stride);
03112     assert(((mb_x|mb_y)&1) == 0);
03113 
03114     index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
03115     value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
03116     if(s->me_cache[index] == value)
03117         return 0;
03118     s->me_cache[index]= value;
03119 
03120     block->mx= p0;
03121     block->my= p1;
03122     block->ref= ref;
03123     block->type &= ~BLOCK_INTRA;
03124     block[1]= block[b_stride]= block[b_stride+1]= *block;
03125 
03126     rd= get_4block_rd(s, mb_x, mb_y, 0);
03127 
03128 //FIXME chroma
03129     if(rd < *best_rd){
03130         *best_rd= rd;
03131         return 1;
03132     }else{
03133         block[0]= backup[0];
03134         block[1]= backup[1];
03135         block[b_stride]= backup[2];
03136         block[b_stride+1]= backup[3];
03137         return 0;
03138     }
03139 }
03140 
03141 static void iterative_me(SnowContext *s){
03142     int pass, mb_x, mb_y;
03143     const int b_width = s->b_width  << s->block_max_depth;
03144     const int b_height= s->b_height << s->block_max_depth;
03145     const int b_stride= b_width;
03146     int color[3];
03147 
03148     {
03149         RangeCoder r = s->c;
03150         uint8_t state[sizeof(s->block_state)];
03151         memcpy(state, s->block_state, sizeof(s->block_state));
03152         for(mb_y= 0; mb_y<s->b_height; mb_y++)
03153             for(mb_x= 0; mb_x<s->b_width; mb_x++)
03154                 encode_q_branch(s, 0, mb_x, mb_y);
03155         s->c = r;
03156         memcpy(s->block_state, state, sizeof(s->block_state));
03157     }
03158 
03159     for(pass=0; pass<25; pass++){
03160         int change= 0;
03161 
03162         for(mb_y= 0; mb_y<b_height; mb_y++){
03163             for(mb_x= 0; mb_x<b_width; mb_x++){
03164                 int dia_change, i, j, ref;
03165                 int best_rd= INT_MAX, ref_rd;
03166                 BlockNode backup, ref_b;
03167                 const int index= mb_x + mb_y * b_stride;
03168                 BlockNode *block= &s->block[index];
03169                 BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
03170                 BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
03171                 BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
03172                 BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
03173                 BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
03174                 BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
03175                 BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
03176                 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
03177                 const int b_w= (MB_SIZE >> s->block_max_depth);
03178                 uint8_t obmc_edged[b_w*2][b_w*2];
03179 
03180                 if(pass && (block->type & BLOCK_OPT))
03181                     continue;
03182                 block->type |= BLOCK_OPT;
03183 
03184                 backup= *block;
03185 
03186                 if(!s->me_cache_generation)
03187                     memset(s->me_cache, 0, sizeof(s->me_cache));
03188                 s->me_cache_generation += 1<<22;
03189 
03190                 //FIXME precalculate
03191                 {
03192                     int x, y;
03193                     memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
03194                     if(mb_x==0)
03195                         for(y=0; y<b_w*2; y++)
03196                             memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
03197                     if(mb_x==b_stride-1)
03198                         for(y=0; y<b_w*2; y++)
03199                             memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
03200                     if(mb_y==0){
03201                         for(x=0; x<b_w*2; x++)
03202                             obmc_edged[0][x] += obmc_edged[b_w-1][x];
03203                         for(y=1; y<b_w; y++)
03204                             memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
03205                     }
03206                     if(mb_y==b_height-1){
03207                         for(x=0; x<b_w*2; x++)
03208                             obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
03209                         for(y=b_w; y<b_w*2-1; y++)
03210                             memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
03211                     }
03212                 }
03213 
03214                 //skip stuff outside the picture
03215                 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
03216                     uint8_t *src= s->  input_picture.data[0];
03217                     uint8_t *dst= s->current_picture.data[0];
03218                     const int stride= s->current_picture.linesize[0];
03219                     const int block_w= MB_SIZE >> s->block_max_depth;
03220                     const int sx= block_w*mb_x - block_w/2;
03221                     const int sy= block_w*mb_y - block_w/2;
03222                     const int w= s->plane[0].width;
03223                     const int h= s->plane[0].height;
03224                     int y;
03225 
03226                     for(y=sy; y<0; y++)
03227                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03228                     for(y=h; y<sy+block_w*2; y++)
03229                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
03230                     if(sx<0){
03231                         for(y=sy; y<sy+block_w*2; y++)
03232                             memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
03233                     }
03234                     if(sx+block_w*2 > w){
03235                         for(y=sy; y<sy+block_w*2; y++)
03236                             memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
03237                     }
03238                 }
03239 
03240                 // intra(black) = neighbors' contribution to the current block
03241                 for(i=0; i<3; i++)
03242                     color[i]= get_dc(s, mb_x, mb_y, i);
03243 
03244                 // get previous score (cannot be cached due to OBMC)
03245                 if(pass > 0 && (block->type&BLOCK_INTRA)){
03246                     int color0[3]= {block->color[0], block->color[1], block->color[2]};
03247                     check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
03248                 }else
03249                     check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
03250 
03251                 ref_b= *block;
03252                 ref_rd= best_rd;
03253                 for(ref=0; ref < s->ref_frames; ref++){
03254                     int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
03255                     if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
03256                         continue;
03257                     block->ref= ref;
03258                     best_rd= INT_MAX;
03259 
03260                     check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
03261                     check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
03262                     if(tb)
03263                         check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
03264                     if(lb)
03265                         check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
03266                     if(rb)
03267                         check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
03268                     if(bb)
03269                         check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
03270 
03271                     /* fullpel ME */
03272                     //FIXME avoid subpel interpolation / round to nearest integer
03273                     do{
03274                         dia_change=0;
03275                         for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
03276                             for(j=0; j<i; j++){
03277                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03278                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03279                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
03280                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
03281                             }
03282                         }
03283                     }while(dia_change);
03284                     /* subpel ME */
03285                     do{
03286                         static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
03287                         dia_change=0;
03288                         for(i=0; i<8; i++)
03289                             dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
03290                     }while(dia_change);
03291                     //FIXME or try the standard 2 pass qpel or similar
03292 
03293                     mvr[0][0]= block->mx;
03294                     mvr[0][1]= block->my;
03295                     if(ref_rd > best_rd){
03296                         ref_rd= best_rd;
03297                         ref_b= *block;
03298                     }
03299                 }
03300                 best_rd= ref_rd;
03301                 *block= ref_b;
03302                 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
03303                 //FIXME RD style color selection
03304                 if(!same_block(block, &backup)){
03305                     if(tb ) tb ->type &= ~BLOCK_OPT;
03306                     if(lb ) lb ->type &= ~BLOCK_OPT;
03307                     if(rb ) rb ->type &= ~BLOCK_OPT;
03308                     if(bb ) bb ->type &= ~BLOCK_OPT;
03309                     if(tlb) tlb->type &= ~BLOCK_OPT;
03310                     if(trb) trb->type &= ~BLOCK_OPT;
03311                     if(blb) blb->type &= ~BLOCK_OPT;
03312                     if(brb) brb->type &= ~BLOCK_OPT;
03313                     change ++;
03314                 }
03315             }
03316         }
03317         av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
03318         if(!change)
03319             break;
03320     }
03321 
03322     if(s->block_max_depth == 1){
03323         int change= 0;
03324         for(mb_y= 0; mb_y<b_height; mb_y+=2){
03325             for(mb_x= 0; mb_x<b_width; mb_x+=2){
03326                 int i;
03327                 int best_rd, init_rd;
03328                 const int index= mb_x + mb_y * b_stride;
03329                 BlockNode *b[4];
03330 
03331                 b[0]= &s->block[index];
03332                 b[1]= b[0]+1;
03333                 b[2]= b[0]+b_stride;
03334                 b[3]= b[2]+1;
03335                 if(same_block(b[0], b[1]) &&
03336                    same_block(b[0], b[2]) &&
03337                    same_block(b[0], b[3]))
03338                     continue;
03339 
03340                 if(!s->me_cache_generation)
03341                     memset(s->me_cache, 0, sizeof(s->me_cache));
03342                 s->me_cache_generation += 1<<22;
03343 
03344                 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
03345 
03346                 //FIXME more multiref search?
03347                 check_4block_inter(s, mb_x, mb_y,
03348                                    (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
03349                                    (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
03350 
03351                 for(i=0; i<4; i++)
03352                     if(!(b[i]->type&BLOCK_INTRA))
03353                         check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
03354 
03355                 if(init_rd != best_rd)
03356                     change++;
03357             }
03358         }
03359         av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
03360     }
03361 }
03362 
03363 static void encode_blocks(SnowContext *s, int search){
03364     int x, y;
03365     int w= s->b_width;
03366     int h= s->b_height;
03367 
03368     if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
03369         iterative_me(s);
03370 
03371     for(y=0; y<h; y++){
03372         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
03373             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
03374             return;
03375         }
03376         for(x=0; x<w; x++){
03377             if(s->avctx->me_method == ME_ITER || !search)
03378                 encode_q_branch2(s, 0, x, y);
03379             else
03380                 encode_q_branch (s, 0, x, y);
03381         }
03382     }
03383 }
03384 
03385 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
03386     const int w= b->width;
03387     const int h= b->height;
03388     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03389     const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
03390     int x,y, thres1, thres2;
03391 
03392     if(s->qlog == LOSSLESS_QLOG){
03393         for(y=0; y<h; y++)
03394             for(x=0; x<w; x++)
03395                 dst[x + y*stride]= src[x + y*stride];
03396         return;
03397     }
03398 
03399     bias= bias ? 0 : (3*qmul)>>3;
03400     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
03401     thres2= 2*thres1;
03402 
03403     if(!bias){
03404         for(y=0; y<h; y++){
03405             for(x=0; x<w; x++){
03406                 int i= src[x + y*stride];
03407 
03408                 if((unsigned)(i+thres1) > thres2){
03409                     if(i>=0){
03410                         i<<= QEXPSHIFT;
03411                         i/= qmul; //FIXME optimize
03412                         dst[x + y*stride]=  i;
03413                     }else{
03414                         i= -i;
03415                         i<<= QEXPSHIFT;
03416                         i/= qmul; //FIXME optimize
03417                         dst[x + y*stride]= -i;
03418                     }
03419                 }else
03420                     dst[x + y*stride]= 0;
03421             }
03422         }
03423     }else{
03424         for(y=0; y<h; y++){
03425             for(x=0; x<w; x++){
03426                 int i= src[x + y*stride];
03427 
03428                 if((unsigned)(i+thres1) > thres2){
03429                     if(i>=0){
03430                         i<<= QEXPSHIFT;
03431                         i= (i + bias) / qmul; //FIXME optimize
03432                         dst[x + y*stride]=  i;
03433                     }else{
03434                         i= -i;
03435                         i<<= QEXPSHIFT;
03436                         i= (i + bias) / qmul; //FIXME optimize
03437                         dst[x + y*stride]= -i;
03438                     }
03439                 }else
03440                     dst[x + y*stride]= 0;
03441             }
03442         }
03443     }
03444 }
03445 
03446 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
03447     const int w= b->width;
03448     const int h= b->height;
03449     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
03450     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03451     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
03452     int x,y;
03453 
03454     if(s->qlog == LOSSLESS_QLOG) return;
03455 
03456     for(y=0; y<h; y++){
03457         for(x=0; x<w; x++){
03458             int i= src[x + y*stride];
03459             if(i<0){
03460                 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
03461             }else if(i>0){
03462                 src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
03463             }
03464         }
03465     }
03466 }
03467 
03468 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03469     const int w= b->width;
03470     const int h= b->height;
03471     int x,y;
03472 
03473     for(y=h-1; y>=0; y--){
03474         for(x=w-1; x>=0; x--){
03475             int i= x + y*stride;
03476 
03477             if(x){
03478                 if(use_median){
03479                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03480                     else  src[i] -= src[i - 1];
03481                 }else{
03482                     if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03483                     else  src[i] -= src[i - 1];
03484                 }
03485             }else{
03486                 if(y) src[i] -= src[i - stride];
03487             }
03488         }
03489     }
03490 }
03491 
03492 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
03493     const int w= b->width;
03494     const int h= b->height;
03495     int x,y;
03496 
03497     for(y=0; y<h; y++){
03498         for(x=0; x<w; x++){
03499             int i= x + y*stride;
03500 
03501             if(x){
03502                 if(use_median){
03503                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
03504                     else  src[i] += src[i - 1];
03505                 }else{
03506                     if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
03507                     else  src[i] += src[i - 1];
03508                 }
03509             }else{
03510                 if(y) src[i] += src[i - stride];
03511             }
03512         }
03513     }
03514 }
03515 
03516 static void encode_qlogs(SnowContext *s){
03517     int plane_index, level, orientation;
03518 
03519     for(plane_index=0; plane_index<2; plane_index++){
03520         for(level=0; level<s->spatial_decomposition_count; level++){
03521             for(orientation=level ? 1:0; orientation<4; orientation++){
03522                 if(orientation==2) continue;
03523                 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
03524             }
03525         }
03526     }
03527 }
03528 
03529 static void encode_header(SnowContext *s){
03530     int plane_index, i;
03531     uint8_t kstate[32];
03532 
03533     memset(kstate, MID_STATE, sizeof(kstate));
03534 
03535     put_rac(&s->c, kstate, s->keyframe);
03536     if(s->keyframe || s->always_reset){
03537         reset_contexts(s);
03538         s->last_spatial_decomposition_type=
03539         s->last_qlog=
03540         s->last_qbias=
03541         s->last_mv_scale=
03542         s->last_block_max_depth= 0;
03543         for(plane_index=0; plane_index<2; plane_index++){
03544             Plane *p= &s->plane[plane_index];
03545             p->last_htaps=0;
03546             p->last_diag_mc=0;
03547             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
03548         }
03549     }
03550     if(s->keyframe){
03551         put_symbol(&s->c, s->header_state, s->version, 0);
03552         put_rac(&s->c, s->header_state, s->always_reset);
03553         put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
03554         put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
03555         put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03556         put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
03557         put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
03558         put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
03559         put_rac(&s->c, s->header_state, s->spatial_scalability);
03560 //        put_rac(&s->c, s->header_state, s->rate_scalability);
03561         put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
03562 
03563         encode_qlogs(s);
03564     }
03565 
03566     if(!s->keyframe){
03567         int update_mc=0;
03568         for(plane_index=0; plane_index<2; plane_index++){
03569             Plane *p= &s->plane[plane_index];
03570             update_mc |= p->last_htaps   != p->htaps;
03571             update_mc |= p->last_diag_mc != p->diag_mc;
03572             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03573         }
03574         put_rac(&s->c, s->header_state, update_mc);
03575         if(update_mc){
03576             for(plane_index=0; plane_index<2; plane_index++){
03577                 Plane *p= &s->plane[plane_index];
03578                 put_rac(&s->c, s->header_state, p->diag_mc);
03579                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
03580                 for(i= p->htaps/2; i; i--)
03581                     put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
03582             }
03583         }
03584         if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
03585             put_rac(&s->c, s->header_state, 1);
03586             put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
03587             encode_qlogs(s);
03588         }else
03589             put_rac(&s->c, s->header_state, 0);
03590     }
03591 
03592     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
03593     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
03594     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
03595     put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
03596     put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
03597 
03598 }
03599 
03600 static void update_last_header_values(SnowContext *s){
03601     int plane_index;
03602 
03603     if(!s->keyframe){
03604         for(plane_index=0; plane_index<2; plane_index++){
03605             Plane *p= &s->plane[plane_index];
03606             p->last_diag_mc= p->diag_mc;
03607             p->last_htaps  = p->htaps;
03608             memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
03609         }
03610     }
03611 
03612     s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
03613     s->last_qlog                        = s->qlog;
03614     s->last_qbias                       = s->qbias;
03615     s->last_mv_scale                    = s->mv_scale;
03616     s->last_block_max_depth             = s->block_max_depth;
03617     s->last_spatial_decomposition_count = s->spatial_decomposition_count;
03618 }
03619 
03620 static int qscale2qlog(int qscale){
03621     return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
03622            + 61*QROOT/8; //<64 >60
03623 }
03624 
03625 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
03626 {
03627     /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
03628      * FIXME we know exact mv bits at this point,
03629      * but ratecontrol isn't set up to include them. */
03630     uint32_t coef_sum= 0;
03631     int level, orientation, delta_qlog;
03632 
03633     for(level=0; level<s->spatial_decomposition_count; level++){
03634         for(orientation=level ? 1 : 0; orientation<4; orientation++){
03635             SubBand *b= &s->plane[0].band[level][orientation];
03636             IDWTELEM *buf= b->ibuf;
03637             const int w= b->width;
03638             const int h= b->height;
03639             const int stride= b->stride;
03640             const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
03641             const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
03642             const int qdiv= (1<<16)/qmul;
03643             int x, y;
03644             //FIXME this is ugly
03645             for(y=0; y<h; y++)
03646                 for(x=0; x<w; x++)
03647                     buf[x+y*stride]= b->buf[x+y*stride];
03648             if(orientation==0)
03649                 decorrelate(s, b, buf, stride, 1, 0);
03650             for(y=0; y<h; y++)
03651                 for(x=0; x<w; x++)
03652                     coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
03653         }
03654     }
03655 
03656     /* ugly, ratecontrol just takes a sqrt again */
03657     coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
03658     assert(coef_sum < INT_MAX);
03659 
03660     if(pict->pict_type == AV_PICTURE_TYPE_I){
03661         s->m.current_picture.mb_var_sum= coef_sum;
03662         s->m.current_picture.mc_mb_var_sum= 0;
03663     }else{
03664         s->m.current_picture.mc_mb_var_sum= coef_sum;
03665         s->m.current_picture.mb_var_sum= 0;
03666     }
03667 
03668     pict->quality= ff_rate_estimate_qscale(&s->m, 1);
03669     if (pict->quality < 0)
03670         return INT_MIN;
03671     s->lambda= pict->quality * 3/2;
03672     delta_qlog= qscale2qlog(pict->quality) - s->qlog;
03673     s->qlog+= delta_qlog;
03674     return delta_qlog;
03675 }
03676 
03677 static void calculate_visual_weight(SnowContext *s, Plane *p){
03678     int width = p->width;
03679     int height= p->height;
03680     int level, orientation, x, y;
03681 
03682     for(level=0; level<s->spatial_decomposition_count; level++){
03683         for(orientation=level ? 1 : 0; orientation<4; orientation++){
03684             SubBand *b= &p->band[level][orientation];
03685             IDWTELEM *ibuf= b->ibuf;
03686             int64_t error=0;
03687 
03688             memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
03689             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
03690             ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
03691             for(y=0; y<height; y++){
03692                 for(x=0; x<width; x++){
03693                     int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
03694                     error += d*d;
03695                 }
03696             }
03697 
03698             b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
03699         }
03700     }
03701 }
03702 
03703 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
03704     SnowContext *s = avctx->priv_data;
03705     RangeCoder * const c= &s->c;
03706     AVFrame *pict = data;
03707     const int width= s->avctx->width;
03708     const int height= s->avctx->height;
03709     int level, orientation, plane_index, i, y;
03710     uint8_t rc_header_bak[sizeof(s->header_state)];
03711     uint8_t rc_block_bak[sizeof(s->block_state)];
03712 
03713     ff_init_range_encoder(c, buf, buf_size);
03714     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
03715 
03716     for(i=0; i<3; i++){
03717         int shift= !!i;
03718         for(y=0; y<(height>>shift); y++)
03719             memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
03720                    &pict->data[i][y * pict->linesize[i]],
03721                    width>>shift);
03722     }
03723     s->new_picture = *pict;
03724 
03725     s->m.picture_number= avctx->frame_number;
03726     if(avctx->flags&CODEC_FLAG_PASS2){
03727         s->m.pict_type =
03728         pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
03729         s->keyframe= pict->pict_type==AV_PICTURE_TYPE_I;
03730         if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
03731             pict->quality= ff_rate_estimate_qscale(&s->m, 0);
03732             if (pict->quality < 0)
03733                 return -1;
03734         }
03735     }else{
03736         s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
03737         s->m.pict_type=
03738         pict->pict_type= s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
03739     }
03740 
03741     if(s->pass1_rc && avctx->frame_number == 0)
03742         pict->quality= 2*FF_QP2LAMBDA;
03743     if(pict->quality){
03744         s->qlog= qscale2qlog(pict->quality);
03745         s->lambda = pict->quality * 3/2;
03746     }
03747     if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
03748         s->qlog= LOSSLESS_QLOG;
03749         s->lambda = 0;
03750     }//else keep previous frame's qlog until after motion estimation
03751 
03752     frame_start(s);
03753 
03754     s->m.current_picture_ptr= &s->m.current_picture;
03755     s->m.last_picture.pts= s->m.current_picture.pts;
03756     s->m.current_picture.pts= pict->pts;
03757     if(pict->pict_type == AV_PICTURE_TYPE_P){
03758         int block_width = (width +15)>>4;
03759         int block_height= (height+15)>>4;
03760         int stride= s->current_picture.linesize[0];
03761 
03762         assert(s->current_picture.data[0]);
03763         assert(s->last_picture[0].data[0]);
03764 
03765         s->m.avctx= s->avctx;
03766         s->m.current_picture.data[0]= s->current_picture.data[0];
03767         s->m.   last_picture.data[0]= s->last_picture[0].data[0];
03768         s->m.    new_picture.data[0]= s->  input_picture.data[0];
03769         s->m.   last_picture_ptr= &s->m.   last_picture;
03770         s->m.linesize=
03771         s->m.   last_picture.linesize[0]=
03772         s->m.    new_picture.linesize[0]=
03773         s->m.current_picture.linesize[0]= stride;
03774         s->m.uvlinesize= s->current_picture.linesize[1];
03775         s->m.width = width;
03776         s->m.height= height;
03777         s->m.mb_width = block_width;
03778         s->m.mb_height= block_height;
03779         s->m.mb_stride=   s->m.mb_width+1;
03780         s->m.b8_stride= 2*s->m.mb_width+1;
03781         s->m.f_code=1;
03782         s->m.pict_type= pict->pict_type;
03783         s->m.me_method= s->avctx->me_method;
03784         s->m.me.scene_change_score=0;
03785         s->m.flags= s->avctx->flags;
03786         s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
03787         s->m.out_format= FMT_H263;
03788         s->m.unrestricted_mv= 1;
03789 
03790         s->m.lambda = s->lambda;
03791         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
03792         s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
03793 
03794         s->m.dsp= s->dsp; //move
03795         ff_init_me(&s->m);
03796         s->dsp= s->m.dsp;
03797     }
03798 
03799     if(s->pass1_rc){
03800         memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
03801         memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
03802     }
03803 
03804 redo_frame:
03805 
03806     if(pict->pict_type == AV_PICTURE_TYPE_I)
03807         s->spatial_decomposition_count= 5;
03808     else
03809         s->spatial_decomposition_count= 5;
03810 
03811     s->m.pict_type = pict->pict_type;
03812     s->qbias= pict->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
03813 
03814     common_init_after_header(avctx);
03815 
03816     if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
03817         for(plane_index=0; plane_index<3; plane_index++){
03818             calculate_visual_weight(s, &s->plane[plane_index]);
03819         }
03820     }
03821 
03822     encode_header(s);
03823     s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
03824     encode_blocks(s, 1);
03825     s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
03826 
03827     for(plane_index=0; plane_index<3; plane_index++){
03828         Plane *p= &s->plane[plane_index];
03829         int w= p->width;
03830         int h= p->height;
03831         int x, y;
03832 //        int bits= put_bits_count(&s->c.pb);
03833 
03834         if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
03835             //FIXME optimize
03836             if(pict->data[plane_index]) //FIXME gray hack
03837                 for(y=0; y<h; y++){
03838                     for(x=0; x<w; x++){
03839                         s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
03840                     }
03841                 }
03842             predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
03843 
03844             if(   plane_index==0
03845                && pict->pict_type == AV_PICTURE_TYPE_P
03846                && !(avctx->flags&CODEC_FLAG_PASS2)
03847                && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
03848                 ff_init_range_encoder(c, buf, buf_size);
03849                 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
03850                 pict->pict_type= AV_PICTURE_TYPE_I;
03851                 s->keyframe=1;
03852                 s->current_picture.key_frame=1;
03853                 goto redo_frame;
03854             }
03855 
03856             if(s->qlog == LOSSLESS_QLOG){
03857                 for(y=0; y<h; y++){
03858                     for(x=0; x<w; x++){
03859                         s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
03860                     }
03861                 }
03862             }else{
03863                 for(y=0; y<h; y++){
03864                     for(x=0; x<w; x++){
03865                         s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
03866                     }
03867                 }
03868             }
03869 
03870             /*  if(QUANTIZE2)
03871                 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
03872             else*/
03873                 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
03874 
03875             if(s->pass1_rc && plane_index==0){
03876                 int delta_qlog = ratecontrol_1pass(s, pict);
03877                 if (delta_qlog <= INT_MIN)
03878                     return -1;
03879                 if(delta_qlog){
03880                     //reordering qlog in the bitstream would eliminate this reset
03881                     ff_init_range_encoder(c, buf, buf_size);
03882                     memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
03883                     memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
03884                     encode_header(s);
03885                     encode_blocks(s, 0);
03886                 }
03887             }
03888 
03889             for(level=0; level<s->spatial_decomposition_count; level++){
03890                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03891                     SubBand *b= &p->band[level][orientation];
03892 
03893                     if(!QUANTIZE2)
03894                         quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
03895                     if(orientation==0)
03896                         decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == AV_PICTURE_TYPE_P, 0);
03897                     encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
03898                     assert(b->parent==NULL || b->parent->stride == b->stride*2);
03899                     if(orientation==0)
03900                         correlate(s, b, b->ibuf, b->stride, 1, 0);
03901                 }
03902             }
03903 
03904             for(level=0; level<s->spatial_decomposition_count; level++){
03905                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
03906                     SubBand *b= &p->band[level][orientation];
03907 
03908                     dequantize(s, b, b->ibuf, b->stride);
03909                 }
03910             }
03911 
03912             ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
03913             if(s->qlog == LOSSLESS_QLOG){
03914                 for(y=0; y<h; y++){
03915                     for(x=0; x<w; x++){
03916                         s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
03917                     }
03918                 }
03919             }
03920             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
03921         }else{
03922             //ME/MC only
03923             if(pict->pict_type == AV_PICTURE_TYPE_I){
03924                 for(y=0; y<h; y++){
03925                     for(x=0; x<w; x++){
03926                         s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
03927                             pict->data[plane_index][y*pict->linesize[plane_index] + x];
03928                     }
03929                 }
03930             }else{
03931                 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
03932                 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
03933             }
03934         }
03935         if(s->avctx->flags&CODEC_FLAG_PSNR){
03936             int64_t error= 0;
03937 
03938             if(pict->data[plane_index]) //FIXME gray hack
03939                 for(y=0; y<h; y++){
03940                     for(x=0; x<w; x++){
03941                         int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
03942                         error += d*d;
03943                     }
03944                 }
03945             s->avctx->error[plane_index] += error;
03946             s->current_picture.error[plane_index] = error;
03947         }
03948 
03949     }
03950 
03951     update_last_header_values(s);
03952 
03953     release_buffer(avctx);
03954 
03955     s->current_picture.coded_picture_number = avctx->frame_number;
03956     s->current_picture.pict_type = pict->pict_type;
03957     s->current_picture.quality = pict->quality;
03958     s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
03959     s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
03960     s->m.current_picture.display_picture_number =
03961     s->m.current_picture.coded_picture_number = avctx->frame_number;
03962     s->m.current_picture.quality = pict->quality;
03963     s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
03964     if(s->pass1_rc)
03965         if (ff_rate_estimate_qscale(&s->m, 0) < 0)
03966             return -1;
03967     if(avctx->flags&CODEC_FLAG_PASS1)
03968         ff_write_pass1_stats(&s->m);
03969     s->m.last_pict_type = s->m.pict_type;
03970     avctx->frame_bits = s->m.frame_bits;
03971     avctx->mv_bits = s->m.mv_bits;
03972     avctx->misc_bits = s->m.misc_bits;
03973     avctx->p_tex_bits = s->m.p_tex_bits;
03974 
03975     emms_c();
03976 
03977     return ff_rac_terminate(c);
03978 }
03979 
03980 static av_cold int encode_end(AVCodecContext *avctx)
03981 {
03982     SnowContext *s = avctx->priv_data;
03983 
03984     common_end(s);
03985     if (s->input_picture.data[0])
03986         avctx->release_buffer(avctx, &s->input_picture);
03987     av_free(avctx->stats_out);
03988 
03989     return 0;
03990 }
03991 
03992 AVCodec ff_snow_encoder = {
03993     "snow",
03994     AVMEDIA_TYPE_VIDEO,
03995     CODEC_ID_SNOW,
03996     sizeof(SnowContext),
03997     encode_init,
03998     encode_frame,
03999     encode_end,
04000     .long_name = NULL_IF_CONFIG_SMALL("Snow"),
04001 };
04002 #endif
04003 
04004 
04005 #ifdef TEST
04006 #undef malloc
04007 #undef free
04008 #undef printf
04009 
04010 #include "libavutil/lfg.h"
04011 
04012 int main(void){
04013     int width=256;
04014     int height=256;
04015     int buffer[2][width*height];
04016     SnowContext s;
04017     int i;
04018     AVLFG prng;
04019     s.spatial_decomposition_count=6;
04020     s.spatial_decomposition_type=1;
04021 
04022     av_lfg_init(&prng, 1);
04023 
04024     printf("testing 5/3 DWT\n");
04025     for(i=0; i<width*height; i++)
04026         buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
04027 
04028     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04029     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04030 
04031     for(i=0; i<width*height; i++)
04032         if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
04033 
04034     printf("testing 9/7 DWT\n");
04035     s.spatial_decomposition_type=0;
04036     for(i=0; i<width*height; i++)
04037         buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
04038 
04039     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04040     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04041 
04042     for(i=0; i<width*height; i++)
04043         if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
04044 
04045 #if 0
04046     printf("testing AC coder\n");
04047     memset(s.header_state, 0, sizeof(s.header_state));
04048     ff_init_range_encoder(&s.c, buffer[0], 256*256);
04049     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04050 
04051     for(i=-256; i<256; i++){
04052         put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
04053     }
04054     ff_rac_terminate(&s.c);
04055 
04056     memset(s.header_state, 0, sizeof(s.header_state));
04057     ff_init_range_decoder(&s.c, buffer[0], 256*256);
04058     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
04059 
04060     for(i=-256; i<256; i++){
04061         int j;
04062         j= get_symbol(&s.c, s.header_state, 1);
04063         if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
04064     }
04065 #endif
04066     {
04067     int level, orientation, x, y;
04068     int64_t errors[8][4];
04069     int64_t g=0;
04070 
04071         memset(errors, 0, sizeof(errors));
04072         s.spatial_decomposition_count=3;
04073         s.spatial_decomposition_type=0;
04074         for(level=0; level<s.spatial_decomposition_count; level++){
04075             for(orientation=level ? 1 : 0; orientation<4; orientation++){
04076                 int w= width  >> (s.spatial_decomposition_count-level);
04077                 int h= height >> (s.spatial_decomposition_count-level);
04078                 int stride= width  << (s.spatial_decomposition_count-level);
04079                 DWTELEM *buf= buffer[0];
04080                 int64_t error=0;
04081 
04082                 if(orientation&1) buf+=w;
04083                 if(orientation>1) buf+=stride>>1;
04084 
04085                 memset(buffer[0], 0, sizeof(int)*width*height);
04086                 buf[w/2 + h/2*stride]= 256*256;
04087                 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04088                 for(y=0; y<height; y++){
04089                     for(x=0; x<width; x++){
04090                         int64_t d= buffer[0][x + y*width];
04091                         error += d*d;
04092                         if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
04093                     }
04094                     if(FFABS(height/2-y)<9 && level==2) printf("\n");
04095                 }
04096                 error= (int)(sqrt(error)+0.5);
04097                 errors[level][orientation]= error;
04098                 if(g) g=av_gcd(g, error);
04099                 else g= error;
04100             }
04101         }
04102         printf("static int const visual_weight[][4]={\n");
04103         for(level=0; level<s.spatial_decomposition_count; level++){
04104             printf("  {");
04105             for(orientation=0; orientation<4; orientation++){
04106                 printf("%8"PRId64",", errors[level][orientation]/g);
04107             }
04108             printf("},\n");
04109         }
04110         printf("};\n");
04111         {
04112             int level=2;
04113             int w= width  >> (s.spatial_decomposition_count-level);
04114             //int h= height >> (s.spatial_decomposition_count-level);
04115             int stride= width  << (s.spatial_decomposition_count-level);
04116             DWTELEM *buf= buffer[0];
04117             int64_t error=0;
04118 
04119             buf+=w;
04120             buf+=stride>>1;
04121 
04122             memset(buffer[0], 0, sizeof(int)*width*height);
04123 #if 1
04124             for(y=0; y<height; y++){
04125                 for(x=0; x<width; x++){
04126                     int tab[4]={0,2,3,1};
04127                     buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
04128                 }
04129             }
04130             ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04131 #else
04132             for(y=0; y<h; y++){
04133                 for(x=0; x<w; x++){
04134                     buf[x + y*stride  ]=169;
04135                     buf[x + y*stride-w]=64;
04136                 }
04137             }
04138             ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
04139 #endif
04140             for(y=0; y<height; y++){
04141                 for(x=0; x<width; x++){
04142                     int64_t d= buffer[0][x + y*width];
04143                     error += d*d;
04144                     if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
04145                 }
04146                 if(FFABS(height/2-y)<9) printf("\n");
04147             }
04148         }
04149 
04150     }
04151     return 0;
04152 }
04153 #endif /* TEST */