FFmpeg: libswscale/swscale.c Source File

00001 /*
00002  * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00021 /*
00022   supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
00023   supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
00024   {BGR,RGB}{1,4,8,15,16} support dithering
00025 
00026   unscaled special converters (YV12=I420=IYUV, Y800=Y8)
00027   YV12 -> {BGR,RGB}{1,4,8,12,15,16,24,32}
00028   x -> x
00029   YUV9 -> YV12
00030   YUV9/YV12 -> Y800
00031   Y800 -> YUV9/YV12
00032   BGR24 -> BGR32 & RGB24 -> RGB32
00033   BGR32 -> BGR24 & RGB32 -> RGB24
00034   BGR15 -> BGR16
00035 */
00036 
00037 /*
00038 tested special converters (most are tested actually, but I did not write it down ...)
00039  YV12 -> BGR12/BGR16
00040  YV12 -> YV12
00041  BGR15 -> BGR16
00042  BGR16 -> BGR16
00043  YVU9 -> YV12
00044 
00045 untested special converters
00046   YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
00047   YV12/I420 -> YV12/I420
00048   YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
00049   BGR24 -> BGR32 & RGB24 -> RGB32
00050   BGR32 -> BGR24 & RGB32 -> RGB24
00051   BGR24 -> YV12
00052 */
00053 
00054 #include <inttypes.h>
00055 #include <string.h>
00056 #include <math.h>
00057 #include <stdio.h>
00058 #include "config.h"
00059 #include <assert.h>
00060 #include "swscale.h"
00061 #include "swscale_internal.h"
00062 #include "rgb2rgb.h"
00063 #include "libavutil/avassert.h"
00064 #include "libavutil/intreadwrite.h"
00065 #include "libavutil/cpu.h"
00066 #include "libavutil/avutil.h"
00067 #include "libavutil/mathematics.h"
00068 #include "libavutil/bswap.h"
00069 #include "libavutil/pixdesc.h"
00070 
00071 
00072 #define RGB2YUV_SHIFT 15
00073 #define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00074 #define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00075 #define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00076 #define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00077 #define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00078 #define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00079 #define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
00080 #define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00081 #define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
00082 
00083 static const double rgb2yuv_table[8][9]={
00084     {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
00085     {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5}, //ITU709
00086     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
00087     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
00088     {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
00089     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
00090     {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //DEFAULT / ITU601 / ITU624 / SMPTE 170M
00091     {0.701 , 0.087 , 0.212 , -0.384, 0.5, -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
00092 };
00093 
00094 /*
00095 NOTES
00096 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
00097 
00098 TODO
00099 more intelligent misalignment avoidance for the horizontal scaler
00100 write special vertical cubic upscale version
00101 optimize C code (YV12 / minmax)
00102 add support for packed pixel YUV input & output
00103 add support for Y8 output
00104 optimize BGR24 & BGR32
00105 add BGR4 output support
00106 write special BGR->BGR scaler
00107 */
00108 
00109 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4)[2][8]={
00110 {  1,   3,   1,   3,   1,   3,   1,   3, },
00111 {  2,   0,   2,   0,   2,   0,   2,   0, },
00112 };
00113 
00114 DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8)[2][8]={
00115 {  6,   2,   6,   2,   6,   2,   6,   2, },
00116 {  0,   4,   0,   4,   0,   4,   0,   4, },
00117 };
00118 
00119 DECLARE_ALIGNED(8, const uint8_t, dither_4x4_16)[4][8]={
00120 {  8,   4,  11,   7,   8,   4,  11,   7, },
00121 {  2,  14,   1,  13,   2,  14,   1,  13, },
00122 { 10,   6,   9,   5,  10,   6,   9,   5, },
00123 {  0,  12,   3,  15,   0,  12,   3,  15, },
00124 };
00125 
00126 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32)[8][8]={
00127 { 17,   9,  23,  15,  16,   8,  22,  14, },
00128 {  5,  29,   3,  27,   4,  28,   2,  26, },
00129 { 21,  13,  19,  11,  20,  12,  18,  10, },
00130 {  0,  24,   6,  30,   1,  25,   7,  31, },
00131 { 16,   8,  22,  14,  17,   9,  23,  15, },
00132 {  4,  28,   2,  26,   5,  29,   3,  27, },
00133 { 20,  12,  18,  10,  21,  13,  19,  11, },
00134 {  1,  25,   7,  31,   0,  24,   6,  30, },
00135 };
00136 
00137 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73)[8][8]={
00138 {  0,  55,  14,  68,   3,  58,  17,  72, },
00139 { 37,  18,  50,  32,  40,  22,  54,  35, },
00140 {  9,  64,   5,  59,  13,  67,   8,  63, },
00141 { 46,  27,  41,  23,  49,  31,  44,  26, },
00142 {  2,  57,  16,  71,   1,  56,  15,  70, },
00143 { 39,  21,  52,  34,  38,  19,  51,  33, },
00144 { 11,  66,   7,  62,  10,  65,   6,  60, },
00145 { 48,  30,  43,  25,  47,  29,  42,  24, },
00146 };
00147 
00148 #if 1
00149 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00150 {117,  62, 158, 103, 113,  58, 155, 100, },
00151 { 34, 199,  21, 186,  31, 196,  17, 182, },
00152 {144,  89, 131,  76, 141,  86, 127,  72, },
00153 {  0, 165,  41, 206,  10, 175,  52, 217, },
00154 {110,  55, 151,  96, 120,  65, 162, 107, },
00155 { 28, 193,  14, 179,  38, 203,  24, 189, },
00156 {138,  83, 124,  69, 148,  93, 134,  79, },
00157 {  7, 172,  48, 213,   3, 168,  45, 210, },
00158 };
00159 #elif 1
00160 // tries to correct a gamma of 1.5
00161 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00162 {  0, 143,  18, 200,   2, 156,  25, 215, },
00163 { 78,  28, 125,  64,  89,  36, 138,  74, },
00164 { 10, 180,   3, 161,  16, 195,   8, 175, },
00165 {109,  51,  93,  38, 121,  60, 105,  47, },
00166 {  1, 152,  23, 210,   0, 147,  20, 205, },
00167 { 85,  33, 134,  71,  81,  30, 130,  67, },
00168 { 14, 190,   6, 171,  12, 185,   5, 166, },
00169 {117,  57, 101,  44, 113,  54,  97,  41, },
00170 };
00171 #elif 1
00172 // tries to correct a gamma of 2.0
00173 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00174 {  0, 124,   8, 193,   0, 140,  12, 213, },
00175 { 55,  14, 104,  42,  66,  19, 119,  52, },
00176 {  3, 168,   1, 145,   6, 187,   3, 162, },
00177 { 86,  31,  70,  21,  99,  39,  82,  28, },
00178 {  0, 134,  11, 206,   0, 129,   9, 200, },
00179 { 62,  17, 114,  48,  58,  16, 109,  45, },
00180 {  5, 181,   2, 157,   4, 175,   1, 151, },
00181 { 95,  36,  78,  26,  90,  34,  74,  24, },
00182 };
00183 #else
00184 // tries to correct a gamma of 2.5
00185 DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
00186 {  0, 107,   3, 187,   0, 125,   6, 212, },
00187 { 39,   7,  86,  28,  49,  11, 102,  36, },
00188 {  1, 158,   0, 131,   3, 180,   1, 151, },
00189 { 68,  19,  52,  12,  81,  25,  64,  17, },
00190 {  0, 119,   5, 203,   0, 113,   4, 195, },
00191 { 45,   9,  96,  33,  42,   8,  91,  30, },
00192 {  2, 172,   1, 144,   2, 165,   0, 137, },
00193 { 77,  23,  60,  15,  72,  21,  56,  14, },
00194 };
00195 #endif
00196 
00197 DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
00198 {
00199   {   0,  1,  0,  1,  0,  1,  0,  1,},
00200   {   1,  0,  1,  0,  1,  0,  1,  0,},
00201   {   0,  1,  0,  1,  0,  1,  0,  1,},
00202   {   1,  0,  1,  0,  1,  0,  1,  0,},
00203   {   0,  1,  0,  1,  0,  1,  0,  1,},
00204   {   1,  0,  1,  0,  1,  0,  1,  0,},
00205   {   0,  1,  0,  1,  0,  1,  0,  1,},
00206   {   1,  0,  1,  0,  1,  0,  1,  0,},
00207 },{
00208   {   1,  2,  1,  2,  1,  2,  1,  2,},
00209   {   3,  0,  3,  0,  3,  0,  3,  0,},
00210   {   1,  2,  1,  2,  1,  2,  1,  2,},
00211   {   3,  0,  3,  0,  3,  0,  3,  0,},
00212   {   1,  2,  1,  2,  1,  2,  1,  2,},
00213   {   3,  0,  3,  0,  3,  0,  3,  0,},
00214   {   1,  2,  1,  2,  1,  2,  1,  2,},
00215   {   3,  0,  3,  0,  3,  0,  3,  0,},
00216 },{
00217   {   2,  4,  3,  5,  2,  4,  3,  5,},
00218   {   6,  0,  7,  1,  6,  0,  7,  1,},
00219   {   3,  5,  2,  4,  3,  5,  2,  4,},
00220   {   7,  1,  6,  0,  7,  1,  6,  0,},
00221   {   2,  4,  3,  5,  2,  4,  3,  5,},
00222   {   6,  0,  7,  1,  6,  0,  7,  1,},
00223   {   3,  5,  2,  4,  3,  5,  2,  4,},
00224   {   7,  1,  6,  0,  7,  1,  6,  0,},
00225 },{
00226   {   4,  8,  7, 11,  4,  8,  7, 11,},
00227   {  12,  0, 15,  3, 12,  0, 15,  3,},
00228   {   6, 10,  5,  9,  6, 10,  5,  9,},
00229   {  14,  2, 13,  1, 14,  2, 13,  1,},
00230   {   4,  8,  7, 11,  4,  8,  7, 11,},
00231   {  12,  0, 15,  3, 12,  0, 15,  3,},
00232   {   6, 10,  5,  9,  6, 10,  5,  9,},
00233   {  14,  2, 13,  1, 14,  2, 13,  1,},
00234 },{
00235   {   9, 17, 15, 23,  8, 16, 14, 22,},
00236   {  25,  1, 31,  7, 24,  0, 30,  6,},
00237   {  13, 21, 11, 19, 12, 20, 10, 18,},
00238   {  29,  5, 27,  3, 28,  4, 26,  2,},
00239   {   8, 16, 14, 22,  9, 17, 15, 23,},
00240   {  24,  0, 30,  6, 25,  1, 31,  7,},
00241   {  12, 20, 10, 18, 13, 21, 11, 19,},
00242   {  28,  4, 26,  2, 29,  5, 27,  3,},
00243 },{
00244   {  18, 34, 30, 46, 17, 33, 29, 45,},
00245   {  50,  2, 62, 14, 49,  1, 61, 13,},
00246   {  26, 42, 22, 38, 25, 41, 21, 37,},
00247   {  58, 10, 54,  6, 57,  9, 53,  5,},
00248   {  16, 32, 28, 44, 19, 35, 31, 47,},
00249   {  48,  0, 60, 12, 51,  3, 63, 15,},
00250   {  24, 40, 20, 36, 27, 43, 23, 39,},
00251   {  56,  8, 52,  4, 59, 11, 55,  7,},
00252 },{
00253   {  18, 34, 30, 46, 17, 33, 29, 45,},
00254   {  50,  2, 62, 14, 49,  1, 61, 13,},
00255   {  26, 42, 22, 38, 25, 41, 21, 37,},
00256   {  58, 10, 54,  6, 57,  9, 53,  5,},
00257   {  16, 32, 28, 44, 19, 35, 31, 47,},
00258   {  48,  0, 60, 12, 51,  3, 63, 15,},
00259   {  24, 40, 20, 36, 27, 43, 23, 39,},
00260   {  56,  8, 52,  4, 59, 11, 55,  7,},
00261 },{
00262   {  36, 68, 60, 92, 34, 66, 58, 90,},
00263   { 100,  4,124, 28, 98,  2,122, 26,},
00264   {  52, 84, 44, 76, 50, 82, 42, 74,},
00265   { 116, 20,108, 12,114, 18,106, 10,},
00266   {  32, 64, 56, 88, 38, 70, 62, 94,},
00267   {  96,  0,120, 24,102,  6,126, 30,},
00268   {  48, 80, 40, 72, 54, 86, 46, 78,},
00269   { 112, 16,104,  8,118, 22,110, 14,},
00270 }};
00271 
00272 static const uint8_t flat64[8]={64,64,64,64,64,64,64,64};
00273 
00274 const uint16_t dither_scale[15][16]={
00275 {    2,    3,    3,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,    5,},
00276 {    2,    3,    7,    7,   13,   13,   25,   25,   25,   25,   25,   25,   25,   25,   25,   25,},
00277 {    3,    3,    4,   15,   15,   29,   57,   57,   57,  113,  113,  113,  113,  113,  113,  113,},
00278 {    3,    4,    4,    5,   31,   31,   61,  121,  241,  241,  241,  241,  481,  481,  481,  481,},
00279 {    3,    4,    5,    5,    6,   63,   63,  125,  249,  497,  993,  993,  993,  993,  993, 1985,},
00280 {    3,    5,    6,    6,    6,    7,  127,  127,  253,  505, 1009, 2017, 4033, 4033, 4033, 4033,},
00281 {    3,    5,    6,    7,    7,    7,    8,  255,  255,  509, 1017, 2033, 4065, 8129,16257,16257,},
00282 {    3,    5,    6,    8,    8,    8,    8,    9,  511,  511, 1021, 2041, 4081, 8161,16321,32641,},
00283 {    3,    5,    7,    8,    9,    9,    9,    9,   10, 1023, 1023, 2045, 4089, 8177,16353,32705,},
00284 {    3,    5,    7,    8,   10,   10,   10,   10,   10,   11, 2047, 2047, 4093, 8185,16369,32737,},
00285 {    3,    5,    7,    8,   10,   11,   11,   11,   11,   11,   12, 4095, 4095, 8189,16377,32753,},
00286 {    3,    5,    7,    9,   10,   12,   12,   12,   12,   12,   12,   13, 8191, 8191,16381,32761,},
00287 {    3,    5,    7,    9,   10,   12,   13,   13,   13,   13,   13,   13,   14,16383,16383,32765,},
00288 {    3,    5,    7,    9,   10,   12,   14,   14,   14,   14,   14,   14,   14,   15,32767,32767,},
00289 {    3,    5,    7,    9,   11,   12,   14,   15,   15,   15,   15,   15,   15,   15,   16,65535,},
00290 };
00291 
00292 static av_always_inline void
00293 yuv2yuvX16_c_template(const int16_t *lumFilter, const int16_t **lumSrc,
00294                       int lumFilterSize, const int16_t *chrFilter,
00295                       const int16_t **chrUSrc, const int16_t **chrVSrc,
00296                       int chrFilterSize, const int16_t **alpSrc,
00297                       uint16_t *dest, uint16_t *uDest, uint16_t *vDest,
00298                       uint16_t *aDest, int dstW, int chrDstW,
00299                       int big_endian, int output_bits)
00300 {
00301     //FIXME Optimize (just quickly written not optimized..)
00302     int i;
00303     int shift = 11 + 16 - output_bits;
00304 
00305 #define output_pixel(pos, val) \
00306     if (big_endian) { \
00307         if (output_bits == 16) { \
00308             AV_WB16(pos, av_clip_uint16(val >> shift)); \
00309         } else { \
00310             AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00311         } \
00312     } else { \
00313         if (output_bits == 16) { \
00314             AV_WL16(pos, av_clip_uint16(val >> shift)); \
00315         } else { \
00316             AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \
00317         } \
00318     }
00319     for (i = 0; i < dstW; i++) {
00320         int val = 1 << (26-output_bits);
00321         int j;
00322 
00323         for (j = 0; j < lumFilterSize; j++)
00324             val += lumSrc[j][i] * lumFilter[j];
00325 
00326         output_pixel(&dest[i], val);
00327     }
00328 
00329     if (uDest) {
00330         for (i = 0; i < chrDstW; i++) {
00331             int u = 1 << (26-output_bits);
00332             int v = 1 << (26-output_bits);
00333             int j;
00334 
00335             for (j = 0; j < chrFilterSize; j++) {
00336                 u += chrUSrc[j][i] * chrFilter[j];
00337                 v += chrVSrc[j][i] * chrFilter[j];
00338             }
00339 
00340             output_pixel(&uDest[i], u);
00341             output_pixel(&vDest[i], v);
00342         }
00343     }
00344 
00345     if (CONFIG_SWSCALE_ALPHA && aDest) {
00346         for (i = 0; i < dstW; i++) {
00347             int val = 1 << (26-output_bits);
00348             int j;
00349 
00350             for (j = 0; j < lumFilterSize; j++)
00351                 val += alpSrc[j][i] * lumFilter[j];
00352 
00353             output_pixel(&aDest[i], val);
00354         }
00355     }
00356 #undef output_pixel
00357 }
00358 
00359 #define yuv2NBPS(bits, BE_LE, is_be) \
00360 static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \
00361                               const int16_t **lumSrc, int lumFilterSize, \
00362                               const int16_t *chrFilter, const int16_t **chrUSrc, \
00363                               const int16_t **chrVSrc, \
00364                               int chrFilterSize, const int16_t **alpSrc, \
00365                               uint8_t *_dest, uint8_t *_uDest, uint8_t *_vDest, \
00366                               uint8_t *_aDest, int dstW, int chrDstW) \
00367 { \
00368     uint16_t *dest  = (uint16_t *) _dest,  *uDest = (uint16_t *) _uDest, \
00369              *vDest = (uint16_t *) _vDest, *aDest = (uint16_t *) _aDest; \
00370     yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \
00371                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00372                           alpSrc, \
00373                           dest, uDest, vDest, aDest, \
00374                           dstW, chrDstW, is_be, bits); \
00375 }
00376 yuv2NBPS( 9, BE, 1);
00377 yuv2NBPS( 9, LE, 0);
00378 yuv2NBPS(10, BE, 1);
00379 yuv2NBPS(10, LE, 0);
00380 yuv2NBPS(16, BE, 1);
00381 yuv2NBPS(16, LE, 0);
00382 
00383 static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
00384                        const int16_t **lumSrc, int lumFilterSize,
00385                        const int16_t *chrFilter, const int16_t **chrUSrc,
00386                        const int16_t **chrVSrc,
00387                        int chrFilterSize, const int16_t **alpSrc,
00388                        uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00389                        uint8_t *aDest, int dstW, int chrDstW,
00390                        const uint8_t *lumDither, const uint8_t *chrDither)
00391 {
00392     //FIXME Optimize (just quickly written not optimized..)
00393     int i;
00394     for (i=0; i<dstW; i++) {
00395         int val = lumDither[i&7] << 12;
00396         int j;
00397         for (j=0; j<lumFilterSize; j++)
00398             val += lumSrc[j][i] * lumFilter[j];
00399 
00400         dest[i]= av_clip_uint8(val>>19);
00401     }
00402 
00403     if (uDest)
00404         for (i=0; i<chrDstW; i++) {
00405             int u = chrDither[i&7] << 12;
00406             int v = chrDither[(i+3)&7] << 12;
00407             int j;
00408             for (j=0; j<chrFilterSize; j++) {
00409                 u += chrUSrc[j][i] * chrFilter[j];
00410                 v += chrVSrc[j][i] * chrFilter[j];
00411             }
00412 
00413             uDest[i]= av_clip_uint8(u>>19);
00414             vDest[i]= av_clip_uint8(v>>19);
00415         }
00416 
00417     if (CONFIG_SWSCALE_ALPHA && aDest)
00418         for (i=0; i<dstW; i++) {
00419             int val = lumDither[i&7] << 12;
00420             int j;
00421             for (j=0; j<lumFilterSize; j++)
00422                 val += alpSrc[j][i] * lumFilter[j];
00423 
00424             aDest[i]= av_clip_uint8(val>>19);
00425         }
00426 }
00427 
00428 static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
00429                               const int16_t *chrUSrc, const int16_t *chrVSrc,
00430                               const int16_t *alpSrc,
00431                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest,
00432                               uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither)
00433 {
00434     int i;
00435 
00436     for (i=0; i<dstW; i++) {
00437         int val= (lumSrc[i]+lumDither[i&7])>>7;
00438         dest[i]= av_clip_uint8(val);
00439     }
00440 
00441     if (uDest)
00442         for (i=0; i<chrDstW; i++) {
00443             int u=(chrUSrc[i]+chrDither[i&7])>>7;
00444             int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7;
00445             uDest[i]= av_clip_uint8(u);
00446             vDest[i]= av_clip_uint8(v);
00447         }
00448 
00449     if (CONFIG_SWSCALE_ALPHA && aDest)
00450         for (i=0; i<dstW; i++) {
00451             int val= (alpSrc[i]+lumDither[i&7])>>7;
00452             aDest[i]= av_clip_uint8(val);
00453         }
00454 }
00455 
00456 static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
00457                         const int16_t **lumSrc, int lumFilterSize,
00458                         const int16_t *chrFilter, const int16_t **chrUSrc,
00459                         const int16_t **chrVSrc, int chrFilterSize,
00460                         const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest,
00461                         uint8_t *vDest, uint8_t *aDest,
00462                         int dstW, int chrDstW,
00463                         const uint8_t *lumDither, const uint8_t *chrDither)
00464 {
00465     enum PixelFormat dstFormat = c->dstFormat;
00466 
00467     //FIXME Optimize (just quickly written not optimized..)
00468     int i;
00469     for (i=0; i<dstW; i++) {
00470         int val = lumDither[i&7]<<12;
00471         int j;
00472         for (j=0; j<lumFilterSize; j++)
00473             val += lumSrc[j][i] * lumFilter[j];
00474 
00475         dest[i]= av_clip_uint8(val>>19);
00476     }
00477 
00478     if (!uDest)
00479         return;
00480 
00481     if (dstFormat == PIX_FMT_NV12)
00482         for (i=0; i<chrDstW; i++) {
00483             int u = chrDither[i&7]<<12;
00484             int v = chrDither[(i+3)&7]<<12;
00485             int j;
00486             for (j=0; j<chrFilterSize; j++) {
00487                 u += chrUSrc[j][i] * chrFilter[j];
00488                 v += chrVSrc[j][i] * chrFilter[j];
00489             }
00490 
00491             uDest[2*i]= av_clip_uint8(u>>19);
00492             uDest[2*i+1]= av_clip_uint8(v>>19);
00493         }
00494     else
00495         for (i=0; i<chrDstW; i++) {
00496             int u = chrDither[i&7]<<12;
00497             int v = chrDither[(i+3)&7]<<12;
00498             int j;
00499             for (j=0; j<chrFilterSize; j++) {
00500                 u += chrUSrc[j][i] * chrFilter[j];
00501                 v += chrVSrc[j][i] * chrFilter[j];
00502             }
00503 
00504             uDest[2*i]= av_clip_uint8(v>>19);
00505             uDest[2*i+1]= av_clip_uint8(u>>19);
00506         }
00507 }
00508 
00509 #define output_pixel(pos, val) \
00510         if (target == PIX_FMT_GRAY16BE) { \
00511             AV_WB16(pos, val); \
00512         } else { \
00513             AV_WL16(pos, val); \
00514         }
00515 
00516 static av_always_inline void
00517 yuv2gray16_X_c_template(SwsContext *c, const int16_t *lumFilter,
00518                         const int16_t **lumSrc, int lumFilterSize,
00519                         const int16_t *chrFilter, const int16_t **chrUSrc,
00520                         const int16_t **chrVSrc, int chrFilterSize,
00521                         const int16_t **alpSrc, uint8_t *dest, int dstW,
00522                         int y, enum PixelFormat target)
00523 {
00524     int i;
00525 
00526     for (i = 0; i < (dstW >> 1); i++) {
00527         int j;
00528         int Y1 = 1 << 18;
00529         int Y2 = 1 << 18;
00530         const int i2 = 2 * i;
00531 
00532         for (j = 0; j < lumFilterSize; j++) {
00533             Y1 += lumSrc[j][i2]   * lumFilter[j];
00534             Y2 += lumSrc[j][i2+1] * lumFilter[j];
00535         }
00536         Y1 >>= 11;
00537         Y2 >>= 11;
00538         if ((Y1 | Y2) & 0x10000) {
00539             Y1 = av_clip_uint16(Y1);
00540             Y2 = av_clip_uint16(Y2);
00541         }
00542         output_pixel(&dest[2 * i2 + 0], Y1);
00543         output_pixel(&dest[2 * i2 + 2], Y2);
00544     }
00545 }
00546 
00547 static av_always_inline void
00548 yuv2gray16_2_c_template(SwsContext *c, const uint16_t *buf0,
00549                         const uint16_t *buf1, const uint16_t *ubuf0,
00550                         const uint16_t *ubuf1, const uint16_t *vbuf0,
00551                         const uint16_t *vbuf1, const uint16_t *abuf0,
00552                         const uint16_t *abuf1, uint8_t *dest, int dstW,
00553                         int yalpha, int uvalpha, int y,
00554                         enum PixelFormat target)
00555 {
00556     int  yalpha1 = 4095 - yalpha; \
00557     int i;
00558 
00559     for (i = 0; i < (dstW >> 1); i++) {
00560         const int i2 = 2 * i;
00561         int Y1 = (buf0[i2  ] * yalpha1 + buf1[i2  ] * yalpha) >> 11;
00562         int Y2 = (buf0[i2+1] * yalpha1 + buf1[i2+1] * yalpha) >> 11;
00563 
00564         output_pixel(&dest[2 * i2 + 0], Y1);
00565         output_pixel(&dest[2 * i2 + 2], Y2);
00566     }
00567 }
00568 
00569 static av_always_inline void
00570 yuv2gray16_1_c_template(SwsContext *c, const uint16_t *buf0,
00571                         const uint16_t *ubuf0, const uint16_t *ubuf1,
00572                         const uint16_t *vbuf0, const uint16_t *vbuf1,
00573                         const uint16_t *abuf0, uint8_t *dest, int dstW,
00574                         int uvalpha, enum PixelFormat dstFormat,
00575                         int flags, int y, enum PixelFormat target)
00576 {
00577     int i;
00578 
00579     for (i = 0; i < (dstW >> 1); i++) {
00580         const int i2 = 2 * i;
00581         int Y1 = buf0[i2  ] << 1;
00582         int Y2 = buf0[i2+1] << 1;
00583 
00584         output_pixel(&dest[2 * i2 + 0], Y1);
00585         output_pixel(&dest[2 * i2 + 2], Y2);
00586     }
00587 }
00588 
00589 #undef output_pixel
00590 
00591 #define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
00592 static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
00593                         const int16_t **lumSrc, int lumFilterSize, \
00594                         const int16_t *chrFilter, const int16_t **chrUSrc, \
00595                         const int16_t **chrVSrc, int chrFilterSize, \
00596                         const int16_t **alpSrc, uint8_t *dest, int dstW, \
00597                         int y) \
00598 { \
00599     name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
00600                           chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00601                           alpSrc, dest, dstW, y, fmt); \
00602 } \
00603  \
00604 static void name ## ext ## _2_c(SwsContext *c, const uint16_t *buf0, \
00605                         const uint16_t *buf1, const uint16_t *ubuf0, \
00606                         const uint16_t *ubuf1, const uint16_t *vbuf0, \
00607                         const uint16_t *vbuf1, const uint16_t *abuf0, \
00608                         const uint16_t *abuf1, uint8_t *dest, int dstW, \
00609                         int yalpha, int uvalpha, int y) \
00610 { \
00611     name ## base ## _2_c_template(c, buf0, buf1, ubuf0, ubuf1, \
00612                           vbuf0, vbuf1, abuf0, abuf1, \
00613                           dest, dstW, yalpha, uvalpha, y, fmt); \
00614 } \
00615  \
00616 static void name ## ext ## _1_c(SwsContext *c, const uint16_t *buf0, \
00617                         const uint16_t *ubuf0, const uint16_t *ubuf1, \
00618                         const uint16_t *vbuf0, const uint16_t *vbuf1, \
00619                         const uint16_t *abuf0, uint8_t *dest, int dstW, \
00620                         int uvalpha, enum PixelFormat dstFormat, \
00621                         int flags, int y) \
00622 { \
00623     name ## base ## _1_c_template(c, buf0, ubuf0, ubuf1, vbuf0, \
00624                           vbuf1, abuf0, dest, dstW, uvalpha, \
00625                           dstFormat, flags, y, fmt); \
00626 }
00627 
00628 YUV2PACKEDWRAPPER(yuv2gray16,, LE, PIX_FMT_GRAY16LE);
00629 YUV2PACKEDWRAPPER(yuv2gray16,, BE, PIX_FMT_GRAY16BE);
00630 
00631 #define output_pixel(pos, acc) \
00632     if (target == PIX_FMT_MONOBLACK) { \
00633         pos = acc; \
00634     } else { \
00635         pos = ~acc; \
00636     }
00637 
00638 static av_always_inline void
00639 yuv2mono_X_c_template(SwsContext *c, const int16_t *lumFilter,
00640                       const int16_t **lumSrc, int lumFilterSize,
00641                       const int16_t *chrFilter, const int16_t **chrUSrc,
00642                       const int16_t **chrVSrc, int chrFilterSize,
00643                       const int16_t **alpSrc, uint8_t *dest, int dstW,
00644                       int y, enum PixelFormat target)
00645 {
00646     const uint8_t * const d128=dither_8x8_220[y&7];
00647     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00648     int i;
00649     int acc = 0;
00650 
00651     for (i = 0; i < dstW - 1; i += 2) {
00652         int j;
00653         int Y1 = 1 << 18;
00654         int Y2 = 1 << 18;
00655 
00656         for (j = 0; j < lumFilterSize; j++) {
00657             Y1 += lumSrc[j][i]   * lumFilter[j];
00658             Y2 += lumSrc[j][i+1] * lumFilter[j];
00659         }
00660         Y1 >>= 19;
00661         Y2 >>= 19;
00662         if ((Y1 | Y2) & 0x100) {
00663             Y1 = av_clip_uint8(Y1);
00664             Y2 = av_clip_uint8(Y2);
00665         }
00666         acc += acc + g[Y1 + d128[(i + 0) & 7]];
00667         acc += acc + g[Y2 + d128[(i + 1) & 7]];
00668         if ((i & 7) == 6) {
00669             output_pixel(*dest++, acc);
00670         }
00671     }
00672 }
00673 
00674 static av_always_inline void
00675 yuv2mono_2_c_template(SwsContext *c, const uint16_t *buf0,
00676                       const uint16_t *buf1, const uint16_t *ubuf0,
00677                       const uint16_t *ubuf1, const uint16_t *vbuf0,
00678                       const uint16_t *vbuf1, const uint16_t *abuf0,
00679                       const uint16_t *abuf1, uint8_t *dest, int dstW,
00680                       int yalpha, int uvalpha, int y,
00681                       enum PixelFormat target)
00682 {
00683     const uint8_t * const d128 = dither_8x8_220[y & 7];
00684     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00685     int  yalpha1 = 4095 - yalpha;
00686     int i;
00687 
00688     for (i = 0; i < dstW - 7; i += 8) {
00689         int acc =    g[((buf0[i    ] * yalpha1 + buf1[i    ] * yalpha) >> 19) + d128[0]];
00690         acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
00691         acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
00692         acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
00693         acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
00694         acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
00695         acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
00696         acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
00697         output_pixel(*dest++, acc);
00698     }
00699 }
00700 
00701 static av_always_inline void
00702 yuv2mono_1_c_template(SwsContext *c, const uint16_t *buf0,
00703                       const uint16_t *ubuf0, const uint16_t *ubuf1,
00704                       const uint16_t *vbuf0, const uint16_t *vbuf1,
00705                       const uint16_t *abuf0, uint8_t *dest, int dstW,
00706                       int uvalpha, enum PixelFormat dstFormat,
00707                       int flags, int y, enum PixelFormat target)
00708 {
00709     const uint8_t * const d128 = dither_8x8_220[y & 7];
00710     uint8_t *g = c->table_gU[128] + c->table_gV[128];
00711     int i;
00712 
00713     for (i = 0; i < dstW - 7; i += 8) {
00714         int acc =    g[(buf0[i    ] >> 7) + d128[0]];
00715         acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
00716         acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
00717         acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
00718         acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
00719         acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
00720         acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
00721         acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
00722         output_pixel(*dest++, acc);
00723     }
00724 }
00725 
00726 #undef output_pixel
00727 
00728 YUV2PACKEDWRAPPER(yuv2mono,, white, PIX_FMT_MONOWHITE);
00729 YUV2PACKEDWRAPPER(yuv2mono,, black, PIX_FMT_MONOBLACK);
00730 
00731 #define output_pixels(pos, Y1, U, Y2, V) \
00732     if (target == PIX_FMT_YUYV422) { \
00733         dest[pos + 0] = Y1; \
00734         dest[pos + 1] = U;  \
00735         dest[pos + 2] = Y2; \
00736         dest[pos + 3] = V;  \
00737     } else { \
00738         dest[pos + 0] = U;  \
00739         dest[pos + 1] = Y1; \
00740         dest[pos + 2] = V;  \
00741         dest[pos + 3] = Y2; \
00742     }
00743 
00744 static av_always_inline void
00745 yuv2422_X_c_template(SwsContext *c, const int16_t *lumFilter,
00746                      const int16_t **lumSrc, int lumFilterSize,
00747                      const int16_t *chrFilter, const int16_t **chrUSrc,
00748                      const int16_t **chrVSrc, int chrFilterSize,
00749                      const int16_t **alpSrc, uint8_t *dest, int dstW,
00750                      int y, enum PixelFormat target)
00751 {
00752     int i;
00753 
00754     for (i = 0; i < (dstW >> 1); i++) {
00755         int j;
00756         int Y1 = 1 << 18;
00757         int Y2 = 1 << 18;
00758         int U  = 1 << 18;
00759         int V  = 1 << 18;
00760 
00761         for (j = 0; j < lumFilterSize; j++) {
00762             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00763             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00764         }
00765         for (j = 0; j < chrFilterSize; j++) {
00766             U += chrUSrc[j][i] * chrFilter[j];
00767             V += chrVSrc[j][i] * chrFilter[j];
00768         }
00769         Y1 >>= 19;
00770         Y2 >>= 19;
00771         U  >>= 19;
00772         V  >>= 19;
00773         if ((Y1 | Y2 | U | V) & 0x100) {
00774             Y1 = av_clip_uint8(Y1);
00775             Y2 = av_clip_uint8(Y2);
00776             U  = av_clip_uint8(U);
00777             V  = av_clip_uint8(V);
00778         }
00779         output_pixels(4*i, Y1, U, Y2, V);
00780     }
00781 }
00782 
00783 static av_always_inline void
00784 yuv2422_2_c_template(SwsContext *c, const uint16_t *buf0,
00785                      const uint16_t *buf1, const uint16_t *ubuf0,
00786                      const uint16_t *ubuf1, const uint16_t *vbuf0,
00787                      const uint16_t *vbuf1, const uint16_t *abuf0,
00788                      const uint16_t *abuf1, uint8_t *dest, int dstW,
00789                      int yalpha, int uvalpha, int y,
00790                      enum PixelFormat target)
00791 {
00792     int  yalpha1 = 4095 - yalpha;
00793     int uvalpha1 = 4095 - uvalpha;
00794     int i;
00795 
00796     for (i = 0; i < (dstW >> 1); i++) {
00797         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
00798         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
00799         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
00800         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
00801 
00802         output_pixels(i * 4, Y1, U, Y2, V);
00803     }
00804 }
00805 
00806 static av_always_inline void
00807 yuv2422_1_c_template(SwsContext *c, const uint16_t *buf0,
00808                      const uint16_t *ubuf0, const uint16_t *ubuf1,
00809                      const uint16_t *vbuf0, const uint16_t *vbuf1,
00810                      const uint16_t *abuf0, uint8_t *dest, int dstW,
00811                      int uvalpha, enum PixelFormat dstFormat,
00812                      int flags, int y, enum PixelFormat target)
00813 {
00814     int i;
00815 
00816     if (uvalpha < 2048) {
00817         for (i = 0; i < (dstW >> 1); i++) {
00818             int Y1 = buf0[i * 2]     >> 7;
00819             int Y2 = buf0[i * 2 + 1] >> 7;
00820             int U  = ubuf1[i]        >> 7;
00821             int V  = vbuf1[i]        >> 7;
00822 
00823             output_pixels(i * 4, Y1, U, Y2, V);
00824         }
00825     } else {
00826         for (i = 0; i < (dstW >> 1); i++) {
00827             int Y1 =  buf0[i * 2]          >> 7;
00828             int Y2 =  buf0[i * 2 + 1]      >> 7;
00829             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
00830             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
00831 
00832             output_pixels(i * 4, Y1, U, Y2, V);
00833         }
00834     }
00835 }
00836 
00837 #undef output_pixels
00838 
00839 YUV2PACKEDWRAPPER(yuv2, 422, yuyv422, PIX_FMT_YUYV422);
00840 YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, PIX_FMT_UYVY422);
00841 
00842 #define r_b ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? r : b)
00843 #define b_r ((target == PIX_FMT_RGB48LE || target == PIX_FMT_RGB48BE) ? b : r)
00844 
00845 static av_always_inline void
00846 yuv2rgb48_X_c_template(SwsContext *c, const int16_t *lumFilter,
00847                        const int16_t **lumSrc, int lumFilterSize,
00848                        const int16_t *chrFilter, const int16_t **chrUSrc,
00849                        const int16_t **chrVSrc, int chrFilterSize,
00850                        const int16_t **alpSrc, uint8_t *dest, int dstW,
00851                        int y, enum PixelFormat target)
00852 {
00853     int i;
00854 
00855     for (i = 0; i < (dstW >> 1); i++) {
00856         int j;
00857         int Y1 = 1 << 18;
00858         int Y2 = 1 << 18;
00859         int U  = 1 << 18;
00860         int V  = 1 << 18;
00861         const uint8_t *r, *g, *b;
00862 
00863         for (j = 0; j < lumFilterSize; j++) {
00864             Y1 += lumSrc[j][i * 2]     * lumFilter[j];
00865             Y2 += lumSrc[j][i * 2 + 1] * lumFilter[j];
00866         }
00867         for (j = 0; j < chrFilterSize; j++) {
00868             U += chrUSrc[j][i] * chrFilter[j];
00869             V += chrVSrc[j][i] * chrFilter[j];
00870         }
00871         Y1 >>= 19;
00872         Y2 >>= 19;
00873         U  >>= 19;
00874         V  >>= 19;
00875         if ((Y1 | Y2 | U | V) & 0x100) {
00876             Y1 = av_clip_uint8(Y1);
00877             Y2 = av_clip_uint8(Y2);
00878             U  = av_clip_uint8(U);
00879             V  = av_clip_uint8(V);
00880         }
00881 
00882         /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/
00883         r = (const uint8_t *) c->table_rV[V];
00884         g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]);
00885         b = (const uint8_t *) c->table_bU[U];
00886 
00887         dest[ 0] = dest[ 1] = r_b[Y1];
00888         dest[ 2] = dest[ 3] =   g[Y1];
00889         dest[ 4] = dest[ 5] = b_r[Y1];
00890         dest[ 6] = dest[ 7] = r_b[Y2];
00891         dest[ 8] = dest[ 9] =   g[Y2];
00892         dest[10] = dest[11] = b_r[Y2];
00893         dest += 12;
00894     }
00895 }
00896 
00897 static av_always_inline void
00898 yuv2rgb48_2_c_template(SwsContext *c, const uint16_t *buf0,
00899                        const uint16_t *buf1, const uint16_t *ubuf0,
00900                        const uint16_t *ubuf1, const uint16_t *vbuf0,
00901                        const uint16_t *vbuf1, const uint16_t *abuf0,
00902                        const uint16_t *abuf1, uint8_t *dest, int dstW,
00903                        int yalpha, int uvalpha, int y,
00904                        enum PixelFormat target)
00905 {
00906     int  yalpha1 = 4095 - yalpha;
00907     int uvalpha1 = 4095 - uvalpha;
00908     int i;
00909 
00910     for (i = 0; i < (dstW >> 1); i++) {
00911         int Y1 = (buf0[i * 2]     * yalpha1  + buf1[i * 2]     * yalpha)  >> 19;
00912         int Y2 = (buf0[i * 2 + 1] * yalpha1  + buf1[i * 2 + 1] * yalpha)  >> 19;
00913         int U  = (ubuf0[i]        * uvalpha1 + ubuf1[i]        * uvalpha) >> 19;
00914         int V  = (vbuf0[i]        * uvalpha1 + vbuf1[i]        * uvalpha) >> 19;
00915         const uint8_t *r = (const uint8_t *) c->table_rV[V],
00916                       *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
00917                       *b = (const uint8_t *) c->table_bU[U];
00918 
00919         dest[ 0] = dest[ 1] = r_b[Y1];
00920         dest[ 2] = dest[ 3] =   g[Y1];
00921         dest[ 4] = dest[ 5] = b_r[Y1];
00922         dest[ 6] = dest[ 7] = r_b[Y2];
00923         dest[ 8] = dest[ 9] =   g[Y2];
00924         dest[10] = dest[11] = b_r[Y2];
00925         dest += 12;
00926     }
00927 }
00928 
00929 static av_always_inline void
00930 yuv2rgb48_1_c_template(SwsContext *c, const uint16_t *buf0,
00931                        const uint16_t *ubuf0, const uint16_t *ubuf1,
00932                        const uint16_t *vbuf0, const uint16_t *vbuf1,
00933                        const uint16_t *abuf0, uint8_t *dest, int dstW,
00934                        int uvalpha, enum PixelFormat dstFormat,
00935                        int flags, int y, enum PixelFormat target)
00936 {
00937     int i;
00938 
00939     if (uvalpha < 2048) {
00940         for (i = 0; i < (dstW >> 1); i++) {
00941             int Y1 = buf0[i * 2]     >> 7;
00942             int Y2 = buf0[i * 2 + 1] >> 7;
00943             int U  = ubuf1[i]        >> 7;
00944             int V  = vbuf1[i]        >> 7;
00945             const uint8_t *r = (const uint8_t *) c->table_rV[V],
00946                           *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
00947                           *b = (const uint8_t *) c->table_bU[U];
00948 
00949             dest[ 0] = dest[ 1] = r_b[Y1];
00950             dest[ 2] = dest[ 3] =   g[Y1];
00951             dest[ 4] = dest[ 5] = b_r[Y1];
00952             dest[ 6] = dest[ 7] = r_b[Y2];
00953             dest[ 8] = dest[ 9] =   g[Y2];
00954             dest[10] = dest[11] = b_r[Y2];
00955             dest += 12;
00956         }
00957     } else {
00958         for (i = 0; i < (dstW >> 1); i++) {
00959             int Y1 =  buf0[i * 2]          >> 7;
00960             int Y2 =  buf0[i * 2 + 1]      >> 7;
00961             int U  = (ubuf0[i] + ubuf1[i]) >> 8;
00962             int V  = (vbuf0[i] + vbuf1[i]) >> 8;
00963             const uint8_t *r = (const uint8_t *) c->table_rV[V],
00964                           *g = (const uint8_t *)(c->table_gU[U] + c->table_gV[V]),
00965                           *b = (const uint8_t *) c->table_bU[U];
00966 
00967             dest[ 0] = dest[ 1] = r_b[Y1];
00968             dest[ 2] = dest[ 3] =   g[Y1];
00969             dest[ 4] = dest[ 5] = b_r[Y1];
00970             dest[ 6] = dest[ 7] = r_b[Y2];
00971             dest[ 8] = dest[ 9] =   g[Y2];
00972             dest[10] = dest[11] = b_r[Y2];
00973             dest += 12;
00974         }
00975     }
00976 }
00977 
00978 #undef r_b
00979 #undef b_r
00980 
00981 YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE);
00982 //YUV2PACKEDWRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE);
00983 YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE);
00984 //YUV2PACKEDWRAPPER(yuv2, rgb48, bgr48le, PIX_FMT_BGR48LE);
00985 
00986 #define YSCALE_YUV_2_RGBX_C(type,alpha) \
00987     for (i=0; i<(dstW>>1); i++) {\
00988         int j;\
00989         int Y1 = 1<<18;\
00990         int Y2 = 1<<18;\
00991         int U  = 1<<18;\
00992         int V  = 1<<18;\
00993         int av_unused A1, A2;\
00994         type av_unused *r, *b, *g;\
00995         const int i2= 2*i;\
00996         \
00997         for (j=0; j<lumFilterSize; j++) {\
00998             Y1 += lumSrc[j][i2] * lumFilter[j];\
00999             Y2 += lumSrc[j][i2+1] * lumFilter[j];\
01000         }\
01001         for (j=0; j<chrFilterSize; j++) {\
01002             U += chrUSrc[j][i] * chrFilter[j];\
01003             V += chrVSrc[j][i] * chrFilter[j];\
01004         }\
01005         Y1>>=19;\
01006         Y2>>=19;\
01007         U >>=19;\
01008         V >>=19;\
01009         if ((Y1|Y2|U|V)&0x100) {\
01010             Y1 = av_clip_uint8(Y1); \
01011             Y2 = av_clip_uint8(Y2); \
01012             U  = av_clip_uint8(U); \
01013             V  = av_clip_uint8(V); \
01014         }\
01015         if (alpha) {\
01016             A1 = 1<<18;\
01017             A2 = 1<<18;\
01018             for (j=0; j<lumFilterSize; j++) {\
01019                 A1 += alpSrc[j][i2  ] * lumFilter[j];\
01020                 A2 += alpSrc[j][i2+1] * lumFilter[j];\
01021             }\
01022             A1>>=19;\
01023             A2>>=19;\
01024             if ((A1|A2)&0x100) {\
01025                 A1 = av_clip_uint8(A1); \
01026                 A2 = av_clip_uint8(A2); \
01027             }\
01028         }\
01029         /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
01030     r = (type *)c->table_rV[V];   \
01031     g = (type *)(c->table_gU[U] + c->table_gV[V]); \
01032     b = (type *)c->table_bU[U];
01033 
01034 #define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
01035     for (i=0; i<dstW; i++) {\
01036         int j;\
01037         int Y = 1<<9;\
01038         int U = (1<<9)-(128<<19);\
01039         int V = (1<<9)-(128<<19);\
01040         int av_unused A;\
01041         int R,G,B;\
01042         \
01043         for (j=0; j<lumFilterSize; j++) {\
01044             Y += lumSrc[j][i     ] * lumFilter[j];\
01045         }\
01046         for (j=0; j<chrFilterSize; j++) {\
01047             U += chrUSrc[j][i] * chrFilter[j];\
01048             V += chrVSrc[j][i] * chrFilter[j];\
01049         }\
01050         Y >>=10;\
01051         U >>=10;\
01052         V >>=10;\
01053         if (alpha) {\
01054             A = rnd>>3;\
01055             for (j=0; j<lumFilterSize; j++)\
01056                 A += alpSrc[j][i     ] * lumFilter[j];\
01057             A >>=19;\
01058             if (A&0x100)\
01059                 A = av_clip_uint8(A);\
01060         }\
01061         Y-= c->yuv2rgb_y_offset;\
01062         Y*= c->yuv2rgb_y_coeff;\
01063         Y+= rnd;\
01064         R= Y + V*c->yuv2rgb_v2r_coeff;\
01065         G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
01066         B= Y +                          U*c->yuv2rgb_u2b_coeff;\
01067         if ((R|G|B)&(0xC0000000)) {\
01068             R = av_clip_uintp2(R, 30); \
01069             G = av_clip_uintp2(G, 30); \
01070             B = av_clip_uintp2(B, 30); \
01071         }
01072 
01073 #define YSCALE_YUV_2_RGB2_C(type,alpha) \
01074     for (i=0; i<(dstW>>1); i++) { \
01075         const int i2= 2*i;       \
01076         int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
01077         int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
01078         int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19;              \
01079         int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19;              \
01080         type av_unused *r, *b, *g;                                    \
01081         int av_unused A1, A2;                                         \
01082         if (alpha) {\
01083             A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
01084             A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
01085         }\
01086     r = (type *)c->table_rV[V];\
01087     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
01088     b = (type *)c->table_bU[U];
01089 
01090 #define YSCALE_YUV_2_RGB1_C(type,alpha) \
01091     for (i=0; i<(dstW>>1); i++) {\
01092         const int i2= 2*i;\
01093         int Y1= buf0[i2  ]>>7;\
01094         int Y2= buf0[i2+1]>>7;\
01095         int U= (ubuf1[i])>>7;\
01096         int V= (vbuf1[i])>>7;\
01097         type av_unused *r, *b, *g;\
01098         int av_unused A1, A2;\
01099         if (alpha) {\
01100             A1= abuf0[i2  ]>>7;\
01101             A2= abuf0[i2+1]>>7;\
01102         }\
01103     r = (type *)c->table_rV[V];\
01104     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
01105     b = (type *)c->table_bU[U];
01106 
01107 #define YSCALE_YUV_2_RGB1B_C(type,alpha) \
01108     for (i=0; i<(dstW>>1); i++) {\
01109         const int i2= 2*i;\
01110         int Y1= buf0[i2  ]>>7;\
01111         int Y2= buf0[i2+1]>>7;\
01112         int U= (ubuf0[i] + ubuf1[i])>>8;\
01113         int V= (vbuf0[i] + vbuf1[i])>>8;\
01114         type av_unused *r, *b, *g;\
01115         int av_unused A1, A2;\
01116         if (alpha) {\
01117             A1= abuf0[i2  ]>>7;\
01118             A2= abuf0[i2+1]>>7;\
01119         }\
01120     r = (type *)c->table_rV[V];\
01121     g = (type *)(c->table_gU[U] + c->table_gV[V]);\
01122     b = (type *)c->table_bU[U];
01123 
01124 #define YSCALE_YUV_2_ANYRGB_C(func)\
01125     switch(c->dstFormat) {\
01126     case PIX_FMT_RGBA:\
01127     case PIX_FMT_BGRA:\
01128         if (CONFIG_SMALL) {\
01129             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
01130             func(uint32_t,needAlpha)\
01131                 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
01132                 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
01133             }\
01134         } else {\
01135             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
01136                 func(uint32_t,1)\
01137                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
01138                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
01139                 }\
01140             } else {\
01141                 func(uint32_t,0)\
01142                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
01143                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
01144                 }\
01145             }\
01146         }\
01147         break;\
01148     case PIX_FMT_ARGB:\
01149     case PIX_FMT_ABGR:\
01150         if (CONFIG_SMALL) {\
01151             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
01152             func(uint32_t,needAlpha)\
01153                 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
01154                 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
01155             }\
01156         } else {\
01157             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
01158                 func(uint32_t,1)\
01159                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
01160                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
01161                 }\
01162             } else {\
01163                 func(uint32_t,0)\
01164                     ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
01165                     ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
01166                 }\
01167             }\
01168         }                \
01169         break;\
01170     case PIX_FMT_RGB24:\
01171         func(uint8_t,0)\
01172             ((uint8_t*)dest)[0]= r[Y1];\
01173             ((uint8_t*)dest)[1]= g[Y1];\
01174             ((uint8_t*)dest)[2]= b[Y1];\
01175             ((uint8_t*)dest)[3]= r[Y2];\
01176             ((uint8_t*)dest)[4]= g[Y2];\
01177             ((uint8_t*)dest)[5]= b[Y2];\
01178             dest+=6;\
01179         }\
01180         break;\
01181     case PIX_FMT_BGR24:\
01182         func(uint8_t,0)\
01183             ((uint8_t*)dest)[0]= b[Y1];\
01184             ((uint8_t*)dest)[1]= g[Y1];\
01185             ((uint8_t*)dest)[2]= r[Y1];\
01186             ((uint8_t*)dest)[3]= b[Y2];\
01187             ((uint8_t*)dest)[4]= g[Y2];\
01188             ((uint8_t*)dest)[5]= r[Y2];\
01189             dest+=6;\
01190         }\
01191         break;\
01192     case PIX_FMT_RGB565:\
01193     case PIX_FMT_BGR565:\
01194         {\
01195             const int dr1= dither_2x2_8[y&1    ][0];\
01196             const int dg1= dither_2x2_4[y&1    ][0];\
01197             const int db1= dither_2x2_8[(y&1)^1][0];\
01198             const int dr2= dither_2x2_8[y&1    ][1];\
01199             const int dg2= dither_2x2_4[y&1    ][1];\
01200             const int db2= dither_2x2_8[(y&1)^1][1];\
01201             func(uint16_t,0)\
01202                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
01203                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
01204             }\
01205         }\
01206         break;\
01207     case PIX_FMT_RGB555:\
01208     case PIX_FMT_BGR555:\
01209         {\
01210             const int dr1= dither_2x2_8[y&1    ][0];\
01211             const int dg1= dither_2x2_8[y&1    ][1];\
01212             const int db1= dither_2x2_8[(y&1)^1][0];\
01213             const int dr2= dither_2x2_8[y&1    ][1];\
01214             const int dg2= dither_2x2_8[y&1    ][0];\
01215             const int db2= dither_2x2_8[(y&1)^1][1];\
01216             func(uint16_t,0)\
01217                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
01218                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
01219             }\
01220         }\
01221         break;\
01222     case PIX_FMT_RGB444:\
01223     case PIX_FMT_BGR444:\
01224         {\
01225             const int dr1= dither_4x4_16[y&3    ][0];\
01226             const int dg1= dither_4x4_16[y&3    ][1];\
01227             const int db1= dither_4x4_16[(y&3)^3][0];\
01228             const int dr2= dither_4x4_16[y&3    ][1];\
01229             const int dg2= dither_4x4_16[y&3    ][0];\
01230             const int db2= dither_4x4_16[(y&3)^3][1];\
01231             func(uint16_t,0)\
01232                 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
01233                 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
01234             }\
01235         }\
01236         break;\
01237     case PIX_FMT_RGB8:\
01238     case PIX_FMT_BGR8:\
01239         {\
01240             const uint8_t * const d64= dither_8x8_73[y&7];\
01241             const uint8_t * const d32= dither_8x8_32[y&7];\
01242             func(uint8_t,0)\
01243                 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
01244                 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
01245             }\
01246         }\
01247         break;\
01248     case PIX_FMT_RGB4:\
01249     case PIX_FMT_BGR4:\
01250         {\
01251             const uint8_t * const d64= dither_8x8_73 [y&7];\
01252             const uint8_t * const d128=dither_8x8_220[y&7];\
01253             func(uint8_t,0)\
01254                 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
01255                                  + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
01256             }\
01257         }\
01258         break;\
01259     case PIX_FMT_RGB4_BYTE:\
01260     case PIX_FMT_BGR4_BYTE:\
01261         {\
01262             const uint8_t * const d64= dither_8x8_73 [y&7];\
01263             const uint8_t * const d128=dither_8x8_220[y&7];\
01264             func(uint8_t,0)\
01265                 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
01266                 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
01267             }\
01268         }\
01269         break;\
01270     }
01271 
01272 static void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
01273                           const int16_t **lumSrc, int lumFilterSize,
01274                           const int16_t *chrFilter, const int16_t **chrUSrc,
01275                           const int16_t **chrVSrc, int chrFilterSize,
01276                           const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
01277 {
01278     int i;
01279     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C)
01280 }
01281 
01282 static void yuv2rgbX_c_full(SwsContext *c, const int16_t *lumFilter,
01283                             const int16_t **lumSrc, int lumFilterSize,
01284                             const int16_t *chrFilter, const int16_t **chrUSrc,
01285                             const int16_t **chrVSrc, int chrFilterSize,
01286                             const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
01287 {
01288     int i;
01289     int step= c->dstFormatBpp/8;
01290     int aidx= 3;
01291 
01292     switch(c->dstFormat) {
01293     case PIX_FMT_ARGB:
01294         dest++;
01295         aidx= 0;
01296     case PIX_FMT_RGB24:
01297         aidx--;
01298     case PIX_FMT_RGBA:
01299         if (CONFIG_SMALL) {
01300             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
01301             YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
01302                 dest[aidx]= needAlpha ? A : 255;
01303                 dest[0]= R>>22;
01304                 dest[1]= G>>22;
01305                 dest[2]= B>>22;
01306                 dest+= step;
01307             }
01308         } else {
01309             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01310                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
01311                     dest[aidx]= A;
01312                     dest[0]= R>>22;
01313                     dest[1]= G>>22;
01314                     dest[2]= B>>22;
01315                     dest+= step;
01316                 }
01317             } else {
01318                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
01319                     dest[aidx]= 255;
01320                     dest[0]= R>>22;
01321                     dest[1]= G>>22;
01322                     dest[2]= B>>22;
01323                     dest+= step;
01324                 }
01325             }
01326         }
01327         break;
01328     case PIX_FMT_ABGR:
01329         dest++;
01330         aidx= 0;
01331     case PIX_FMT_BGR24:
01332         aidx--;
01333     case PIX_FMT_BGRA:
01334         if (CONFIG_SMALL) {
01335             int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
01336             YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
01337                 dest[aidx]= needAlpha ? A : 255;
01338                 dest[0]= B>>22;
01339                 dest[1]= G>>22;
01340                 dest[2]= R>>22;
01341                 dest+= step;
01342             }
01343         } else {
01344             if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
01345                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
01346                     dest[aidx]= A;
01347                     dest[0]= B>>22;
01348                     dest[1]= G>>22;
01349                     dest[2]= R>>22;
01350                     dest+= step;
01351                 }
01352             } else {
01353                 YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
01354                     dest[aidx]= 255;
01355                     dest[0]= B>>22;
01356                     dest[1]= G>>22;
01357                     dest[2]= R>>22;
01358                     dest+= step;
01359                 }
01360             }
01361         }
01362         break;
01363     default:
01364         assert(0);
01365     }
01366 }
01367 
01371 static void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
01372                           const uint16_t *buf1, const uint16_t *ubuf0,
01373                           const uint16_t *ubuf1, const uint16_t *vbuf0,
01374                           const uint16_t *vbuf1, const uint16_t *abuf0,
01375                           const uint16_t *abuf1, uint8_t *dest, int dstW,
01376                           int yalpha, int uvalpha, int y)
01377 {
01378     int  yalpha1=4095- yalpha;
01379     int uvalpha1=4095-uvalpha;
01380     int i;
01381 
01382     YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C)
01383 }
01384 
01388 static void yuv2packed1_c(SwsContext *c, const uint16_t *buf0,
01389                           const uint16_t *ubuf0, const uint16_t *ubuf1,
01390                           const uint16_t *vbuf0, const uint16_t *vbuf1,
01391                           const uint16_t *abuf0, uint8_t *dest, int dstW,
01392                           int uvalpha, enum PixelFormat dstFormat,
01393                           int flags, int y)
01394 {
01395     int i;
01396 
01397     if (uvalpha < 2048) {
01398         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C)
01399     } else {
01400         YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C)
01401     }
01402 }
01403 
01404 static av_always_inline void fillPlane(uint8_t* plane, int stride,
01405                                        int width, int height,
01406                                        int y, uint8_t val)
01407 {
01408     int i;
01409     uint8_t *ptr = plane + stride*y;
01410     for (i=0; i<height; i++) {
01411         memset(ptr, val, width);
01412         ptr += stride;
01413     }
01414 }
01415 
01416 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01417 
01418 #define r ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? b_r : r_b)
01419 #define b ((origin == PIX_FMT_BGR48BE || origin == PIX_FMT_BGR48LE) ? r_b : b_r)
01420 
01421 static av_always_inline void
01422 rgb48ToY_c_template(int16_t *dst, const uint16_t *src, int width,
01423                     enum PixelFormat origin)
01424 {
01425     int i;
01426     for (i = 0; i < width; i++) {
01427         int r_b = input_pixel(&src[i*3+0]);
01428         int   g = input_pixel(&src[i*3+1]);
01429         int b_r = input_pixel(&src[i*3+2]);
01430 
01431         dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);
01432     }
01433 }
01434 
01435 static av_always_inline void
01436 rgb48ToUV_c_template(int16_t *dstU, int16_t *dstV,
01437                     const uint16_t *src1, const uint16_t *src2,
01438                     int width, enum PixelFormat origin)
01439 {
01440     int i;
01441     assert(src1==src2);
01442     for (i = 0; i < width; i++) {
01443         int r_b = input_pixel(&src1[i*3+0]);
01444         int   g = input_pixel(&src1[i*3+1]);
01445         int b_r = input_pixel(&src1[i*3+2]);
01446 
01447         dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);
01448         dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1+8)) + (1<<(RGB2YUV_SHIFT-7+8))) >> (RGB2YUV_SHIFT-6+8);
01449     }
01450 }
01451 
01452 static av_always_inline void
01453 rgb48ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
01454                           const uint16_t *src1, const uint16_t *src2,
01455                           int width, enum PixelFormat origin)
01456 {
01457     int i;
01458     assert(src1==src2);
01459     for (i = 0; i < width; i++) {
01460         int r_b = (input_pixel(&src1[6*i + 0])) + (input_pixel(&src1[6*i + 3]));
01461         int   g = (input_pixel(&src1[6*i + 1])) + (input_pixel(&src1[6*i + 4]));
01462         int b_r = (input_pixel(&src1[6*i + 2])) + (input_pixel(&src1[6*i + 5]));
01463 
01464         dstU[i]= (RU*r + GU*g + BU*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);
01465         dstV[i]= (RV*r + GV*g + BV*b + (256U<<(RGB2YUV_SHIFT+8)) + (1<<(RGB2YUV_SHIFT-6+8))) >> (RGB2YUV_SHIFT-5+8);
01466     }
01467 }
01468 
01469 #undef r
01470 #undef b
01471 #undef input_pixel
01472 
01473 #define rgb48funcs(pattern, BE_LE, origin) \
01474 static void pattern ## 48 ## BE_LE ## ToY_c(uint8_t *dst, const uint8_t *src, \
01475                                     int width, uint32_t *unused) \
01476 { \
01477     rgb48ToY_c_template(dst, src, width, origin); \
01478 } \
01479  \
01480 static void pattern ## 48 ## BE_LE ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01481                                     const uint8_t *src1, const uint8_t *src2, \
01482                                     int width, uint32_t *unused) \
01483 { \
01484     rgb48ToUV_c_template(dstU, dstV, src1, src2, width, origin); \
01485 } \
01486  \
01487 static void pattern ## 48 ## BE_LE ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
01488                                     const uint8_t *src1, const uint8_t *src2, \
01489                                     int width, uint32_t *unused) \
01490 { \
01491     rgb48ToUV_half_c_template(dstU, dstV, src1, src2, width, origin); \
01492 }
01493 
01494 rgb48funcs(rgb, LE, PIX_FMT_RGB48LE);
01495 rgb48funcs(rgb, BE, PIX_FMT_RGB48BE);
01496 rgb48funcs(bgr, LE, PIX_FMT_BGR48LE);
01497 rgb48funcs(bgr, BE, PIX_FMT_BGR48BE);
01498 
01499 #define input_pixel(i) ((origin == PIX_FMT_RGBA || origin == PIX_FMT_BGRA || \
01500                          origin == PIX_FMT_ARGB || origin == PIX_FMT_ABGR) ? AV_RN32A(&src[(i)*4]) : \
01501                         (isBE(origin) ? AV_RB16(&src[(i)*2]) : AV_RL16(&src[(i)*2])))
01502 
01503 static av_always_inline void
01504 rgb16_32ToY_c_template(int16_t *dst, const uint8_t *src,
01505                        int width, enum PixelFormat origin,
01506                        int shr,   int shg,   int shb, int shp,
01507                        int maskr, int maskg, int maskb,
01508                        int rsh,   int gsh,   int bsh, int S)
01509 {
01510     const int ry = RY << rsh, gy = GY << gsh, by = BY << bsh,
01511               rnd = (32<<((S)-1)) + (1<<(S-7));
01512     int i;
01513 
01514     for (i = 0; i < width; i++) {
01515         int px = input_pixel(i) >> shp;
01516         int b = (px & maskb) >> shb;
01517         int g = (px & maskg) >> shg;
01518         int r = (px & maskr) >> shr;
01519 
01520         dst[i] = (ry * r + gy * g + by * b + rnd) >> ((S)-6);
01521     }
01522 }
01523 
01524 static av_always_inline void
01525 rgb16_32ToUV_c_template(int16_t *dstU, int16_t *dstV,
01526                         const uint8_t *src, int width,
01527                         enum PixelFormat origin,
01528                         int shr,   int shg,   int shb, int shp,
01529                         int maskr, int maskg, int maskb,
01530                         int rsh,   int gsh,   int bsh, int S)
01531 {
01532     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01533               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
01534               rnd = (256<<((S)-1)) + (1<<(S-7));
01535     int i;
01536 
01537     for (i = 0; i < width; i++) {
01538         int px = input_pixel(i) >> shp;
01539         int b = (px & maskb) >> shb;
01540         int g = (px & maskg) >> shg;
01541         int r = (px & maskr) >> shr;
01542 
01543         dstU[i] = (ru * r + gu * g + bu * b + rnd) >> ((S)-6);
01544         dstV[i] = (rv * r + gv * g + bv * b + rnd) >> ((S)-6);
01545     }
01546 }
01547 
01548 static av_always_inline void
01549 rgb16_32ToUV_half_c_template(int16_t *dstU, int16_t *dstV,
01550                              const uint8_t *src, int width,
01551                              enum PixelFormat origin,
01552                              int shr,   int shg,   int shb, int shp,
01553                              int maskr, int maskg, int maskb,
01554                              int rsh,   int gsh,   int bsh, int S)
01555 {
01556     const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
01557               rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
01558               rnd = (256U<<(S)) + (1<<(S-6)), maskgx = ~(maskr | maskb);
01559     int i;
01560 
01561     maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
01562     for (i = 0; i < width; i++) {
01563         int px0 = input_pixel(2 * i + 0) >> shp;
01564         int px1 = input_pixel(2 * i + 1) >> shp;
01565         int b, r, g = (px0 & maskgx) + (px1 & maskgx);
01566         int rb = px0 + px1 - g;
01567 
01568         b = (rb & maskb) >> shb;
01569         if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
01570             origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
01571             g >>= shg;
01572         } else {
01573             g = (g  & maskg) >> shg;
01574         }
01575         r = (rb & maskr) >> shr;
01576 
01577         dstU[i] = (ru * r + gu * g + bu * b + (unsigned)rnd) >> ((S)-6+1);
01578         dstV[i] = (rv * r + gv * g + bv * b + (unsigned)rnd) >> ((S)-6+1);
01579     }
01580 }
01581 
01582 #undef input_pixel
01583 
01584 #define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
01585                          maskg, maskb, rsh, gsh, bsh, S) \
01586 static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
01587                           int width, uint32_t *unused) \
01588 { \
01589     rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
01590                            maskr, maskg, maskb, rsh, gsh, bsh, S); \
01591 } \
01592  \
01593 static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01594                            const uint8_t *src, const uint8_t *dummy, \
01595                            int width, uint32_t *unused) \
01596 { \
01597     rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
01598                             maskr, maskg, maskb, rsh, gsh, bsh, S); \
01599 } \
01600  \
01601 static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
01602                                 const uint8_t *src, const uint8_t *dummy, \
01603                                 int width, uint32_t *unused) \
01604 { \
01605     rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
01606                                  maskr, maskg, maskb, rsh, gsh, bsh, S); \
01607 }
01608 
01609 rgb16_32_wrapper(PIX_FMT_BGR32,    bgr32,  16, 0,  0, 0, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
01610 rgb16_32_wrapper(PIX_FMT_BGR32_1,  bgr321, 16, 0,  0, 8, 0xFF0000, 0xFF00,   0x00FF,  8, 0,  8, RGB2YUV_SHIFT+8);
01611 rgb16_32_wrapper(PIX_FMT_RGB32,    rgb32,   0, 0, 16, 0,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
01612 rgb16_32_wrapper(PIX_FMT_RGB32_1,  rgb321,  0, 0, 16, 8,   0x00FF, 0xFF00, 0xFF0000,  8, 0,  8, RGB2YUV_SHIFT+8);
01613 rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
01614 rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
01615 rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
01616 rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
01617 rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0,  0, 0,   0x001F, 0x07E0,   0xF800, 11, 5,  0, RGB2YUV_SHIFT+8);
01618 rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0,  0, 0,   0x001F, 0x03E0,   0x7C00, 10, 5,  0, RGB2YUV_SHIFT+7);
01619 rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0,  0, 0,   0xF800, 0x07E0,   0x001F,  0, 5, 11, RGB2YUV_SHIFT+8);
01620 rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0,  0, 0,   0x7C00, 0x03E0,   0x001F,  0, 5, 10, RGB2YUV_SHIFT+7);
01621 
01622 static void abgrToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
01623 {
01624     int i;
01625     for (i=0; i<width; i++) {
01626         dst[i]= src[4*i]<<6;
01627     }
01628 }
01629 
01630 static void rgbaToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
01631 {
01632     int i;
01633     for (i=0; i<width; i++) {
01634         dst[i]= src[4*i+3]<<6;
01635     }
01636 }
01637 
01638 static void palToA_c(int16_t *dst, const uint8_t *src, int width, uint32_t *pal)
01639 {
01640     int i;
01641     for (i=0; i<width; i++) {
01642         int d= src[i];
01643 
01644         dst[i]= (pal[d] >> 24)<<6;
01645     }
01646 }
01647 
01648 static void palToY_c(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
01649 {
01650     int i;
01651     for (i=0; i<width; i++) {
01652         int d= src[i];
01653 
01654         dst[i]= (pal[d] & 0xFF)<<6;
01655     }
01656 }
01657 
01658 static void palToUV_c(uint16_t *dstU, int16_t *dstV,
01659                            const uint8_t *src1, const uint8_t *src2,
01660                            int width, uint32_t *pal)
01661 {
01662     int i;
01663     assert(src1 == src2);
01664     for (i=0; i<width; i++) {
01665         int p= pal[src1[i]];
01666 
01667         dstU[i]= (uint8_t)(p>> 8)<<6;
01668         dstV[i]= (uint8_t)(p>>16)<<6;
01669     }
01670 }
01671 
01672 static void monowhite2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
01673 {
01674     int i, j;
01675     for (i=0; i<width/8; i++) {
01676         int d= ~src[i];
01677         for(j=0; j<8; j++)
01678             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01679     }
01680     if(width&7){
01681         int d= ~src[i];
01682         for(j=0; j<(width&7); j++)
01683             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01684     }
01685 }
01686 
01687 static void monoblack2Y_c(int16_t *dst, const uint8_t *src, int width, uint32_t *unused)
01688 {
01689     int i, j;
01690     for (i=0; i<width/8; i++) {
01691         int d= src[i];
01692         for(j=0; j<8; j++)
01693             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01694     }
01695     if(width&7){
01696         int d= src[i];
01697         for(j=0; j<(width&7); j++)
01698             dst[8*i+j]= ((d>>(7-j))&1)*16383;
01699     }
01700 }
01701 
01702 //FIXME yuy2* can read up to 7 samples too much
01703 
01704 static void yuy2ToY_c(uint8_t *dst, const uint8_t *src, int width,
01705                       uint32_t *unused)
01706 {
01707     int i;
01708     for (i=0; i<width; i++)
01709         dst[i]= src[2*i];
01710 }
01711 
01712 static void yuy2ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01713                        const uint8_t *src2, int width, uint32_t *unused)
01714 {
01715     int i;
01716     for (i=0; i<width; i++) {
01717         dstU[i]= src1[4*i + 1];
01718         dstV[i]= src1[4*i + 3];
01719     }
01720     assert(src1 == src2);
01721 }
01722 
01723 static void LEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01724                      const uint8_t *src2, int width, uint32_t *unused)
01725 {
01726     int i;
01727     for (i=0; i<width; i++) {
01728         dstU[i]= src1[2*i + 1];
01729         dstV[i]= src2[2*i + 1];
01730     }
01731 }
01732 
01733 /* This is almost identical to the previous, end exists only because
01734  * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
01735 static void uyvyToY_c(uint8_t *dst, const uint8_t *src, int width,
01736                       uint32_t *unused)
01737 {
01738     int i;
01739     for (i=0; i<width; i++)
01740         dst[i]= src[2*i+1];
01741 }
01742 
01743 static void uyvyToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01744                        const uint8_t *src2, int width, uint32_t *unused)
01745 {
01746     int i;
01747     for (i=0; i<width; i++) {
01748         dstU[i]= src1[4*i + 0];
01749         dstV[i]= src1[4*i + 2];
01750     }
01751     assert(src1 == src2);
01752 }
01753 
01754 static void BEToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
01755                      const uint8_t *src2, int width, uint32_t *unused)
01756 {
01757     int i;
01758     for (i=0; i<width; i++) {
01759         dstU[i]= src1[2*i];
01760         dstV[i]= src2[2*i];
01761     }
01762 }
01763 
01764 static av_always_inline void nvXXtoUV_c(uint8_t *dst1, uint8_t *dst2,
01765                                         const uint8_t *src, int width)
01766 {
01767     int i;
01768     for (i = 0; i < width; i++) {
01769         dst1[i] = src[2*i+0];
01770         dst2[i] = src[2*i+1];
01771     }
01772 }
01773 
01774 static void nv12ToUV_c(uint8_t *dstU, uint8_t *dstV,
01775                        const uint8_t *src1, const uint8_t *src2,
01776                        int width, uint32_t *unused)
01777 {
01778     nvXXtoUV_c(dstU, dstV, src1, width);
01779 }
01780 
01781 static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
01782                        const uint8_t *src1, const uint8_t *src2,
01783                        int width, uint32_t *unused)
01784 {
01785     nvXXtoUV_c(dstV, dstU, src1, width);
01786 }
01787 
01788 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
01789 
01790 // FIXME Maybe dither instead.
01791 static av_always_inline void
01792 yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
01793                           const uint8_t *_srcU, const uint8_t *_srcV,
01794                           int width, enum PixelFormat origin, int depth)
01795 {
01796     int i;
01797     const uint16_t *srcU = (const uint16_t *) _srcU;
01798     const uint16_t *srcV = (const uint16_t *) _srcV;
01799 
01800     for (i = 0; i < width; i++) {
01801         dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
01802         dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
01803     }
01804 }
01805 
01806 static av_always_inline void
01807 yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
01808                          int width, enum PixelFormat origin, int depth)
01809 {
01810     int i;
01811     const uint16_t *srcY = (const uint16_t*)_srcY;
01812 
01813     for (i = 0; i < width; i++)
01814         dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
01815 }
01816 
01817 #undef input_pixel
01818 
01819 #define YUV_NBPS(depth, BE_LE, origin) \
01820 static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
01821                                      const uint8_t *srcU, const uint8_t *srcV, \
01822                                      int width, uint32_t *unused) \
01823 { \
01824     yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
01825 } \
01826 static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
01827                                     int width, uint32_t *unused) \
01828 { \
01829     yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
01830 }
01831 
01832 YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
01833 YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
01834 YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
01835 YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
01836 
01837 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
01838                        int width, uint32_t *unused)
01839 {
01840     int i;
01841     for (i=0; i<width; i++) {
01842         int b= src[i*3+0];
01843         int g= src[i*3+1];
01844         int r= src[i*3+2];
01845 
01846         dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
01847     }
01848 }
01849 
01850 static void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
01851                         const uint8_t *src2, int width, uint32_t *unused)
01852 {
01853     int i;
01854     for (i=0; i<width; i++) {
01855         int b= src1[3*i + 0];
01856         int g= src1[3*i + 1];
01857         int r= src1[3*i + 2];
01858 
01859         dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01860         dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01861     }
01862     assert(src1 == src2);
01863 }
01864 
01865 static void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
01866                              const uint8_t *src2, int width, uint32_t *unused)
01867 {
01868     int i;
01869     for (i=0; i<width; i++) {
01870         int b= src1[6*i + 0] + src1[6*i + 3];
01871         int g= src1[6*i + 1] + src1[6*i + 4];
01872         int r= src1[6*i + 2] + src1[6*i + 5];
01873 
01874         dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01875         dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01876     }
01877     assert(src1 == src2);
01878 }
01879 
01880 static void rgb24ToY_c(int16_t *dst, const uint8_t *src, int width,
01881                        uint32_t *unused)
01882 {
01883     int i;
01884     for (i=0; i<width; i++) {
01885         int r= src[i*3+0];
01886         int g= src[i*3+1];
01887         int b= src[i*3+2];
01888 
01889         dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
01890     }
01891 }
01892 
01893 static void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
01894                         const uint8_t *src2, int width, uint32_t *unused)
01895 {
01896     int i;
01897     assert(src1==src2);
01898     for (i=0; i<width; i++) {
01899         int r= src1[3*i + 0];
01900         int g= src1[3*i + 1];
01901         int b= src1[3*i + 2];
01902 
01903         dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01904         dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
01905     }
01906 }
01907 
01908 static void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
01909                                     const uint8_t *src2, int width, uint32_t *unused)
01910 {
01911     int i;
01912     assert(src1==src2);
01913     for (i=0; i<width; i++) {
01914         int r= src1[6*i + 0] + src1[6*i + 3];
01915         int g= src1[6*i + 1] + src1[6*i + 4];
01916         int b= src1[6*i + 2] + src1[6*i + 5];
01917 
01918         dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01919         dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
01920     }
01921 }
01922 
01923 
01924 // bilinear / bicubic scaling
01925 static void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
01926                      int srcW, int xInc,
01927                      const int16_t *filter, const int16_t *filterPos,
01928                      int filterSize)
01929 {
01930     int i;
01931     for (i=0; i<dstW; i++) {
01932         int j;
01933         int srcPos= filterPos[i];
01934         int val=0;
01935         for (j=0; j<filterSize; j++) {
01936             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
01937         }
01938         //filter += hFilterSize;
01939         dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
01940         //dst[i] = val>>7;
01941     }
01942 }
01943 
01944 static inline void hScale16_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
01945                                     const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
01946 {
01947     int i, j;
01948 
01949     for (i=0; i<dstW; i++) {
01950         int srcPos= filterPos[i];
01951         int val=0;
01952         for (j=0; j<filterSize; j++) {
01953             val += ((int)src[srcPos + j])*filter[filterSize*i + j];
01954         }
01955         dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
01956     }
01957 }
01958 
01959 static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
01960                                     const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
01961 {
01962     int i, j;
01963     for (i=0; i<dstW; i++) {
01964         int srcPos= filterPos[i];
01965         int val=0;
01966         for (j=0; j<filterSize; j++) {
01967             val += ((int)av_bswap16(src[srcPos + j]))*filter[filterSize*i + j];
01968         }
01969         dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
01970     }
01971 }
01972 
01973 //FIXME all pal and rgb srcFormats could do this convertion as well
01974 //FIXME all scalers more complex than bilinear could do half of this transform
01975 static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width)
01976 {
01977     int i;
01978     for (i = 0; i < width; i++) {
01979         dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264
01980         dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264
01981     }
01982 }
01983 static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width)
01984 {
01985     int i;
01986     for (i = 0; i < width; i++) {
01987         dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469
01988         dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469
01989     }
01990 }
01991 static void lumRangeToJpeg_c(int16_t *dst, int width)
01992 {
01993     int i;
01994     for (i = 0; i < width; i++)
01995         dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
01996 }
01997 static void lumRangeFromJpeg_c(int16_t *dst, int width)
01998 {
01999     int i;
02000     for (i = 0; i < width; i++)
02001         dst[i] = (dst[i]*14071 + 33561947)>>14;
02002 }
02003 
02004 static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth,
02005                            const uint8_t *src, int srcW, int xInc)
02006 {
02007     int i;
02008     unsigned int xpos=0;
02009     for (i=0;i<dstWidth;i++) {
02010         register unsigned int xx=xpos>>16;
02011         register unsigned int xalpha=(xpos&0xFFFF)>>9;
02012         dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
02013         xpos+=xInc;
02014     }
02015     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--)
02016         dst[i] = src[srcW-1]*128;
02017 }
02018 
02019 // *** horizontal scale Y line to temp buffer
02020 static av_always_inline void hyscale(SwsContext *c, uint16_t *dst, int dstWidth,
02021                                      const uint8_t *src, int srcW, int xInc,
02022                                      const int16_t *hLumFilter,
02023                                      const int16_t *hLumFilterPos, int hLumFilterSize,
02024                                      uint8_t *formatConvBuffer,
02025                                      uint32_t *pal, int isAlpha)
02026 {
02027     void (*toYV12)(uint8_t *, const uint8_t *, int, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
02028     void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
02029 
02030     if (toYV12) {
02031         toYV12(formatConvBuffer, src, srcW, pal);
02032         src= formatConvBuffer;
02033     }
02034 
02035     if (c->hScale16) {
02036         int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
02037         c->hScale16(dst, dstWidth, (const uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
02038     } else if (!c->hyscale_fast) {
02039         c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
02040     } else { // fast bilinear upscale / crap downscale
02041         c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
02042     }
02043 
02044     if (convertRange)
02045         convertRange(dst, dstWidth);
02046 }
02047 
02048 static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2,
02049                            int dstWidth, const uint8_t *src1,
02050                            const uint8_t *src2, int srcW, int xInc)
02051 {
02052     int i;
02053     unsigned int xpos=0;
02054     for (i=0;i<dstWidth;i++) {
02055         register unsigned int xx=xpos>>16;
02056         register unsigned int xalpha=(xpos&0xFFFF)>>9;
02057         dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
02058         dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
02059         xpos+=xInc;
02060     }
02061     for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
02062         dst1[i] = src1[srcW-1]*128;
02063         dst2[i] = src2[srcW-1]*128;
02064     }
02065 }
02066 
02067 static av_always_inline void hcscale(SwsContext *c, uint16_t *dst1, uint16_t *dst2, int dstWidth,
02068                                      const uint8_t *src1, const uint8_t *src2,
02069                                      int srcW, int xInc, const int16_t *hChrFilter,
02070                                      const int16_t *hChrFilterPos, int hChrFilterSize,
02071                                      uint8_t *formatConvBuffer, uint32_t *pal)
02072 {
02073     if (c->chrToYV12) {
02074         uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW*2+78, 16);
02075         c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
02076         src1= formatConvBuffer;
02077         src2= buf2;
02078     }
02079 
02080     if (c->hScale16) {
02081         int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
02082         c->hScale16(dst1, dstWidth, (const uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
02083         c->hScale16(dst2, dstWidth, (const uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
02084     } else if (!c->hcscale_fast) {
02085         c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02086         c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
02087     } else { // fast bilinear upscale / crap downscale
02088         c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
02089     }
02090 
02091     if (c->chrConvertRange)
02092         c->chrConvertRange(dst1, dst2, dstWidth);
02093 }
02094 
02095 static av_always_inline void
02096 find_c_packed_planar_out_funcs(SwsContext *c,
02097                                yuv2planar1_fn *yuv2yuv1,    yuv2planarX_fn *yuv2yuvX,
02098                                yuv2packed1_fn *yuv2packed1, yuv2packed2_fn *yuv2packed2,
02099                                yuv2packedX_fn *yuv2packedX)
02100 {
02101     enum PixelFormat dstFormat = c->dstFormat;
02102 
02103     if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
02104         *yuv2yuvX     = yuv2nv12X_c;
02105     } else if (is16BPS(dstFormat)) {
02106         *yuv2yuvX     = isBE(dstFormat) ? yuv2yuvX16BE_c  : yuv2yuvX16LE_c;
02107     } else if (is9_OR_10BPS(dstFormat)) {
02108         if (av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1 == 8) {
02109             *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX9BE_c :  yuv2yuvX9LE_c;
02110         } else {
02111             *yuv2yuvX = isBE(dstFormat) ? yuv2yuvX10BE_c : yuv2yuvX10LE_c;
02112         }
02113     } else {
02114         *yuv2yuv1     = yuv2yuv1_c;
02115         *yuv2yuvX     = yuv2yuvX_c;
02116     }
02117     if(c->flags & SWS_FULL_CHR_H_INT) {
02118         *yuv2packedX = yuv2rgbX_c_full;
02119     } else {
02120         switch (dstFormat) {
02121         case PIX_FMT_GRAY16BE:
02122             *yuv2packed1 = yuv2gray16BE_1_c;
02123             *yuv2packed2 = yuv2gray16BE_2_c;
02124             *yuv2packedX = yuv2gray16BE_X_c;
02125             break;
02126         case PIX_FMT_GRAY16LE:
02127             *yuv2packed1 = yuv2gray16LE_1_c;
02128             *yuv2packed2 = yuv2gray16LE_2_c;
02129             *yuv2packedX = yuv2gray16LE_X_c;
02130             break;
02131         case PIX_FMT_MONOWHITE:
02132             *yuv2packed1 = yuv2monowhite_1_c;
02133             *yuv2packed2 = yuv2monowhite_2_c;
02134             *yuv2packedX = yuv2monowhite_X_c;
02135             break;
02136         case PIX_FMT_MONOBLACK:
02137             *yuv2packed1 = yuv2monoblack_1_c;
02138             *yuv2packed2 = yuv2monoblack_2_c;
02139             *yuv2packedX = yuv2monoblack_X_c;
02140             break;
02141         case PIX_FMT_YUYV422:
02142             *yuv2packed1 = yuv2yuyv422_1_c;
02143             *yuv2packed2 = yuv2yuyv422_2_c;
02144             *yuv2packedX = yuv2yuyv422_X_c;
02145             break;
02146         case PIX_FMT_UYVY422:
02147             *yuv2packed1 = yuv2uyvy422_1_c;
02148             *yuv2packed2 = yuv2uyvy422_2_c;
02149             *yuv2packedX = yuv2uyvy422_X_c;
02150             break;
02151         case PIX_FMT_RGB48LE:
02152             //*yuv2packed1 = yuv2rgb48le_1_c;
02153             //*yuv2packed2 = yuv2rgb48le_2_c;
02154             //*yuv2packedX = yuv2rgb48le_X_c;
02155             //break;
02156         case PIX_FMT_RGB48BE:
02157             *yuv2packed1 = yuv2rgb48be_1_c;
02158             *yuv2packed2 = yuv2rgb48be_2_c;
02159             *yuv2packedX = yuv2rgb48be_X_c;
02160             break;
02161         case PIX_FMT_BGR48LE:
02162             //*yuv2packed1 = yuv2bgr48le_1_c;
02163             //*yuv2packed2 = yuv2bgr48le_2_c;
02164             //*yuv2packedX = yuv2bgr48le_X_c;
02165             //break;
02166         case PIX_FMT_BGR48BE:
02167             *yuv2packed1 = yuv2bgr48be_1_c;
02168             *yuv2packed2 = yuv2bgr48be_2_c;
02169             *yuv2packedX = yuv2bgr48be_X_c;
02170             break;
02171         default:
02172             *yuv2packed1 = yuv2packed1_c;
02173             *yuv2packed2 = yuv2packed2_c;
02174             *yuv2packedX = yuv2packedX_c;
02175             break;
02176         }
02177     }
02178 }
02179 
02180 #define DEBUG_SWSCALE_BUFFERS 0
02181 #define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
02182 
02183 static int swScale(SwsContext *c, const uint8_t* src[],
02184                    int srcStride[], int srcSliceY,
02185                    int srcSliceH, uint8_t* dst[], int dstStride[])
02186 {
02187     /* load a few things into local vars to make the code more readable? and faster */
02188     const int srcW= c->srcW;
02189     const int dstW= c->dstW;
02190     const int dstH= c->dstH;
02191     const int chrDstW= c->chrDstW;
02192     const int chrSrcW= c->chrSrcW;
02193     const int lumXInc= c->lumXInc;
02194     const int chrXInc= c->chrXInc;
02195     const enum PixelFormat dstFormat= c->dstFormat;
02196     const int flags= c->flags;
02197     int16_t *vLumFilterPos= c->vLumFilterPos;
02198     int16_t *vChrFilterPos= c->vChrFilterPos;
02199     int16_t *hLumFilterPos= c->hLumFilterPos;
02200     int16_t *hChrFilterPos= c->hChrFilterPos;
02201     int16_t *vLumFilter= c->vLumFilter;
02202     int16_t *vChrFilter= c->vChrFilter;
02203     int16_t *hLumFilter= c->hLumFilter;
02204     int16_t *hChrFilter= c->hChrFilter;
02205     int32_t *lumMmxFilter= c->lumMmxFilter;
02206     int32_t *chrMmxFilter= c->chrMmxFilter;
02207     int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
02208     const int vLumFilterSize= c->vLumFilterSize;
02209     const int vChrFilterSize= c->vChrFilterSize;
02210     const int hLumFilterSize= c->hLumFilterSize;
02211     const int hChrFilterSize= c->hChrFilterSize;
02212     int16_t **lumPixBuf= c->lumPixBuf;
02213     int16_t **chrUPixBuf= c->chrUPixBuf;
02214     int16_t **chrVPixBuf= c->chrVPixBuf;
02215     int16_t **alpPixBuf= c->alpPixBuf;
02216     const int vLumBufSize= c->vLumBufSize;
02217     const int vChrBufSize= c->vChrBufSize;
02218     uint8_t *formatConvBuffer= c->formatConvBuffer;
02219     const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
02220     const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
02221     int lastDstY;
02222     uint32_t *pal=c->pal_yuv;
02223     int should_dither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat);
02224     yuv2planar1_fn yuv2yuv1 = c->yuv2yuv1;
02225     yuv2planarX_fn yuv2yuvX = c->yuv2yuvX;
02226     yuv2packed1_fn yuv2packed1 = c->yuv2packed1;
02227     yuv2packed2_fn yuv2packed2 = c->yuv2packed2;
02228     yuv2packedX_fn yuv2packedX = c->yuv2packedX;
02229 
02230     /* vars which will change and which we need to store back in the context */
02231     int dstY= c->dstY;
02232     int lumBufIndex= c->lumBufIndex;
02233     int chrBufIndex= c->chrBufIndex;
02234     int lastInLumBuf= c->lastInLumBuf;
02235     int lastInChrBuf= c->lastInChrBuf;
02236 
02237     if (isPacked(c->srcFormat)) {
02238         src[0]=
02239         src[1]=
02240         src[2]=
02241         src[3]= src[0];
02242         srcStride[0]=
02243         srcStride[1]=
02244         srcStride[2]=
02245         srcStride[3]= srcStride[0];
02246     }
02247     srcStride[1]<<= c->vChrDrop;
02248     srcStride[2]<<= c->vChrDrop;
02249 
02250     DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
02251                   src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
02252                   dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
02253     DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
02254                    srcSliceY,    srcSliceH,    dstY,    dstH);
02255     DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
02256                    vLumFilterSize,    vLumBufSize,    vChrFilterSize,    vChrBufSize);
02257 
02258     if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
02259         static int warnedAlready=0; //FIXME move this into the context perhaps
02260         if (flags & SWS_PRINT_INFO && !warnedAlready) {
02261             av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
02262                    "         ->cannot do aligned memory accesses anymore\n");
02263             warnedAlready=1;
02264         }
02265     }
02266 
02267     /* Note the user might start scaling the picture in the middle so this
02268        will not get executed. This is not really intended but works
02269        currently, so people might do it. */
02270     if (srcSliceY ==0) {
02271         lumBufIndex=-1;
02272         chrBufIndex=-1;
02273         dstY=0;
02274         lastInLumBuf= -1;
02275         lastInChrBuf= -1;
02276     }
02277 
02278     lastDstY= dstY;
02279 
02280     for (;dstY < dstH; dstY++) {
02281         unsigned char *dest =dst[0]+dstStride[0]*dstY;
02282         const int chrDstY= dstY>>c->chrDstVSubSample;
02283         unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
02284         unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
02285         unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
02286         const uint8_t *lumDither= should_dither ? dithers[7][dstY   &7] : flat64;
02287         const uint8_t *chrDither= should_dither ? dithers[7][chrDstY&7] : flat64;
02288 
02289         const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
02290         const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
02291         const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
02292         int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
02293         int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
02294         int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
02295         int enough_lines;
02296 
02297         //handle holes (FAST_BILINEAR & weird filters)
02298         if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
02299         if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
02300         assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
02301         assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
02302 
02303         DEBUG_BUFFERS("dstY: %d\n", dstY);
02304         DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
02305                          firstLumSrcY,    lastLumSrcY,    lastInLumBuf);
02306         DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
02307                          firstChrSrcY,    lastChrSrcY,    lastInChrBuf);
02308 
02309         // Do we have enough lines in this slice to output the dstY line
02310         enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
02311 
02312         if (!enough_lines) {
02313             lastLumSrcY = srcSliceY + srcSliceH - 1;
02314             lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
02315             DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
02316                                             lastLumSrcY, lastChrSrcY);
02317         }
02318 
02319         //Do horizontal scaling
02320         while(lastInLumBuf < lastLumSrcY) {
02321             const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
02322             const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
02323             lumBufIndex++;
02324             assert(lumBufIndex < 2*vLumBufSize);
02325             assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
02326             assert(lastInLumBuf + 1 - srcSliceY >= 0);
02327             hyscale(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
02328                     hLumFilter, hLumFilterPos, hLumFilterSize,
02329                     formatConvBuffer,
02330                     pal, 0);
02331             if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
02332                 hyscale(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW,
02333                         lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
02334                         formatConvBuffer,
02335                         pal, 1);
02336             lastInLumBuf++;
02337             DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
02338                                lumBufIndex,    lastInLumBuf);
02339         }
02340         while(lastInChrBuf < lastChrSrcY) {
02341             const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
02342             const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
02343             chrBufIndex++;
02344             assert(chrBufIndex < 2*vChrBufSize);
02345             assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
02346             assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
02347             //FIXME replace parameters through context struct (some at least)
02348 
02349             if (c->needs_hcscale)
02350                 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
02351                           chrDstW, src1, src2, chrSrcW, chrXInc,
02352                           hChrFilter, hChrFilterPos, hChrFilterSize,
02353                           formatConvBuffer, pal);
02354             lastInChrBuf++;
02355             DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
02356                                chrBufIndex,    lastInChrBuf);
02357         }
02358         //wrap buf index around to stay inside the ring buffer
02359         if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
02360         if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
02361         if (!enough_lines)
02362             break; //we can't output a dstY line so let's try with the next slice
02363 
02364 #if HAVE_MMX
02365         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
02366 #endif
02367         if (dstY >= dstH-2) {
02368             // hmm looks like we can't use MMX here without overwriting this array's tail
02369             find_c_packed_planar_out_funcs(c, &yuv2yuv1, &yuv2yuvX,
02370                                            &yuv2packed1, &yuv2packed2,
02371                                            &yuv2packedX);
02372         }
02373 
02374         {
02375             const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
02376             const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02377             const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
02378             const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
02379 
02380             if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
02381                 const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
02382                 if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
02383                 if (c->yuv2yuv1 && vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
02384                     const int16_t *lumBuf = lumSrcPtr[0];
02385                     const int16_t *chrUBuf= chrUSrcPtr[0];
02386                     const int16_t *chrVBuf= chrVSrcPtr[0];
02387                     const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
02388                     yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest,
02389                                 uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
02390                 } else { //General YV12
02391                     yuv2yuvX(c,
02392                                 vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
02393                                 vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr,
02394                                 chrVSrcPtr, vChrFilterSize,
02395                                 alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither);
02396                 }
02397             } else {
02398                 assert(lumSrcPtr  + vLumFilterSize - 1 < lumPixBuf  + vLumBufSize*2);
02399                 assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2);
02400                 if (c->yuv2packed1 && vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
02401                     int chrAlpha= vChrFilter[2*dstY+1];
02402                     yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1),
02403                                    *chrVSrcPtr, *(chrVSrcPtr+1),
02404                                    alpPixBuf ? *alpSrcPtr : NULL,
02405                                    dest, dstW, chrAlpha, dstFormat, flags, dstY);
02406                 } else if (c->yuv2packed2 && vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
02407                     int lumAlpha= vLumFilter[2*dstY+1];
02408                     int chrAlpha= vChrFilter[2*dstY+1];
02409                     lumMmxFilter[2]=
02410                     lumMmxFilter[3]= vLumFilter[2*dstY   ]*0x10001;
02411                     chrMmxFilter[2]=
02412                     chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
02413                     yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1),
02414                                    *chrVSrcPtr, *(chrVSrcPtr+1),
02415                                    alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
02416                                    dest, dstW, lumAlpha, chrAlpha, dstY);
02417                 } else { //general RGB
02418                     yuv2packedX(c,
02419                                    vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
02420                                    vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
02421                                    alpSrcPtr, dest, dstW, dstY);
02422                 }
02423             }
02424         }
02425     }
02426 
02427     if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
02428         fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
02429 
02430 #if HAVE_MMX2
02431     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
02432         __asm__ volatile("sfence":::"memory");
02433 #endif
02434     emms_c();
02435 
02436     /* store changed local vars back in the context */
02437     c->dstY= dstY;
02438     c->lumBufIndex= lumBufIndex;
02439     c->chrBufIndex= chrBufIndex;
02440     c->lastInLumBuf= lastInLumBuf;
02441     c->lastInChrBuf= lastInChrBuf;
02442 
02443     return dstY - lastDstY;
02444 }
02445 
02446 static av_cold void sws_init_swScale_c(SwsContext *c)
02447 {
02448     enum PixelFormat srcFormat = c->srcFormat;
02449 
02450     find_c_packed_planar_out_funcs(c, &c->yuv2yuv1, &c->yuv2yuvX,
02451                                    &c->yuv2packed1, &c->yuv2packed2,
02452                                    &c->yuv2packedX);
02453 
02454     c->hScale       = hScale_c;
02455 
02456     if (c->flags & SWS_FAST_BILINEAR) {
02457         c->hyscale_fast = hyscale_fast_c;
02458         c->hcscale_fast = hcscale_fast_c;
02459     }
02460 
02461     c->chrToYV12 = NULL;
02462     switch(srcFormat) {
02463         case PIX_FMT_YUYV422  : c->chrToYV12 = yuy2ToUV_c; break;
02464         case PIX_FMT_UYVY422  : c->chrToYV12 = uyvyToUV_c; break;
02465         case PIX_FMT_NV12     : c->chrToYV12 = nv12ToUV_c; break;
02466         case PIX_FMT_NV21     : c->chrToYV12 = nv21ToUV_c; break;
02467         case PIX_FMT_RGB8     :
02468         case PIX_FMT_BGR8     :
02469         case PIX_FMT_PAL8     :
02470         case PIX_FMT_BGR4_BYTE:
02471         case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV_c; break;
02472         case PIX_FMT_GRAY16BE :
02473         case PIX_FMT_YUV444P9BE:
02474         case PIX_FMT_YUV420P9BE:
02475         case PIX_FMT_YUV444P10BE:
02476         case PIX_FMT_YUV422P10BE:
02477         case PIX_FMT_YUV420P10BE:
02478         case PIX_FMT_YUV420P16BE:
02479         case PIX_FMT_YUV422P16BE:
02480         case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break;
02481         case PIX_FMT_GRAY16LE :
02482         case PIX_FMT_YUV444P9LE:
02483         case PIX_FMT_YUV420P9LE:
02484         case PIX_FMT_YUV422P10LE:
02485         case PIX_FMT_YUV420P10LE:
02486         case PIX_FMT_YUV444P10LE:
02487         case PIX_FMT_YUV420P16LE:
02488         case PIX_FMT_YUV422P16LE:
02489         case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break;
02490     }
02491     if (c->chrSrcHSubSample) {
02492         switch(srcFormat) {
02493         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_half_c; break;
02494         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_half_c; break;
02495         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_half_c; break;
02496         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_half_c; break;
02497         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_half_c;   break;
02498         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_half_c;  break;
02499         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_half_c;   break;
02500         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_half_c; break;
02501         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_half_c; break;
02502         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_half_c; break;
02503         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_half_c; break;
02504         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_half_c;   break;
02505         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_half_c;  break;
02506         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_half_c;   break;
02507         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_half_c; break;
02508         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_half_c; break;
02509         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_half_c; break;
02510         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_half_c; break;
02511         }
02512     } else {
02513         switch(srcFormat) {
02514         case PIX_FMT_RGB48BE : c->chrToYV12 = rgb48BEToUV_c; break;
02515         case PIX_FMT_RGB48LE : c->chrToYV12 = rgb48LEToUV_c; break;
02516         case PIX_FMT_BGR48BE : c->chrToYV12 = bgr48BEToUV_c; break;
02517         case PIX_FMT_BGR48LE : c->chrToYV12 = bgr48LEToUV_c; break;
02518         case PIX_FMT_RGB32   : c->chrToYV12 = bgr32ToUV_c;   break;
02519         case PIX_FMT_RGB32_1 : c->chrToYV12 = bgr321ToUV_c;  break;
02520         case PIX_FMT_BGR24   : c->chrToYV12 = bgr24ToUV_c;   break;
02521         case PIX_FMT_BGR565LE: c->chrToYV12 = bgr16leToUV_c; break;
02522         case PIX_FMT_BGR565BE: c->chrToYV12 = bgr16beToUV_c; break;
02523         case PIX_FMT_BGR555LE: c->chrToYV12 = bgr15leToUV_c; break;
02524         case PIX_FMT_BGR555BE: c->chrToYV12 = bgr15beToUV_c; break;
02525         case PIX_FMT_BGR32   : c->chrToYV12 = rgb32ToUV_c;   break;
02526         case PIX_FMT_BGR32_1 : c->chrToYV12 = rgb321ToUV_c;  break;
02527         case PIX_FMT_RGB24   : c->chrToYV12 = rgb24ToUV_c;   break;
02528         case PIX_FMT_RGB565LE: c->chrToYV12 = rgb16leToUV_c; break;
02529         case PIX_FMT_RGB565BE: c->chrToYV12 = rgb16beToUV_c; break;
02530         case PIX_FMT_RGB555LE: c->chrToYV12 = rgb15leToUV_c; break;
02531         case PIX_FMT_RGB555BE: c->chrToYV12 = rgb15beToUV_c; break;
02532         }
02533     }
02534 
02535     c->lumToYV12 = NULL;
02536     c->alpToYV12 = NULL;
02537     switch (srcFormat) {
02538     case PIX_FMT_YUYV422  :
02539     case PIX_FMT_GRAY8A   :
02540                             c->lumToYV12 = yuy2ToY_c; break;
02541     case PIX_FMT_UYVY422  :
02542                             c->lumToYV12 = uyvyToY_c;    break;
02543     case PIX_FMT_BGR24    : c->lumToYV12 = bgr24ToY_c;   break;
02544     case PIX_FMT_BGR565LE : c->lumToYV12 = bgr16leToY_c; break;
02545     case PIX_FMT_BGR565BE : c->lumToYV12 = bgr16beToY_c; break;
02546     case PIX_FMT_BGR555LE : c->lumToYV12 = bgr15leToY_c; break;
02547     case PIX_FMT_BGR555BE : c->lumToYV12 = bgr15beToY_c; break;
02548     case PIX_FMT_RGB24    : c->lumToYV12 = rgb24ToY_c;   break;
02549     case PIX_FMT_RGB565LE : c->lumToYV12 = rgb16leToY_c; break;
02550     case PIX_FMT_RGB565BE : c->lumToYV12 = rgb16beToY_c; break;
02551     case PIX_FMT_RGB555LE : c->lumToYV12 = rgb15leToY_c; break;
02552     case PIX_FMT_RGB555BE : c->lumToYV12 = rgb15beToY_c; break;
02553     case PIX_FMT_RGB8     :
02554     case PIX_FMT_BGR8     :
02555     case PIX_FMT_PAL8     :
02556     case PIX_FMT_BGR4_BYTE:
02557     case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY_c; break;
02558     case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y_c; break;
02559     case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y_c; break;
02560     case PIX_FMT_RGB32  : c->lumToYV12 = bgr32ToY_c;  break;
02561     case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY_c; break;
02562     case PIX_FMT_BGR32  : c->lumToYV12 = rgb32ToY_c;  break;
02563     case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY_c; break;
02564     case PIX_FMT_RGB48BE: c->lumToYV12 = rgb48BEToY_c; break;
02565     case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48LEToY_c; break;
02566     case PIX_FMT_BGR48BE: c->lumToYV12 = bgr48BEToY_c; break;
02567     case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48LEToY_c; break;
02568     }
02569     if (c->alpPixBuf) {
02570         switch (srcFormat) {
02571         case PIX_FMT_BGRA:
02572         case PIX_FMT_RGBA:  c->alpToYV12 = rgbaToA_c; break;
02573         case PIX_FMT_ABGR:
02574         case PIX_FMT_ARGB:  c->alpToYV12 = abgrToA_c; break;
02575         case PIX_FMT_Y400A: c->alpToYV12 = uyvyToY_c; break;
02576         case PIX_FMT_PAL8 : c->alpToYV12 = palToA_c; break;
02577         }
02578     }
02579 
02580     if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
02581         c->hScale16= hScale16_c;
02582 
02583     if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
02584         if (c->srcRange) {
02585             c->lumConvertRange = lumRangeFromJpeg_c;
02586             c->chrConvertRange = chrRangeFromJpeg_c;
02587         } else {
02588             c->lumConvertRange = lumRangeToJpeg_c;
02589             c->chrConvertRange = chrRangeToJpeg_c;
02590         }
02591     }
02592 
02593     if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
02594           srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
02595         c->needs_hcscale = 1;
02596 }
02597 
02598 SwsFunc ff_getSwsFunc(SwsContext *c)
02599 {
02600     sws_init_swScale_c(c);
02601 
02602     if (HAVE_MMX)
02603         ff_sws_init_swScale_mmx(c);
02604     if (HAVE_ALTIVEC)
02605         ff_sws_init_swScale_altivec(c);
02606 
02607     return swScale;
02608 }