00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00030 #include "avcodec.h"
00031 #include "dsputil.h"
00032 #include "mpegvideo.h"
00033 #include "h263.h"
00034 #include "internal.h"
00035
00036 #include "svq1.h"
00037 #include "svq1enc_cb.h"
00038
00039 #undef NDEBUG
00040 #include <assert.h>
00041
00042
00043 typedef struct SVQ1Context {
00044 MpegEncContext m;
00045 AVCodecContext *avctx;
00046 DSPContext dsp;
00047 AVFrame picture;
00048 AVFrame current_picture;
00049 AVFrame last_picture;
00050 PutBitContext pb;
00051 GetBitContext gb;
00052
00053 PutBitContext reorder_pb[6];
00054
00055 int frame_width;
00056 int frame_height;
00057
00058
00059 int y_block_width;
00060 int y_block_height;
00061
00062
00063 int c_block_width;
00064 int c_block_height;
00065
00066 uint16_t *mb_type;
00067 uint32_t *dummy;
00068 int16_t (*motion_val8[3])[2];
00069 int16_t (*motion_val16[3])[2];
00070
00071 int64_t rd_total;
00072
00073 uint8_t *scratchbuf;
00074 } SVQ1Context;
00075
00076 static void svq1_write_header(SVQ1Context *s, int frame_type)
00077 {
00078 int i;
00079
00080
00081 put_bits(&s->pb, 22, 0x20);
00082
00083
00084 put_bits(&s->pb, 8, 0x00);
00085
00086
00087 put_bits(&s->pb, 2, frame_type - 1);
00088
00089 if (frame_type == AV_PICTURE_TYPE_I) {
00090
00091
00092
00093
00094
00095
00096 put_bits(&s->pb, 5, 2);
00097
00098 i= ff_match_2uint16(ff_svq1_frame_size_table, FF_ARRAY_ELEMS(ff_svq1_frame_size_table), s->frame_width, s->frame_height);
00099 put_bits(&s->pb, 3, i);
00100
00101 if (i == 7)
00102 {
00103 put_bits(&s->pb, 12, s->frame_width);
00104 put_bits(&s->pb, 12, s->frame_height);
00105 }
00106 }
00107
00108
00109 put_bits(&s->pb, 2, 0);
00110 }
00111
00112
00113 #define QUALITY_THRESHOLD 100
00114 #define THRESHOLD_MULTIPLIER 0.6
00115
00116 #if HAVE_ALTIVEC
00117 #undef vector
00118 #endif
00119
00120 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
00121 int count, y, x, i, j, split, best_mean, best_score, best_count;
00122 int best_vector[6];
00123 int block_sum[7]= {0, 0, 0, 0, 0, 0};
00124 int w= 2<<((level+2)>>1);
00125 int h= 2<<((level+1)>>1);
00126 int size=w*h;
00127 int16_t block[7][256];
00128 const int8_t *codebook_sum, *codebook;
00129 const uint16_t (*mean_vlc)[2];
00130 const uint8_t (*multistage_vlc)[2];
00131
00132 best_score=0;
00133
00134 if(intra){
00135 codebook_sum= svq1_intra_codebook_sum[level];
00136 codebook= ff_svq1_intra_codebooks[level];
00137 mean_vlc= ff_svq1_intra_mean_vlc;
00138 multistage_vlc= ff_svq1_intra_multistage_vlc[level];
00139 for(y=0; y<h; y++){
00140 for(x=0; x<w; x++){
00141 int v= src[x + y*stride];
00142 block[0][x + w*y]= v;
00143 best_score += v*v;
00144 block_sum[0] += v;
00145 }
00146 }
00147 }else{
00148 codebook_sum= svq1_inter_codebook_sum[level];
00149 codebook= ff_svq1_inter_codebooks[level];
00150 mean_vlc= ff_svq1_inter_mean_vlc + 256;
00151 multistage_vlc= ff_svq1_inter_multistage_vlc[level];
00152 for(y=0; y<h; y++){
00153 for(x=0; x<w; x++){
00154 int v= src[x + y*stride] - ref[x + y*stride];
00155 block[0][x + w*y]= v;
00156 best_score += v*v;
00157 block_sum[0] += v;
00158 }
00159 }
00160 }
00161
00162 best_count=0;
00163 best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
00164 best_mean= (block_sum[0] + (size>>1)) >> (level+3);
00165
00166 if(level<4){
00167 for(count=1; count<7; count++){
00168 int best_vector_score= INT_MAX;
00169 int best_vector_sum=-999, best_vector_mean=-999;
00170 const int stage= count-1;
00171 const int8_t *vector;
00172
00173 for(i=0; i<16; i++){
00174 int sum= codebook_sum[stage*16 + i];
00175 int sqr, diff, score;
00176
00177 vector = codebook + stage*size*16 + i*size;
00178 sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
00179 diff= block_sum[stage] - sum;
00180 score= sqr - ((diff*(int64_t)diff)>>(level+3));
00181 if(score < best_vector_score){
00182 int mean= (diff + (size>>1)) >> (level+3);
00183 assert(mean >-300 && mean<300);
00184 mean= av_clip(mean, intra?0:-256, 255);
00185 best_vector_score= score;
00186 best_vector[stage]= i;
00187 best_vector_sum= sum;
00188 best_vector_mean= mean;
00189 }
00190 }
00191 assert(best_vector_mean != -999);
00192 vector= codebook + stage*size*16 + best_vector[stage]*size;
00193 for(j=0; j<size; j++){
00194 block[stage+1][j] = block[stage][j] - vector[j];
00195 }
00196 block_sum[stage+1]= block_sum[stage] - best_vector_sum;
00197 best_vector_score +=
00198 lambda*(+ 1 + 4*count
00199 + multistage_vlc[1+count][1]
00200 + mean_vlc[best_vector_mean][1]);
00201
00202 if(best_vector_score < best_score){
00203 best_score= best_vector_score;
00204 best_count= count;
00205 best_mean= best_vector_mean;
00206 }
00207 }
00208 }
00209
00210 split=0;
00211 if(best_score > threshold && level){
00212 int score=0;
00213 int offset= (level&1) ? stride*h/2 : w/2;
00214 PutBitContext backup[6];
00215
00216 for(i=level-1; i>=0; i--){
00217 backup[i]= s->reorder_pb[i];
00218 }
00219 score += encode_block(s, src , ref , decoded , stride, level-1, threshold>>1, lambda, intra);
00220 score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
00221 score += lambda;
00222
00223 if(score < best_score){
00224 best_score= score;
00225 split=1;
00226 }else{
00227 for(i=level-1; i>=0; i--){
00228 s->reorder_pb[i]= backup[i];
00229 }
00230 }
00231 }
00232 if (level > 0)
00233 put_bits(&s->reorder_pb[level], 1, split);
00234
00235 if(!split){
00236 assert((best_mean >= 0 && best_mean<256) || !intra);
00237 assert(best_mean >= -256 && best_mean<256);
00238 assert(best_count >=0 && best_count<7);
00239 assert(level<4 || best_count==0);
00240
00241
00242 put_bits(&s->reorder_pb[level],
00243 multistage_vlc[1 + best_count][1],
00244 multistage_vlc[1 + best_count][0]);
00245 put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
00246 mean_vlc[best_mean][0]);
00247
00248 for (i = 0; i < best_count; i++){
00249 assert(best_vector[i]>=0 && best_vector[i]<16);
00250 put_bits(&s->reorder_pb[level], 4, best_vector[i]);
00251 }
00252
00253 for(y=0; y<h; y++){
00254 for(x=0; x<w; x++){
00255 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
00256 }
00257 }
00258 }
00259
00260 return best_score;
00261 }
00262
00263
00264 static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
00265 int width, int height, int src_stride, int stride)
00266 {
00267 int x, y;
00268 int i;
00269 int block_width, block_height;
00270 int level;
00271 int threshold[6];
00272 uint8_t *src = s->scratchbuf + stride * 16;
00273 const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
00274
00275
00276 threshold[5] = QUALITY_THRESHOLD;
00277 for (level = 4; level >= 0; level--)
00278 threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
00279
00280 block_width = (width + 15) / 16;
00281 block_height = (height + 15) / 16;
00282
00283 if(s->picture.pict_type == AV_PICTURE_TYPE_P){
00284 s->m.avctx= s->avctx;
00285 s->m.current_picture_ptr= &s->m.current_picture;
00286 s->m.last_picture_ptr = &s->m.last_picture;
00287 s->m.last_picture.data[0]= ref_plane;
00288 s->m.linesize=
00289 s->m.last_picture.linesize[0]=
00290 s->m.new_picture.linesize[0]=
00291 s->m.current_picture.linesize[0]= stride;
00292 s->m.width= width;
00293 s->m.height= height;
00294 s->m.mb_width= block_width;
00295 s->m.mb_height= block_height;
00296 s->m.mb_stride= s->m.mb_width+1;
00297 s->m.b8_stride= 2*s->m.mb_width+1;
00298 s->m.f_code=1;
00299 s->m.pict_type= s->picture.pict_type;
00300 s->m.me_method= s->avctx->me_method;
00301 s->m.me.scene_change_score=0;
00302 s->m.flags= s->avctx->flags;
00303
00304
00305
00306 s->m.lambda= s->picture.quality;
00307 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
00308 s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
00309
00310 if(!s->motion_val8[plane]){
00311 s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
00312 s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
00313 }
00314
00315 s->m.mb_type= s->mb_type;
00316
00317
00318 s->m.current_picture.mb_mean= (uint8_t *)s->dummy;
00319 s->m.current_picture.mb_var= (uint16_t*)s->dummy;
00320 s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
00321 s->m.current_picture.mb_type= s->dummy;
00322
00323 s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
00324 s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
00325 s->m.dsp= s->dsp;
00326 ff_init_me(&s->m);
00327
00328 s->m.me.dia_size= s->avctx->dia_size;
00329 s->m.first_slice_line=1;
00330 for (y = 0; y < block_height; y++) {
00331 s->m.new_picture.data[0]= src - y*16*stride;
00332 s->m.mb_y= y;
00333
00334 for(i=0; i<16 && i + 16*y<height; i++){
00335 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
00336 for(x=width; x<16*block_width; x++)
00337 src[i*stride+x]= src[i*stride+x-1];
00338 }
00339 for(; i<16 && i + 16*y<16*block_height; i++)
00340 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
00341
00342 for (x = 0; x < block_width; x++) {
00343 s->m.mb_x= x;
00344 ff_init_block_index(&s->m);
00345 ff_update_block_index(&s->m);
00346
00347 ff_estimate_p_frame_motion(&s->m, x, y);
00348 }
00349 s->m.first_slice_line=0;
00350 }
00351
00352 ff_fix_long_p_mvs(&s->m);
00353 ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
00354 }
00355
00356 s->m.first_slice_line=1;
00357 for (y = 0; y < block_height; y++) {
00358 for(i=0; i<16 && i + 16*y<height; i++){
00359 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
00360 for(x=width; x<16*block_width; x++)
00361 src[i*stride+x]= src[i*stride+x-1];
00362 }
00363 for(; i<16 && i + 16*y<16*block_height; i++)
00364 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
00365
00366 s->m.mb_y= y;
00367 for (x = 0; x < block_width; x++) {
00368 uint8_t reorder_buffer[3][6][7*32];
00369 int count[3][6];
00370 int offset = y * 16 * stride + x * 16;
00371 uint8_t *decoded= decoded_plane + offset;
00372 uint8_t *ref= ref_plane + offset;
00373 int score[4]={0,0,0,0}, best;
00374 uint8_t *temp = s->scratchbuf;
00375
00376 if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){
00377 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
00378 return -1;
00379 }
00380
00381 s->m.mb_x= x;
00382 ff_init_block_index(&s->m);
00383 ff_update_block_index(&s->m);
00384
00385 if(s->picture.pict_type == AV_PICTURE_TYPE_I || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
00386 for(i=0; i<6; i++){
00387 init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
00388 }
00389 if(s->picture.pict_type == AV_PICTURE_TYPE_P){
00390 const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
00391 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
00392 score[0]= vlc[1]*lambda;
00393 }
00394 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
00395 for(i=0; i<6; i++){
00396 count[0][i]= put_bits_count(&s->reorder_pb[i]);
00397 flush_put_bits(&s->reorder_pb[i]);
00398 }
00399 }else
00400 score[0]= INT_MAX;
00401
00402 best=0;
00403
00404 if(s->picture.pict_type == AV_PICTURE_TYPE_P){
00405 const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTER];
00406 int mx, my, pred_x, pred_y, dxy;
00407 int16_t *motion_ptr;
00408
00409 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
00410 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
00411 for(i=0; i<6; i++)
00412 init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
00413
00414 put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
00415
00416 s->m.pb= s->reorder_pb[5];
00417 mx= motion_ptr[0];
00418 my= motion_ptr[1];
00419 assert(mx>=-32 && mx<=31);
00420 assert(my>=-32 && my<=31);
00421 assert(pred_x>=-32 && pred_x<=31);
00422 assert(pred_y>=-32 && pred_y<=31);
00423 ff_h263_encode_motion(&s->m, mx - pred_x, 1);
00424 ff_h263_encode_motion(&s->m, my - pred_y, 1);
00425 s->reorder_pb[5]= s->m.pb;
00426 score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
00427
00428 dxy= (mx&1) + 2*(my&1);
00429
00430 s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
00431
00432 score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
00433 best= score[1] <= score[0];
00434
00435 vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
00436 score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
00437 score[2]+= vlc[1]*lambda;
00438 if(score[2] < score[best] && mx==0 && my==0){
00439 best=2;
00440 s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
00441 for(i=0; i<6; i++){
00442 count[2][i]=0;
00443 }
00444 put_bits(&s->pb, vlc[1], vlc[0]);
00445 }
00446 }
00447
00448 if(best==1){
00449 for(i=0; i<6; i++){
00450 count[1][i]= put_bits_count(&s->reorder_pb[i]);
00451 flush_put_bits(&s->reorder_pb[i]);
00452 }
00453 }else{
00454 motion_ptr[0 ] = motion_ptr[1 ]=
00455 motion_ptr[2 ] = motion_ptr[3 ]=
00456 motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
00457 motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
00458 }
00459 }
00460
00461 s->rd_total += score[best];
00462
00463 for(i=5; i>=0; i--){
00464 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
00465 }
00466 if(best==0){
00467 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
00468 }
00469 }
00470 s->m.first_slice_line=0;
00471 }
00472 return 0;
00473 }
00474
00475 static av_cold int svq1_encode_init(AVCodecContext *avctx)
00476 {
00477 SVQ1Context * const s = avctx->priv_data;
00478
00479 dsputil_init(&s->dsp, avctx);
00480 avctx->coded_frame= (AVFrame*)&s->picture;
00481
00482 s->frame_width = avctx->width;
00483 s->frame_height = avctx->height;
00484
00485 s->y_block_width = (s->frame_width + 15) / 16;
00486 s->y_block_height = (s->frame_height + 15) / 16;
00487
00488 s->c_block_width = (s->frame_width / 4 + 15) / 16;
00489 s->c_block_height = (s->frame_height / 4 + 15) / 16;
00490
00491 s->avctx= avctx;
00492 s->m.avctx= avctx;
00493 s->m.me.temp =
00494 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
00495 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
00496 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
00497 s->mb_type = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
00498 s->dummy = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
00499 h263_encode_init(&s->m);
00500
00501 return 0;
00502 }
00503
00504 static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
00505 int buf_size, void *data)
00506 {
00507 SVQ1Context * const s = avctx->priv_data;
00508 AVFrame *pict = data;
00509 AVFrame * const p= (AVFrame*)&s->picture;
00510 AVFrame temp;
00511 int i;
00512
00513 if(avctx->pix_fmt != PIX_FMT_YUV410P){
00514 av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
00515 return -1;
00516 }
00517
00518 if(!s->current_picture.data[0]){
00519 avctx->get_buffer(avctx, &s->current_picture);
00520 avctx->get_buffer(avctx, &s->last_picture);
00521 s->scratchbuf = av_malloc(s->current_picture.linesize[0] * 16 * 2);
00522 }
00523
00524 temp= s->current_picture;
00525 s->current_picture= s->last_picture;
00526 s->last_picture= temp;
00527
00528 init_put_bits(&s->pb, buf, buf_size);
00529
00530 *p = *pict;
00531 p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
00532 p->key_frame = p->pict_type == AV_PICTURE_TYPE_I;
00533
00534 svq1_write_header(s, p->pict_type);
00535 for(i=0; i<3; i++){
00536 if(svq1_encode_plane(s, i,
00537 s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
00538 s->frame_width / (i?4:1), s->frame_height / (i?4:1),
00539 s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
00540 return -1;
00541 }
00542
00543
00544 while(put_bits_count(&s->pb) & 31)
00545 put_bits(&s->pb, 1, 0);
00546
00547 flush_put_bits(&s->pb);
00548
00549 return put_bits_count(&s->pb) / 8;
00550 }
00551
00552 static av_cold int svq1_encode_end(AVCodecContext *avctx)
00553 {
00554 SVQ1Context * const s = avctx->priv_data;
00555 int i;
00556
00557 av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
00558
00559 av_freep(&s->m.me.scratchpad);
00560 av_freep(&s->m.me.map);
00561 av_freep(&s->m.me.score_map);
00562 av_freep(&s->mb_type);
00563 av_freep(&s->dummy);
00564 av_freep(&s->scratchbuf);
00565
00566 for(i=0; i<3; i++){
00567 av_freep(&s->motion_val8[i]);
00568 av_freep(&s->motion_val16[i]);
00569 }
00570
00571 return 0;
00572 }
00573
00574
00575 AVCodec ff_svq1_encoder = {
00576 "svq1",
00577 AVMEDIA_TYPE_VIDEO,
00578 CODEC_ID_SVQ1,
00579 sizeof(SVQ1Context),
00580 svq1_encode_init,
00581 svq1_encode_frame,
00582 svq1_encode_end,
00583 .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV410P, PIX_FMT_NONE},
00584 .long_name= NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 1 / Sorenson Video 1 / SVQ1"),
00585 };