在上一篇介绍了编码器的VCL编码操作,分析了函数x264_slice_write()。函数x264_slice_write()里有四个关键模块,分别是宏块分析模块、宏块编码模块、熵编码模块和滤波模块,再加上NAL打包输出部分,是我们这里要讲的内容。

1.编码模块

宏块分析模块:调用函数x264_macroblock_analyse()。分为两部分:帧内宏块和帧间宏块。帧内宏块用于分析帧内的预测模式,而帧间宏块进行运动估计,分析帧间的预测模式。

x264_macroblock_analyse():

void x264_macroblock_analyse( x264_t *h )
{
x264_mb_analysis_t analysis;
int i_cost = COST_MAX;
//通过码率控制方法,获取本宏块QP
h->mb.i_qp = x264_ratecontrol_mb_qp( h );
/* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
* to lower the bit cost of the qp_delta. Don't do this if QPRD is enabled. */
if( h->param.rc.i_aq_mode && h->param.analyse.i_subpel_refine < 10 )
h->mb.i_qp = abs(h->mb.i_qp - h->mb.i_last_qp) == 1 ? h->mb.i_last_qp : h->mb.i_qp; if( h->param.analyse.b_mb_info )
h->fdec->effective_qp[h->mb.i_mb_xy] = h->mb.i_qp; /* Store the real analysis QP. */
//初始化
x264_mb_analyse_init( h, &analysis, h->mb.i_qp ); //I帧:只使用帧内预测,分别计算亮度16x16(4种)和4x4(9种)所有模式的代价值,选出代价最小的模式 //P帧:计算帧内模式和帧间模式( P Slice允许有Intra宏块和P宏块;同理B帧也支持Intra宏块)。
//对P帧的每一种分割进行帧间预测,得到最佳的运动矢量及最佳匹配块。
//帧间预测过程:选出最佳矢量——>找到最佳的整像素点——>找到最佳的二分之一像素点——>找到最佳的1/4像素点
//然后取代价最小的为最佳MV和分割方式
//最后从帧内模式和帧间模式中选择代价比较小的方式(有可能没有找到很好的匹配块,这时候就直接使用帧内预测而不是帧间预测)。 if( h->sh.i_type == SLICE_TYPE_I )
{
//I slice
//通过一系列帧内预测模式(16x16的4种,4x4的9种)代价的计算得出代价最小的最优模式
intra_analysis:
if( analysis.i_mbrd )
x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 );
//帧内预测分析
//从16×16的SAD,4个8×8的SAD和,16个4×4SAD中选出最优方式
x264_mb_analyse_intra( h, &analysis, COST_MAX );
if( analysis.i_mbrd )
x264_intra_rd( h, &analysis, COST_MAX );
//分析结果都存储在analysis结构体中
//开销
i_cost = analysis.i_satd_i16x16;
h->mb.i_type = I_16x16;
//如果I4x4或者I8x8开销更小的话就拷贝
//copy if little
COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, h->mb.i_type, I_4x4 );
COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, h->mb.i_type, I_8x8 );
//画面极其特殊的时候,才有可能用到PCM
if( analysis.i_satd_pcm < i_cost )
h->mb.i_type = I_PCM; else if( analysis.i_mbrd >= 2 )
x264_intra_rd_refine( h, &analysis );
}
else if( h->sh.i_type == SLICE_TYPE_P )
{
//P slice int b_skip = 0; h->mc.prefetch_ref( h->mb.pic.p_fref[0][0][h->mb.i_mb_x&3], h->mb.pic.i_stride[0], 0 ); analysis.b_try_skip = 0;
if( analysis.b_force_intra )
{
if( !h->param.analyse.b_psy )
{
x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) );
goto intra_analysis;
}
}
else
{
if( h->fdec->mb_info && (h->fdec->mb_info[h->mb.i_mb_xy]&X264_MBINFO_CONSTANT) )
{
if( !SLICE_MBAFF && (h->fdec->i_frame - h->fref[0][0]->i_frame) == 1 && !h->sh.b_weighted_pred &&
h->fref[0][0]->effective_qp[h->mb.i_mb_xy] <= h->mb.i_qp )
{
h->mb.i_partition = D_16x16;
if( !M32(h->mb.cache.pskip_mv) )
{
b_skip = 1;
h->mb.i_type = P_SKIP;
}
else
{
h->mb.i_type = P_L0;
analysis.l0.me16x16.i_ref = 0;
M32( analysis.l0.me16x16.mv ) = 0;
}
goto skip_analysis;
}
else if( h->param.analyse.b_mb_info_update )
h->fdec->mb_info[h->mb.i_mb_xy] &= ~X264_MBINFO_CONSTANT;
} int skip_invalid = h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1];
/* If the current macroblock is off the frame, just skip it. */
if( HAVE_INTERLACED && !MB_INTERLACED && h->mb.i_mb_y * 16 >= h->param.i_height && !skip_invalid )
b_skip = 1;
/* Fast P_SKIP detection */
else if( h->param.analyse.b_fast_pskip )
{
if( skip_invalid )
// FIXME don't need to check this if the reference frame is done
{}
else if( h->param.analyse.i_subpel_refine >= 3 )
analysis.b_try_skip = 1;
else if( h->mb.i_mb_type_left[0] == P_SKIP ||
h->mb.i_mb_type_top == P_SKIP ||
h->mb.i_mb_type_topleft == P_SKIP ||
h->mb.i_mb_type_topright == P_SKIP )
b_skip = x264_macroblock_probe_pskip( h );//检查是否是Skip类型
}
} h->mc.prefetch_ref( h->mb.pic.p_fref[0][0][h->mb.i_mb_x&3], h->mb.pic.i_stride[0], 1 ); if( b_skip )
{
h->mb.i_type = P_SKIP;
h->mb.i_partition = D_16x16;
assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
skip_analysis:
for( int i = 0; i < h->mb.pic.i_fref[0]; i++ )
M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0;
}
else
{
const unsigned int flags = h->param.analyse.inter;
int i_type;
int i_partition;
int i_satd_inter, i_satd_intra; x264_mb_analyse_load_costs( h, &analysis );
/*
* 16x16 帧间预测宏块分析-P
*
* +--------+--------+
* | |
* | |
* | |
* + + +
* | |
* | |
* | |
* +--------+--------+
*
*/
x264_mb_analyse_inter_p16x16( h, &analysis ); if( h->mb.i_type == P_SKIP )
{
for( int i = 1; i < h->mb.pic.i_fref[0]; i++ )
M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0;
return;
} if( flags & X264_ANALYSE_PSUB16x16 )
{
if( h->param.analyse.b_mixed_references )
x264_mb_analyse_inter_p8x8_mixed_ref( h, &analysis );
else{
/*
* 8x8帧间预测宏块分析-P
* +--------+
* | |
* | |
* | |
* +--------+
*/
x264_mb_analyse_inter_p8x8( h, &analysis );
}
} /* Select best inter mode */
i_type = P_L0;
i_partition = D_16x16;
i_cost = analysis.l0.me16x16.cost; //如果8x8的代价值小于16x16
//则进行8x8子块分割的处理 //处理的数据源自于l0
if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate ||
analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost) )
{
i_type = P_8x8;
i_partition = D_8x8;
i_cost = analysis.l0.i_cost8x8; /* Do sub 8x8 */
if( flags & X264_ANALYSE_PSUB8x8 )
{
for( int i = 0; i < 4; i++ )
{
//8x8块的子块的分析
/*
* 4x4
* +----+----+
* | | |
* +----+----+
* | | |
* +----+----+
*
*/
x264_mb_analyse_inter_p4x4( h, &analysis, i );
int i_thresh8x4 = analysis.l0.me4x4[i][1].cost_mv + analysis.l0.me4x4[i][2].cost_mv;
//如果4x4小于8x8
//则再分析8x4,4x8的代价
if( !analysis.b_early_terminate || analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost + i_thresh8x4 )
{
int i_cost8x8 = analysis.l0.i_cost4x4[i];
h->mb.i_sub_partition[i] = D_L0_4x4;
/*
* 8x4
* +----+----+
* | |
* +----+----+
* | |
* +----+----+
*
*/
//如果8x4小于8x8
x264_mb_analyse_inter_p8x4( h, &analysis, i );
COPY2_IF_LT( i_cost8x8, analysis.l0.i_cost8x4[i],
h->mb.i_sub_partition[i], D_L0_8x4 );
/*
* 4x8
* +----+----+
* | | |
* + + +
* | | |
* +----+----+
*
*/
//如果4x8小于8x8
x264_mb_analyse_inter_p4x8( h, &analysis, i );
COPY2_IF_LT( i_cost8x8, analysis.l0.i_cost4x8[i],
h->mb.i_sub_partition[i], D_L0_4x8 ); i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
}
x264_mb_cache_mv_p8x8( h, &analysis, i );
}
analysis.l0.i_cost8x8 = i_cost;
}
} /* Now do 16x8/8x16 */
int i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv; //前提要求8x8的代价值小于16x16
if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate ||
analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8) )
{
int i_avg_mv_ref_cost = (analysis.l0.me8x8[2].cost_mv + analysis.l0.me8x8[2].i_ref_cost
+ analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1;
analysis.i_cost_est16x8[1] = analysis.i_satd8x8[0][2] + analysis.i_satd8x8[0][3] + i_avg_mv_ref_cost;
/*
* 16x8 宏块划分
*
* +--------+--------+
* | | |
* | | |
* | | |
* +--------+--------+
*
*/
x264_mb_analyse_inter_p16x8( h, &analysis, i_cost );
COPY3_IF_LT( i_cost, analysis.l0.i_cost16x8, i_type, P_L0, i_partition, D_16x8 ); i_avg_mv_ref_cost = (analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[1].i_ref_cost
+ analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1;
analysis.i_cost_est8x16[1] = analysis.i_satd8x8[0][1] + analysis.i_satd8x8[0][3] + i_avg_mv_ref_cost;
/*
* 8x16 宏块划分
*
* +--------+
* | |
* | |
* | |
* +--------+
* | |
* | |
* | |
* +--------+
*
*/
x264_mb_analyse_inter_p8x16( h, &analysis, i_cost );
COPY3_IF_LT( i_cost, analysis.l0.i_cost8x16, i_type, P_L0, i_partition, D_8x16 );
} h->mb.i_partition = i_partition; /* refine qpel */
//亚像素精度搜索
//FIXME mb_type costs?
if( analysis.i_mbrd || !h->mb.i_subpel_refine )
{
/* refine later */
}
else if( i_partition == D_16x16 )
{
x264_me_refine_qpel( h, &analysis.l0.me16x16 );
i_cost = analysis.l0.me16x16.cost;
}
else if( i_partition == D_16x8 )
{
x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
}
else if( i_partition == D_8x16 )
{
x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
}
else if( i_partition == D_8x8 )
{
i_cost = 0;
for( int i8x8 = 0; i8x8 < 4; i8x8++ )
{
switch( h->mb.i_sub_partition[i8x8] )
{
case D_L0_8x8:
x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
i_cost += analysis.l0.me8x8[i8x8].cost;
break;
case D_L0_8x4:
x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
i_cost += analysis.l0.me8x4[i8x8][0].cost +
analysis.l0.me8x4[i8x8][1].cost;
break;
case D_L0_4x8:
x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
i_cost += analysis.l0.me4x8[i8x8][0].cost +
analysis.l0.me4x8[i8x8][1].cost;
break; case D_L0_4x4:
x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
i_cost += analysis.l0.me4x4[i8x8][0].cost +
analysis.l0.me4x4[i8x8][1].cost +
analysis.l0.me4x4[i8x8][2].cost +
analysis.l0.me4x4[i8x8][3].cost;
break;
default:
x264_log( h, X264_LOG_ERROR, "internal error (!8x8 && !4x4)\n" );
break;
}
}
} if( h->mb.b_chroma_me )
{
if( CHROMA444 )
{
x264_mb_analyse_intra( h, &analysis, i_cost );
x264_mb_analyse_intra_chroma( h, &analysis );
}
else
{
x264_mb_analyse_intra_chroma( h, &analysis );
x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_chroma );
}
analysis.i_satd_i16x16 += analysis.i_satd_chroma;
analysis.i_satd_i8x8 += analysis.i_satd_chroma;
analysis.i_satd_i4x4 += analysis.i_satd_chroma;
}
else
x264_mb_analyse_intra( h, &analysis, i_cost );//P Slice中也允许有Intra宏块,所以也要进行分析 i_satd_inter = i_cost;
i_satd_intra = X264_MIN3( analysis.i_satd_i16x16,
analysis.i_satd_i8x8,
analysis.i_satd_i4x4 ); if( analysis.i_mbrd )
{
x264_mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) );
i_type = P_L0;
i_partition = D_16x16;
i_cost = analysis.l0.i_rd16x16;
COPY2_IF_LT( i_cost, analysis.l0.i_cost16x8, i_partition, D_16x8 );
COPY2_IF_LT( i_cost, analysis.l0.i_cost8x16, i_partition, D_8x16 );
COPY3_IF_LT( i_cost, analysis.l0.i_cost8x8, i_partition, D_8x8, i_type, P_8x8 );
h->mb.i_type = i_type;
h->mb.i_partition = i_partition;
if( i_cost < COST_MAX )
x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost );
x264_intra_rd( h, &analysis, i_satd_inter * 5/4 + 1 );
}
//获取最小的代价
COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );
COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 );
COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 );
COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM ); h->mb.i_type = i_type; if( analysis.b_force_intra && !IS_INTRA(i_type) )
{
/* Intra masking: copy fdec to fenc and re-encode the block as intra in order to make it appear as if
* it was an inter block. */
x264_analyse_update_cache( h, &analysis );
x264_macroblock_encode( h );
for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[p], FENC_STRIDE, h->mb.pic.p_fdec[p], FDEC_STRIDE, 16 );
if( !CHROMA444 )
{
int height = 16 >> CHROMA_V_SHIFT;
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, height );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, height );
}
x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) );
goto intra_analysis;
} if( analysis.i_mbrd >= 2 && h->mb.i_type != I_PCM )
{
if( IS_INTRA( h->mb.i_type ) )
{
x264_intra_rd_refine( h, &analysis );
}
else if( i_partition == D_16x16 )
{
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref );
analysis.l0.me16x16.cost = i_cost;
x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 );
}
else if( i_partition == D_16x8 )
{
h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref );
x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref );
x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 8, 0 );
}
else if( i_partition == D_8x16 )
{
h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref );
x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref );
x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 4, 0 );
}
else if( i_partition == D_8x8 )
{
x264_analyse_update_cache( h, &analysis );
for( int i8x8 = 0; i8x8 < 4; i8x8++ )
{
if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 )
{
x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8*4, 0 );
}
else if( h->mb.i_sub_partition[i8x8] == D_L0_8x4 )
{
x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][1], analysis.i_lambda2, i8x8*4+2, 0 );
}
else if( h->mb.i_sub_partition[i8x8] == D_L0_4x8 )
{
x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
}
else if( h->mb.i_sub_partition[i8x8] == D_L0_4x4 )
{
x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][2], analysis.i_lambda2, i8x8*4+2, 0 );
x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][3], analysis.i_lambda2, i8x8*4+3, 0 );
}
}
}
}
}
}
else if( h->sh.i_type == SLICE_TYPE_B )//B Slice的时候
{
int i_bskip_cost = COST_MAX;
int b_skip = 0; if( analysis.i_mbrd )
x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 ); h->mb.i_type = B_SKIP;
if( h->mb.b_direct_auto_write )
{
/* direct=auto heuristic: prefer whichever mode allows more Skip macroblocks */
for( int i = 0; i < 2; i++ )
{
int b_changed = 1;
h->sh.b_direct_spatial_mv_pred ^= 1;
analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h, i && analysis.b_direct_available ? &b_changed : NULL );
if( analysis.b_direct_available )
{
if( b_changed )
{
x264_mb_mc( h );
b_skip = x264_macroblock_probe_bskip( h );
}
h->stat.frame.i_direct_score[ h->sh.b_direct_spatial_mv_pred ] += b_skip;
}
else
b_skip = 0;
}
}
else
analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h, NULL ); analysis.b_try_skip = 0;
if( analysis.b_direct_available )
{
if( !h->mb.b_direct_auto_write )
x264_mb_mc( h );
/* If the current macroblock is off the frame, just skip it. */
if( HAVE_INTERLACED && !MB_INTERLACED && h->mb.i_mb_y * 16 >= h->param.i_height )
b_skip = 1;
else if( analysis.i_mbrd )
{
i_bskip_cost = ssd_mb( h );
/* 6 = minimum cavlc cost of a non-skipped MB */
b_skip = h->mb.b_skip_mc = i_bskip_cost <= ((6 * analysis.i_lambda2 + 128) >> 8);
}
else if( !h->mb.b_direct_auto_write )
{
/* Conditioning the probe on neighboring block types
* doesn't seem to help speed or quality. */
analysis.b_try_skip = x264_macroblock_probe_bskip( h );
if( h->param.analyse.i_subpel_refine < 3 )
b_skip = analysis.b_try_skip;
}
/* Set up MVs for future predictors */
if( b_skip )
{
for( int i = 0; i < h->mb.pic.i_fref[0]; i++ )
M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0;
for( int i = 0; i < h->mb.pic.i_fref[1]; i++ )
M32( h->mb.mvr[1][i][h->mb.i_mb_xy] ) = 0;
}
} if( !b_skip )
{
const unsigned int flags = h->param.analyse.inter;
int i_type;
int i_partition;
int i_satd_inter;
h->mb.b_skip_mc = 0;
h->mb.i_type = B_DIRECT; x264_mb_analyse_load_costs( h, &analysis ); /* select best inter mode */
/* direct must be first */
if( analysis.b_direct_available )
x264_mb_analyse_inter_direct( h, &analysis );
/*
* 16x16 帧间预测宏块分析-B
*
* +--------+--------+
* | |
* | |
* | |
* + + +
* | |
* | |
* | |
* +--------+--------+
*
*/
x264_mb_analyse_inter_b16x16( h, &analysis ); if( h->mb.i_type == B_SKIP )
{
for( int i = 1; i < h->mb.pic.i_fref[0]; i++ )
M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0;
for( int i = 1; i < h->mb.pic.i_fref[1]; i++ )
M32( h->mb.mvr[1][i][h->mb.i_mb_xy] ) = 0;
return;
} i_type = B_L0_L0;
i_partition = D_16x16;
i_cost = analysis.l0.me16x16.cost;
COPY2_IF_LT( i_cost, analysis.l1.me16x16.cost, i_type, B_L1_L1 );
COPY2_IF_LT( i_cost, analysis.i_cost16x16bi, i_type, B_BI_BI );
COPY2_IF_LT( i_cost, analysis.i_cost16x16direct, i_type, B_DIRECT ); if( analysis.i_mbrd && analysis.b_early_terminate && analysis.i_cost16x16direct <= i_cost * 33/32 )
{
x264_mb_analyse_b_rd( h, &analysis, i_cost );
if( i_bskip_cost < analysis.i_rd16x16direct &&
i_bskip_cost < analysis.i_rd16x16bi &&
i_bskip_cost < analysis.l0.i_rd16x16 &&
i_bskip_cost < analysis.l1.i_rd16x16 )
{
h->mb.i_type = B_SKIP;
x264_analyse_update_cache( h, &analysis );
return;
}
} if( flags & X264_ANALYSE_BSUB16x16 )
{ /*
* 8x8 帧间预测宏块分析-B
* +--------+
* | |
* | |
* | |
* +--------+
*
*/ if( h->param.analyse.b_mixed_references )
x264_mb_analyse_inter_b8x8_mixed_ref( h, &analysis );
else
x264_mb_analyse_inter_b8x8( h, &analysis ); COPY3_IF_LT( i_cost, analysis.i_cost8x8bi, i_type, B_8x8, i_partition, D_8x8 ); /* Try to estimate the cost of b16x8/b8x16 based on the satd scores of the b8x8 modes */
int i_cost_est16x8bi_total = 0, i_cost_est8x16bi_total = 0;
int i_mb_type, i_partition16x8[2], i_partition8x16[2];
for( int i = 0; i < 2; i++ )
{
int avg_l0_mv_ref_cost, avg_l1_mv_ref_cost;
int i_l0_satd, i_l1_satd, i_bi_satd, i_best_cost;
// 16x8
i_best_cost = COST_MAX;
i_l0_satd = analysis.i_satd8x8[0][i*2] + analysis.i_satd8x8[0][i*2+1];
i_l1_satd = analysis.i_satd8x8[1][i*2] + analysis.i_satd8x8[1][i*2+1];
i_bi_satd = analysis.i_satd8x8[2][i*2] + analysis.i_satd8x8[2][i*2+1];
avg_l0_mv_ref_cost = ( analysis.l0.me8x8[i*2].cost_mv + analysis.l0.me8x8[i*2].i_ref_cost
+ analysis.l0.me8x8[i*2+1].cost_mv + analysis.l0.me8x8[i*2+1].i_ref_cost + 1 ) >> 1;
avg_l1_mv_ref_cost = ( analysis.l1.me8x8[i*2].cost_mv + analysis.l1.me8x8[i*2].i_ref_cost
+ analysis.l1.me8x8[i*2+1].cost_mv + analysis.l1.me8x8[i*2+1].i_ref_cost + 1 ) >> 1;
COPY2_IF_LT( i_best_cost, i_l0_satd + avg_l0_mv_ref_cost, i_partition16x8[i], D_L0_8x8 );
COPY2_IF_LT( i_best_cost, i_l1_satd + avg_l1_mv_ref_cost, i_partition16x8[i], D_L1_8x8 );
COPY2_IF_LT( i_best_cost, i_bi_satd + avg_l0_mv_ref_cost + avg_l1_mv_ref_cost, i_partition16x8[i], D_BI_8x8 );
analysis.i_cost_est16x8[i] = i_best_cost; // 8x16
i_best_cost = COST_MAX;
i_l0_satd = analysis.i_satd8x8[0][i] + analysis.i_satd8x8[0][i+2];
i_l1_satd = analysis.i_satd8x8[1][i] + analysis.i_satd8x8[1][i+2];
i_bi_satd = analysis.i_satd8x8[2][i] + analysis.i_satd8x8[2][i+2];
avg_l0_mv_ref_cost = ( analysis.l0.me8x8[i].cost_mv + analysis.l0.me8x8[i].i_ref_cost
+ analysis.l0.me8x8[i+2].cost_mv + analysis.l0.me8x8[i+2].i_ref_cost + 1 ) >> 1;
avg_l1_mv_ref_cost = ( analysis.l1.me8x8[i].cost_mv + analysis.l1.me8x8[i].i_ref_cost
+ analysis.l1.me8x8[i+2].cost_mv + analysis.l1.me8x8[i+2].i_ref_cost + 1 ) >> 1;
COPY2_IF_LT( i_best_cost, i_l0_satd + avg_l0_mv_ref_cost, i_partition8x16[i], D_L0_8x8 );
COPY2_IF_LT( i_best_cost, i_l1_satd + avg_l1_mv_ref_cost, i_partition8x16[i], D_L1_8x8 );
COPY2_IF_LT( i_best_cost, i_bi_satd + avg_l0_mv_ref_cost + avg_l1_mv_ref_cost, i_partition8x16[i], D_BI_8x8 );
analysis.i_cost_est8x16[i] = i_best_cost;
}
i_mb_type = B_L0_L0 + (i_partition16x8[0]>>2) * 3 + (i_partition16x8[1]>>2);
analysis.i_cost_est16x8[1] += analysis.i_lambda * i_mb_b16x8_cost_table[i_mb_type];
i_cost_est16x8bi_total = analysis.i_cost_est16x8[0] + analysis.i_cost_est16x8[1];
i_mb_type = B_L0_L0 + (i_partition8x16[0]>>2) * 3 + (i_partition8x16[1]>>2);
analysis.i_cost_est8x16[1] += analysis.i_lambda * i_mb_b16x8_cost_table[i_mb_type];
i_cost_est8x16bi_total = analysis.i_cost_est8x16[0] + analysis.i_cost_est8x16[1]; /* We can gain a little speed by checking the mode with the lowest estimated cost first */
int try_16x8_first = i_cost_est16x8bi_total < i_cost_est8x16bi_total;
if( try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) )
{
x264_mb_analyse_inter_b16x8( h, &analysis, i_cost );
COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );
}
if( !analysis.b_early_terminate || i_cost_est8x16bi_total < i_cost )
{
x264_mb_analyse_inter_b8x16( h, &analysis, i_cost );
COPY3_IF_LT( i_cost, analysis.i_cost8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 );
}
if( !try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) )
{
x264_mb_analyse_inter_b16x8( h, &analysis, i_cost );
COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );
}
} if( analysis.i_mbrd || !h->mb.i_subpel_refine )
{
/* refine later */
}
/* refine qpel */
else if( i_partition == D_16x16 )
{
analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
if( i_type == B_L0_L0 )
{
x264_me_refine_qpel( h, &analysis.l0.me16x16 );
i_cost = analysis.l0.me16x16.cost
+ analysis.i_lambda * i_mb_b_cost_table[B_L0_L0];
}
else if( i_type == B_L1_L1 )
{
x264_me_refine_qpel( h, &analysis.l1.me16x16 );
i_cost = analysis.l1.me16x16.cost
+ analysis.i_lambda * i_mb_b_cost_table[B_L1_L1];
}
else if( i_type == B_BI_BI )
{
x264_me_refine_qpel( h, &analysis.l0.bi16x16 );
x264_me_refine_qpel( h, &analysis.l1.bi16x16 );
}
}
else if( i_partition == D_16x8 )
{
for( int i = 0; i < 2; i++ )
{
if( analysis.i_mb_partition16x8[i] != D_L1_8x8 )
x264_me_refine_qpel( h, &analysis.l0.me16x8[i] );
if( analysis.i_mb_partition16x8[i] != D_L0_8x8 )
x264_me_refine_qpel( h, &analysis.l1.me16x8[i] );
}
}
else if( i_partition == D_8x16 )
{
for( int i = 0; i < 2; i++ )
{
if( analysis.i_mb_partition8x16[i] != D_L1_8x8 )
x264_me_refine_qpel( h, &analysis.l0.me8x16[i] );
if( analysis.i_mb_partition8x16[i] != D_L0_8x8 )
x264_me_refine_qpel( h, &analysis.l1.me8x16[i] );
}
}
else if( i_partition == D_8x8 )
{
for( int i = 0; i < 4; i++ )
{
x264_me_t *m;
int i_part_cost_old;
int i_type_cost;
int i_part_type = h->mb.i_sub_partition[i];
int b_bidir = (i_part_type == D_BI_8x8); if( i_part_type == D_DIRECT_8x8 )
continue;
if( x264_mb_partition_listX_table[0][i_part_type] )
{
m = &analysis.l0.me8x8[i];
i_part_cost_old = m->cost;
i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
m->cost -= i_type_cost;
x264_me_refine_qpel( h, m );
if( !b_bidir )
analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old;
}
if( x264_mb_partition_listX_table[1][i_part_type] )
{
m = &analysis.l1.me8x8[i];
i_part_cost_old = m->cost;
i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L1_8x8];
m->cost -= i_type_cost;
x264_me_refine_qpel( h, m );
if( !b_bidir )
analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old;
}
/* TODO: update mvp? */
}
} i_satd_inter = i_cost; if( analysis.i_mbrd )
{
x264_mb_analyse_b_rd( h, &analysis, i_satd_inter );
i_type = B_SKIP;
i_cost = i_bskip_cost;
i_partition = D_16x16;
COPY2_IF_LT( i_cost, analysis.l0.i_rd16x16, i_type, B_L0_L0 );
COPY2_IF_LT( i_cost, analysis.l1.i_rd16x16, i_type, B_L1_L1 );
COPY2_IF_LT( i_cost, analysis.i_rd16x16bi, i_type, B_BI_BI );
COPY2_IF_LT( i_cost, analysis.i_rd16x16direct, i_type, B_DIRECT );
COPY3_IF_LT( i_cost, analysis.i_rd16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 );
COPY3_IF_LT( i_cost, analysis.i_rd8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 );
COPY3_IF_LT( i_cost, analysis.i_rd8x8bi, i_type, B_8x8, i_partition, D_8x8 ); h->mb.i_type = i_type;
h->mb.i_partition = i_partition;
} if( h->mb.b_chroma_me )
{
if( CHROMA444 )
{
x264_mb_analyse_intra( h, &analysis, i_satd_inter );
x264_mb_analyse_intra_chroma( h, &analysis );
}
else
{
x264_mb_analyse_intra_chroma( h, &analysis );
x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_chroma );
}
analysis.i_satd_i16x16 += analysis.i_satd_chroma;
analysis.i_satd_i8x8 += analysis.i_satd_chroma;
analysis.i_satd_i4x4 += analysis.i_satd_chroma;
}
else
x264_mb_analyse_intra( h, &analysis, i_satd_inter ); if( analysis.i_mbrd )
{
x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost );
x264_intra_rd( h, &analysis, i_satd_inter * 17/16 + 1 );
} COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );
COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 );
COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 );
COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM ); h->mb.i_type = i_type;
h->mb.i_partition = i_partition; if( analysis.i_mbrd >= 2 && IS_INTRA( i_type ) && i_type != I_PCM )
x264_intra_rd_refine( h, &analysis );
if( h->mb.i_subpel_refine >= 5 )
x264_refine_bidir( h, &analysis ); if( analysis.i_mbrd >= 2 && i_type > B_DIRECT && i_type < B_SKIP )
{
int i_biweight;
x264_analyse_update_cache( h, &analysis ); if( i_partition == D_16x16 )
{
if( i_type == B_L0_L0 )
{
analysis.l0.me16x16.cost = i_cost;
x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 );
}
else if( i_type == B_L1_L1 )
{
analysis.l1.me16x16.cost = i_cost;
x264_me_refine_qpel_rd( h, &analysis.l1.me16x16, analysis.i_lambda2, 0, 1 );
}
else if( i_type == B_BI_BI )
{
i_biweight = h->mb.bipred_weight[analysis.l0.bi16x16.i_ref][analysis.l1.bi16x16.i_ref];
x264_me_refine_bidir_rd( h, &analysis.l0.bi16x16, &analysis.l1.bi16x16, i_biweight, 0, analysis.i_lambda2 );
}
}
else if( i_partition == D_16x8 )
{
for( int i = 0; i < 2; i++ )
{
h->mb.i_sub_partition[i*2] = h->mb.i_sub_partition[i*2+1] = analysis.i_mb_partition16x8[i];
if( analysis.i_mb_partition16x8[i] == D_L0_8x8 )
x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[i], analysis.i_lambda2, i*8, 0 );
else if( analysis.i_mb_partition16x8[i] == D_L1_8x8 )
x264_me_refine_qpel_rd( h, &analysis.l1.me16x8[i], analysis.i_lambda2, i*8, 1 );
else if( analysis.i_mb_partition16x8[i] == D_BI_8x8 )
{
i_biweight = h->mb.bipred_weight[analysis.l0.me16x8[i].i_ref][analysis.l1.me16x8[i].i_ref];
x264_me_refine_bidir_rd( h, &analysis.l0.me16x8[i], &analysis.l1.me16x8[i], i_biweight, i*2, analysis.i_lambda2 );
}
}
}
else if( i_partition == D_8x16 )
{
for( int i = 0; i < 2; i++ )
{
h->mb.i_sub_partition[i] = h->mb.i_sub_partition[i+2] = analysis.i_mb_partition8x16[i];
if( analysis.i_mb_partition8x16[i] == D_L0_8x8 )
x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[i], analysis.i_lambda2, i*4, 0 );
else if( analysis.i_mb_partition8x16[i] == D_L1_8x8 )
x264_me_refine_qpel_rd( h, &analysis.l1.me8x16[i], analysis.i_lambda2, i*4, 1 );
else if( analysis.i_mb_partition8x16[i] == D_BI_8x8 )
{
i_biweight = h->mb.bipred_weight[analysis.l0.me8x16[i].i_ref][analysis.l1.me8x16[i].i_ref];
x264_me_refine_bidir_rd( h, &analysis.l0.me8x16[i], &analysis.l1.me8x16[i], i_biweight, i, analysis.i_lambda2 );
}
}
}
else if( i_partition == D_8x8 )
{
for( int i = 0; i < 4; i++ )
{
if( h->mb.i_sub_partition[i] == D_L0_8x8 )
x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i], analysis.i_lambda2, i*4, 0 );
else if( h->mb.i_sub_partition[i] == D_L1_8x8 )
x264_me_refine_qpel_rd( h, &analysis.l1.me8x8[i], analysis.i_lambda2, i*4, 1 );
else if( h->mb.i_sub_partition[i] == D_BI_8x8 )
{
i_biweight = h->mb.bipred_weight[analysis.l0.me8x8[i].i_ref][analysis.l1.me8x8[i].i_ref];
x264_me_refine_bidir_rd( h, &analysis.l0.me8x8[i], &analysis.l1.me8x8[i], i_biweight, i, analysis.i_lambda2 );
}
}
}
}
}
} x264_analyse_update_cache( h, &analysis ); /* In rare cases we can end up qpel-RDing our way back to a larger partition size
* without realizing it. Check for this and account for it if necessary. */
if( analysis.i_mbrd >= 2 )
{
/* Don't bother with bipred or 8x8-and-below, the odds are incredibly low. */
static const uint8_t check_mv_lists[X264_MBTYPE_MAX] = {[P_L0]=1, [B_L0_L0]=1, [B_L1_L1]=2};
int list = check_mv_lists[h->mb.i_type] - 1;
if( list >= 0 && h->mb.i_partition != D_16x16 &&
M32( &h->mb.cache.mv[list][x264_scan8[0]] ) == M32( &h->mb.cache.mv[list][x264_scan8[12]] ) &&
h->mb.cache.ref[list][x264_scan8[0]] == h->mb.cache.ref[list][x264_scan8[12]] )
h->mb.i_partition = D_16x16;
} if( !analysis.i_mbrd )
x264_mb_analyse_transform( h ); if( analysis.i_mbrd == 3 && !IS_SKIP(h->mb.i_type) )
x264_mb_analyse_qp_rd( h, &analysis ); h->mb.b_trellis = h->param.analyse.i_trellis;
h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type )); if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
x264_psy_trellis_init( h, 0 );
if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
h->mb.i_skip_intra = 0;
}

大致流程:

(1)如果当前是I Slice,调用x264_mb_analyse_intra()进行Intra宏块的帧内预测模式分析;

(2)如果是P Slice:

(a)调用x264_macroblock_probe_pskip()分析是否为Skip宏块,如果是下面步骤不再进行分析;

(b)调用x264_mb_analyse_inter_p16x16()分析P16x16帧间预测的代价;

(c)调用x264_mb_analyse_inter_p8x8()分析P8x8帧间预测的代价;

(d)如果P8x8代价值小于P16x16,则依次对4个8x8的子宏块分割进行判断:

(i)调用x264_mb_analyse_inter_p4x4()分析P4x4帧间预测的代价;

(ii)如果P4x4的代价值小于P8x8,则调用x264_mb_analyse_inter_p8x4和x264_mb_analyse_inter_p4x8分析P8x4和P4x8帧间预测的代价;

(e)如果P8x8代价值小于P16x16,调用x264_mb_analyse_inter_p16x8和x264_mb_analyse_inter_p8x16分析P16x8和P8x16帧间预测的代价;

(f)此外调用x264_mb_analyse_intra(),检查当前宏块作为Intra宏块编码的代价是否小于作为P宏块编码的代价;

(3)如果当前是B Slice,则进行和P Slice同样的处理。

宏块编码模块:调用函数x264_macroblock_encode_internal()。

x264_macroblock_encode_internal():

static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_count, int chroma )
{
int i_qp = h->mb.i_qp;
int b_decimate = h->mb.b_dct_decimate;
int b_force_no_skip = 0;
int nz;
h->mb.i_cbp_luma = 0;
for( int p = 0; p < plane_count; p++ )
h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = 0;
//PCM,不常见
if( h->mb.i_type == I_PCM )
{
/* if PCM is chosen, we need to store reconstructed frame data */
for( int p = 0; p < plane_count; p++ )
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc[p], FENC_STRIDE, 16 );
if( chroma )
{
int height = 16 >> CHROMA_V_SHIFT;
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, height );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, height );
}
return;
} if( !h->mb.b_allow_skip )
{
b_force_no_skip = 1;
if( IS_SKIP(h->mb.i_type) )
{
if( h->mb.i_type == P_SKIP )
h->mb.i_type = P_L0;
else if( h->mb.i_type == B_SKIP )
h->mb.i_type = B_DIRECT;
}
}
//根据不同的宏块类型,进行编码
if( h->mb.i_type == P_SKIP )
{
/* don't do pskip motion compensation if it was already done in macroblock_analyse */
if( !h->mb.b_skip_mc )
{
int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
h->mb.mv_min[0], h->mb.mv_max[0] );
int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
h->mb.mv_min[1], h->mb.mv_max[1] ); for( int p = 0; p < plane_count; p++ )
h->mc.mc_luma( h->mb.pic.p_fdec[p], FDEC_STRIDE,
&h->mb.pic.p_fref[0][0][p*4], h->mb.pic.i_stride[p],
mvx, mvy, 16, 16, &h->sh.weight[0][p] ); if( chroma )
{
int v_shift = CHROMA_V_SHIFT;
int height = 16 >> v_shift; /* Special case for mv0, which is (of course) very common in P-skip mode. */
if( mvx | mvy )
h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE,
h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
mvx, 2*mvy>>v_shift, 8, height );
else
h->mc.load_deinterleave_chroma_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4],
h->mb.pic.i_stride[1], height ); if( h->sh.weight[0][1].weightfn )
h->sh.weight[0][1].weightfn[8>>2]( h->mb.pic.p_fdec[1], FDEC_STRIDE,
h->mb.pic.p_fdec[1], FDEC_STRIDE,
&h->sh.weight[0][1], height );
if( h->sh.weight[0][2].weightfn )
h->sh.weight[0][2].weightfn[8>>2]( h->mb.pic.p_fdec[2], FDEC_STRIDE,
h->mb.pic.p_fdec[2], FDEC_STRIDE,
&h->sh.weight[0][2], height );
}
}
//编码skip类型宏块
x264_macroblock_encode_skip( h );
return;
}
if( h->mb.i_type == B_SKIP )
{
/* don't do bskip motion compensation if it was already done in macroblock_analyse */
if( !h->mb.b_skip_mc )
x264_mb_mc( h );
x264_macroblock_encode_skip( h );
return;
} if( h->mb.i_type == I_16x16 )
{
h->mb.b_transform_8x8 = 0;
//Intra16x16宏块编码-需要Hadamard变换
//分别编码Y,U,V
/*
* 16x16 宏块
*
* +--------+--------+
* | |
* | |
* | |
* + + +
* | |
* | |
* | |
* +--------+--------+
*
*/
for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
x264_mb_encode_i16x16( h, p, i_qp );
}
else if( h->mb.i_type == I_8x8 )
{
h->mb.b_transform_8x8 = 1;
/* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
if( h->mb.i_skip_intra )
{
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 );
M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = h->mb.pic.i8x8_nnz_buf[0];
M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = h->mb.pic.i8x8_nnz_buf[1];
M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = h->mb.pic.i8x8_nnz_buf[2];
M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = h->mb.pic.i8x8_nnz_buf[3];
h->mb.i_cbp_luma = h->mb.pic.i8x8_cbp;
/* In RD mode, restore the now-overwritten DCT data. */
if( h->mb.i_skip_intra == 2 )
h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) );
}
for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
{
for( int i = (p == 0 && h->mb.i_skip_intra) ? 3 : 0 ; i < 4; i++ )
{
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
x264_mb_encode_i8x8( h, p, i, i_qp, i_mode, NULL, 1 );
}
}
}
//Intra4x4类型
else if( h->mb.i_type == I_4x4 )
{
/*
* 帧内预测:16x16 宏块被划分为16个4x4子块
*
* +----+----+----+----+
* | | | | |
* +----+----+----+----+
* | | | | |
* +----+----+----+----+
* | | | | |
* +----+----+----+----+
* | | | | |
* +----+----+----+----+
*
*/
h->mb.b_transform_8x8 = 0;
/* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */
if( h->mb.i_skip_intra )
{
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 );
M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = h->mb.pic.i4x4_nnz_buf[0];
M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = h->mb.pic.i4x4_nnz_buf[1];
M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = h->mb.pic.i4x4_nnz_buf[2];
M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = h->mb.pic.i4x4_nnz_buf[3];
h->mb.i_cbp_luma = h->mb.pic.i4x4_cbp;
/* In RD mode, restore the now-overwritten DCT data. */
if( h->mb.i_skip_intra == 2 )
h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) );
}
//分别编码Y,U,V
for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
{
//循环16次,编码16个Intra4x4宏块
for( int i = (p == 0 && h->mb.i_skip_intra) ? 15 : 0 ; i < 16; i++ )
{
pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[i]];
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]; if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP )
/* emulate missing topright samples */
MPIXEL_X4( &p_dst[4-FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst[3-FDEC_STRIDE] );
//Intra4x4宏块编码
/*
* +----+
* | |
* +----+
*/
x264_mb_encode_i4x4( h, p, i, i_qp, i_mode, 1 );
}
}
}
//包含帧间预测
else /* Inter MB */
{
int i_decimate_mb = 0; /* Don't repeat motion compensation if it was already done in non-RD transform analysis */
if( !h->mb.b_skip_mc )
x264_mb_mc( h ); if( h->mb.b_lossless )//lossless情况没研究过
{
if( h->mb.b_transform_8x8 )
for( int p = 0; p < plane_count; p++ )
for( int i8x8 = 0; i8x8 < 4; i8x8++ )
{
int x = i8x8&1;
int y = i8x8>>1;
nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+i8x8], h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE,
h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE );
STORE_8x8_NNZ( p, i8x8, nz );
h->mb.i_cbp_luma |= nz << i8x8;
}
else
for( int p = 0; p < plane_count; p++ )
for( int i4x4 = 0; i4x4 < 16; i4x4++ )
{
nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+i4x4],
h->mb.pic.p_fenc[p]+block_idx_xy_fenc[i4x4],
h->mb.pic.p_fdec[p]+block_idx_xy_fdec[i4x4] );
h->mb.cache.non_zero_count[x264_scan8[p*16+i4x4]] = nz;
h->mb.i_cbp_luma |= nz << (i4x4>>2);
}
}
else if( h->mb.b_transform_8x8 )//DCT8x8情况暂时没研究过
{
ALIGNED_ARRAY_N( dctcoef, dct8x8,[4],[64] );
b_decimate &= !h->mb.b_trellis || !h->param.b_cabac; // 8x8 trellis is inherently optimal decimation for CABAC for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
{
CLEAR_16x16_NNZ( p );
h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[p], h->mb.pic.p_fdec[p] );
h->nr_count[1+!!p*2] += h->mb.b_noise_reduction * 4; int plane_cbp = 0;
for( int idx = 0; idx < 4; idx++ )
{
nz = x264_quant_8x8( h, dct8x8[idx], i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 0, p, idx ); if( nz )
{
h->zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8[idx] );
if( b_decimate )
{
int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[p*4+idx] );
i_decimate_mb += i_decimate_8x8;
if( i_decimate_8x8 >= 4 )
plane_cbp |= 1<<idx;
}
else
plane_cbp |= 1<<idx;
}
} if( i_decimate_mb >= 6 || !b_decimate )
{
h->mb.i_cbp_luma |= plane_cbp;
FOREACH_BIT( idx, 0, plane_cbp )
{
h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[p?CQM_8PC:CQM_8PY], i_qp );
h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[p][8*(idx&1) + 8*(idx>>1)*FDEC_STRIDE], dct8x8[idx] );
STORE_8x8_NNZ( p, idx, 1 );
}
}
}
}
else//最普通的情况
{
/*
* 帧间预测:16x16 宏块被划分为8x8
* 每个8x8再次被划分为4x4
*
* ++====+====++====+====++
* || | || | ||
* ++====+====++====+====++
* || | || | ||
* ++====+====++====+====++
* || | || | ||
* ++====+====++====+====++
* || | || | ||
* ++====+====+=====+====++
*
*/
ALIGNED_ARRAY_N( dctcoef, dct4x4,[16],[16] );
for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp )
{
CLEAR_16x16_NNZ( p );
//16x16DCT(实际上分解为16个4x4DCT)
//求编码帧p_fenc和重建帧p_fdec之间的残差,然后进行DCT变换
h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[p], h->mb.pic.p_fdec[p] ); if( h->mb.b_noise_reduction )
{
h->nr_count[0+!!p*2] += 16;
for( int idx = 0; idx < 16; idx++ )
h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0+!!p*2], h->nr_offset[0+!!p*2], 16 );
} int plane_cbp = 0;
//16x16的块分成4个8x8的块
for( int i8x8 = 0; i8x8 < 4; i8x8++ )
{
int i_decimate_8x8 = b_decimate ? 0 : 6;
int nnz8x8 = 0;
if( h->mb.b_trellis )
{
for( int i4x4 = 0; i4x4 < 4; i4x4++ )
{
int idx = i8x8*4+i4x4;
if( x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 0, !!p, p*16+idx ) )
{
h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4[idx] );
h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[p?CQM_4PC:CQM_4PY], i_qp );
if( i_decimate_8x8 < 6 )
i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[p*16+idx] );
h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = 1;
nnz8x8 = 1;
}
}
}
else
{
//8x8的块分成4个4x4的块,每个4x4的块再分别进行量化
nnz8x8 = nz = h->quantf.quant_4x4x4( &dct4x4[i8x8*4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
if( nz )
{
FOREACH_BIT( idx, i8x8*4, nz )
{
//这几步用于建立重建帧
h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4[idx] );
//反量化
h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[p?CQM_4PC:CQM_4PY], i_qp );
if( i_decimate_8x8 < 6 )
i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[p*16+idx] );
h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = 1;
}
}
}
if( nnz8x8 )
{
i_decimate_mb += i_decimate_8x8;
if( i_decimate_8x8 < 4 )
STORE_8x8_NNZ( p, i8x8, 0 );
else
plane_cbp |= 1<<i8x8;
}
} if( i_decimate_mb < 6 )
{
plane_cbp = 0;
CLEAR_16x16_NNZ( p );
}
else
{
h->mb.i_cbp_luma |= plane_cbp;
FOREACH_BIT( i8x8, 0, plane_cbp )
{
//用于建立重建帧
//残差进行DCT反变换之后,叠加到预测数据上
h->dctf.add8x8_idct( &h->mb.pic.p_fdec[p][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] );
}
}
}
}
} /* encode chroma */
if( chroma )
{
if( IS_INTRA( h->mb.i_type ) )
{
int i_mode = h->mb.i_chroma_pred_mode;
if( h->mb.b_lossless )
x264_predict_lossless_chroma( h, i_mode );
else
{
h->predict_chroma[i_mode]( h->mb.pic.p_fdec[1] );
h->predict_chroma[i_mode]( h->mb.pic.p_fdec[2] );
}
} /* encode the 8x8 blocks */
x264_mb_encode_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp );
}
else
h->mb.i_cbp_chroma = 0; /* store cbp */
int cbp = h->mb.i_cbp_chroma << 4 | h->mb.i_cbp_luma;
if( h->param.b_cabac )
cbp |= h->mb.cache.non_zero_count[x264_scan8[LUMA_DC ]] << 8
| h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]] << 9
| h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] << 10;
h->mb.cbp[h->mb.i_mb_xy] = cbp; /* Check for P_SKIP
* XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
* (if multiple mv give same result)*/
if( !b_force_no_skip )
{
if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
!(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) &&
M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv )
&& h->mb.cache.ref[0][x264_scan8[0]] == 0 )
{
h->mb.i_type = P_SKIP;
} /* Check for B_SKIP */
if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) )
{
h->mb.i_type = B_SKIP;
}
}
}

(1)如果是Skip类型,调用x264_macroblock_encode_skip()编码宏块;

(2)如果Intra16x16类型,调用x264_mb_encode_i16x16()编码宏块;

(3)如果Intra4x4类型,循环16次调用x264_mb_encode_i4x4()编码宏块;

(4)如果Inter类型,则不再调用子函数,二是直接进行编码;

(5)如果对色度编码,调用x264_mb_encode_chroma()。

滤波模块:调用函数x264_fdec_filter_row()。

x264_fdec_filter_row():

static void x264_fdec_filter_row( x264_t *h, int mb_y, int pass )
{
/* mb_y is the mb to be encoded next, not the mb to be filtered here */
int b_hpel = h->fdec->b_kept_as_ref;
int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
int b_end = mb_y == h->i_threadslice_end;
int b_measure_quality = 1;
int min_y = mb_y - (1 << SLICE_MBAFF);
int b_start = min_y == h->i_threadslice_start;
/* Even in interlaced mode, deblocking never modifies more than 4 pixels
* above each MB, as bS=4 doesn't happen for the top of interlaced mbpairs. */
int minpix_y = min_y*16 - 4 * !b_start;
int maxpix_y = mb_y*16 - 4 * !b_end;
b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv;
if( h->param.b_sliced_threads )
{
switch( pass )
{
/* During encode: only do deblock if asked for */
default:
case 0:
b_deblock &= h->param.b_full_recon;
b_hpel = 0;
break;
/* During post-encode pass: do deblock if not done yet, do hpel for all
* rows except those between slices. */
case 1:
b_deblock &= !h->param.b_full_recon;
b_hpel &= !(b_start && min_y > 0);
b_measure_quality = 0;
break;
/* Final pass: do the rows between slices in sequence. */
case 2:
b_deblock = 0;
b_measure_quality = 0;
break;
}
}
if( mb_y & SLICE_MBAFF )
return;
if( min_y < h->i_threadslice_start )
return;
//去块效应滤波
if( b_deblock )
for( int y = min_y; y < mb_y; y += (1 << SLICE_MBAFF) )
x264_frame_deblock_row( h, y );//处理一行 /* FIXME: Prediction requires different borders for interlaced/progressive mc,
* but the actual image data is equivalent. For now, maintain this
* consistency by copying deblocked pixels between planes. */
if( PARAM_INTERLACED && (!h->param.b_sliced_threads || pass == 1) )
for( int p = 0; p < h->fdec->i_plane; p++ )
for( int i = minpix_y>>(CHROMA_V_SHIFT && p); i < maxpix_y>>(CHROMA_V_SHIFT && p); i++ )
memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p],
h->fdec->plane[p] + i*h->fdec->i_stride[p],
h->mb.i_mb_width*16*sizeof(pixel) ); if( h->fdec->b_kept_as_ref && (!h->param.b_sliced_threads || pass == 1) )
x264_frame_expand_border( h, h->fdec, min_y );
//半像素内插
if( b_hpel )
{
int end = mb_y == h->mb.i_mb_height;
/* Can't do hpel until the previous slice is done encoding. */
if( h->param.analyse.i_subpel_refine )
{
//半像素内插
x264_frame_filter( h, h->fdec, min_y, end );
x264_frame_expand_border_filtered( h, h->fdec, min_y, end );
}
} if( SLICE_MBAFF && pass == 0 )
for( int i = 0; i < 3; i++ )
{
XCHG( pixel *, h->intra_border_backup[0][i], h->intra_border_backup[3][i] );
XCHG( pixel *, h->intra_border_backup[1][i], h->intra_border_backup[4][i] );
} if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref )
x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << SLICE_MBAFF)) ); //计算编码的质量
if( b_measure_quality )
{
maxpix_y = X264_MIN( maxpix_y, h->param.i_height );
//如果需要打印输出PSNR
if( h->param.analyse.b_psnr )
{
//实际上是计算SSD
//输出的时候调用x264_psnr()换算SSD为PSNR
/**
* 计算PSNR的过程
*
* MSE = SSD*1/(w*h)
* PSNR= 10*log10(MAX^2/MSE)
*
* 其中MAX指的是图像的灰度级,对于8bit来说就是2^8-1=255
*/
for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ )
h->stat.frame.i_ssd[p] += x264_pixel_ssd_wxh( &h->pixf,
h->fdec->plane[p] + minpix_y * h->fdec->i_stride[p], h->fdec->i_stride[p],//重建帧
h->fenc->plane[p] + minpix_y * h->fenc->i_stride[p], h->fenc->i_stride[p],//编码帧
h->param.i_width, maxpix_y-minpix_y );
if( !CHROMA444 )
{
uint64_t ssd_u, ssd_v;
int v_shift = CHROMA_V_SHIFT;
x264_pixel_ssd_nv12( &h->pixf,
h->fdec->plane[1] + (minpix_y>>v_shift) * h->fdec->i_stride[1], h->fdec->i_stride[1],
h->fenc->plane[1] + (minpix_y>>v_shift) * h->fenc->i_stride[1], h->fenc->i_stride[1],
h->param.i_width>>1, (maxpix_y-minpix_y)>>v_shift, &ssd_u, &ssd_v );
h->stat.frame.i_ssd[1] += ssd_u;
h->stat.frame.i_ssd[2] += ssd_v;
}
}
//如果需要打印输出SSIM
if( h->param.analyse.b_ssim )
{
int ssim_cnt;
x264_emms();
/* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks,
* and overlap by 4 */
minpix_y += b_start ? 2 : -6;
//计算SSIM
h->stat.frame.f_ssim +=
x264_pixel_ssim_wxh( &h->pixf,
h->fdec->plane[0] + 2+minpix_y*h->fdec->i_stride[0], h->fdec->i_stride[0],//重建帧
h->fenc->plane[0] + 2+minpix_y*h->fenc->i_stride[0], h->fenc->i_stride[0],//编码帧
h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer, &ssim_cnt );
h->stat.frame.i_ssim_cnt += ssim_cnt;
}
}
}

函数x264_fdec_filter_row()完成了三步工作:

环路滤波;半像素内插;视频质量SSIM和PSNR计算。

熵编码模块:熵编码模块包含两个函数:如果输出设置为CABAC编码,调用函数x264_macroblock_write_cabac();如果输出设置为CAVLC编码,调用函数x264_macroblock_write_cavlc()。

x264_macroblock_write_cavlc():

void x264_macroblock_write_cavlc( x264_t *h )
{
bs_t *s = &h->out.bs;
const int i_mb_type = h->mb.i_type;
int plane_count = CHROMA444 ? 3 : 1;
int chroma = !CHROMA444; #if RDO_SKIP_BS
s->i_bits_encoded = 0;
#else
const int i_mb_pos_start = bs_pos( s );
int i_mb_pos_tex;
#endif if( SLICE_MBAFF
&& (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
{
bs_write1( s, MB_INTERLACED );
#if !RDO_SKIP_BS
h->mb.field_decoding_flag = MB_INTERLACED;
#endif
} #if !RDO_SKIP_BS
if( i_mb_type == I_PCM )
{
static const uint8_t i_offsets[3] = {5,23,0};
uint8_t *p_start = s->p_start;
bs_write_ue( s, i_offsets[h->sh.i_type] + 25 );
i_mb_pos_tex = bs_pos( s );
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start; bs_align_0( s ); for( int p = 0; p < plane_count; p++ )
for( int i = 0; i < 256; i++ )
bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[p][i] );
if( chroma )
for( int ch = 1; ch < 3; ch++ )
for( int i = 0; i < 16>>CHROMA_V_SHIFT; i++ )
for( int j = 0; j < 8; j++ )
bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] ); bs_init( s, s->p, s->p_end - s->p );
s->p_start = p_start; h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
return;
}
#endif if( h->sh.i_type == SLICE_TYPE_P )
x264_cavlc_mb_header_p( h, i_mb_type, chroma );//写入P宏块MB Header数据-CAVLC
else if( h->sh.i_type == SLICE_TYPE_B )
x264_cavlc_mb_header_b( h, i_mb_type, chroma );//写入B宏块MB Header数据-CAVLC
else //if( h->sh.i_type == SLICE_TYPE_I )
x264_cavlc_mb_header_i( h, i_mb_type, 0, chroma );//写入I宏块MB Header数据-CAVLC #if !RDO_SKIP_BS
i_mb_pos_tex = bs_pos( s );
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
#endif /* Coded block pattern */
if( i_mb_type != I_16x16 )
bs_write_ue( s, cbp_to_golomb[chroma][IS_INTRA(i_mb_type)][(h->mb.i_cbp_chroma << 4)|h->mb.i_cbp_luma] ); /* transform size 8x8 flag */
if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
bs_write1( s, h->mb.b_transform_8x8 ); if( i_mb_type == I_16x16 )
{
x264_cavlc_qp_delta( h ); /* DC Luma */
for( int p = 0; p < plane_count; p++ )
{
x264_cavlc_block_residual( h, DCT_LUMA_DC, LUMA_DC+p, h->dct.luma16x16_dc[p] ); /* AC Luma */
if( h->mb.i_cbp_luma )
for( int i = p*16; i < p*16+16; i++ )
x264_cavlc_block_residual( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
}
}
else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
{
x264_cavlc_qp_delta( h );
//残差数据
x264_cavlc_macroblock_luma_residual( h, plane_count );
}
if( h->mb.i_cbp_chroma )
{
/* Chroma DC residual present */
x264_cavlc_block_residual( h, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0] );
x264_cavlc_block_residual( h, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1] );
if( h->mb.i_cbp_chroma == 2 ) /* Chroma AC residual present */
{
int step = 8 << CHROMA_V_SHIFT;
for( int i = 16; i < 3*16; i += step )
for( int j = i; j < i+4; j++ )
x264_cavlc_block_residual( h, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1 );
}
} #if !RDO_SKIP_BS
h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
#endif
}

2.NAL打包:

前面所说的压缩编码过程已经把所有的宏块循环完毕,实现了VCL编码。进行NAL打包是为了增强码流的健壮性,适应网络传输。VCL编码加上NAL头信息就组成完整的NAL单元,输出文件。

这部分的代码位于函数x264_encoder_encode()中,调用了函数x264_encoder_frame_end()。

x264_encoder_frame_end():在编码结束后做一些后续处理,比如说加上起始码,封装MALU。

//结束的时候做一些处理,记录一些统计信息
//pp_nal:输出的NALU
//pic_out:输出的重建帧
static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
x264_nal_t **pp_nal, int *pi_nal,
x264_picture_t *pic_out )
{
char psz_message[80]; if( !h->param.b_sliced_threads && h->b_thread_active )
{
h->b_thread_active = 0;
if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) )
return -1;
}
if( !h->out.i_nal )
{
pic_out->i_type = X264_TYPE_AUTO;
return 0;
} x264_emms(); /* generate buffering period sei and insert it into place */
if( h->i_thread_frames > 1 && h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present )
{
x264_hrd_fullness( h );
x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
x264_sei_buffering_period_write( h, &h->out.bs );
if( x264_nal_end( h ) )
return -1;
/* buffering period sei must follow AUD, SPS and PPS and precede all other SEIs */
int idx = 0;
while( h->out.nal[idx].i_type == NAL_AUD ||
h->out.nal[idx].i_type == NAL_SPS ||
h->out.nal[idx].i_type == NAL_PPS )
idx++;
x264_nal_t nal_tmp = h->out.nal[h->out.i_nal-1];
memmove( &h->out.nal[idx+1], &h->out.nal[idx], (h->out.i_nal-idx-1)*sizeof(x264_nal_t) );
h->out.nal[idx] = nal_tmp;
}
//封装一帧数据对应的NALU.
//例如给NALU添加起始码0x00000001
int frame_size = x264_encoder_encapsulate_nals( h, 0 );
if( frame_size < 0 )
return -1; /* Set output picture properties */
//pic_out为x264_picture_t类型结构体。是libx264对外的结构体
//fenc,fdec是x264_frame_t类型结构体。是libx264的内部结构体
pic_out->i_type = h->fenc->i_type; pic_out->b_keyframe = h->fenc->b_keyframe;
pic_out->i_pic_struct = h->fenc->i_pic_struct; pic_out->i_pts = h->fdec->i_pts;
pic_out->i_dts = h->fdec->i_dts; if( pic_out->i_pts < pic_out->i_dts )
x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" ); pic_out->opaque = h->fenc->opaque; pic_out->img.i_csp = h->fdec->i_csp;
#if HIGH_BIT_DEPTH
pic_out->img.i_csp |= X264_CSP_HIGH_DEPTH;
#endif
pic_out->img.i_plane = h->fdec->i_plane;
//图像数据
for( int i = 0; i < pic_out->img.i_plane; i++ )
{
pic_out->img.i_stride[i] = h->fdec->i_stride[i] * sizeof(pixel);
pic_out->img.plane[i] = (uint8_t*)h->fdec->plane[i];
}
//回收用过的编码帧fenc
x264_frame_push_unused( thread_current, h->fenc ); /* ---------------------- Update encoder state ------------------------- */ /* update rc */
int filler = 0;
if( x264_ratecontrol_end( h, frame_size * 8, &filler ) < 0 )
return -1; pic_out->hrd_timing = h->fenc->hrd_timing;
pic_out->prop.f_crf_avg = h->fdec->f_crf_avg; /* Filler in AVC-Intra mode is written as zero bytes to the last slice
* We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */
if( h->param.i_avcintra_class )
{
x264_t *h0 = h->thread[0];
int ret = x264_check_encapsulated_buffer( h, h0, h->out.i_nal, frame_size, frame_size + filler );
if( ret < 0 )
return -1;
memset( h->out.nal[0].p_payload + frame_size, 0, filler );
h->out.nal[h->out.i_nal-1].i_payload += filler;
h->out.nal[h->out.i_nal-1].i_padding = filler;
frame_size += filler;
}
else
{
while( filler > 0 )
{
int f, overhead;
overhead = (FILLER_OVERHEAD - h->param.b_annexb);
if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size )
{
int next_size = filler - h->param.i_slice_max_size;
int overflow = X264_MAX( overhead - next_size, 0 );
f = h->param.i_slice_max_size - overhead - overflow;
}
else
f = X264_MAX( 0, filler - overhead ); if( x264_bitstream_check_buffer_filler( h, f ) )
return -1;
x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE );
x264_filler_write( h, &h->out.bs, f );
if( x264_nal_end( h ) )
return -1;
int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 );
if( total_size < 0 )
return -1;
frame_size += total_size;
filler -= total_size;
}
} /* End bitstream, set output */
*pi_nal = h->out.i_nal;
*pp_nal = h->out.nal; h->out.i_nal = 0; x264_noise_reduction_update( h ); /* ---------------------- Compute/Print statistics --------------------- */
x264_thread_sync_stat( h, h->thread[0] ); /* Slice stat */
//stat中存储了统计信息
//帧数+1 (根据类型)
h->stat.i_frame_count[h->sh.i_type]++;
//帧大小
h->stat.i_frame_size[h->sh.i_type] += frame_size;
h->stat.f_frame_qp[h->sh.i_type] += h->fdec->f_qp_avg_aq;
//统计MB个数,把不同类型的累加起来
for( int i = 0; i < X264_MBTYPE_MAX; i++ )
h->stat.i_mb_count[h->sh.i_type][i] += h->stat.frame.i_mb_count[i];
for( int i = 0; i < X264_PARTTYPE_MAX; i++ )
h->stat.i_mb_partition[h->sh.i_type][i] += h->stat.frame.i_mb_partition[i];
for( int i = 0; i < 2; i++ )
h->stat.i_mb_count_8x8dct[i] += h->stat.frame.i_mb_count_8x8dct[i];
for( int i = 0; i < 6; i++ )
h->stat.i_mb_cbp[i] += h->stat.frame.i_mb_cbp[i];
for( int i = 0; i < 4; i++ )
for( int j = 0; j < 13; j++ )
h->stat.i_mb_pred_mode[i][j] += h->stat.frame.i_mb_pred_mode[i][j];
if( h->sh.i_type != SLICE_TYPE_I )
for( int i_list = 0; i_list < 2; i_list++ )
for( int i = 0; i < X264_REF_MAX*2; i++ )
h->stat.i_mb_count_ref[h->sh.i_type][i_list][i] += h->stat.frame.i_mb_count_ref[i_list][i];
for( int i = 0; i < 3; i++ )
h->stat.i_mb_field[i] += h->stat.frame.i_mb_field[i];
if( h->sh.i_type == SLICE_TYPE_P && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE )
{
h->stat.i_wpred[0] += !!h->sh.weight[0][0].weightfn;
h->stat.i_wpred[1] += !!h->sh.weight[0][1].weightfn || !!h->sh.weight[0][2].weightfn;
}
if( h->sh.i_type == SLICE_TYPE_B )
{
h->stat.i_direct_frames[ h->sh.b_direct_spatial_mv_pred ] ++;
if( h->mb.b_direct_auto_write )
{
//FIXME somewhat arbitrary time constants
if( h->stat.i_direct_score[0] + h->stat.i_direct_score[1] > h->mb.i_mb_count )
for( int i = 0; i < 2; i++ )
h->stat.i_direct_score[i] = h->stat.i_direct_score[i] * 9/10;
for( int i = 0; i < 2; i++ )
h->stat.i_direct_score[i] += h->stat.frame.i_direct_score[i];
}
}
else
h->stat.i_consecutive_bframes[h->fenc->i_bframes]++; psz_message[0] = '\0';
double dur = h->fenc->f_duration;
h->stat.f_frame_duration[h->sh.i_type] += dur; //需要计算PSNR
if( h->param.analyse.b_psnr )
{
//SSD(Sum of Squared Difference)即差值的平方和
int64_t ssd[3] =
{
h->stat.frame.i_ssd[0],
h->stat.frame.i_ssd[1],
h->stat.frame.i_ssd[2],
};
int luma_size = h->param.i_width * h->param.i_height;
int chroma_size = CHROMA_SIZE( luma_size ); //SSD是已经在“滤波”环节计算过的
//SSD简单换算成PSNR,调用x264_psnr()
pic_out->prop.f_psnr[0] = x264_psnr( ssd[0], luma_size );
pic_out->prop.f_psnr[1] = x264_psnr( ssd[1], chroma_size );
pic_out->prop.f_psnr[2] = x264_psnr( ssd[2], chroma_size );
//平均值
pic_out->prop.f_psnr_avg = x264_psnr( ssd[0] + ssd[1] + ssd[2], luma_size + chroma_size*2 );
//mean系列的需要累加
h->stat.f_ssd_global[h->sh.i_type] += dur * (ssd[0] + ssd[1] + ssd[2]);
h->stat.f_psnr_average[h->sh.i_type] += dur * pic_out->prop.f_psnr_avg;
h->stat.f_psnr_mean_y[h->sh.i_type] += dur * pic_out->prop.f_psnr[0];
h->stat.f_psnr_mean_u[h->sh.i_type] += dur * pic_out->prop.f_psnr[1];
h->stat.f_psnr_mean_v[h->sh.i_type] += dur * pic_out->prop.f_psnr[2]; snprintf( psz_message, 80, " PSNR Y:%5.2f U:%5.2f V:%5.2f", pic_out->prop.f_psnr[0],
pic_out->prop.f_psnr[1],
pic_out->prop.f_psnr[2] );
} //需要计算SSIM
if( h->param.analyse.b_ssim )
{
//SSIM是已经在“滤波”环节计算过的
pic_out->prop.f_ssim = h->stat.frame.f_ssim / h->stat.frame.i_ssim_cnt;
//mean系列的需要累加
h->stat.f_ssim_mean_y[h->sh.i_type] += pic_out->prop.f_ssim * dur;
snprintf( psz_message + strlen(psz_message), 80 - strlen(psz_message),
" SSIM Y:%.5f", pic_out->prop.f_ssim );
}
psz_message[79] = '\0';
//Debug时候输出
x264_log( h, X264_LOG_DEBUG,
"frame=%4d QP=%.2f NAL=%d Slice:%c Poc:%-3d I:%-4d P:%-4d SKIP:%-4d size=%d bytes%s\n",
h->i_frame,
h->fdec->f_qp_avg_aq,
h->i_nal_ref_idc,
h->sh.i_type == SLICE_TYPE_I ? 'I' : (h->sh.i_type == SLICE_TYPE_P ? 'P' : 'B' ),
h->fdec->i_poc,
h->stat.frame.i_mb_count_i,
h->stat.frame.i_mb_count_p,
h->stat.frame.i_mb_count_skip,
frame_size,
psz_message ); // keep stats all in one place
x264_thread_sync_stat( h->thread[0], h );
// for the use of the next frame
x264_thread_sync_stat( thread_current, h ); #ifdef DEBUG_MB_TYPE
{
static const char mb_chars[] = { 'i', 'i', 'I', 'C', 'P', '8', 'S',
'D', '<', 'X', 'B', 'X', '>', 'B', 'B', 'B', 'B', '8', 'S' };
for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
{
if( h->mb.type[mb_xy] < X264_MBTYPE_MAX && h->mb.type[mb_xy] >= 0 )
fprintf( stderr, "%c ", mb_chars[ h->mb.type[mb_xy] ] );
else
fprintf( stderr, "? " ); if( (mb_xy+1) % h->mb.i_mb_width == 0 )
fprintf( stderr, "\n" );
}
}
#endif /* Remove duplicates, must be done near the end as breaks h->fref0 array
* by freeing some of its pointers. */
for( int i = 0; i < h->i_ref[0]; i++ )
if( h->fref[0][i] && h->fref[0][i]->b_duplicate )
{
x264_frame_push_blank_unused( h, h->fref[0][i] );
h->fref[0][i] = 0;
} if( h->param.psz_dump_yuv )
x264_frame_dump( h );
x264_emms(); return frame_size;
}

x264_encoder_frame_end()中封装NALU调用了函数x264_encoder_encapsulate_nals()。

x264_encoder_encapsulate_nals():

//封装一帧数据对应的NALU.
//例如给NALU添加起始码0x00000001
static int x264_encoder_encapsulate_nals( x264_t *h, int start )
{
x264_t *h0 = h->thread[0];
int nal_size = 0, previous_nal_size = 0; if( h->param.nalu_process )
{
for( int i = start; i < h->out.i_nal; i++ )
nal_size += h->out.nal[i].i_payload;
return nal_size;
} for( int i = 0; i < start; i++ )
previous_nal_size += h->out.nal[i].i_payload; for( int i = start; i < h->out.i_nal; i++ )
nal_size += h->out.nal[i].i_payload; /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */
int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64;
for( int i = start; i < h->out.i_nal; i++ )
necessary_size += h->out.nal[i].i_padding;
if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) )
return -1; uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size; //一个一个NALU处理
for( int i = start; i < h->out.i_nal; i++ )
{
int old_payload_len = h->out.nal[i].i_payload;
h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS ||
h->param.i_avcintra_class;
//添加起始码
x264_nal_encode( h, nal_buffer, &h->out.nal[i] );
nal_buffer += h->out.nal[i].i_payload;
if( h->param.i_avcintra_class )
{
h->out.nal[i].i_padding -= h->out.nal[i].i_payload - (old_payload_len + NALU_OVERHEAD);
if( h->out.nal[i].i_padding > 0 )
{
memset( nal_buffer, 0, h->out.nal[i].i_padding );
nal_buffer += h->out.nal[i].i_padding;
h->out.nal[i].i_payload += h->out.nal[i].i_padding;
}
h->out.nal[i].i_padding = X264_MAX( h->out.nal[i].i_padding, 0 );
}
} x264_emms(); return nal_buffer - (h0->nal_buffer + previous_nal_size);
}

其内部又调用了另一个函数x264_nal_encode()逐个给一帧数据中的各个NALU添加起始码以及NALU Header。

x264_nal_encode():

//添加起始码
void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
{
uint8_t *src = nal->p_payload;
uint8_t *end = nal->p_payload + nal->i_payload;
uint8_t *orig_dst = dst;
//起始码 ============================================
//annexb格式,起始码为0x00000001
if( h->param.b_annexb )
{
if( nal->b_long_startcode )
*dst++ = 0x00;
*dst++ = 0x00;
*dst++ = 0x00;
*dst++ = 0x01;
}
else /* save room for size later */
dst += 4;//mp4格式 //NALU Header =======================================
/* nal header */
*dst++ = ( 0x00 << 7 ) | ( nal->i_ref_idc << 5 ) | nal->i_type; dst = h->bsf.nal_escape( dst, src, end );
int size = (dst - orig_dst) - 4; /* Write the size header for mp4/etc */
//重新回到起始码的位置,写入mp4格式的起始码(size大小,不含起始码)
if( !h->param.b_annexb )
{
/* Size doesn't include the size of the header we're writing now. */
orig_dst[0] = size>>24;
orig_dst[1] = size>>16;
orig_dst[2] = size>> 8;
orig_dst[3] = size>> 0;
}
//NALU负载大小,包含起始码
nal->i_payload = size+4;
nal->p_payload = orig_dst;
x264_emms();
}

添加过程:

(1)annexb模式:在每个NALU前面添加0x00000001;

(2)mp4模式:先计算NALU的长度(不包含前四个字节),再将长度信息写入NALU前面的四个字节;

添加过程分两种是因为H264码流格式有两种:

(1)annexb模式:在这个模式下,每个NALU包含起始码0x00000001,SPS、PPS存储在码流中,最常见的H264裸流就是这种;

(2)mp4模式:这种模式下,每个NALU不包含起始码,原本存储起始码前4个字节存储的是NALU的长度,SPS、PPS单独放在容器的其他位置上,这种H264一般存储在容器中,比如说mp4中。

X264-编码模块和NAL打包输出的更多相关文章

  1. 实用程序包utils - 基于Rollup打包输出各模块文件(二)

    上一次,我们讲到了如何去搭建一个前端工具库的工程,那么今天我们来聊一聊如何去将其打包输出. 需求 事情是这个样子的.我有一个这样的需求,或者是我发现有这么一个需求.就是有时候吧,我也不想搞的那么复杂, ...

  2. X264编码实现

    H264 H264的官方测试源码,由德国hhi研究所负责开发.特点:实现了264所有的特性,由于是官方的测试源码,所以学术研究的算法都是在JM基础上实现并和JM进行比较.但其程序结构冗长,只考虑引入各 ...

  3. WebRTC VideoEngine超详细教程(三)——集成X264编码和ffmpeg解码

    转自:http://blog.csdn.net/nonmarking/article/details/47958395 本系列目前共三篇文章,后续还会更新 WebRTC VideoEngine超详细教 ...

  4. webpack对多个模块依赖进行打包

    [ webpack3.0.0刚刚出来  所以文章是跟着低版本 教程 操作熟悉  结果好多对不上喔] 七:webpack对多个模块依赖进行打包 通过一刚开始我们了解到 webpack支持commonJS ...

  5. (转)x264 编码流程

    转自:http://alphamailpost.blog.163.com/blog/static/20111808120128111160728/ http://www.usr.cc/thread-5 ...

  6. X264编码流程详解(转)

    http://blog.csdn.net/xingyu19871124/article/details/7671634 对H.264编码标准一直停留在理解原理的基础上,对于一个实际投入使用的编码器是如 ...

  7. python 1: 解决linux系统下python中的matplotlib模块内的pyplot输出图片不能显示中文的问题

    问题: 我在ubuntu14.04下用python中的matplotlib模块内的pyplot输出图片不能显示中文,怎么解决呢? 解决: 1.指定默认编码为UTF-8: 在python代码开头加入如下 ...

  8. MicroPython TPYBoard v102 无线红外遥控舵机(基于红外解/编码模块)

    转载请注明文章来源,更多教程可自助参考docs.tpyboard.com,QQ技术交流群:157816561,公众号:MicroPython玩家汇 红外解码/编码模块介绍 模块上搭载了红外接收头.红外 ...

  9. iOS开发之Socket通信实战--Request请求数据包编码模块

    实际上在iOS很多应用开发中,大部分用的网络通信都是http/https协议,除非有特殊的需求会用到Socket网络协议进行网络数 据传输,这时候在iOS客户端就需要很好的第三方CocoaAsyncS ...

随机推荐

  1. [20190510]rman备份的疑问8.txt

    [20190510]rman备份的疑问8.txt --//上午测试rman备份多个文件,探究input memory buffer 的问题.--//补充测试5个文件的情况.--//http://blo ...

  2. ORACLE ORA-1652的解决方法

    原创 Oracle 作者:wzq609 时间:2015-02-04 22:11:07 17183 0 前言:在检查数据库的alert日志,发现数据库报了ORA-1652: unable to exte ...

  3. Linux—文件上传和下载

    一.通过SecureCRT的rz.sz实现文件的上传和下载 1.检查是否安装sz  rz,命令如下 [root@localhost ~]# rpm -qa | grep rz # 上传 [root@l ...

  4. firewalld基础

    firewalld是一个基于网络区域的动态管理防火墙的守护进程.在iptables中需要理解表,链等概念,而在firewalld中需要从理解区域的概念开始 防火墙依照特定的规则允许或限制输出的数据通过 ...

  5. 软件工程基础团队第二次作业(团队项目-需求分析&系统设计)成绩汇总

    一.作业题目 团队第二次作业:需求分析&系统设计 二.具体要求 1.作业任务 任务一:组长组织项目组开展需求调研工作(可采取需求调查.问卷.分析已有软件.网上资料等方法).概要设计.详细设计. ...

  6. while语句 break和continue

    1.whlie 循环 基本条件 :while 条件: 代码块(循环体) else: 当上面的条件为假的,才会执行 执行顺序: 判断条件是否为真,如果为真,执行循环体,然后判断条件,...直到循环条件为 ...

  7. Pwnable-cmd1

    ssh cmd1@pwnable.kr -p2222 (pw:guest) 先看一下c的源码 #include <stdio.h> #include <string.h> in ...

  8. <Binary Search> 81 (高频)34 (很难hard, 高频)315 (hard)354

    81. Search in Rotated Sorted Array II 如果中间的数小于最右边的数,则右半段是有序的,若中间数大于最右边数,则左半段是有序的.而如果可以有重复值,就会出现来面两种情 ...

  9. 用scratch写了一个win98 界面模拟

    scratch模拟win98 玩耍,放在网上了https://kada.163.com/project/4216461-3515165.htm

  10. 使用system V实现读者写者问题

    #include <stdio.h> #include <sys/sem.h> #include <sys/ipc.h> #include <string.h ...