Index: encoder/me.c =================================================================== --- encoder/me.c (revision 680) +++ encoder/me.c (working copy) @@ -61,6 +61,23 @@ COPY3_IF_LT( bpred_cost, cost, bpred_mx, mx, bpred_my, my ); \ } +#define COST_MV_HPEL2( mx, my, cost ) \ +{ \ + int stride = 16; \ + uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \ + cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \ + + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \ +} + +#define COST_MV_HPEL3( mx, my) \ +{ \ + int stride = 16; \ + uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \ + int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \ + + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \ + COPY3_IF_LT( bestcost, cost, bestx, mx, besty, my ); \ +} + #define COST_MV_X3_DIR( m0x, m0y, m1x, m1y, m2x, m2y, costs )\ {\ uint8_t *pix_base = p_fref + bmx + bmy*m->i_stride[0];\ @@ -166,8 +183,13 @@ int mv_y_min = h->mb.mv_min_fpel[1]; int mv_x_max = h->mb.mv_max_fpel[0]; int mv_y_max = h->mb.mv_max_fpel[1]; + int mv_x_min4 = h->mb.mv_min_fpel[0]<<2; + int mv_y_min4 = h->mb.mv_min_fpel[1]<<2; + int mv_x_max4 = h->mb.mv_max_fpel[0]<<2; + int mv_y_max4 = h->mb.mv_max_fpel[1]<<2; #define CHECK_MVRANGE(mx,my) ( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max ) +#define CHECK_MVRANGE4(mx,my) ( mx >= mv_x_min4 && mx <= mv_x_max4 && my >= mv_y_min4 && my <= mv_y_max4 ) const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; @@ -177,18 +199,85 @@ pmx = ( bmx + 2 ) >> 2; pmy = ( bmy + 2 ) >> 2; bcost = COST_MAX; - + /* try extra predictors if provided */ if( h->mb.i_subpel_refine >= 3 ) { COST_MV_HPEL( bmx, bmy ); - for( i = 0; i < i_mvc; i++ ) + if(!h->param.analyse.i_me_prepass) { - const int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 ); - const int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 ); - if( mx != bpred_mx || my != bpred_my ) - COST_MV_HPEL( mx, my ); + for( i = 0; i < i_mvc; i++ ) + { + const int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 ); + const int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 ); + if( mx != bpred_mx || my != bpred_my ) + COST_MV_HPEL( mx, my ); + } } + else + { + for( i = 0; i < i_mvc; i++ ) + { + const int mx = x264_clip3( mvc[i][0], mv_x_min*4, mv_x_max*4 ); + const int my = x264_clip3( mvc[i][1], mv_y_min*4, mv_y_max*4 ); + if( mx != bpred_mx || my != bpred_my ) + { + int bestcost; + int bestx = mx; + int besty = my; + COST_MV_HPEL2( mx, my, bestcost ); + COPY3_IF_LT( bpred_cost, bestcost, bpred_mx, bestx, bpred_my, besty ); + if(bestcost < 2*bpred_cost) + { + int n; + int dir = -2; + COST_MV_HPEL2(bestx-2,besty,costs[0]); + COST_MV_HPEL2(bestx-1,besty+2,costs[1]); + COST_MV_HPEL2(bestx+1,besty+2,costs[2]); + COST_MV_HPEL2(bestx+2,besty,costs[3]); + COST_MV_HPEL2(bestx+1,besty-2,costs[4]); + COST_MV_HPEL2(bestx-1,besty-2,costs[5]); + COPY2_IF_LT( bestcost, costs[0], dir, 0 ); + COPY2_IF_LT( bestcost, costs[1], dir, 1 ); + COPY2_IF_LT( bestcost, costs[2], dir, 2 ); + COPY2_IF_LT( bestcost, costs[3], dir, 3 ); + COPY2_IF_LT( bestcost, costs[4], dir, 4 ); + COPY2_IF_LT( bestcost, costs[5], dir, 5 ); + if( dir != -2 ) + { + static const int hex2[8][2] = {{-1,-2}, {-2,0}, {-1,2}, {1,2}, {2,0}, {1,-2}, {-1,-2}, {-2,0}}; + bestx += hex2[dir+1][0]; + besty += hex2[dir+1][1]; + for( n = 1; n < i_me_range && CHECK_MVRANGE(bestx, besty); n++ ) + { + static const int mod6[8] = {5,0,1,2,3,4,5,0}; + const int odir = mod6[dir+1]; + COST_MV_HPEL2(hex2[odir+0][0]+bestx,hex2[odir+0][1]+besty,costs[0]); + COST_MV_HPEL2(hex2[odir+1][0]+bestx,hex2[odir+1][1]+besty,costs[1]); + COST_MV_HPEL2(hex2[odir+2][0]+bestx,hex2[odir+2][1]+besty,costs[2]); + dir = -2; + COPY2_IF_LT( bestcost, costs[0], dir, odir-1 ); + COPY2_IF_LT( bestcost, costs[1], dir, odir ); + COPY2_IF_LT( bestcost, costs[2], dir, odir+1 ); + if( dir == -2 ) + break; + bestx += hex2[dir+1][0]; + besty += hex2[dir+1][1]; + } + } + COST_MV_HPEL3(bestx+1,besty-1); + COST_MV_HPEL3(bestx+1,besty); + COST_MV_HPEL3(bestx+1,besty+1); + COST_MV_HPEL3(bestx,besty-1); + COST_MV_HPEL3(bestx,besty+1); + COST_MV_HPEL3(bestx-1,besty-1); + COST_MV_HPEL3(bestx-1,besty); + COST_MV_HPEL3(bestx-1,besty+1); + COPY3_IF_LT(bpred_cost,bestcost,bpred_mx,bestx,bpred_my,besty); + } + } + } + } bmx = ( bpred_mx + 2 ) >> 2; bmy = ( bpred_my + 2 ) >> 2; COST_MV( bmx, bmy ); Index: x264.c =================================================================== --- x264.c (revision 680) +++ x264.c (working copy) @@ -232,7 +232,8 @@ H1( " --mvrange-thread Minimum buffer between threads [-1 (auto)]\n" ); H0( " -m, --subme Subpixel motion estimation and partition\n" " decision quality: 1=fast, 7=best. [%d]\n", defaults->analyse.i_subpel_refine ); - H0( " --b-rdo RD based mode decision for B-frames. Requires subme 6.\n" ); + H0( " --me-prepass Run an ME prepass on predictors. Requires subme 3 or higher.\n"); + H0( " --b-rdo RD based mode decision for B-frames. Requires subme 6 or higher.\n" ); H0( " --mixed-refs Decide references on a per partition basis\n" ); H1( " --no-chroma-me Ignore chroma in motion estimation\n" ); H1( " --bime Jointly optimize both MVs in B-frames\n" ); @@ -398,6 +399,7 @@ { "mvrange", required_argument, NULL, 0 }, { "mvrange-thread", required_argument, NULL, 0 }, { "subme", required_argument, NULL, 'm' }, + { "me-prepass", no_argument, NULL, 0 }, { "b-rdo", no_argument, NULL, 0 }, { "mixed-refs", no_argument, NULL, 0 }, { "no-chroma-me", no_argument, NULL, 0 }, Index: common/common.c =================================================================== --- common/common.c (revision 680) +++ common/common.c (working copy) @@ -441,6 +441,8 @@ p->analyse.i_mv_range_thread = atoi(value); OPT2("subme", "subq") p->analyse.i_subpel_refine = atoi(value); + OPT2("me-prepass", "meprepass") + p->analyse.i_me_prepass = atobool(value); OPT("bime") p->analyse.b_bidir_me = atobool(value); OPT("chroma-me") @@ -879,6 +881,7 @@ s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter ); s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] ); s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine ); + s += sprintf( s, " me-prepass=%d", p->analyse.i_me_prepass ); s += sprintf( s, " brdo=%d", p->analyse.b_bframe_rdo ); s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references ); s += sprintf( s, " me_range=%d", p->analyse.i_me_range ); Index: x264.h =================================================================== --- x264.h (revision 680) +++ x264.h (working copy) @@ -220,6 +220,7 @@ int i_mv_range; /* maximum length of a mv (in pixels). -1 = auto, based on level */ int i_mv_range_thread; /* minimum space between threads. -1 = auto, based on number of threads. */ int i_subpel_refine; /* subpixel motion estimation quality */ + int i_me_prepass; /* run an ME prepass on predictors */ int b_bidir_me; /* jointly optimize both MVs in B-frames */ int b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */ int b_bframe_rdo; /* RD based mode decision for B-frames */