Index: encoder/encoder.c =================================================================== --- encoder/encoder.c (revision 650) +++ encoder/encoder.c (working copy) @@ -64,6 +64,7 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_out ); +static int x264_slices_write_thread( x264_t *h ); /**************************************************************************** * @@ -696,12 +697,20 @@ for( i = 0; i < h->param.i_threads; i++ ) { + x264_t *t = h->thread[i]; if( i > 0 ) - *h->thread[i] = *h; - h->thread[i]->fdec = x264_frame_pop_unused( h ); - h->thread[i]->out.p_bitstream = x264_malloc( h->out.i_bitstream ); - if( x264_macroblock_cache_init( h->thread[i] ) < 0 ) + *t = *h; + t->fdec = x264_frame_pop_unused( h ); + t->out.p_bitstream = x264_malloc( h->out.i_bitstream ); + if( x264_macroblock_cache_init( t ) < 0 ) return NULL; + + if( h->param.i_threads > 1 ) + { + pthread_cond_init( &t->thread_active_cv, NULL ); + pthread_mutex_init( &t->thread_active_mutex, NULL ); + pthread_create( &t->thread_handle, NULL, (void*(*)(void*))x264_slices_write_thread, t ); + } } if( x264_ratecontrol_new( h ) < 0 ) @@ -936,7 +945,8 @@ if( h->param.i_threads > 1 && h->fdec->b_kept_as_ref ) { - x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) ); + x264_cond_broadcast( &h->fdec->cv, &h->fdec->mutex, &h->fdec->i_lines_completed, + mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << h->sh.b_mbaff)) ); } } @@ -1236,6 +1246,30 @@ return 0; } +static int x264_slices_write_thread( x264_t *h ) +{ + for(;;) + { + int i_frame_size; + + x264_cond_wait( &h->thread_active_cv, &h->thread_active_mutex, &h->thread_active, 1, 0 ); + if( h->thread_exit ) + return 0; + + x264_stack_align( x264_slice_write, h ); + i_frame_size = h->out.nal[h->out.i_nal-1].i_payload; + x264_fdec_filter_row( h, h->sps->i_mb_height ); + + h->out.i_frame_size = i_frame_size; + + x264_cond_broadcast( &h->thread_active_cv, &h->thread_active_mutex, &h->thread_active, 0 ); + if( h->thread_exit ) + return 0; + } + + return 0; +} + /**************************************************************************** * x264_encoder_encode: * XXX: i_poc : is the poc of the current given picture @@ -1479,10 +1513,7 @@ /* Write frame */ if( h->param.i_threads > 1 ) - { - pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ); - h->b_thread_active = 1; - } + x264_cond_broadcast( &h->thread_active_cv, &h->thread_active_mutex, &h->thread_active, 1 ); else x264_slices_write( h ); @@ -1598,11 +1629,9 @@ int i; char psz_message[80]; - if( h->b_thread_active ) - { - pthread_join( h->thread_handle, NULL ); - h->b_thread_active = 0; - } + if( h->param.i_threads > 1 ) + x264_cond_wait( &h->thread_active_cv, &h->thread_active_mutex, &h->thread_active, 0, 0 ); + if( !h->out.i_nal ) { pic_out->i_type = X264_TYPE_AUTO; @@ -1771,11 +1800,18 @@ int64_t i_yuv_size = 3 * h->param.i_width * h->param.i_height / 2; int i; - for( i=0; iparam.i_threads; i++ ) + if( h->param.i_threads > 1 ) { - // don't strictly have to wait for the other threads, but it's simpler than cancelling them - if( h->thread[i]->b_thread_active ) - pthread_join( h->thread[i]->thread_handle, NULL ); + for( i=0; iparam.i_threads; i++ ) + { + // don't strictly have to wait for the other threads, but it's simpler than cancelling them + x264_t *t = h->thread[i]; + t->thread_exit = 1; + x264_cond_broadcast( &t->thread_active_cv, &t->thread_active_mutex, &t->thread_active, 1 ); + pthread_join( t->thread_handle, NULL ); + pthread_cond_destroy( &t->thread_active_cv ); + pthread_mutex_destroy( &t->thread_active_mutex ); + } } #ifdef DEBUG_BENCHMARK Index: encoder/ratecontrol.c =================================================================== --- encoder/ratecontrol.c (revision 650) +++ encoder/ratecontrol.c (working copy) @@ -466,6 +466,7 @@ x264_free( p ); } + rc->frame_size_planned = rc->buffer_rate; // init so that idles threads have no effect on the planning for( i=1; iparam.i_threads; i++ ) { h->thread[i]->rc = rc+i; @@ -1116,8 +1117,6 @@ { x264_t *t = h->thread[ (j+i)%h->param.i_threads ]; double bits = t->rc->frame_size_planned; - if( !t->b_thread_active ) - continue; rcc->buffer_fill += rcc->buffer_rate - bits; rcc->buffer_fill = x264_clip3( rcc->buffer_fill, 0, rcc->buffer_size ); } Index: encoder/analyse.c =================================================================== --- encoder/analyse.c (revision 650) +++ encoder/analyse.c (working copy) @@ -248,7 +248,7 @@ int i_ref = i ? h->i_ref1 : h->i_ref0; for( j=0; jcv, &fref[j]->mutex, &fref[j]->i_lines_completed, thresh, 1 ); thread_mvy_range = X264_MIN( thread_mvy_range, fref[j]->i_lines_completed - pix_y ); } } Index: common/cpu.c =================================================================== --- common/cpu.c (revision 650) +++ common/cpu.c (working copy) @@ -240,3 +240,28 @@ return 1; #endif } + +#ifdef HAVE_PTHREAD +void x264_cond_broadcast( pthread_cond_t *cv, pthread_mutex_t *mutex, int *var, int val ) +{ + pthread_mutex_lock( mutex ); + *var = val; + pthread_cond_broadcast( cv ); + pthread_mutex_unlock( mutex ); +} + +void x264_cond_wait( pthread_cond_t *cv, pthread_mutex_t *mutex, int *var, int val, int cmp ) +{ + pthread_mutex_lock( mutex ); + while( cmp ? (*var < val*cmp) : (*var != val) ) + pthread_cond_wait( cv, mutex ); + pthread_mutex_unlock( mutex ); +} + +#else +void x264_cond_broadcast( pthread_cond_t *cv, pthread_mutex_t *mutex, int *var, int val ) +{} +void x264_cond_wait( pthread_cond_t *cv, pthread_mutex_t *mutex, int *var, int val, int cmp ) +{} +#endif + Index: common/cpu.h =================================================================== --- common/cpu.h (revision 650) +++ common/cpu.h (working copy) @@ -44,4 +44,7 @@ #define x264_stack_align(func,arg) func(arg) #endif +void x264_cond_broadcast( pthread_cond_t *cv, pthread_mutex_t *mutex, int *var, int val ); +void x264_cond_wait( pthread_cond_t *cv, pthread_mutex_t *mutex, int *var, int val, int cmp ); + #endif Index: common/common.h =================================================================== --- common/common.h (revision 650) +++ common/common.h (working copy) @@ -144,6 +144,7 @@ #include "cabac.h" #include "csp.h" #include "quant.h" +#include "cpu.h" /**************************************************************************** * Generals functions @@ -305,7 +306,10 @@ x264_t *thread[X264_THREAD_MAX]; pthread_t thread_handle; - int b_thread_active; + pthread_mutex_t thread_active_mutex; + pthread_cond_t thread_active_cv; + int thread_active; + int thread_exit; int i_thread_phase; /* which thread to use for the next frame */ /* bitstream output */