@ -41,11 +41,14 @@ public:
CUdevice cuDevice ;
CUcontext cuContext ;
CUmodule cuModule ;
CUstream cuStream ;
CUevent tileDone ;
map < device_ptr , bool > tex_interp_map ;
int cuDevId ;
int cuDevArchitecture ;
bool first_error ;
bool use_texture_storage ;
unsigned int target_update_frequency ;
struct PixelMem {
GLuint cuPBO ;
@ -177,6 +180,8 @@ public:
first_error = true ;
background = background_ ;
use_texture_storage = true ;
/* we try an update / sync every 1000 ms */
target_update_frequency = 1000 ;
cuDevId = info . num ;
cuDevice = 0 ;
@ -207,6 +212,9 @@ public:
if ( cuda_error_ ( result , " cuCtxCreate " ) )
return ;
cuda_assert ( cuStreamCreate ( & cuStream , 0 ) )
cuda_assert ( cuEventCreate ( & tileDone , 0x1 ) )
int major , minor ;
cuDeviceComputeCapability ( & major , & minor , cuDevId ) ;
cuDevArchitecture = major * 100 + minor * 10 ;
@ -223,6 +231,8 @@ public:
{
task_pool . stop ( ) ;
cuda_assert ( cuEventDestroy ( tileDone ) )
cuda_assert ( cuStreamDestroy ( cuStream ) )
cuda_assert ( cuCtxDestroy ( cuContext ) )
}
@ -645,9 +655,7 @@ public:
cuda_assert ( cuFuncSetCacheConfig ( cuPathTrace , CU_FUNC_CACHE_PREFER_L1 ) )
cuda_assert ( cuFuncSetBlockShape ( cuPathTrace , xthreads , ythreads , 1 ) )
cuda_assert ( cuLaunchGrid ( cuPathTrace , xblocks , yblocks ) )
cuda_assert ( cuCtxSynchronize ( ) )
cuda_assert ( cuLaunchGridAsync ( cuPathTrace , xblocks , yblocks , cuStream ) )
cuda_pop_context ( ) ;
}
@ -964,11 +972,16 @@ public:
bool branched = task - > integrator_branched ;
/* keep rendering tiles until done */
while ( task - > acquire_tile ( this , tile ) ) {
int start_sample = tile . start_sample ;
int end_sample = tile . start_sample + tile . num_samples ;
boost : : posix_time : : ptime start_time ( boost : : posix_time : : microsec_clock : : local_time ( ) ) ;
boost : : posix_time : : ptime last_time = start_time ;
int sync_sample = 10 ;
for ( int sample = start_sample ; sample < end_sample ; sample + + ) {
if ( task - > get_cancel ( ) ) {
if ( task - > need_finish_queue = = false )
@ -978,8 +991,28 @@ public:
path_trace ( tile , sample , branched ) ;
tile . sample = sample + 1 ;
task - > update_progress ( tile ) ;
if ( sample = = sync_sample ) {
cuda_push_context ( ) ;
cuda_assert ( cuEventRecord ( tileDone , cuStream ) )
cuda_assert ( cuEventSynchronize ( tileDone ) )
/* Do some time keeping to find out if we need to sync less */
boost : : posix_time : : ptime current_time ( boost : : posix_time : : microsec_clock : : local_time ( ) ) ;
boost : : posix_time : : time_duration sample_duration = current_time - last_time ;
long msec = sample_duration . total_milliseconds ( ) ;
float scaling_factor = ( float ) target_update_frequency / ( float ) msec ;
/* sync at earliest next sample and probably later */
sync_sample = ( sample + 1 ) + sync_sample * ceil ( scaling_factor ) ;
sync_sample = min ( end_sample - 1 , sync_sample ) ; // make sure we sync the last sample always
last_time = current_time ;
cuda_pop_context ( ) ;
}
}
task - > release_tile ( tile ) ;