@ -8,7 +8,7 @@
# include "util / debug.h"
# include "util / log.h"
# include "kernel / device / oneapi / kernel .h"
# include "kernel / device / oneapi / globals .h"
CCL_NAMESPACE_BEGIN
@ -19,26 +19,19 @@ static void queue_error_cb(const char *message, void *user_ptr)
}
}
OneapiDevice : : OneapiDevice ( const DeviceInfo & info ,
OneAPIDLLInterface & oneapi_dll_object ,
Stats & stats ,
Profiler & profiler )
OneapiDevice : : OneapiDevice ( const DeviceInfo & info , Stats & stats , Profiler & profiler )
: Device ( info , stats , profiler ) ,
device_queue_ ( nullptr ) ,
texture_info_ ( this , " texture_info " , MEM_GLOBAL ) ,
kg_memory_ ( nullptr ) ,
kg_memory_device_ ( nullptr ) ,
kg_memory_size_ ( 0 ) ,
oneapi_dll_ ( oneapi_dll_object )
kg_memory_size_ ( 0 )
{
need_texture_info_ = false ;
oneapi_ dll_. oneapi_ set_error_cb( queue_error_cb , & oneapi_error_string_ ) ;
oneapi_ set_error_cb( queue_error_cb , & oneapi_error_string_ ) ;
/* OneAPI calls should be initialized on this moment. */
assert ( oneapi_dll_ . oneapi_create_queue ! = nullptr ) ;
bool is_finished_ok = oneapi_dll_ . oneapi_create_queue ( device_queue_ , info . num ) ;
bool is_finished_ok = create_queue ( device_queue_ , info . num ) ;
if ( is_finished_ok = = false ) {
set_error ( " oneAPI queue initialization error: got runtime exception \" " +
oneapi_error_string_ + " \" " ) ;
@ -50,7 +43,7 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info,
}
size_t globals_segment_size ;
is_finished_ok = oneapi_dll_. oneapi_ kernel_globals_size( device_queue_ , globals_segment_size ) ;
is_finished_ok = kernel_globals_size( device_queue_ , globals_segment_size ) ;
if ( is_finished_ok = = false ) {
set_error ( " oneAPI constant memory initialization got runtime exception \" " +
oneapi_error_string_ + " \" " ) ;
@ -59,27 +52,27 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info,
VLOG_DEBUG < < " Successfully created global/constant memory segment (kernel globals object) " ;
}
kg_memory_ = oneapi_dll_. oneapi_ usm_aligned_alloc_host( device_queue_ , globals_segment_size , 16 ) ;
oneapi_dll_. oneapi_ usm_memset( device_queue_ , kg_memory_ , 0 , globals_segment_size ) ;
kg_memory_ = usm_aligned_alloc_host( device_queue_ , globals_segment_size , 16 ) ;
usm_memset( device_queue_ , kg_memory_ , 0 , globals_segment_size ) ;
kg_memory_device_ = oneapi_dll_. oneapi_ usm_alloc_device( device_queue_ , globals_segment_size ) ;
kg_memory_device_ = usm_alloc_device( device_queue_ , globals_segment_size ) ;
kg_memory_size_ = globals_segment_size ;
max_memory_on_device_ = oneapi_dll_. oneapi_ get_memcapacity( device_queue_ ) ;
max_memory_on_device_ = get_memcapacity( ) ;
}
OneapiDevice : : ~ OneapiDevice ( )
{
texture_info_ . free ( ) ;
oneapi_dll_. oneapi_ usm_free( device_queue_ , kg_memory_ ) ;
oneapi_dll_. oneapi_ usm_free( device_queue_ , kg_memory_device_ ) ;
usm_free( device_queue_ , kg_memory_ ) ;
usm_free( device_queue_ , kg_memory_device_ ) ;
for ( ConstMemMap : : iterator mt = const_mem_map_ . begin ( ) ; mt ! = const_mem_map_ . end ( ) ; mt + + )
delete mt - > second ;
if ( device_queue_ )
oneapi_dll_. oneapi_ free_queue( device_queue_ ) ;
free_queue( device_queue_ ) ;
}
bool OneapiDevice : : check_peer_access ( Device * /*peer_device*/ )
@ -99,7 +92,7 @@ bool OneapiDevice::load_kernels(const uint requested_features)
* with specialization constants , but it hasn ' t been implemented yet . */
( void ) requested_features ;
bool is_finished_ok = oneapi_ dll_. oneapi_ run_test_kernel( device_queue_ ) ;
bool is_finished_ok = oneapi_ run_test_kernel( device_queue_ ) ;
if ( is_finished_ok = = false ) {
set_error ( " oneAPI kernel load: got runtime exception \" " + oneapi_error_string_ + " \" " ) ;
}
@ -138,7 +131,7 @@ void OneapiDevice::generic_alloc(device_memory &mem)
* type has been used for oneAPI device in order to better fit in Cycles architecture . */
void * device_pointer = nullptr ;
if ( mem . memory_size ( ) + stats . mem_used < max_memory_on_device_ )
device_pointer = oneapi_dll_. oneapi_ usm_alloc_device( device_queue_ , memory_size ) ;
device_pointer = usm_alloc_device( device_queue_ , memory_size ) ;
if ( device_pointer = = nullptr ) {
set_error ( " oneAPI kernel - device memory allocation error for " +
string_human_readable_size ( mem . memory_size ( ) ) +
@ -163,8 +156,7 @@ void OneapiDevice::generic_copy_to(device_memory &mem)
/* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
assert ( mem . host_pointer ) ;
assert ( device_queue_ ) ;
oneapi_dll_ . oneapi_usm_memcpy (
device_queue_ , ( void * ) mem . device_pointer , ( void * ) mem . host_pointer , memory_size ) ;
usm_memcpy ( device_queue_ , ( void * ) mem . device_pointer , ( void * ) mem . host_pointer , memory_size ) ;
}
/* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
@ -178,11 +170,6 @@ string OneapiDevice::oneapi_error_message()
return string ( oneapi_error_string_ ) ;
}
OneAPIDLLInterface OneapiDevice : : oneapi_dll_object ( )
{
return oneapi_dll_ ;
}
void * OneapiDevice : : kernel_globals_device_pointer ( )
{
return kg_memory_device_ ;
@ -198,7 +185,7 @@ void OneapiDevice::generic_free(device_memory &mem)
mem . device_size = 0 ;
assert ( device_queue_ ) ;
oneapi_dll_. oneapi_ usm_free( device_queue_ , ( void * ) mem . device_pointer ) ;
usm_free( device_queue_ , ( void * ) mem . device_pointer ) ;
mem . device_pointer = 0 ;
}
@ -266,8 +253,7 @@ void OneapiDevice::mem_copy_from(device_memory &mem, size_t y, size_t w, size_t
if ( mem . device_pointer ) {
char * shifted_host = reinterpret_cast < char * > ( mem . host_pointer ) + offset ;
char * shifted_device = reinterpret_cast < char * > ( mem . device_pointer ) + offset ;
bool is_finished_ok = oneapi_dll_ . oneapi_usm_memcpy (
device_queue_ , shifted_host , shifted_device , size ) ;
bool is_finished_ok = usm_memcpy ( device_queue_ , shifted_host , shifted_device , size ) ;
if ( is_finished_ok = = false ) {
set_error ( " oneAPI memory operation error: got runtime exception \" " +
oneapi_error_string_ + " \" " ) ;
@ -292,7 +278,7 @@ void OneapiDevice::mem_zero(device_memory &mem)
}
assert ( device_queue_ ) ;
bool is_finished_ok = oneapi_dll_. oneapi_ usm_memset(
bool is_finished_ok = usm_memset(
device_queue_ , ( void * ) mem . device_pointer , 0 , mem . memory_size ( ) ) ;
if ( is_finished_ok = = false ) {
set_error ( " oneAPI memory operation error: got runtime exception \" " + oneapi_error_string_ +
@ -349,10 +335,9 @@ void OneapiDevice::const_copy_to(const char *name, void *host, size_t size)
memcpy ( data - > data ( ) , host , size ) ;
data - > copy_to_device ( ) ;
oneapi_dll_ . oneapi_set_global_memory (
device_queue_ , kg_memory_ , name , ( void * ) data - > device_pointer ) ;
set_global_memory ( device_queue_ , kg_memory_ , name , ( void * ) data - > device_pointer ) ;
oneapi_dll_. oneapi_ usm_memcpy( device_queue_ , kg_memory_device_ , kg_memory_ , kg_memory_size_ ) ;
usm_memcpy( device_queue_ , kg_memory_device_ , kg_memory_ , kg_memory_size_ ) ;
}
void OneapiDevice : : global_alloc ( device_memory & mem )
@ -367,10 +352,9 @@ void OneapiDevice::global_alloc(device_memory &mem)
generic_alloc ( mem ) ;
generic_copy_to ( mem ) ;
oneapi_dll_ . oneapi_set_global_memory (
device_queue_ , kg_memory_ , mem . name , ( void * ) mem . device_pointer ) ;
set_global_memory ( device_queue_ , kg_memory_ , mem . name , ( void * ) mem . device_pointer ) ;
oneapi_dll_. oneapi_ usm_memcpy( device_queue_ , kg_memory_device_ , kg_memory_ , kg_memory_size_ ) ;
usm_memcpy( device_queue_ , kg_memory_device_ , kg_memory_ , kg_memory_size_ ) ;
}
void OneapiDevice : : global_free ( device_memory & mem )
@ -410,18 +394,6 @@ unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
return make_unique < OneapiDeviceQueue > ( this ) ;
}
int OneapiDevice : : get_num_multiprocessors ( )
{
assert ( device_queue_ ) ;
return oneapi_dll_ . oneapi_get_num_multiprocessors ( device_queue_ ) ;
}
int OneapiDevice : : get_max_num_threads_per_multiprocessor ( )
{
assert ( device_queue_ ) ;
return oneapi_dll_ . oneapi_get_max_num_threads_per_multiprocessor ( device_queue_ ) ;
}
bool OneapiDevice : : should_use_graphics_interop ( )
{
/* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
@ -432,13 +404,460 @@ bool OneapiDevice::should_use_graphics_interop()
void * OneapiDevice : : usm_aligned_alloc_host ( size_t memory_size , size_t alignment )
{
assert ( device_queue_ ) ;
return oneapi_dll_ . oneapi_ usm_aligned_alloc_host( device_queue_ , memory_size , alignment ) ;
return usm_aligned_alloc_host( device_queue_ , memory_size , alignment ) ;
}
void OneapiDevice : : usm_free ( void * usm_ptr )
{
assert ( device_queue_ ) ;
return oneapi_dll_ . oneapi_usm_free ( device_queue_ , usm_ptr ) ;
return usm_free ( device_queue_ , usm_ptr ) ;
}
void OneapiDevice : : check_usm ( SyclQueue * queue_ , const void * usm_ptr , bool allow_host = false )
{
# ifdef _DEBUG
sycl : : queue * queue = reinterpret_cast < sycl : : queue * > ( queue_ ) ;
sycl : : info : : device_type device_type =
queue - > get_device ( ) . get_info < sycl : : info : : device : : device_type > ( ) ;
sycl : : usm : : alloc usm_type = get_pointer_type ( usm_ptr , queue - > get_context ( ) ) ;
( void ) usm_type ;
assert ( usm_type = = sycl : : usm : : alloc : : device | |
( ( device_type = = sycl : : info : : device_type : : host | |
device_type = = sycl : : info : : device_type : : cpu | | allow_host ) & &
usm_type = = sycl : : usm : : alloc : : host ) ) ;
# endif
}
bool OneapiDevice : : create_queue ( SyclQueue * & external_queue , int device_index )
{
bool finished_correct = true ;
try {
std : : vector < sycl : : device > devices = OneapiDevice : : available_devices ( ) ;
if ( device_index < 0 | | device_index > = devices . size ( ) ) {
return false ;
}
sycl : : queue * created_queue = new sycl : : queue ( devices [ device_index ] ,
sycl : : property : : queue : : in_order ( ) ) ;
external_queue = reinterpret_cast < SyclQueue * > ( created_queue ) ;
}
catch ( sycl : : exception const & e ) {
finished_correct = false ;
oneapi_error_string_ = e . what ( ) ;
}
return finished_correct ;
}
void OneapiDevice : : free_queue ( SyclQueue * queue_ )
{
assert ( queue_ ) ;
sycl : : queue * queue = reinterpret_cast < sycl : : queue * > ( queue_ ) ;
delete queue ;
}
void * OneapiDevice : : usm_aligned_alloc_host ( SyclQueue * queue_ , size_t memory_size , size_t alignment )
{
assert ( queue_ ) ;
sycl : : queue * queue = reinterpret_cast < sycl : : queue * > ( queue_ ) ;
return sycl : : aligned_alloc_host ( alignment , memory_size , * queue ) ;
}
void * OneapiDevice : : usm_alloc_device ( SyclQueue * queue_ , size_t memory_size )
{
assert ( queue_ ) ;
sycl : : queue * queue = reinterpret_cast < sycl : : queue * > ( queue_ ) ;
return sycl : : malloc_device ( memory_size , * queue ) ;
}
void OneapiDevice : : usm_free ( SyclQueue * queue_ , void * usm_ptr )
{
assert ( queue_ ) ;
sycl : : queue * queue = reinterpret_cast < sycl : : queue * > ( queue_ ) ;
OneapiDevice : : check_usm ( queue_ , usm_ptr , true ) ;
sycl : : free ( usm_ptr , * queue ) ;
}
bool OneapiDevice : : usm_memcpy ( SyclQueue * queue_ , void * dest , void * src , size_t num_bytes )
{
assert ( queue_ ) ;
sycl : : queue * queue = reinterpret_cast < sycl : : queue * > ( queue_ ) ;
OneapiDevice : : check_usm ( queue_ , dest , true ) ;
OneapiDevice : : check_usm ( queue_ , src , true ) ;
sycl : : event mem_event = queue - > memcpy ( dest , src , num_bytes ) ;
# ifdef WITH_CYCLES_DEBUG
try {
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
* messages . Due to impact on occupancy , it makes sense to enable it only during Cycles debug .
*/
mem_event . wait_and_throw ( ) ;
return true ;
}
catch ( sycl : : exception const & e ) {
oneapi_error_string_ = e . what ( ) ;
return false ;
}
# else
sycl : : usm : : alloc dest_type = get_pointer_type ( dest , queue - > get_context ( ) ) ;
sycl : : usm : : alloc src_type = get_pointer_type ( src , queue - > get_context ( ) ) ;
bool from_device_to_host = dest_type = = sycl : : usm : : alloc : : host & &
src_type = = sycl : : usm : : alloc : : device ;
bool host_or_device_memop_with_offset = dest_type = = sycl : : usm : : alloc : : unknown | |
src_type = = sycl : : usm : : alloc : : unknown ;
/* NOTE(@sirgienko) Host-side blocking wait on this operation is mandatory, otherwise the host
* may not wait until the end of the transfer before using the memory .
*/
if ( from_device_to_host | | host_or_device_memop_with_offset )
mem_event . wait ( ) ;
return true ;
# endif
}
bool OneapiDevice : : usm_memset ( SyclQueue * queue_ ,
void * usm_ptr ,
unsigned char value ,
size_t num_bytes )
{
assert ( queue_ ) ;
sycl : : queue * queue = reinterpret_cast < sycl : : queue * > ( queue_ ) ;
OneapiDevice : : check_usm ( queue_ , usm_ptr , true ) ;
sycl : : event mem_event = queue - > memset ( usm_ptr , value , num_bytes ) ;
# ifdef WITH_CYCLES_DEBUG
try {
/* NOTE(@nsirgien) Waiting on memory operation may give more precise error
* messages . Due to impact on occupancy , it makes sense to enable it only during Cycles debug .
*/
mem_event . wait_and_throw ( ) ;
return true ;
}
catch ( sycl : : exception const & e ) {
oneapi_error_string_ = e . what ( ) ;
return false ;
}
# else
( void ) mem_event ;
return true ;
# endif
}
bool OneapiDevice : : queue_synchronize ( SyclQueue * queue_ )
{
assert ( queue_ ) ;
sycl : : queue * queue = reinterpret_cast < sycl : : queue * > ( queue_ ) ;
try {
queue - > wait_and_throw ( ) ;
return true ;
}
catch ( sycl : : exception const & e ) {
oneapi_error_string_ = e . what ( ) ;
return false ;
}
}
bool OneapiDevice : : kernel_globals_size ( SyclQueue * queue_ , size_t & kernel_global_size )
{
kernel_global_size = sizeof ( KernelGlobalsGPU ) ;
return true ;
}
void OneapiDevice : : set_global_memory ( SyclQueue * queue_ ,
void * kernel_globals ,
const char * memory_name ,
void * memory_device_pointer )
{
assert ( queue_ ) ;
assert ( kernel_globals ) ;
assert ( memory_name ) ;
assert ( memory_device_pointer ) ;
KernelGlobalsGPU * globals = ( KernelGlobalsGPU * ) kernel_globals ;
OneapiDevice : : check_usm ( queue_ , memory_device_pointer ) ;
OneapiDevice : : check_usm ( queue_ , kernel_globals , true ) ;
std : : string matched_name ( memory_name ) ;
/* This macro will change global ptr of KernelGlobals via name matching. */
# define KERNEL_DATA_ARRAY(type, name) \
else if ( # name = = matched_name ) \
{ \
globals - > __ # # name = ( type * ) memory_device_pointer ; \
return ; \
}
if ( false ) {
}
else if ( " integrator_state " = = matched_name ) {
globals - > integrator_state = ( IntegratorStateGPU * ) memory_device_pointer ;
return ;
}
KERNEL_DATA_ARRAY ( KernelData , data )
# include "kernel / data_arrays.h"
else
{
std : : cerr < < " Can't found global/constant memory with name \" " < < matched_name < < " \" ! "
< < std : : endl ;
assert ( false ) ;
}
# undef KERNEL_DATA_ARRAY
}
bool OneapiDevice : : enqueue_kernel ( KernelContext * kernel_context ,
int kernel ,
size_t global_size ,
void * * args )
{
return oneapi_enqueue_kernel ( kernel_context , kernel , global_size , args ) ;
}
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
* since Windows driver 101.3268 . */
/* The same min compute-runtime version is currently required across Windows and Linux.
* For Windows driver 101.3430 , compute - runtime version is 23904. */
static const int lowest_supported_driver_version_win = 1013430 ;
static const int lowest_supported_driver_version_neo = 23904 ;
int OneapiDevice : : parse_driver_build_version ( const sycl : : device & device )
{
const std : : string & driver_version = device . get_info < sycl : : info : : device : : driver_version > ( ) ;
int driver_build_version = 0 ;
size_t second_dot_position = driver_version . find ( ' . ' , driver_version . find ( ' . ' ) + 1 ) ;
if ( second_dot_position = = std : : string : : npos ) {
std : : cerr < < " Unable to parse unknown Intel GPU driver version \" " < < driver_version
< < " \" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0), "
< < " xx.xx.xxx.xxxx (Windows) for device \" "
< < device . get_info < sycl : : info : : device : : name > ( ) < < " \" . " < < std : : endl ;
}
else {
try {
size_t third_dot_position = driver_version . find ( ' . ' , second_dot_position + 1 ) ;
if ( third_dot_position ! = std : : string : : npos ) {
const std : : string & third_number_substr = driver_version . substr (
second_dot_position + 1 , third_dot_position - second_dot_position - 1 ) ;
const std : : string & forth_number_substr = driver_version . substr ( third_dot_position + 1 ) ;
if ( third_number_substr . length ( ) = = 3 & & forth_number_substr . length ( ) = = 4 )
driver_build_version = std : : stoi ( third_number_substr ) * 10000 +
std : : stoi ( forth_number_substr ) ;
}
else {
const std : : string & third_number_substr = driver_version . substr ( second_dot_position + 1 ) ;
driver_build_version = std : : stoi ( third_number_substr ) ;
}
}
catch ( std : : invalid_argument & ) {
std : : cerr < < " Unable to parse unknown Intel GPU driver version \" " < < driver_version
< < " \" does not match xx.xx.xxxxx (Linux), x.x.xxxx (L0), "
< < " xx.xx.xxx.xxxx (Windows) for device \" "
< < device . get_info < sycl : : info : : device : : name > ( ) < < " \" . " < < std : : endl ;
}
}
return driver_build_version ;
}
std : : vector < sycl : : device > OneapiDevice : : available_devices ( )
{
bool allow_all_devices = false ;
if ( getenv ( " CYCLES_ONEAPI_ALL_DEVICES " ) ! = nullptr )
allow_all_devices = true ;
/* Host device is useful only for debugging at the moment
* so we hide this device with default build settings . */
# ifdef WITH_ONEAPI_SYCL_HOST_ENABLED
bool allow_host = true ;
# else
bool allow_host = false ;
# endif
const std : : vector < sycl : : platform > & oneapi_platforms = sycl : : platform : : get_platforms ( ) ;
std : : vector < sycl : : device > available_devices ;
for ( const sycl : : platform & platform : oneapi_platforms ) {
/* ignore OpenCL platforms to avoid using the same devices through both Level-Zero and OpenCL.
*/
if ( platform . get_backend ( ) = = sycl : : backend : : opencl ) {
continue ;
}
const std : : vector < sycl : : device > & oneapi_devices =
( allow_all_devices | | allow_host ) ? platform . get_devices ( sycl : : info : : device_type : : all ) :
platform . get_devices ( sycl : : info : : device_type : : gpu ) ;
for ( const sycl : : device & device : oneapi_devices ) {
if ( allow_all_devices ) {
/* still filter out host device if build doesn't support it. */
if ( allow_host | | ! device . is_host ( ) ) {
available_devices . push_back ( device ) ;
}
}
else {
bool filter_out = false ;
/* For now we support all Intel(R) Arc(TM) devices and likely any future GPU,
* assuming they have either more than 96 Execution Units or not 7 threads per EU .
* Official support can be broaden to older and smaller GPUs once ready . */
if ( device . is_gpu ( ) & & platform . get_backend ( ) = = sycl : : backend : : ext_oneapi_level_zero ) {
/* Filtered-out defaults in-case these values aren't available through too old L0
* runtime . */
int number_of_eus = 96 ;
int threads_per_eu = 7 ;
if ( device . has ( sycl : : aspect : : ext_intel_gpu_eu_count ) ) {
number_of_eus = device . get_info < sycl : : info : : device : : ext_intel_gpu_eu_count > ( ) ;
}
if ( device . has ( sycl : : aspect : : ext_intel_gpu_hw_threads_per_eu ) ) {
threads_per_eu =
device . get_info < sycl : : info : : device : : ext_intel_gpu_hw_threads_per_eu > ( ) ;
}
/* This filters out all Level-Zero supported GPUs from older generation than Arc. */
if ( number_of_eus < = 96 & & threads_per_eu = = 7 ) {
filter_out = true ;
}
/* if not already filtered out, check driver version. */
if ( ! filter_out ) {
int driver_build_version = parse_driver_build_version ( device ) ;
if ( ( driver_build_version > 100000 & &
driver_build_version < lowest_supported_driver_version_win ) | |
driver_build_version < lowest_supported_driver_version_neo ) {
filter_out = true ;
}
}
}
else if ( ! allow_host & & device . is_host ( ) ) {
filter_out = true ;
}
else if ( ! allow_all_devices ) {
filter_out = true ;
}
if ( ! filter_out ) {
available_devices . push_back ( device ) ;
}
}
}
}
return available_devices ;
}
char * OneapiDevice : : device_capabilities ( )
{
std : : stringstream capabilities ;
const std : : vector < sycl : : device > & oneapi_devices = available_devices ( ) ;
for ( const sycl : : device & device : oneapi_devices ) {
const std : : string & name = device . get_info < sycl : : info : : device : : name > ( ) ;
capabilities < < std : : string ( " \t " ) < < name < < " \n " ;
# define WRITE_ATTR(attribute_name, attribute_variable) \
capabilities < < " \t \t sycl::info::device:: " # attribute_name " \t \t \t " < < attribute_variable \
< < " \n " ;
# define GET_NUM_ATTR(attribute) \
{ \
size_t attribute = ( size_t ) device . get_info < sycl : : info : : device : : attribute > ( ) ; \
capabilities < < " \t \t sycl::info::device:: " # attribute " \t \t \t " < < attribute < < " \n " ; \
}
GET_NUM_ATTR ( vendor_id )
GET_NUM_ATTR ( max_compute_units )
GET_NUM_ATTR ( max_work_item_dimensions )
sycl : : id < 3 > max_work_item_sizes =
device . get_info < sycl : : info : : device : : max_work_item_sizes < 3 > > ( ) ;
WRITE_ATTR ( " max_work_item_sizes_dim0 " , ( ( size_t ) max_work_item_sizes . get ( 0 ) ) )
WRITE_ATTR ( " max_work_item_sizes_dim1 " , ( ( size_t ) max_work_item_sizes . get ( 1 ) ) )
WRITE_ATTR ( " max_work_item_sizes_dim2 " , ( ( size_t ) max_work_item_sizes . get ( 2 ) ) )
GET_NUM_ATTR ( max_work_group_size )
GET_NUM_ATTR ( max_num_sub_groups )
GET_NUM_ATTR ( sub_group_independent_forward_progress )
GET_NUM_ATTR ( preferred_vector_width_char )
GET_NUM_ATTR ( preferred_vector_width_short )
GET_NUM_ATTR ( preferred_vector_width_int )
GET_NUM_ATTR ( preferred_vector_width_long )
GET_NUM_ATTR ( preferred_vector_width_float )
GET_NUM_ATTR ( preferred_vector_width_double )
GET_NUM_ATTR ( preferred_vector_width_half )
GET_NUM_ATTR ( native_vector_width_char )
GET_NUM_ATTR ( native_vector_width_short )
GET_NUM_ATTR ( native_vector_width_int )
GET_NUM_ATTR ( native_vector_width_long )
GET_NUM_ATTR ( native_vector_width_float )
GET_NUM_ATTR ( native_vector_width_double )
GET_NUM_ATTR ( native_vector_width_half )
size_t max_clock_frequency =
( size_t ) ( device . is_host ( ) ? ( size_t ) 0 :
device . get_info < sycl : : info : : device : : max_clock_frequency > ( ) ) ;
WRITE_ATTR ( " max_clock_frequency " , max_clock_frequency )
GET_NUM_ATTR ( address_bits )
GET_NUM_ATTR ( max_mem_alloc_size )
/* NOTE(@nsirgien): Implementation doesn't use image support as bindless images aren't
* supported so we always return false , even if device supports HW texture usage acceleration .
*/
bool image_support = false ;
WRITE_ATTR ( " image_support " , ( size_t ) image_support )
GET_NUM_ATTR ( max_parameter_size )
GET_NUM_ATTR ( mem_base_addr_align )
GET_NUM_ATTR ( global_mem_size )
GET_NUM_ATTR ( local_mem_size )
GET_NUM_ATTR ( error_correction_support )
GET_NUM_ATTR ( profiling_timer_resolution )
GET_NUM_ATTR ( is_available )
# undef GET_NUM_ATTR
# undef WRITE_ATTR
capabilities < < " \n " ;
}
return : : strdup ( capabilities . str ( ) . c_str ( ) ) ;
}
void OneapiDevice : : iterate_devices ( OneAPIDeviceIteratorCallback cb , void * user_ptr )
{
int num = 0 ;
std : : vector < sycl : : device > devices = OneapiDevice : : available_devices ( ) ;
for ( sycl : : device & device : devices ) {
const std : : string & platform_name =
device . get_platform ( ) . get_info < sycl : : info : : platform : : name > ( ) ;
std : : string name = device . get_info < sycl : : info : : device : : name > ( ) ;
std : : string id = " ONEAPI_ " + platform_name + " _ " + name ;
if ( device . has ( sycl : : aspect : : ext_intel_pci_address ) ) {
id . append ( " _ " + device . get_info < sycl : : info : : device : : ext_intel_pci_address > ( ) ) ;
}
( cb ) ( id . c_str ( ) , name . c_str ( ) , num , user_ptr ) ;
num + + ;
}
}
size_t OneapiDevice : : get_memcapacity ( )
{
return reinterpret_cast < sycl : : queue * > ( device_queue_ )
- > get_device ( )
. get_info < sycl : : info : : device : : global_mem_size > ( ) ;
}
int OneapiDevice : : get_num_multiprocessors ( )
{
const sycl : : device & device = reinterpret_cast < sycl : : queue * > ( device_queue_ ) - > get_device ( ) ;
if ( device . has ( sycl : : aspect : : ext_intel_gpu_eu_count ) ) {
return device . get_info < sycl : : info : : device : : ext_intel_gpu_eu_count > ( ) ;
}
else
return 0 ;
}
int OneapiDevice : : get_max_num_threads_per_multiprocessor ( )
{
const sycl : : device & device = reinterpret_cast < sycl : : queue * > ( device_queue_ ) - > get_device ( ) ;
if ( device . has ( sycl : : aspect : : ext_intel_gpu_eu_simd_width ) & &
device . has ( sycl : : aspect : : ext_intel_gpu_hw_threads_per_eu ) ) {
return device . get_info < sycl : : info : : device : : ext_intel_gpu_eu_simd_width > ( ) *
device . get_info < sycl : : info : : device : : ext_intel_gpu_hw_threads_per_eu > ( ) ;
}
else
return 0 ;
}
CCL_NAMESPACE_END