CMake: improve selection/filtering of archs of Intel GPU binaries #112669

Merged
Xavier Hallade merged 3 commits from xavierh/blender:cmake_ocloc_updates into main 2023-10-11 14:59:46 +02:00
2 changed files with 27 additions and 4 deletions

View File

@ -684,8 +684,10 @@ This option is only for debugging purposes."
)
# https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html
# acm-g10 is the target for the first Intel Arc Alchemist GPUs.
set(CYCLES_ONEAPI_SPIR64_GEN_DEVICES "acm-g10" CACHE STRING "\
# The target architectures levels can be retrieved from `ocloc` output when running
xavierh marked this conversation as resolved Outdated

Where do these magic numbers come from? I was expecting to find them on the page linked in the comment right above them, but that doesn't seem to be the case

Where do these magic numbers come from? I was expecting to find them on the page linked in the comment right above them, but that doesn't seem to be the case

the page linked is the most user friendly page to explain how to pick targets, using ocloc compile --help.
It doesn't say anything about the magic numbers (architecture versions), I'll ask that it gets improved.

I've retrieved the architecture version number using ocloc ids acm-g10 for Alchemist, and the Meteor Lake one is a bit more convoluted to get yet.. you get it from ocloc when compiling with ocloc for a given meteor lake device id, ie ocloc compile -device 0x7d40 -file test.c, it's also visible from 5569dac6d0/cmake/setup_platform_flags.cmake (L288)

That's maybe a lot of info, which options do you prefer:
a. no change here, I get the webpage improved
b. all the above explanation in the commit message
c. all the above explanation in a comment
d. something else?

the page linked is the most user friendly page to explain how to pick targets, using `ocloc compile --help`. It doesn't say anything about the magic numbers (architecture versions), I'll ask that it gets improved. I've retrieved the architecture version number using `ocloc ids acm-g10` for Alchemist, and the Meteor Lake one is a bit more convoluted to get yet.. you get it from ocloc when compiling with ocloc for a given meteor lake device id, ie `ocloc compile -device 0x7d40 -file test.c`, it's also visible from https://github.com/intel/compute-runtime/blob/5569dac6d0c80ec49b8dac118135ec9ec008cd4b/cmake/setup_platform_flags.cmake#L288 That's maybe a lot of info, which options do you prefer: a. no change here, I get the webpage improved b. all the above explanation in the commit message c. all the above explanation in a comment d. something else?

I went with c. in latest update.

I went with c. in latest update.
# `ocloc compile -device {device_id} test.c` for given GPUs PCI device IDs.
# 12.55.8 is for Arc Alchemist GPUs. 12.70.0 for Meteor Lake iGPUs.
set(CYCLES_ONEAPI_INTEL_BINARIES_ARCH 12.55.8 12.70.0 CACHE STRING "\
oneAPI Intel GPU architectures to build binaries for"
)
set(CYCLES_ONEAPI_SYCL_TARGETS spir64 spir64_gen CACHE STRING "\
@ -693,7 +695,7 @@ oneAPI targets to build AOT binaries for"
)
mark_as_advanced(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
mark_as_advanced(CYCLES_ONEAPI_SPIR64_GEN_DEVICES)
mark_as_advanced(CYCLES_ONEAPI_INTEL_BINARIES_ARCH)
mark_as_advanced(CYCLES_ONEAPI_SYCL_TARGETS)
endif()

View File

@ -884,7 +884,28 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
endif()
# Enable zebin, a graphics binary format with improved compatibility.
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${CYCLES_ONEAPI_SPIR64_GEN_DEVICES} ")
# Add the list of Intel devices to build binaries for.
foreach(device ${CYCLES_ONEAPI_INTEL_BINARIES_ARCH})
# Run ocloc ids to test if the device is supported.
if(WIN32)
execute_process(COMMAND ${OCLOC_INSTALL_DIR}/ocloc.exe ids ${device}
RESULT_VARIABLE oclocids_ret
OUTPUT_QUIET
ERROR_QUIET)
else()
execute_process(COMMAND ${OCLOC_INSTALL_DIR}/bin/ocloc ids ${device}
RESULT_VARIABLE oclocids_ret
OUTPUT_QUIET
ERROR_QUIET)
endif()
if(NOT oclocids_ret EQUAL 0)
list(REMOVE_ITEM CYCLES_ONEAPI_INTEL_BINARIES_ARCH ${device})
message(STATUS "binaries for ${device} not supported by Intel Graphics Compiler/ocloc, skipped.")
endif()
endforeach()
list(JOIN CYCLES_ONEAPI_INTEL_BINARIES_ARCH "," gen_devices_string)
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${gen_devices_string} ")
# Host execution won't use GPU binaries, no need to compile them.
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)