0
0
forked from blender/blender
Files
Nikita Sirgienko 6827400305 Build: Cycles: Backport a DPC++ fix for caching of GPU binaries
Since the addition of Meteor Lake binaries, prebuilt GPU binaries
are now stored as fatbinaries. When running on a platform for which
prebuilt binaries are lacking or considered incompatible, the DPC++
SYCL runtime caching logic failed storing the (re)compiled
compatible version. This patch to DPC++ SYCL runtime fixes it.

Pull Request: blender/blender#117844
2024-02-05 13:55:40 +01:00

280 lines
11 KiB
Diff

diff -Naur llvm-sycl-nightly-20220501.orig\opencl/CMakeLists.txt llvm-sycl-nightly-20220501\opencl/CMakeLists.txt
--- llvm-sycl-nightly-20220501.orig/opencl/CMakeLists.txt 2022-04-29 13:47:11 -0600
+++ llvm-sycl-nightly-20220501/opencl/CMakeLists.txt 2022-05-21 15:25:06 -0600
@@ -11,6 +11,11 @@
)
endif()
+# Blender code below is determined to use FetchContent_Declare
+# temporarily allow it (but feed it our downloaded tarball
+# in the OpenCL_HEADERS variable
+set(FETCHCONTENT_FULLY_DISCONNECTED OFF)
+
# Repo URLs
set(OCL_HEADERS_REPO
@@ -77,5 +82,6 @@
FetchContent_MakeAvailable(ocl-icd)
add_library(OpenCL-ICD ALIAS OpenCL)
+set(FETCHCONTENT_FULLY_DISCONNECTED ON)
add_subdirectory(opencl-aot)
diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake
--- llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-02-08 09:17:24 -0700
+++ llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-05-24 11:35:51 -0600
@@ -36,7 +36,9 @@
add_custom_target(libsycldevice-obj)
add_custom_target(libsycldevice-spv)
-add_custom_target(libsycldevice DEPENDS
+# Blender: add ALL here otherwise this target will not build
+# and cause an error due to missing files during the install phase.
+add_custom_target(libsycldevice ALL DEPENDS
libsycldevice-obj
libsycldevice-spv)
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index 17eeaafae194..09e6d2217aaa 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
}
assert(BinImages.size() > 0 && "Expected to find at least one device image");
+ // Ignore images with incompatible state. Image is considered compatible
+ // with a target state if an image is already in the target state or can
+ // be brought to target state by compiling/linking/building.
+ //
+ // Example: an image in "executable" state is not compatible with
+ // "input" target state - there is no operation to convert the image it
+ // to "input" state. An image in "input" state is compatible with
+ // "executable" target state because it can be built to get into
+ // "executable" state.
+ for (auto It = BinImages.begin(); It != BinImages.end();) {
+ if (getBinImageState(*It) > TargetState)
+ It = BinImages.erase(It);
+ else
+ ++It;
+ }
+
std::vector<device_image_plain> SYCLDeviceImages;
- for (RTDeviceBinaryImage *BinImage : BinImages) {
- const bundle_state ImgState = getBinImageState(BinImage);
-
- // Ignore images with incompatible state. Image is considered compatible
- // with a target state if an image is already in the target state or can
- // be brought to target state by compiling/linking/building.
- //
- // Example: an image in "executable" state is not compatible with
- // "input" target state - there is no operation to convert the image it
- // to "input" state. An image in "input" state is compatible with
- // "executable" target state because it can be built to get into
- // "executable" state.
- if (ImgState > TargetState)
- continue;
- for (const sycl::device &Dev : Devs) {
+ // If a non-input state is requested, we can filter out some compatible
+ // images and return only those with the highest compatible state for each
+ // device-kernel pair. This map tracks how many kernel-device pairs need each
+ // image, so that any unneeded ones are skipped.
+ // TODO this has no effect if the requested state is input, consider having
+ // a separate branch for that case to avoid unnecessary tracking work.
+ struct DeviceBinaryImageInfo {
+ std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
+ bundle_state State = bundle_state::input;
+ int RequirementCounter = 0;
+ };
+ std::unordered_map<RTDeviceBinaryImage *, DeviceBinaryImageInfo> ImageInfoMap;
+
+ for (const sycl::device &Dev : Devs) {
+ // Track the highest image state for each requested kernel.
+ using StateImagesPairT =
+ std::pair<bundle_state, std::vector<RTDeviceBinaryImage *>>;
+ using KernelImageMapT =
+ std::map<kernel_id, StateImagesPairT, LessByNameComp>;
+ KernelImageMapT KernelImageMap;
+ if (!KernelIDs.empty())
+ for (const kernel_id &KernelID : KernelIDs)
+ KernelImageMap.insert({KernelID, {}});
+
+ for (RTDeviceBinaryImage *BinImage : BinImages) {
if (!compatibleWithDevice(BinImage, Dev) ||
!doesDevSupportImgAspects(Dev, *BinImage))
continue;
- std::shared_ptr<std::vector<sycl::kernel_id>> KernelIDs;
- // Collect kernel names for the image
- {
- std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
- KernelIDs = m_BinImg2KernelIDs[BinImage];
- // If the image does not contain any non-service kernels we can skip it.
- if (!KernelIDs || KernelIDs->empty())
- continue;
+ auto InsertRes = ImageInfoMap.insert({BinImage, {}});
+ DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second;
+ if (InsertRes.second) {
+ ImgInfo.State = getBinImageState(BinImage);
+ // Collect kernel names for the image
+ {
+ std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
+ ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
+ }
}
+ const bundle_state ImgState = ImgInfo.State;
+ const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
+ ImgInfo.KernelIDs;
+ int &ImgRequirementCounter = ImgInfo.RequirementCounter;
- DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
- BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr);
+ // If the image does not contain any non-service kernels we can skip it.
+ if (!ImageKernelIDs || ImageKernelIDs->empty())
+ continue;
- SYCLDeviceImages.push_back(
- createSyclObjFromImpl<device_image_plain>(Impl));
- break;
+ // Update tracked information.
+ for (kernel_id &KernelID : *ImageKernelIDs) {
+ StateImagesPairT *StateImagesPair;
+ // If only specific kernels are requested, ignore the rest.
+ if (!KernelIDs.empty()) {
+ auto It = KernelImageMap.find(KernelID);
+ if (It == KernelImageMap.end())
+ continue;
+ StateImagesPair = &It->second;
+ } else
+ StateImagesPair = &KernelImageMap[KernelID];
+
+ auto &[KernelImagesState, KernelImages] = *StateImagesPair;
+
+ if (KernelImages.empty()) {
+ KernelImagesState = ImgState;
+ KernelImages.push_back(BinImage);
+ ++ImgRequirementCounter;
+ } else if (KernelImagesState < ImgState) {
+ for (RTDeviceBinaryImage *Img : KernelImages) {
+ auto It = ImageInfoMap.find(Img);
+ assert(It != ImageInfoMap.end());
+ assert(It->second.RequirementCounter > 0);
+ --(It->second.RequirementCounter);
+ }
+ KernelImages.clear();
+ KernelImages.push_back(BinImage);
+ KernelImagesState = ImgState;
+ ++ImgRequirementCounter;
+ } else if (KernelImagesState == ImgState) {
+ KernelImages.push_back(BinImage);
+ ++ImgRequirementCounter;
+ }
+ }
}
}
+ for (const auto &ImgInfoPair : ImageInfoMap) {
+ if (ImgInfoPair.second.RequirementCounter == 0)
+ continue;
+
+ DeviceImageImplPtr Impl = std::make_shared<detail::device_image_impl>(
+ ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State,
+ ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr);
+
+ SYCLDeviceImages.push_back(createSyclObjFromImpl<device_image_plain>(Impl));
+ }
+
return SYCLDeviceImages;
}
diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp
index fb228cd85979..94e8438ee964 100644
--- a/sycl/source/detail/pi.cpp
+++ b/sycl/source/detail/pi.cpp
@@ -635,45 +635,47 @@ static uint16_t getELFHeaderType(const unsigned char *ImgData, size_t ImgSize) {
RT::PiDeviceBinaryType getBinaryImageFormat(const unsigned char *ImgData,
size_t ImgSize) {
// Top-level magic numbers for the recognized binary image formats.
- struct {
- RT::PiDeviceBinaryType Fmt;
- const uint32_t Magic;
- } Fmts[] = {{PI_DEVICE_BINARY_TYPE_SPIRV, 0x07230203},
- {PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE, 0xDEC04342},
- // 'I', 'N', 'T', 'C' ; Intel native
- {PI_DEVICE_BINARY_TYPE_NATIVE, 0x43544E49}};
-
- if (ImgSize >= sizeof(Fmts[0].Magic)) {
- detail::remove_const_t<decltype(Fmts[0].Magic)> Hdr = 0;
- std::copy(ImgData, ImgData + sizeof(Hdr), reinterpret_cast<char *>(&Hdr));
-
- // Check headers for direct formats.
- for (const auto &Fmt : Fmts) {
- if (Hdr == Fmt.Magic)
- return Fmt.Fmt;
- }
-
- // ELF e_type for recognized binary image formats.
- struct {
- RT::PiDeviceBinaryType Fmt;
- const uint16_t Magic;
- } ELFFmts[] = {{PI_DEVICE_BINARY_TYPE_NATIVE, 0xFF04}, // OpenCL executable
- {PI_DEVICE_BINARY_TYPE_NATIVE, 0xFF12}}; // ZEBIN executable
-
- // ELF files need to be parsed separately. The header type ends after 18
- // bytes.
- if (Hdr == 0x464c457F && ImgSize >= 18) {
- uint16_t HdrType = getELFHeaderType(ImgData, ImgSize);
- for (const auto &ELFFmt : ELFFmts) {
- if (HdrType == ELFFmt.Magic)
- return ELFFmt.Fmt;
- }
- // Newer ZEBIN format does not have a special header type, but can instead
- // be identified by having a required .ze_info section.
- if (checkELFSectionPresent(".ze_info", ImgData, ImgSize))
- return PI_DEVICE_BINARY_TYPE_NATIVE;
- }
+ auto MatchMagicNumber = [&](auto Number) {
+ return ImgSize >= sizeof(Number) &&
+ std::memcmp(ImgData, &Number, sizeof(Number)) == 0;
+ };
+
+ if (MatchMagicNumber(uint32_t{0x07230203}))
+ return PI_DEVICE_BINARY_TYPE_SPIRV;
+
+ if (MatchMagicNumber(uint32_t{0xDEC04342}))
+ return PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE;
+
+ if (MatchMagicNumber(uint32_t{0x43544E49}))
+ // 'I', 'N', 'T', 'C' ; Intel native
+ return PI_DEVICE_BINARY_TYPE_LLVMIR_BITCODE;
+
+ // Check for ELF format, size requirements include data we'll read in case of
+ // succesful match.
+ if (ImgSize >= 18 && MatchMagicNumber(uint32_t{0x464c457F})) {
+ uint16_t ELFHdrType = getELFHeaderType(ImgData, ImgSize);
+ if (ELFHdrType == 0xFF04)
+ // OpenCL executable.
+ return PI_DEVICE_BINARY_TYPE_NATIVE;
+
+ if (ELFHdrType == 0xFF12)
+ // ZEBIN executable.
+ return PI_DEVICE_BINARY_TYPE_NATIVE;
+
+ // Newer ZEBIN format does not have a special header type, but can instead
+ // be identified by having a required .ze_info section.
+ if (checkELFSectionPresent(".ze_info", ImgData, ImgSize))
+ return PI_DEVICE_BINARY_TYPE_NATIVE;
}
+
+ // "ar" format is used to pack binaries for multiple devices, e.g. via
+ //
+ // -Xsycl-target-backend=spir64_gen "-device acm-g10,acm-g11"
+ //
+ // option.
+ if (MatchMagicNumber(std::array{'!', '<', 'a', 'r', 'c', 'h', '>', '\n'}))
+ return PI_DEVICE_BINARY_TYPE_NATIVE;
+
return PI_DEVICE_BINARY_TYPE_NONE;
}