diff -Naur llvm-sycl-nightly-20220501.orig\opencl/CMakeLists.txt llvm-sycl-nightly-20220501\opencl/CMakeLists.txt --- llvm-sycl-nightly-20220501.orig/opencl/CMakeLists.txt 2022-04-29 13:47:11 -0600 +++ llvm-sycl-nightly-20220501/opencl/CMakeLists.txt 2022-05-21 15:25:06 -0600 @@ -11,6 +11,11 @@ ) endif() +# Blender code below is determined to use FetchContent_Declare +# temporarily allow it (but feed it our downloaded tarball +# in the OpenCL_HEADERS variable +set(FETCHCONTENT_FULLY_DISCONNECTED OFF) + # Repo URLs set(OCL_HEADERS_REPO @@ -77,5 +82,6 @@ FetchContent_MakeAvailable(ocl-icd) add_library(OpenCL-ICD ALIAS OpenCL) +set(FETCHCONTENT_FULLY_DISCONNECTED ON) add_subdirectory(opencl-aot) diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake --- llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-02-08 09:17:24 -0700 +++ llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-05-24 11:35:51 -0600 @@ -36,7 +36,9 @@ add_custom_target(libsycldevice-obj) add_custom_target(libsycldevice-spv) -add_custom_target(libsycldevice DEPENDS +# Blender: add ALL here otherwise this target will not build +# and cause an error due to missing files during the install phase. +add_custom_target(libsycldevice ALL DEPENDS libsycldevice-obj libsycldevice-spv) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 17eeaafae194..09e6d2217aaa 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1647,46 +1647,120 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( } assert(BinImages.size() > 0 && "Expected to find at least one device image"); + // Ignore images with incompatible state. Image is considered compatible + // with a target state if an image is already in the target state or can + // be brought to target state by compiling/linking/building. + // + // Example: an image in "executable" state is not compatible with + // "input" target state - there is no operation to convert the image it + // to "input" state. An image in "input" state is compatible with + // "executable" target state because it can be built to get into + // "executable" state. + for (auto It = BinImages.begin(); It != BinImages.end();) { + if (getBinImageState(*It) > TargetState) + It = BinImages.erase(It); + else + ++It; + } + std::vector SYCLDeviceImages; - for (RTDeviceBinaryImage *BinImage : BinImages) { - const bundle_state ImgState = getBinImageState(BinImage); - - // Ignore images with incompatible state. Image is considered compatible - // with a target state if an image is already in the target state or can - // be brought to target state by compiling/linking/building. - // - // Example: an image in "executable" state is not compatible with - // "input" target state - there is no operation to convert the image it - // to "input" state. An image in "input" state is compatible with - // "executable" target state because it can be built to get into - // "executable" state. - if (ImgState > TargetState) - continue; - for (const sycl::device &Dev : Devs) { + // If a non-input state is requested, we can filter out some compatible + // images and return only those with the highest compatible state for each + // device-kernel pair. This map tracks how many kernel-device pairs need each + // image, so that any unneeded ones are skipped. + // TODO this has no effect if the requested state is input, consider having + // a separate branch for that case to avoid unnecessary tracking work. + struct DeviceBinaryImageInfo { + std::shared_ptr> KernelIDs; + bundle_state State = bundle_state::input; + int RequirementCounter = 0; + }; + std::unordered_map ImageInfoMap; + + for (const sycl::device &Dev : Devs) { + // Track the highest image state for each requested kernel. + using StateImagesPairT = + std::pair>; + using KernelImageMapT = + std::map; + KernelImageMapT KernelImageMap; + if (!KernelIDs.empty()) + for (const kernel_id &KernelID : KernelIDs) + KernelImageMap.insert({KernelID, {}}); + + for (RTDeviceBinaryImage *BinImage : BinImages) { if (!compatibleWithDevice(BinImage, Dev) || !doesDevSupportImgAspects(Dev, *BinImage)) continue; - std::shared_ptr> KernelIDs; - // Collect kernel names for the image - { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - KernelIDs = m_BinImg2KernelIDs[BinImage]; - // If the image does not contain any non-service kernels we can skip it. - if (!KernelIDs || KernelIDs->empty()) - continue; + auto InsertRes = ImageInfoMap.insert({BinImage, {}}); + DeviceBinaryImageInfo &ImgInfo = InsertRes.first->second; + if (InsertRes.second) { + ImgInfo.State = getBinImageState(BinImage); + // Collect kernel names for the image + { + std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage]; + } } + const bundle_state ImgState = ImgInfo.State; + const std::shared_ptr> &ImageKernelIDs = + ImgInfo.KernelIDs; + int &ImgRequirementCounter = ImgInfo.RequirementCounter; - DeviceImageImplPtr Impl = std::make_shared( - BinImage, Ctx, Devs, ImgState, KernelIDs, /*PIProgram=*/nullptr); + // If the image does not contain any non-service kernels we can skip it. + if (!ImageKernelIDs || ImageKernelIDs->empty()) + continue; - SYCLDeviceImages.push_back( - createSyclObjFromImpl(Impl)); - break; + // Update tracked information. + for (kernel_id &KernelID : *ImageKernelIDs) { + StateImagesPairT *StateImagesPair; + // If only specific kernels are requested, ignore the rest. + if (!KernelIDs.empty()) { + auto It = KernelImageMap.find(KernelID); + if (It == KernelImageMap.end()) + continue; + StateImagesPair = &It->second; + } else + StateImagesPair = &KernelImageMap[KernelID]; + + auto &[KernelImagesState, KernelImages] = *StateImagesPair; + + if (KernelImages.empty()) { + KernelImagesState = ImgState; + KernelImages.push_back(BinImage); + ++ImgRequirementCounter; + } else if (KernelImagesState < ImgState) { + for (RTDeviceBinaryImage *Img : KernelImages) { + auto It = ImageInfoMap.find(Img); + assert(It != ImageInfoMap.end()); + assert(It->second.RequirementCounter > 0); + --(It->second.RequirementCounter); + } + KernelImages.clear(); + KernelImages.push_back(BinImage); + KernelImagesState = ImgState; + ++ImgRequirementCounter; + } else if (KernelImagesState == ImgState) { + KernelImages.push_back(BinImage); + ++ImgRequirementCounter; + } + } } } + for (const auto &ImgInfoPair : ImageInfoMap) { + if (ImgInfoPair.second.RequirementCounter == 0) + continue; + + DeviceImageImplPtr Impl = std::make_shared( + ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State, + ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr); + + SYCLDeviceImages.push_back(createSyclObjFromImpl(Impl)); + } + return SYCLDeviceImages; }