Nodes: new interactive operator to slide nodes #121981
@ -1,5 +1,5 @@
|
||||
name: Bug Report
|
||||
about: File a bug report
|
||||
about: Use Help > Report a Bug from the top of Blender to automatically fill out part of this form.
|
||||
labels:
|
||||
- "Type/Report"
|
||||
- "Status/Needs Triage"
|
||||
@ -11,7 +11,7 @@ body:
|
||||
### Instructions
|
||||
First time reporting? See [tips](https://developer.blender.org/docs/handbook/bug_reports/making_good_bug_reports/).
|
||||
|
||||
* Use **Help > Report a Bug** in Blender to fill system information and exact Blender version.
|
||||
* Use **Help > Report a Bug** from the top of Blender to fill system information and exact Blender version.
|
||||
* Test [daily builds](https://builder.blender.org/) to verify if the issue is already fixed.
|
||||
* Test [previous versions](https://download.blender.org/release/) to find an older working version.
|
||||
* For feature requests, feedback, questions or build issues, see [communication channels](https://developer.blender.org/docs/handbook/communication/user_feedback/).
|
||||
|
21
.github/stale.yml
vendored
21
.github/stale.yml
vendored
@ -1,21 +0,0 @@
|
||||
# Configuration for probot-stale - https://github.com/probot/stale
|
||||
# This file is used on Blender's GitHub mirror to automatically close any pull request
|
||||
# and invite contributors to join the official development platform on blender.org
|
||||
|
||||
# Number of days of inactivity before an Issue or Pull Request becomes stale
|
||||
daysUntilStale: 1
|
||||
|
||||
# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
|
||||
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
|
||||
daysUntilClose: 1
|
||||
|
||||
# Label to use when marking as stale
|
||||
staleLabel: stale
|
||||
|
||||
# Comment to post when closing a stale Issue or Pull Request.
|
||||
closeComment: >
|
||||
This issue has been automatically closed, because this repository is only
|
||||
used as a mirror. Blender development happens on projects.blender.org.
|
||||
|
||||
To get started contributing code, please read:
|
||||
https://developer.blender.org/docs/handbook/contributing/
|
32
.github/workflows/stale.yml
vendored
Normal file
32
.github/workflows/stale.yml
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
# GitHub Actions workflow for automatically closing pull requests
|
||||
# This workflow is specific to Blender's GitHub mirror and directs contributors to the official development platform on blender.org
|
||||
|
||||
name: Close GitHub Pull Requests
|
||||
|
||||
# Trigger this workflow every 12 hours.
|
||||
on:
|
||||
schedule:
|
||||
- cron: '* */12 * * *'
|
||||
|
||||
jobs:
|
||||
close_prs:
|
||||
name: Close Pull Requests
|
||||
runs-on: ubuntu-latest
|
||||
# Only run this job in the read-only mirror repository.
|
||||
if: github.repository == 'blender/blender' && contains(github.server_url, 'github.com')
|
||||
# Permissions granted to the GitHub Actions bot.
|
||||
permissions:
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/stale@v9
|
||||
with:
|
||||
# Number of days before a pull request is marked as stale.
|
||||
days-before-pr-stale: 0
|
||||
# Number of days before a pull request is closed.
|
||||
days-before-pr-close: 0
|
||||
# Message posted when closing a pull request.
|
||||
stale-pr-message: |
|
||||
This pull request has been automatically closed because this repository is a read-only mirror. Blender development happens on [projects.blender.org](https://projects.blender.org).
|
||||
|
||||
To contribute code, please read:
|
||||
https://developer.blender.org/docs/handbook/contributing/
|
@ -18,12 +18,16 @@ macro(fftw_build FFTW_POSTFIX)
|
||||
|
||||
CMAKE_ARGS
|
||||
-DCMAKE_INSTALL_PREFIX=${LIBDIR}/fftw3
|
||||
-DENABLE_THREADS=ON
|
||||
-DWITH_COMBINED_THREADS=OFF
|
||||
-DBUILD_SHARED_LIBS=OFF
|
||||
-DBUILD_TESTS=OFF
|
||||
${FFTW_EXTRA_ARGS}
|
||||
|
||||
INSTALL_DIR ${LIBDIR}/fftw3
|
||||
)
|
||||
else()
|
||||
set(FFTW_EXTRA_ARGS --enable-static)
|
||||
set(FFTW_EXTRA_ARGS --enable-static --enable-threads)
|
||||
set(FFTW_INSTALL install)
|
||||
ExternalProject_Add(external_fftw3_${FFTW_POSTFIX}
|
||||
URL file://${PACKAGE_DIR}/${FFTW_FILE}
|
||||
@ -57,12 +61,12 @@ if(MSVC)
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${LIBDIR}/fftw3/lib/fftw3.lib
|
||||
${HARVEST_TARGET}/fftw3/lib/fftw3.lib
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${LIBDIR}/fftw3/bin/fftw3.dll
|
||||
${HARVEST_TARGET}/fftw3/lib/fftw3.dll
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${LIBDIR}/fftw3/include/fftw3.h
|
||||
${HARVEST_TARGET}/fftw3/include/fftw3.h
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${LIBDIR}/fftw3/lib/fftw3_threads.lib
|
||||
${HARVEST_TARGET}/fftw3/lib/fftw3_threads.lib
|
||||
DEPENDEES install
|
||||
)
|
||||
ExternalProject_Add_Step(external_fftw3_float after_install
|
||||
@ -70,8 +74,8 @@ if(MSVC)
|
||||
${LIBDIR}/fftw3/lib/fftw3f.lib
|
||||
${HARVEST_TARGET}/fftw3/lib/fftw3f.lib
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${LIBDIR}/fftw3/bin/fftw3f.dll
|
||||
${HARVEST_TARGET}/fftw3/lib/fftw3f.dll
|
||||
${LIBDIR}/fftw3/lib/fftw3f_threads.lib
|
||||
${HARVEST_TARGET}/fftw3/lib/fftw3f_threads.lib
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
|
@ -73,3 +73,13 @@ index ae14ced..a49e131 100644
|
||||
else
|
||||
return HIPArch::Unknown;
|
||||
}
|
||||
--- a/devices/cpu/cpu_engine.h
|
||||
+++ b/devices/cpu/cpu_engine.h
|
||||
@@ -7,5 +7,7 @@
|
||||
#include "cpu_device.h"
|
||||
#include <queue>
|
||||
#include <condition_variable>
|
||||
+// BLENDER: needed for building on Linux.
|
||||
+#include <thread>
|
||||
|
||||
OIDN_NAMESPACE_BEGIN
|
||||
|
@ -440,12 +440,16 @@ endif()
|
||||
|
||||
if(WITH_FFTW3)
|
||||
set(FFTW3 ${LIBDIR}/fftw3)
|
||||
if(EXISTS ${FFTW3}/lib/libfftw3-3.lib) # 3.6 libraries
|
||||
set(FFTW3_LIBRARIES ${FFTW3}/lib/libfftw3-3.lib ${FFTW3}/lib/libfftw3f.lib)
|
||||
elseif(EXISTS ${FFTW3}/lib/libfftw.lib)
|
||||
set(FFTW3_LIBRARIES ${FFTW3}/lib/libfftw.lib) # 3.5 Libraries
|
||||
else()
|
||||
set(FFTW3_LIBRARIES ${FFTW3}/lib/fftw3.lib ${FFTW3}/lib/fftw3f.lib) # msys2+MSVC Libraries
|
||||
set(FFTW3_LIBRARIES
|
||||
${FFTW3}/lib/fftw3.lib
|
||||
${FFTW3}/lib/fftw3f.lib
|
||||
)
|
||||
if(EXISTS ${FFTW3}/lib/fftw3_threads.lib)
|
||||
list(APPEND FFTW3_LIBRARIES
|
||||
${FFTW3}/lib/fftw3_threads.lib
|
||||
${FFTW3}/lib/fftw3f_threads.lib
|
||||
)
|
||||
set(WITH_FFTW3_THREADS_SUPPORT ON)
|
||||
endif()
|
||||
set(FFTW3_INCLUDE_DIRS ${FFTW3}/include)
|
||||
set(FFTW3_LIBPATH ${FFTW3}/lib)
|
||||
|
@ -183,8 +183,12 @@ def main() -> None:
|
||||
# Set platform tag following conventions.
|
||||
if sys.platform == "darwin":
|
||||
target = cmake_cache_var_or_exit(filepath_cmake_cache, "CMAKE_OSX_DEPLOYMENT_TARGET").split(".")
|
||||
# Minor version is expected to be always zero starting with macOS 11.
|
||||
# https://github.com/pypa/packaging/issues/435
|
||||
target_major = int(target[0])
|
||||
target_minor = 0 # int(target[1])
|
||||
machine = cmake_cache_var_or_exit(filepath_cmake_cache, "CMAKE_OSX_ARCHITECTURES")
|
||||
platform_tag = "macosx_%d_%d_%s" % (int(target[0]), int(target[1]), machine)
|
||||
platform_tag = "macosx_%d_%d_%s" % (target_major, target_minor, machine)
|
||||
elif sys.platform == "win32":
|
||||
platform_tag = "win_%s" % (platform.machine().lower())
|
||||
elif sys.platform == "linux":
|
||||
@ -198,6 +202,10 @@ def main() -> None:
|
||||
sys.stderr.write("Unsupported platform: %s, abort!\n" % (sys.platform))
|
||||
sys.exit(1)
|
||||
|
||||
# Manually specify, otherwise it uses the version of the executable used to run
|
||||
# this script which may not match the Blender python version.
|
||||
python_tag = "py%d%d" % (python_version_number[0], python_version_number[1])
|
||||
|
||||
os.chdir(install_dir)
|
||||
|
||||
# Include all files recursively.
|
||||
@ -223,7 +231,7 @@ def main() -> None:
|
||||
packages=["bpy"],
|
||||
package_data={"": package_files("bpy")},
|
||||
distclass=BinaryDistribution,
|
||||
options={"bdist_wheel": {"plat_name": platform_tag}},
|
||||
options={"bdist_wheel": {"plat_name": platform_tag, "python_tag": python_tag}},
|
||||
|
||||
description="Blender as a Python module",
|
||||
long_description=long_description,
|
||||
|
@ -409,7 +409,7 @@ void FFMPEGReader::seek(int position)
|
||||
{
|
||||
double pts_time_base = av_q2d(m_formatCtx->streams[m_stream]->time_base);
|
||||
|
||||
uint64_t st_time = m_formatCtx->streams[m_stream]->start_time;
|
||||
int64_t st_time = m_formatCtx->streams[m_stream]->start_time;
|
||||
uint64_t seek_pos = (uint64_t)(position / (pts_time_base * m_specs.rate));
|
||||
|
||||
if(st_time != AV_NOPTS_VALUE)
|
||||
|
4
extern/draco/src/encoder.cpp
vendored
4
extern/draco/src/encoder.cpp
vendored
@ -222,7 +222,7 @@ draco::DataType getDataType(size_t componentType)
|
||||
}
|
||||
|
||||
API(uint32_t)
|
||||
encoderSetAttribute(Encoder *encoder, char *attributeName, size_t componentType, char *dataType, void *data)
|
||||
encoderSetAttribute(Encoder *encoder, char *attributeName, size_t componentType, char *dataType, void *data, bool normalized)
|
||||
{
|
||||
auto buffer = std::make_unique<draco::DataBuffer>();
|
||||
uint32_t count = encoder->mesh.num_points();
|
||||
@ -232,7 +232,7 @@ encoderSetAttribute(Encoder *encoder, char *attributeName, size_t componentType,
|
||||
|
||||
draco::GeometryAttribute::Type semantics = getAttributeSemantics(attributeName);
|
||||
draco::GeometryAttribute attribute;
|
||||
attribute.Init(semantics, &*buffer, componentCount, getDataType(componentType), false, stride, 0);
|
||||
attribute.Init(semantics, &*buffer, componentCount, getDataType(componentType), normalized, stride, 0);
|
||||
|
||||
auto id = static_cast<uint32_t>(encoder->mesh.AddAttribute(attribute, true, count));
|
||||
auto dataBytes = reinterpret_cast<uint8_t *>(data);
|
||||
|
2
extern/draco/src/encoder.h
vendored
2
extern/draco/src/encoder.h
vendored
@ -53,7 +53,7 @@ API(void)
|
||||
encoderSetIndices(Encoder *encoder, size_t indexComponentType, uint32_t indexCount, void *indices);
|
||||
|
||||
API(uint32_t)
|
||||
encoderSetAttribute(Encoder *encoder, char *attributeName, size_t componentType, char *dataType, void *data);
|
||||
encoderSetAttribute(Encoder *encoder, char *attributeName, size_t componentType, char *dataType, void *data, bool normalized);
|
||||
|
||||
API(uint32_t)
|
||||
encoderGetEncodedVertexCount(Encoder *encoder);
|
||||
|
@ -1683,7 +1683,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
elif device_type == 'ONEAPI':
|
||||
import sys
|
||||
if sys.platform.startswith("win"):
|
||||
driver_version = "XX.X.101.5186"
|
||||
driver_version = "XX.X.101.5518"
|
||||
col.label(text=rpt_("Requires Intel GPU with Xe-HPG architecture"), icon='BLANK1', translate=False)
|
||||
col.label(text=rpt_("and Windows driver version %s or newer") % driver_version,
|
||||
icon='BLANK1', translate=False)
|
||||
|
@ -2437,7 +2437,7 @@ class CYCLES_VIEW3D_PT_shading_lighting(Panel):
|
||||
sub.template_icon_view(shading, "studio_light", scale_popup=3)
|
||||
|
||||
col = split.column()
|
||||
col.operator("preferences.studiolight_show", emboss=False, text="", icon='PREFERENCES')
|
||||
col.operator("screen.userpref_show", emboss=False, text="", icon='PREFERENCES').section = 'LIGHTS'
|
||||
|
||||
split = layout.split(factor=0.9)
|
||||
col = split.column()
|
||||
|
@ -178,6 +178,51 @@ void BVHEmbree::build(Progress &progress,
|
||||
rtcCommitScene(scene);
|
||||
}
|
||||
|
||||
string BVHEmbree::get_last_error_message()
|
||||
{
|
||||
const RTCError error_code = rtcGetDeviceError(rtc_device);
|
||||
switch (error_code) {
|
||||
case RTC_ERROR_NONE:
|
||||
return "no error";
|
||||
case RTC_ERROR_UNKNOWN:
|
||||
return "unknown error";
|
||||
case RTC_ERROR_INVALID_ARGUMENT:
|
||||
return "invalid argument error";
|
||||
case RTC_ERROR_INVALID_OPERATION:
|
||||
return "invalid operation error";
|
||||
case RTC_ERROR_OUT_OF_MEMORY:
|
||||
return "out of memory error";
|
||||
case RTC_ERROR_UNSUPPORTED_CPU:
|
||||
return "unsupported cpu error";
|
||||
case RTC_ERROR_CANCELLED:
|
||||
return "cancelled";
|
||||
default:
|
||||
/* We should never end here unless enum for RTC errors would change. */
|
||||
return "unknown error";
|
||||
}
|
||||
}
|
||||
|
||||
# if defined(WITH_EMBREE_GPU) && RTC_VERSION >= 40302
|
||||
bool BVHEmbree::offload_scenes_to_gpu(const vector<RTCScene> &scenes)
|
||||
{
|
||||
/* Having BVH on GPU is more performance-critical than texture data.
|
||||
* In order to ensure good performance even when running out of GPU
|
||||
* memory, we force BVH to migrate to GPU before allocating other textures
|
||||
* that may not fit. */
|
||||
for (const RTCScene &embree_scene : scenes) {
|
||||
RTCSceneFlags scene_flags = rtcGetSceneFlags(embree_scene);
|
||||
scene_flags = scene_flags | RTC_SCENE_FLAG_PREFETCH_USM_SHARED_ON_GPU;
|
||||
rtcSetSceneFlags(embree_scene, scene_flags);
|
||||
rtcCommitScene(embree_scene);
|
||||
/* In case of any errors from Embree, we should stop
|
||||
* the execution and propagate the error. */
|
||||
if (rtcGetDeviceError(rtc_device) != RTC_ERROR_NONE)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
# endif
|
||||
|
||||
void BVHEmbree::add_object(Object *ob, int i)
|
||||
{
|
||||
Geometry *geom = ob->get_geometry();
|
||||
|
@ -18,6 +18,7 @@
|
||||
# include "bvh/bvh.h"
|
||||
# include "bvh/params.h"
|
||||
|
||||
# include "util/string.h"
|
||||
# include "util/thread.h"
|
||||
# include "util/types.h"
|
||||
# include "util/vector.h"
|
||||
@ -36,6 +37,12 @@ class BVHEmbree : public BVH {
|
||||
const bool isSyclEmbreeDevice = false);
|
||||
void refit(Progress &progress);
|
||||
|
||||
# if defined(WITH_EMBREE_GPU) && RTC_VERSION >= 40302
|
||||
bool offload_scenes_to_gpu(const vector<RTCScene> &scenes);
|
||||
# endif
|
||||
|
||||
string get_last_error_message();
|
||||
|
||||
RTCScene scene;
|
||||
|
||||
protected:
|
||||
|
@ -257,6 +257,7 @@ class device_memory {
|
||||
friend class OptiXDevice;
|
||||
friend class HIPDevice;
|
||||
friend class MetalDevice;
|
||||
friend class OneapiDevice;
|
||||
|
||||
/* Only create through subclasses. */
|
||||
device_memory(Device *device, const char *name, MemoryType type);
|
||||
|
@ -50,11 +50,9 @@ bool device_oneapi_init()
|
||||
_putenv_s("ONEAPI_DEVICE_SELECTOR", "!opencl:*");
|
||||
}
|
||||
}
|
||||
if (getenv("SYCL_ENABLE_PCI") == nullptr) {
|
||||
_putenv_s("SYCL_ENABLE_PCI", "1");
|
||||
}
|
||||
if (getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE") == nullptr) {
|
||||
_putenv_s("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0");
|
||||
/* SYSMAN is needed for free_memory queries. */
|
||||
if (getenv("ZES_ENABLE_SYSMAN") == nullptr) {
|
||||
_putenv_s("ZES_ENABLE_SYSMAN", "1");
|
||||
}
|
||||
if (getenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE") == nullptr) {
|
||||
_putenv_s("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE", "0");
|
||||
@ -68,8 +66,9 @@ bool device_oneapi_init()
|
||||
else {
|
||||
setenv("ONEAPI_DEVICE_SELECTOR", "!opencl:*", false);
|
||||
}
|
||||
setenv("SYCL_ENABLE_PCI", "1", false);
|
||||
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_IN_ORDER_QUEUE", "0", false);
|
||||
/* SYSMAN is needed for free_memory queries. However, it leads to runtime driver issues on Linux
|
||||
* when using it with JEMALLOC, so we set it to 0 by default until it's fixed. */
|
||||
setenv("ZES_ENABLE_SYSMAN", "0", false);
|
||||
setenv("SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE", "0", false);
|
||||
# endif
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
# include "device/oneapi/device_impl.h"
|
||||
|
||||
# include "util/debug.h"
|
||||
# include "util/foreach.h"
|
||||
# include "util/log.h"
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
@ -47,18 +48,20 @@ static void queue_error_cb(const char *message, void *user_ptr)
|
||||
}
|
||||
|
||||
OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler)
|
||||
: Device(info, stats, profiler),
|
||||
: GPUDevice(info, stats, profiler),
|
||||
device_queue_(nullptr),
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
embree_device(nullptr),
|
||||
embree_scene(nullptr),
|
||||
# endif
|
||||
texture_info_(this, "texture_info", MEM_GLOBAL),
|
||||
kg_memory_(nullptr),
|
||||
kg_memory_device_(nullptr),
|
||||
kg_memory_size_(0)
|
||||
{
|
||||
need_texture_info_ = false;
|
||||
/* Verify that base class types can be used with specific backend types */
|
||||
static_assert(sizeof(texMemObject) == sizeof(void *));
|
||||
static_assert(sizeof(arrayMemObject) == sizeof(void *));
|
||||
|
||||
use_hardware_raytracing = info.use_hardware_raytracing;
|
||||
|
||||
oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_);
|
||||
@ -110,6 +113,18 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi
|
||||
kg_memory_size_ = globals_segment_size;
|
||||
|
||||
max_memory_on_device_ = get_memcapacity();
|
||||
init_host_memory();
|
||||
move_texture_to_host = false;
|
||||
can_map_host = true;
|
||||
|
||||
const char *headroom_str = getenv("CYCLES_ONEAPI_MEMORY_HEADROOM");
|
||||
if (headroom_str != nullptr) {
|
||||
const long long override_headroom = (float)atoll(headroom_str);
|
||||
device_working_headroom = override_headroom;
|
||||
device_texture_headroom = override_headroom;
|
||||
}
|
||||
VLOG_DEBUG << "oneAPI memory headroom size: "
|
||||
<< string_human_readable_size(device_working_headroom);
|
||||
}
|
||||
|
||||
OneapiDevice::~OneapiDevice()
|
||||
@ -119,7 +134,7 @@ OneapiDevice::~OneapiDevice()
|
||||
rtcReleaseDevice(embree_device);
|
||||
# endif
|
||||
|
||||
texture_info_.free();
|
||||
texture_info.free();
|
||||
usm_free(device_queue_, kg_memory_);
|
||||
usm_free(device_queue_, kg_memory_device_);
|
||||
|
||||
@ -166,8 +181,22 @@ void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
else {
|
||||
bvh_embree->build(progress, &stats, embree_device, true);
|
||||
}
|
||||
|
||||
# if RTC_VERSION >= 40302
|
||||
thread_scoped_lock lock(scene_data_mutex);
|
||||
all_embree_scenes.push_back(bvh_embree->scene);
|
||||
# endif
|
||||
|
||||
if (bvh->params.top_level) {
|
||||
embree_scene = bvh_embree->scene;
|
||||
# if RTC_VERSION >= 40302
|
||||
if (bvh_embree->offload_scenes_to_gpu(all_embree_scenes) == false) {
|
||||
set_error(
|
||||
string_printf("BVH failed to to migrate to the GPU due to Embree library error (%s)",
|
||||
bvh_embree->get_last_error_message()));
|
||||
}
|
||||
all_embree_scenes.clear();
|
||||
# endif
|
||||
}
|
||||
}
|
||||
else {
|
||||
@ -176,6 +205,26 @@ void OneapiDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
|
||||
}
|
||||
# endif
|
||||
|
||||
size_t OneapiDevice::get_free_mem() const
|
||||
{
|
||||
/* Accurate: Use device info, which is practically useful only on dGPU.
|
||||
* This is because for non-discrete GPUs, all GPU memory allocations would
|
||||
* be in the RAM, thus having the same performance for device and host pointers,
|
||||
* so there is no need to be very accurate about what would end where. */
|
||||
const sycl::device &device = reinterpret_cast<sycl::queue *>(device_queue_)->get_device();
|
||||
const bool is_integrated_gpu = device.get_info<sycl::info::device::host_unified_memory>();
|
||||
if (device.has(sycl::aspect::ext_intel_free_memory) && is_integrated_gpu == false) {
|
||||
return device.get_info<sycl::ext::intel::info::device::free_memory>();
|
||||
}
|
||||
/* Estimate: Capacity - in use. */
|
||||
else if (device_mem_in_use < max_memory_on_device_) {
|
||||
return max_memory_on_device_ - device_mem_in_use;
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool OneapiDevice::load_kernels(const uint requested_features)
|
||||
{
|
||||
assert(device_queue_);
|
||||
@ -208,63 +257,101 @@ bool OneapiDevice::load_kernels(const uint requested_features)
|
||||
VLOG_INFO << "Kernels loading (compilation) has been done for \"" << info.description << "\"";
|
||||
}
|
||||
|
||||
if (is_finished_ok) {
|
||||
reserve_private_memory(requested_features);
|
||||
is_finished_ok = !have_error();
|
||||
}
|
||||
|
||||
return is_finished_ok;
|
||||
}
|
||||
|
||||
void OneapiDevice::load_texture_info()
|
||||
void OneapiDevice::reserve_private_memory(const uint kernel_features)
|
||||
{
|
||||
if (need_texture_info_) {
|
||||
need_texture_info_ = false;
|
||||
texture_info_.copy_to_device();
|
||||
size_t free_before = get_free_mem();
|
||||
|
||||
/* Use the biggest kernel for estimation. */
|
||||
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
|
||||
(kernel_features & KERNEL_FEATURE_MNEE) ?
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
|
||||
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
|
||||
|
||||
{
|
||||
unique_ptr<DeviceQueue> queue = gpu_queue_create();
|
||||
|
||||
device_ptr d_path_index = 0;
|
||||
device_ptr d_render_buffer = 0;
|
||||
int d_work_size = 0;
|
||||
DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
|
||||
|
||||
queue->init_execution();
|
||||
/* Launch of the kernel seems to be sufficient to reserve all
|
||||
* needed memory regardless of the execution global size.
|
||||
* So, the smallest possible size is used here. */
|
||||
queue->enqueue(test_kernel, 1, args);
|
||||
queue->synchronize();
|
||||
}
|
||||
|
||||
size_t free_after = get_free_mem();
|
||||
|
||||
VLOG_INFO << "For kernel execution were reserved "
|
||||
<< string_human_readable_number(free_before - free_after) << " bytes. ("
|
||||
<< string_human_readable_size(free_before - free_after) << ")";
|
||||
}
|
||||
|
||||
void OneapiDevice::generic_alloc(device_memory &mem)
|
||||
void OneapiDevice::get_device_memory_info(size_t &total, size_t &free)
|
||||
{
|
||||
size_t memory_size = mem.memory_size();
|
||||
|
||||
/* TODO(@nsirgien): In future, if scene doesn't fit into device memory, then
|
||||
* we can use USM host memory.
|
||||
* Because of the expected performance impact, implementation of this has had a low priority
|
||||
* and is not implemented yet. */
|
||||
|
||||
assert(device_queue_);
|
||||
/* NOTE(@nsirgien): There are three types of Unified Shared Memory (USM) in oneAPI: host, device
|
||||
* and shared. For new project it maybe more beneficial to use USM shared memory, because it
|
||||
* provides automatic migration mechanism in order to allow to use the same pointer on host and
|
||||
* on device, without need to worry about explicit memory transfer operations. But for
|
||||
* Blender/Cycles this type of memory is not very suitable in current application architecture,
|
||||
* because Cycles already uses two different pointer for host activity and device activity, and
|
||||
* also has to perform all needed memory transfer operations. So, USM device memory
|
||||
* type has been used for oneAPI device in order to better fit in Cycles architecture. */
|
||||
void *device_pointer = nullptr;
|
||||
if (mem.memory_size() + stats.mem_used < max_memory_on_device_)
|
||||
device_pointer = usm_alloc_device(device_queue_, memory_size);
|
||||
if (device_pointer == nullptr) {
|
||||
set_error("oneAPI kernel - device memory allocation error for " +
|
||||
string_human_readable_size(mem.memory_size()) +
|
||||
", possibly caused by lack of available memory space on the device: " +
|
||||
string_human_readable_size(stats.mem_used) + " of " +
|
||||
string_human_readable_size(max_memory_on_device_) + " is already allocated");
|
||||
}
|
||||
|
||||
mem.device_pointer = reinterpret_cast<ccl::device_ptr>(device_pointer);
|
||||
mem.device_size = memory_size;
|
||||
|
||||
stats.mem_alloc(memory_size);
|
||||
free = get_free_mem();
|
||||
total = max_memory_on_device_;
|
||||
}
|
||||
|
||||
void OneapiDevice::generic_copy_to(device_memory &mem)
|
||||
bool OneapiDevice::alloc_device(void *&device_pointer, size_t size)
|
||||
{
|
||||
if (!mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
size_t memory_size = mem.memory_size();
|
||||
bool allocation_success = false;
|
||||
device_pointer = usm_alloc_device(device_queue_, size);
|
||||
if (device_pointer != nullptr) {
|
||||
allocation_success = true;
|
||||
/* Due to lazy memory initialization in GPU runtime we will force memory to
|
||||
* appear in device memory via execution of a kernel using this memory. */
|
||||
if (!oneapi_zero_memory_on_device(device_queue_, device_pointer, size)) {
|
||||
set_error("oneAPI memory operation error: got runtime exception \"" + oneapi_error_string_ +
|
||||
"\"");
|
||||
usm_free(device_queue_, device_pointer);
|
||||
|
||||
/* Copy operation from host shouldn't be requested if there is no memory allocated on host. */
|
||||
assert(mem.host_pointer);
|
||||
assert(device_queue_);
|
||||
usm_memcpy(device_queue_, (void *)mem.device_pointer, (void *)mem.host_pointer, memory_size);
|
||||
device_pointer = nullptr;
|
||||
allocation_success = false;
|
||||
}
|
||||
}
|
||||
|
||||
return allocation_success;
|
||||
}
|
||||
|
||||
void OneapiDevice::free_device(void *device_pointer)
|
||||
{
|
||||
usm_free(device_queue_, device_pointer);
|
||||
}
|
||||
|
||||
bool OneapiDevice::alloc_host(void *&shared_pointer, size_t size)
|
||||
{
|
||||
shared_pointer = usm_aligned_alloc_host(device_queue_, size, 64);
|
||||
return shared_pointer != nullptr;
|
||||
}
|
||||
|
||||
void OneapiDevice::free_host(void *shared_pointer)
|
||||
{
|
||||
usm_free(device_queue_, shared_pointer);
|
||||
}
|
||||
|
||||
void OneapiDevice::transform_host_pointer(void *&device_pointer, void *&shared_pointer)
|
||||
{
|
||||
/* Device and host pointer are in the same address space
|
||||
* as we're using Unified Shared Memory. */
|
||||
device_pointer = shared_pointer;
|
||||
}
|
||||
|
||||
void OneapiDevice::copy_host_to_device(void *device_pointer, void *host_pointer, size_t size)
|
||||
{
|
||||
usm_memcpy(device_queue_, device_pointer, host_pointer, size);
|
||||
}
|
||||
|
||||
/* TODO: Make sycl::queue part of OneapiQueue and avoid using pointers to sycl::queue. */
|
||||
@ -288,20 +375,6 @@ void *OneapiDevice::kernel_globals_device_pointer()
|
||||
return kg_memory_device_;
|
||||
}
|
||||
|
||||
void OneapiDevice::generic_free(device_memory &mem)
|
||||
{
|
||||
if (!mem.device_pointer) {
|
||||
return;
|
||||
}
|
||||
|
||||
stats.mem_free(mem.device_size);
|
||||
mem.device_size = 0;
|
||||
|
||||
assert(device_queue_);
|
||||
usm_free(device_queue_, (void *)mem.device_pointer);
|
||||
mem.device_pointer = 0;
|
||||
}
|
||||
|
||||
void OneapiDevice::mem_alloc(device_memory &mem)
|
||||
{
|
||||
if (mem.type == MEM_TEXTURE) {
|
||||
@ -344,7 +417,7 @@ void OneapiDevice::mem_copy_to(device_memory &mem)
|
||||
}
|
||||
else {
|
||||
if (!mem.device_pointer)
|
||||
mem_alloc(mem);
|
||||
generic_alloc(mem);
|
||||
|
||||
generic_copy_to(mem);
|
||||
}
|
||||
@ -515,14 +588,14 @@ void OneapiDevice::tex_alloc(device_texture &mem)
|
||||
|
||||
/* Resize if needed. Also, in case of resize - allocate in advance for future allocations. */
|
||||
const uint slot = mem.slot;
|
||||
if (slot >= texture_info_.size()) {
|
||||
texture_info_.resize(slot + 128);
|
||||
if (slot >= texture_info.size()) {
|
||||
texture_info.resize(slot + 128);
|
||||
}
|
||||
|
||||
texture_info_[slot] = mem.info;
|
||||
need_texture_info_ = true;
|
||||
texture_info[slot] = mem.info;
|
||||
need_texture_info = true;
|
||||
|
||||
texture_info_[slot].data = (uint64_t)mem.device_pointer;
|
||||
texture_info[slot].data = (uint64_t)mem.device_pointer;
|
||||
}
|
||||
|
||||
void OneapiDevice::tex_free(device_texture &mem)
|
||||
@ -628,6 +701,16 @@ void *OneapiDevice::usm_alloc_device(SyclQueue *queue_, size_t memory_size)
|
||||
{
|
||||
assert(queue_);
|
||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||
/* NOTE(@nsirgien): There are three types of Unified Shared Memory (USM) in oneAPI: host, device
|
||||
* and shared. For new project it could more beneficial to use USM shared memory, because it
|
||||
* provides automatic migration mechanism in order to allow to use the same pointer on host and
|
||||
* on device, without need to worry about explicit memory transfer operations, although usage of
|
||||
* USM shared imply some documented limitations on the memory usage in regards of parallel access
|
||||
* from different threads. But for Blender/Cycles this type of memory is not very suitable in
|
||||
* current application architecture, because Cycles is multi-thread application and already uses
|
||||
* two different pointer for host activity and device activity, and also has to perform all
|
||||
* needed memory transfer operations. So, USM device memory type has been used for oneAPI device
|
||||
* in order to better fit in Cycles architecture. */
|
||||
# ifndef WITH_ONEAPI_SYCL_HOST_TASK
|
||||
return sycl::malloc_device(memory_size, *queue);
|
||||
# else
|
||||
@ -646,9 +729,26 @@ void OneapiDevice::usm_free(SyclQueue *queue_, void *usm_ptr)
|
||||
bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t num_bytes)
|
||||
{
|
||||
assert(queue_);
|
||||
/* sycl::queue::memcpy may crash if the queue is in an invalid state due to previous
|
||||
* runtime errors. It's better to avoid running memory operations in that case.
|
||||
* The render will be canceled and the queue will be destroyed anyway. */
|
||||
if (have_error())
|
||||
return false;
|
||||
|
||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||
OneapiDevice::check_usm(queue_, dest, true);
|
||||
OneapiDevice::check_usm(queue_, src, true);
|
||||
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
|
||||
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
|
||||
/* Unknown here means, that this is not an USM allocation, which implies that this is
|
||||
* some generic C++ allocation, so we could use C++ memcpy directly with USM host. */
|
||||
if ((dest_type == sycl::usm::alloc::host || dest_type == sycl::usm::alloc::unknown) &&
|
||||
(src_type == sycl::usm::alloc::host || src_type == sycl::usm::alloc::unknown))
|
||||
{
|
||||
memcpy(dest, src, num_bytes);
|
||||
return true;
|
||||
}
|
||||
|
||||
try {
|
||||
sycl::event mem_event = queue->memcpy(dest, src, num_bytes);
|
||||
# ifdef WITH_CYCLES_DEBUG
|
||||
@ -658,8 +758,6 @@ bool OneapiDevice::usm_memcpy(SyclQueue *queue_, void *dest, void *src, size_t n
|
||||
mem_event.wait_and_throw();
|
||||
return true;
|
||||
# else
|
||||
sycl::usm::alloc dest_type = get_pointer_type(dest, queue->get_context());
|
||||
sycl::usm::alloc src_type = get_pointer_type(src, queue->get_context());
|
||||
bool from_device_to_host = dest_type == sycl::usm::alloc::host &&
|
||||
src_type == sycl::usm::alloc::device;
|
||||
bool host_or_device_memop_with_offset = dest_type == sycl::usm::alloc::unknown ||
|
||||
@ -684,6 +782,12 @@ bool OneapiDevice::usm_memset(SyclQueue *queue_,
|
||||
size_t num_bytes)
|
||||
{
|
||||
assert(queue_);
|
||||
/* sycl::queue::memset may crash if the queue is in an invalid state due to previous
|
||||
* runtime errors. It's better to avoid running memory operations in that case.
|
||||
* The render will be canceled and the queue will be destroyed anyway. */
|
||||
if (have_error())
|
||||
return false;
|
||||
|
||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||
OneapiDevice::check_usm(queue_, usm_ptr, true);
|
||||
try {
|
||||
@ -735,7 +839,7 @@ void OneapiDevice::set_global_memory(SyclQueue *queue_,
|
||||
assert(memory_name);
|
||||
assert(memory_device_pointer);
|
||||
KernelGlobalsGPU *globals = (KernelGlobalsGPU *)kernel_globals;
|
||||
OneapiDevice::check_usm(queue_, memory_device_pointer);
|
||||
OneapiDevice::check_usm(queue_, memory_device_pointer, true);
|
||||
OneapiDevice::check_usm(queue_, kernel_globals, true);
|
||||
|
||||
std::string matched_name(memory_name);
|
||||
@ -874,11 +978,11 @@ void OneapiDevice::get_adjusted_global_and_local_sizes(SyclQueue *queue,
|
||||
|
||||
/* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows
|
||||
* since Windows driver 101.3268. */
|
||||
static const int lowest_supported_driver_version_win = 1015186;
|
||||
static const int lowest_supported_driver_version_win = 1015518;
|
||||
# ifdef _WIN32
|
||||
/* For Windows driver 101.5186, compute-runtime version is 28044.
|
||||
/* For Windows driver 101.5518, compute-runtime version is 28044.
|
||||
* This information is returned by `ocloc query OCL_DRIVER_VERSION`.*/
|
||||
static const int lowest_supported_driver_version_neo = 28044;
|
||||
static const int lowest_supported_driver_version_neo = 29283;
|
||||
# else
|
||||
static const int lowest_supported_driver_version_neo = 27642;
|
||||
# endif
|
||||
|
@ -21,17 +21,19 @@ typedef void (*OneAPIDeviceIteratorCallback)(const char *id,
|
||||
bool oidn_support,
|
||||
void *user_ptr);
|
||||
|
||||
class OneapiDevice : public Device {
|
||||
class OneapiDevice : public GPUDevice {
|
||||
private:
|
||||
SyclQueue *device_queue_;
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
RTCDevice embree_device;
|
||||
RTCScene embree_scene;
|
||||
# if RTC_VERSION >= 40302
|
||||
thread_mutex scene_data_mutex;
|
||||
vector<RTCScene> all_embree_scenes;
|
||||
# endif
|
||||
# endif
|
||||
using ConstMemMap = map<string, device_vector<uchar> *>;
|
||||
ConstMemMap const_mem_map_;
|
||||
device_vector<TextureInfo> texture_info_;
|
||||
bool need_texture_info_;
|
||||
void *kg_memory_;
|
||||
void *kg_memory_device_;
|
||||
size_t kg_memory_size_ = (size_t)0;
|
||||
@ -41,6 +43,8 @@ class OneapiDevice : public Device {
|
||||
unsigned int kernel_features = 0;
|
||||
int scene_max_shaders_ = 0;
|
||||
|
||||
size_t get_free_mem() const;
|
||||
|
||||
public:
|
||||
virtual BVHLayoutMask get_bvh_layout_mask(uint kernel_features) const override;
|
||||
|
||||
@ -54,13 +58,15 @@ class OneapiDevice : public Device {
|
||||
|
||||
bool load_kernels(const uint kernel_features) override;
|
||||
|
||||
void load_texture_info();
|
||||
void reserve_private_memory(const uint kernel_features);
|
||||
|
||||
void generic_alloc(device_memory &mem);
|
||||
|
||||
void generic_copy_to(device_memory &mem);
|
||||
|
||||
void generic_free(device_memory &mem);
|
||||
virtual void get_device_memory_info(size_t &total, size_t &free) override;
|
||||
virtual bool alloc_device(void *&device_pointer, size_t size) override;
|
||||
virtual void free_device(void *device_pointer) override;
|
||||
virtual bool alloc_host(void *&shared_pointer, size_t size) override;
|
||||
virtual void free_host(void *shared_pointer) override;
|
||||
virtual void transform_host_pointer(void *&device_pointer, void *&shared_pointer) override;
|
||||
virtual void copy_host_to_device(void *device_pointer, void *host_pointer, size_t size) override;
|
||||
|
||||
string oneapi_error_message();
|
||||
|
||||
|
@ -142,11 +142,13 @@ ccl_device_inline float longitudinal_scattering(
|
||||
if (v <= 0.1f) {
|
||||
float i0 = log_bessel_I0(cos_arg);
|
||||
float val = expf(i0 - sin_arg - inv_v + 0.6931f + logf(0.5f * inv_v));
|
||||
kernel_assert(isfinite_safe(val));
|
||||
return val;
|
||||
}
|
||||
else {
|
||||
float i0 = bessel_I0(cos_arg);
|
||||
float val = (expf(-sin_arg) * i0) / (sinhf(inv_v) * 2.0f * v);
|
||||
kernel_assert(isfinite_safe(val));
|
||||
return val;
|
||||
}
|
||||
}
|
||||
@ -185,7 +187,7 @@ ccl_device int bsdf_hair_chiang_setup(ccl_private ShaderData *sd, ccl_private Ch
|
||||
kernel_assert(isfinite_safe(bsdf->h));
|
||||
|
||||
bsdf->N = Y;
|
||||
|
||||
bsdf->alpha = -bsdf->alpha;
|
||||
return SD_BSDF | SD_BSDF_HAS_EVAL | SD_BSDF_HAS_TRANSMISSION;
|
||||
}
|
||||
|
||||
@ -224,9 +226,9 @@ ccl_device_inline void hair_attenuation(
|
||||
Ap_energy[3] *= fac;
|
||||
}
|
||||
|
||||
/* Given the tilt angle, generate the rotated theta_i for the different bounces. */
|
||||
ccl_device_inline void hair_alpha_angles(float sin_theta_i,
|
||||
float cos_theta_i,
|
||||
/* Update sin_theta_o and cos_theta_o to account for scale tilt for each bounce. */
|
||||
ccl_device_inline void hair_alpha_angles(float sin_theta_o,
|
||||
float cos_theta_o,
|
||||
float alpha,
|
||||
ccl_private float *angles)
|
||||
{
|
||||
@ -237,12 +239,12 @@ ccl_device_inline void hair_alpha_angles(float sin_theta_i,
|
||||
float sin_4alpha = 2.0f * sin_2alpha * cos_2alpha;
|
||||
float cos_4alpha = sqr(cos_2alpha) - sqr(sin_2alpha);
|
||||
|
||||
angles[0] = sin_theta_i * cos_2alpha + cos_theta_i * sin_2alpha;
|
||||
angles[1] = fabsf(cos_theta_i * cos_2alpha - sin_theta_i * sin_2alpha);
|
||||
angles[2] = sin_theta_i * cos_1alpha - cos_theta_i * sin_1alpha;
|
||||
angles[3] = fabsf(cos_theta_i * cos_1alpha + sin_theta_i * sin_1alpha);
|
||||
angles[4] = sin_theta_i * cos_4alpha - cos_theta_i * sin_4alpha;
|
||||
angles[5] = fabsf(cos_theta_i * cos_4alpha + sin_theta_i * sin_4alpha);
|
||||
angles[0] = sin_theta_o * cos_2alpha - cos_theta_o * sin_2alpha;
|
||||
angles[1] = fabsf(cos_theta_o * cos_2alpha + sin_theta_o * sin_2alpha);
|
||||
angles[2] = sin_theta_o * cos_1alpha + cos_theta_o * sin_1alpha;
|
||||
angles[3] = fabsf(cos_theta_o * cos_1alpha - sin_theta_o * sin_1alpha);
|
||||
angles[4] = sin_theta_o * cos_4alpha + cos_theta_o * sin_4alpha;
|
||||
angles[5] = fabsf(cos_theta_o * cos_4alpha - sin_theta_o * sin_4alpha);
|
||||
}
|
||||
|
||||
/* Evaluation function for our shader. */
|
||||
@ -293,17 +295,17 @@ ccl_device Spectrum bsdf_hair_chiang_eval(KernelGlobals kg,
|
||||
const float phi = phi_i - phi_o;
|
||||
|
||||
float angles[6];
|
||||
hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
|
||||
hair_alpha_angles(sin_theta_o, cos_theta_o, bsdf->alpha, angles);
|
||||
|
||||
Spectrum F = zero_spectrum();
|
||||
float F_energy = 0.0f;
|
||||
|
||||
/* Primary specular (R), Transmission (TT) and Secondary Specular (TRT). */
|
||||
for (int i = 0; i < 3; i++) {
|
||||
const float Mp = longitudinal_scattering(angles[2 * i],
|
||||
const float Mp = longitudinal_scattering(sin_theta_i,
|
||||
cos_theta_i,
|
||||
angles[2 * i],
|
||||
angles[2 * i + 1],
|
||||
sin_theta_o,
|
||||
cos_theta_o,
|
||||
(i == 0) ? bsdf->m0_roughness :
|
||||
(i == 1) ? 0.25f * bsdf->v :
|
||||
4.0f * bsdf->v);
|
||||
@ -347,6 +349,7 @@ ccl_device int bsdf_hair_chiang_sample(KernelGlobals kg,
|
||||
kernel_assert(fabsf(dot(X, Y)) < 1e-3f);
|
||||
const float3 Z = safe_normalize(cross(X, Y));
|
||||
|
||||
/* `wo` in PBRT. */
|
||||
const float3 local_O = make_float3(dot(sd->wi, X), dot(sd->wi, Y), dot(sd->wi, Z));
|
||||
|
||||
const float sin_theta_o = local_O.x;
|
||||
@ -387,19 +390,20 @@ ccl_device int bsdf_hair_chiang_sample(KernelGlobals kg,
|
||||
v *= 4.0f;
|
||||
}
|
||||
|
||||
float angles[6];
|
||||
hair_alpha_angles(sin_theta_o, cos_theta_o, bsdf->alpha, angles);
|
||||
float sin_theta_o_tilted = sin_theta_o;
|
||||
float cos_theta_o_tilted = cos_theta_o;
|
||||
if (p < 3) {
|
||||
sin_theta_o_tilted = angles[2 * p];
|
||||
cos_theta_o_tilted = angles[2 * p + 1];
|
||||
}
|
||||
rand.z = max(rand.z, 1e-5f);
|
||||
const float fac = 1.0f + v * logf(rand.z + (1.0f - rand.z) * expf(-2.0f / v));
|
||||
float sin_theta_i = -fac * sin_theta_o +
|
||||
cos_from_sin(fac) * cosf(M_2PI_F * rand.y) * cos_theta_o;
|
||||
float sin_theta_i = -fac * sin_theta_o_tilted +
|
||||
sin_from_cos(fac) * cosf(M_2PI_F * rand.y) * cos_theta_o_tilted;
|
||||
float cos_theta_i = cos_from_sin(sin_theta_i);
|
||||
|
||||
float angles[6];
|
||||
if (p < 3) {
|
||||
hair_alpha_angles(sin_theta_i, cos_theta_i, -bsdf->alpha, angles);
|
||||
sin_theta_i = angles[2 * p];
|
||||
cos_theta_i = angles[2 * p + 1];
|
||||
}
|
||||
|
||||
float phi;
|
||||
if (p < 3) {
|
||||
phi = delta_phi(p, gamma_o, gamma_t) + sample_trimmed_logistic(rand.x, bsdf->s);
|
||||
@ -409,17 +413,15 @@ ccl_device int bsdf_hair_chiang_sample(KernelGlobals kg,
|
||||
}
|
||||
const float phi_i = phi_o + phi;
|
||||
|
||||
hair_alpha_angles(sin_theta_i, cos_theta_i, bsdf->alpha, angles);
|
||||
|
||||
Spectrum F = zero_spectrum();
|
||||
float F_energy = 0.0f;
|
||||
|
||||
/* Primary specular (R), Transmission (TT) and Secondary Specular (TRT). */
|
||||
for (int i = 0; i < 3; i++) {
|
||||
const float Mp = longitudinal_scattering(angles[2 * i],
|
||||
const float Mp = longitudinal_scattering(sin_theta_i,
|
||||
cos_theta_i,
|
||||
angles[2 * i],
|
||||
angles[2 * i + 1],
|
||||
sin_theta_o,
|
||||
cos_theta_o,
|
||||
(i == 0) ? bsdf->m0_roughness :
|
||||
(i == 1) ? 0.25f * bsdf->v :
|
||||
4.0f * bsdf->v);
|
||||
|
@ -133,6 +133,26 @@ bool oneapi_run_test_kernel(SyclQueue *queue_)
|
||||
return is_computation_correct;
|
||||
}
|
||||
|
||||
bool oneapi_zero_memory_on_device(SyclQueue *queue_, void *device_pointer, size_t num_bytes)
|
||||
{
|
||||
assert(queue_);
|
||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||
try {
|
||||
queue->submit([&](sycl::handler &cgh) {
|
||||
cgh.parallel_for(num_bytes,
|
||||
[=](sycl::id<1> idx) { ((char *)device_pointer)[idx.get(0)] = (char)0; });
|
||||
});
|
||||
queue->wait_and_throw();
|
||||
return true;
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
if (s_error_cb) {
|
||||
s_error_cb(e.what(), s_error_user_ptr);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
|
||||
const uint kernel_features)
|
||||
{
|
||||
|
@ -44,6 +44,9 @@ extern "C" {
|
||||
# endif
|
||||
|
||||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_run_test_kernel(SyclQueue *queue_);
|
||||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_zero_memory_on_device(SyclQueue *queue_,
|
||||
void *device_pointer,
|
||||
size_t num_bytes);
|
||||
CYCLES_KERNEL_ONEAPI_EXPORT void oneapi_set_error_cb(OneAPIErrorCallback cb, void *user_ptr);
|
||||
CYCLES_KERNEL_ONEAPI_EXPORT size_t oneapi_suggested_gpu_kernel_size(const DeviceKernel kernel);
|
||||
CYCLES_KERNEL_ONEAPI_EXPORT bool oneapi_enqueue_kernel(KernelContext *context,
|
||||
|
@ -39,6 +39,11 @@ ccl_device int shadow_linking_pick_mesh_intersection(KernelGlobals kg,
|
||||
|
||||
const uint visibility = path_state_ray_visibility(state);
|
||||
|
||||
int transparent_bounce = INTEGRATOR_STATE(state, path, transparent_bounce);
|
||||
int volume_bounce = INTEGRATOR_STATE(state, path, volume_bounce);
|
||||
|
||||
/* TODO: Replace the look with sequential calls to the kernel, similar to the transparent shadow
|
||||
* intersection kernel. */
|
||||
for (int i = 0; i < SHADOW_LINK_MAX_INTERSECTION_COUNT; i++) {
|
||||
Intersection current_isect ccl_optional_struct_init;
|
||||
current_isect.object = OBJECT_NONE;
|
||||
@ -68,12 +73,33 @@ ccl_device int shadow_linking_pick_mesh_intersection(KernelGlobals kg,
|
||||
}
|
||||
}
|
||||
|
||||
const uint blocker_set = kernel_data_fetch(objects, current_isect.object).blocker_shadow_set;
|
||||
if (blocker_set == 0) {
|
||||
/* Contribution from the lights past the default blocker is accumulated using the main path.
|
||||
*/
|
||||
ray->tmax = current_isect.t;
|
||||
break;
|
||||
/* Contribution from the lights past the default opaque blocker is accumulated
|
||||
* using the main path. */
|
||||
if (!(shader_flags & (SD_HAS_ONLY_VOLUME | SD_HAS_TRANSPARENT_SHADOW))) {
|
||||
const uint blocker_set = kernel_data_fetch(objects, current_isect.object).blocker_shadow_set;
|
||||
if (blocker_set == 0) {
|
||||
ray->tmax = current_isect.t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Lights past the maximum allowed transparency bounce do not contribute any light, so
|
||||
* consider them as fully blocked and only consider lights prior to this intersection. */
|
||||
if (shader_flags & SD_HAS_TRANSPARENT_SHADOW) {
|
||||
++transparent_bounce;
|
||||
if (transparent_bounce >= kernel_data.integrator.transparent_max_bounce) {
|
||||
ray->tmax = current_isect.t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
kernel_assert(shader_flags & SD_HAS_ONLY_VOLUME);
|
||||
++volume_bounce;
|
||||
if (volume_bounce >= kernel_data.integrator.max_volume_bounce) {
|
||||
ray->tmax = current_isect.t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Move the ray forward. */
|
||||
|
@ -177,27 +177,27 @@ ccl_device_forceinline bool triangle_light_sample(KernelGlobals kg,
|
||||
const float cos_a = dot(B, C);
|
||||
const float cos_b = dot(A, C);
|
||||
const float cos_c = dot(A, B);
|
||||
const float sin_b_sin_c_2 = (1.0f - sqr(cos_b)) * (1.0f - sqr(cos_c));
|
||||
|
||||
const float mixed_product = fabsf(dot(A, cross(B, C)));
|
||||
|
||||
/* The area of the spherical triangle is equal to the subtended solid angle. */
|
||||
const float solid_angle = 2.0f * fast_atan2f(mixed_product, (1.0f + cos_a + cos_b + cos_c));
|
||||
|
||||
/* Compute the angle at A. */
|
||||
const float cos_alpha = dot(safe_normalize(cross(A, B)), safe_normalize(cross(A, C))) |