1
1

Compare commits

..

14 Commits

1473 changed files with 53727 additions and 108513 deletions

View File

@@ -403,7 +403,7 @@ option(WITH_CYCLES_CUDA_BINARIES "Build Cycles CUDA binaries" OFF)
option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF)
option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF)
mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL)
set(CYCLES_TEST_DEVICES CPU CACHE STRING "Run regression tests on the specified device types (CPU CUDA OPTIX)" )
set(CYCLES_TEST_DEVICES CPU CACHE STRING "Run regression tests on the specified device types (CPU CUDA OPTIX OPENCL)" )
set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for")
mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH)
unset(PLATFORM_DEFAULT)
@@ -418,10 +418,12 @@ mark_as_advanced(WITH_CYCLES_DEBUG_NAN)
mark_as_advanced(WITH_CYCLES_NATIVE_ONLY)
option(WITH_CYCLES_DEVICE_CUDA "Enable Cycles CUDA compute support" ON)
option(WITH_CYCLES_DEVICE_OPTIX "Enable Cycles OptiX support" ON)
option(WITH_CYCLES_DEVICE_HIP "Enable Cycles HIP support" OFF)
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
option(WITH_CYCLES_DEVICE_OPTIX "Enable Cycles OptiX support" OFF)
option(WITH_CYCLES_DEVICE_OPENCL "Enable Cycles OpenCL compute support" ON)
option(WITH_CYCLES_NETWORK "Enable Cycles compute over network support (EXPERIMENTAL and unfinished)" OFF)
mark_as_advanced(WITH_CYCLES_DEVICE_CUDA)
mark_as_advanced(WITH_CYCLES_DEVICE_OPENCL)
mark_as_advanced(WITH_CYCLES_NETWORK)
option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime" ON)
mark_as_advanced(WITH_CUDA_DYNLOAD)
@@ -823,11 +825,6 @@ if(NOT WITH_CUDA_DYNLOAD)
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP)
# Currently HIP must be dynamically loaded, this may change in future toolkits
set(WITH_HIP_DYNLOAD ON)
endif()
#-----------------------------------------------------------------------------
# Check check if submodules are cloned
@@ -1857,9 +1854,6 @@ elseif(WITH_CYCLES_STANDALONE)
if(WITH_CUDA_DYNLOAD)
add_subdirectory(extern/cuew)
endif()
if(WITH_HIP_DYNLOAD)
add_subdirectory(extern/hipew)
endif()
if(NOT WITH_SYSTEM_GLEW)
add_subdirectory(extern/glew)
endif()

View File

@@ -70,18 +70,16 @@
}
--- a/libavcodec/rl.c
+++ b/libavcodec/rl.c
@@ -71,17 +71,19 @@
@@ -71,7 +71,7 @@ av_cold void ff_rl_init(RLTable *rl,
av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
{
int i, q;
- VLC_TYPE table[1500][2] = {{0}};
+ VLC_TYPE (*table)[2] = av_calloc(sizeof(VLC_TYPE), 1500 * 2);
VLC vlc = { .table = table, .table_allocated = static_size };
- av_assert0(static_size <= FF_ARRAY_ELEMS(table));
+ av_assert0(static_size < 1500);
av_assert0(static_size <= FF_ARRAY_ELEMS(table));
init_vlc(&vlc, 9, rl->n + 1, &rl->table_vlc[0][1], 4, 2, &rl->table_vlc[0][0], 4, 2, INIT_VLC_USE_NEW_STATIC);
for (q = 0; q < 32; q++) {
@@ -80,8 +80,10 @@ av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
int qmul = q * 2;
int qadd = (q - 1) | 1;
@@ -93,7 +91,7 @@
if (q == 0) {
qmul = 1;
@@ -113,4 +115,5 @@
@@ -113,4 +115,5 @@ av_cold void ff_rl_init_vlc(RLTable *rl, unsigned static_size)
rl->rl_vlc[q][i].run = run;
}
}

View File

@@ -33,23 +33,11 @@ FIND_PATH(OPTIX_INCLUDE_DIR
include
)
IF(EXISTS "${OPTIX_INCLUDE_DIR}/optix.h")
FILE(STRINGS "${OPTIX_INCLUDE_DIR}/optix.h" _optix_version REGEX "^#define OPTIX_VERSION[ \t].*$")
STRING(REGEX MATCHALL "[0-9]+" _optix_version ${_optix_version})
MATH(EXPR _optix_version_major "${_optix_version} / 10000")
MATH(EXPR _optix_version_minor "(${_optix_version} % 10000) / 100")
MATH(EXPR _optix_version_patch "${_optix_version} % 100")
SET(OPTIX_VERSION "${_optix_version_major}.${_optix_version_minor}.${_optix_version_patch}")
ENDIF()
# handle the QUIETLY and REQUIRED arguments and set OPTIX_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(OptiX
REQUIRED_VARS OPTIX_INCLUDE_DIR
VERSION_VAR OPTIX_VERSION)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(OptiX DEFAULT_MSG
OPTIX_INCLUDE_DIR)
IF(OPTIX_FOUND)
SET(OPTIX_INCLUDE_DIRS ${OPTIX_INCLUDE_DIR})
@@ -57,7 +45,6 @@ ENDIF()
MARK_AS_ADVANCED(
OPTIX_INCLUDE_DIR
OPTIX_VERSION
)
UNSET(_optix_SEARCH_DIRS)

View File

@@ -24,7 +24,6 @@ import project_source_info
import subprocess
import sys
import os
import tempfile
from typing import (
Any,
@@ -36,6 +35,7 @@ USE_QUIET = (os.environ.get("QUIET", None) is not None)
CHECKER_IGNORE_PREFIX = [
"extern",
"intern/moto",
]
CHECKER_BIN = "cppcheck"
@@ -47,19 +47,13 @@ CHECKER_ARGS = [
"--max-configs=1", # speeds up execution
# "--check-config", # when includes are missing
"--enable=all", # if you want sixty hundred pedantic suggestions
# Quiet output, otherwise all defines/includes are printed (overly verbose).
# Only enable this for troubleshooting (if defines are not set as expected for example).
"--quiet",
# NOTE: `--cppcheck-build-dir=<dir>` is added later as a temporary directory.
]
if USE_QUIET:
CHECKER_ARGS.append("--quiet")
def cppcheck() -> None:
def main() -> None:
source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX)
source_defines = project_source_info.build_defines_as_args()
@@ -84,10 +78,7 @@ def cppcheck() -> None:
percent_str = "[" + ("%.2f]" % percent).rjust(7) + " %:"
sys.stdout.flush()
sys.stdout.write("%s %s\n" % (
percent_str,
os.path.relpath(c, project_source_info.SOURCE_DIR)
))
sys.stdout.write("%s " % percent_str)
return subprocess.Popen(cmd)
@@ -99,11 +90,5 @@ def cppcheck() -> None:
print("Finished!")
def main() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
CHECKER_ARGS.append("--cppcheck-build-dir=" + temp_dir)
cppcheck()
if __name__ == "__main__":
main()

View File

@@ -243,9 +243,7 @@ def build_defines_as_args() -> List[str]:
# use this module.
def queue_processes(
process_funcs: Sequence[Tuple[Callable[..., subprocess.Popen[Any]], Tuple[Any, ...]]],
*,
job_total: int =-1,
sleep: float = 0.1,
) -> None:
""" Takes a list of function arg pairs, each function must return a process
"""
@@ -273,20 +271,14 @@ def queue_processes(
if len(processes) <= job_total:
break
time.sleep(sleep)
else:
time.sleep(0.1)
sys.stdout.flush()
sys.stderr.flush()
processes.append(func(*args))
# Don't return until all jobs have finished.
while 1:
processes[:] = [p for p in processes if p.poll() is None]
if not processes:
break
time.sleep(sleep)
def main() -> None:
if not os.path.exists(join(CMAKE_DIR, "CMakeCache.txt")):

View File

@@ -55,7 +55,7 @@ buildbot:
cuda11:
version: '11.4.1'
optix:
version: '7.3.0'
version: '7.1.0'
cmake:
default:
version: any

View File

@@ -200,20 +200,16 @@ def submodules_update(args, release_version, branch):
if msg:
skip_msg += submodule_path + " skipped: " + msg + "\n"
else:
# Find a matching branch that exists.
call([args.git_command, "fetch", "origin"])
if make_utils.git_branch_exists(args.git_command, submodule_branch):
pass
elif make_utils.git_branch_exists(args.git_command, submodule_branch_fallback):
submodule_branch = submodule_branch_fallback
else:
submodule_branch = None
# Switch to branch and pull.
if submodule_branch:
if make_utils.git_branch(args.git_command) != submodule_branch:
call([args.git_command, "checkout", submodule_branch])
call([args.git_command, "pull", "--rebase", "origin", submodule_branch])
# We are using `exit_on_error=False` here because sub-modules are allowed to not have requested branch,
# in which case falling back to default back-up branch is fine.
if make_utils.git_branch(args.git_command) != submodule_branch:
call([args.git_command, "fetch", "origin"])
call([args.git_command, "checkout", submodule_branch], exit_on_error=False)
call([args.git_command, "pull", "--rebase", "origin", submodule_branch], exit_on_error=False)
# If we cannot find the specified branch for this submodule, fallback to default one (aka master).
if make_utils.git_branch(args.git_command) != submodule_branch:
call([args.git_command, "checkout", submodule_branch_fallback])
call([args.git_command, "pull", "--rebase", "origin", submodule_branch_fallback])
finally:
os.chdir(cwd)

View File

@@ -8,19 +8,14 @@ import subprocess
import sys
def call(cmd, exit_on_error=True, silent=False):
if not silent:
print(" ".join(cmd))
def call(cmd, exit_on_error=True):
print(" ".join(cmd))
# Flush to ensure correct order output on Windows.
sys.stdout.flush()
sys.stderr.flush()
if silent:
retcode = subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
else:
retcode = subprocess.call(cmd)
retcode = subprocess.call(cmd)
if exit_on_error and retcode != 0:
sys.exit(retcode)
return retcode
@@ -43,11 +38,6 @@ def check_output(cmd, exit_on_error=True):
return output.strip()
def git_branch_exists(git_command, branch):
return call([git_command, "rev-parse", "--verify", branch], exit_on_error=False, silent=True) == 0 or \
call([git_command, "rev-parse", "--verify", "remotes/origin/" + branch], exit_on_error=False, silent=True) == 0
def git_branch(git_command):
# Get current branch name.
try:

View File

@@ -1,40 +0,0 @@
"""
This method enables conversions between Local and Pose space for bones in
the middle of updating the armature without having to update dependencies
after each change, by manually carrying updated matrices in a recursive walk.
"""
def set_pose_matrices(obj, matrix_map):
"Assign pose space matrices of all bones at once, ignoring constraints."
def rec(pbone, parent_matrix):
matrix = matrix_map[pbone.name]
## Instead of:
# pbone.matrix = matrix
# bpy.context.view_layer.update()
# Compute and assign local matrix, using the new parent matrix
if pbone.parent:
pbone.matrix_basis = pbone.bone.convert_local_to_pose(
matrix,
pbone.bone.matrix_local,
parent_matrix=parent_matrix,
parent_matrix_local=pbone.parent.bone.matrix_local,
invert=True
)
else:
pbone.matrix_basis = pbone.bone.convert_local_to_pose(
matrix,
pbone.bone.matrix_local,
invert=True
)
# Recursively process children, passing the new matrix through
for child in pbone.children:
rec(child, matrix)
# Scan all bone trees from their roots
for pbone in obj.pose.bones:
if not pbone.parent:
rec(pbone, None)

View File

@@ -1101,7 +1101,6 @@ context_type_map = {
"scene": ("Scene", False),
"sculpt_object": ("Object", False),
"selectable_objects": ("Object", True),
"selected_asset_files": ("FileSelectEntry", True),
"selected_bones": ("EditBone", True),
"selected_editable_bones": ("EditBone", True),
"selected_editable_fcurves": ("FCurve", True),

View File

@@ -67,12 +67,9 @@ endif()
if(WITH_CYCLES OR WITH_COMPOSITOR OR WITH_OPENSUBDIV)
add_subdirectory(clew)
if((WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX) AND WITH_CUDA_DYNLOAD)
if(WITH_CUDA_DYNLOAD)
add_subdirectory(cuew)
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
add_subdirectory(hipew)
endif()
endif()
if(WITH_GHOST_X11 AND WITH_GHOST_XDND)

View File

@@ -129,7 +129,6 @@ set(SRC
src/util/Barrier.cpp
src/util/Buffer.cpp
src/util/BufferReader.cpp
src/util/RingBuffer.cpp
src/util/StreamBuffer.cpp
src/util/ThreadPool.cpp
)
@@ -246,7 +245,6 @@ set(PUBLIC_HDR
include/util/BufferReader.h
include/util/ILockable.h
include/util/Math3D.h
include/util/RingBuffer.h
include/util/StreamBuffer.h
include/util/ThreadPool.h
)

View File

@@ -102,30 +102,26 @@ AUD_API int AUD_Sound_getFileStreams(AUD_Sound* sound, AUD_StreamInfo **stream_i
if(file)
{
try
{
auto streams = file->queryStreams();
size_t size = sizeof(AUD_StreamInfo) * streams.size();
if(!size)
{
*stream_infos = nullptr;
return 0;
}
*stream_infos = reinterpret_cast<AUD_StreamInfo*>(std::malloc(size));
std::memcpy(*stream_infos, streams.data(), size);
return streams.size();
}
catch(Exception&)
auto streams = file->queryStreams();
size_t size = sizeof(AUD_StreamInfo) * streams.size();
if(!size)
{
*stream_infos = nullptr;
return 0;
}
*stream_infos = reinterpret_cast<AUD_StreamInfo*>(std::malloc(size));
std::memcpy(*stream_infos, streams.data(), size);
return streams.size();
}
else
{
*stream_infos = nullptr;
return 0;
}
*stream_infos = nullptr;
return 0;
}
AUD_API sample_t* AUD_Sound_data(AUD_Sound* sound, int* length, AUD_Specs* specs)

View File

@@ -1,97 +0,0 @@
/*******************************************************************************
* Copyright 2009-2021 Jörg Müller
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#pragma once
/**
* @file RingBuffer.h
* @ingroup util
* The RingBuffer class.
*/
#include "Audaspace.h"
#include "Buffer.h"
#include <cstddef>
AUD_NAMESPACE_BEGIN
/**
* This class is a simple ring buffer in RAM which is 32 Byte aligned and provides
* functionality for concurrent reading and writting without locks.
*/
class AUD_API RingBuffer
{
private:
/// The buffer storing the actual data.
Buffer m_buffer;
/// The reading pointer.
volatile size_t m_read;
/// The writing pointer.
volatile size_t m_write;
// delete copy constructor and operator=
RingBuffer(const RingBuffer&) = delete;
RingBuffer& operator=(const RingBuffer&) = delete;
public:
/**
* Creates a new ring buffer.
* \param size The size of the buffer in bytes.
*/
RingBuffer(int size = 0);
/**
* Returns the pointer to the ring buffer in memory.
*/
sample_t* getBuffer() const;
/**
* Returns the size of the ring buffer in bytes.
*/
int getSize() const;
size_t getReadSize() const;
size_t getWriteSize() const;
size_t read(data_t* target, size_t size);
size_t write(data_t* source, size_t size);
/**
* Resets the ring buffer to a state where nothing has been written or read.
*/
void reset();
/**
* Resizes the ring buffer.
* \param size The new size of the ring buffer, measured in bytes.
*/
void resize(int size);
/**
* Makes sure the ring buffer has a minimum size.
* If size is >= current size, nothing will happen.
* Otherwise the ring buffer is resized with keep as parameter.
* \param size The new minimum size of the ring buffer, measured in bytes.
*/
void assureSize(int size);
};
AUD_NAMESPACE_END

View File

@@ -23,121 +23,95 @@
AUD_NAMESPACE_BEGIN
PulseAudioDevice::PulseAudioSynchronizer::PulseAudioSynchronizer(PulseAudioDevice *device) :
m_device(device)
{
}
double PulseAudioDevice::PulseAudioSynchronizer::getPosition(std::shared_ptr<IHandle> handle)
{
pa_usec_t latency;
int negative;
AUD_pa_stream_get_latency(m_device->m_stream, &latency, &negative);
double delay = m_device->m_ring_buffer.getReadSize() / (AUD_SAMPLE_SIZE(m_device->m_specs) * m_device->m_specs.rate) + latency * 1.0e-6;
return handle->getPosition() - delay;
}
void PulseAudioDevice::updateRingBuffer()
{
unsigned int samplesize = AUD_SAMPLE_SIZE(m_specs);
std::unique_lock<std::mutex> lock(m_mixingLock);
Buffer buffer;
while(m_valid)
{
size_t size = m_ring_buffer.getWriteSize();
size_t sample_count = size / samplesize;
if(sample_count > 0)
{
size = sample_count * samplesize;
buffer.assureSize(size);
mix(reinterpret_cast<data_t*>(buffer.getBuffer()), sample_count);
m_ring_buffer.write(reinterpret_cast<data_t*>(buffer.getBuffer()), size);
}
m_mixingCondition.wait(lock);
}
}
void PulseAudioDevice::PulseAudio_state_callback(pa_context *context, void *data)
{
PulseAudioDevice* device = (PulseAudioDevice*)data;
device->m_state = AUD_pa_context_get_state(context);
std::lock_guard<ILockable> lock(*device);
AUD_pa_threaded_mainloop_signal(device->m_mainloop, 0);
device->m_state = AUD_pa_context_get_state(context);
}
void PulseAudioDevice::PulseAudio_request(pa_stream *stream, size_t total_bytes, void *data)
{
PulseAudioDevice* device = (PulseAudioDevice*)data;
data_t* buffer;
size_t sample_size = AUD_DEVICE_SAMPLE_SIZE(device->m_specs);
void* buffer;
while(total_bytes > 0)
{
size_t num_bytes = total_bytes;
AUD_pa_stream_begin_write(stream, reinterpret_cast<void**>(&buffer), &num_bytes);
AUD_pa_stream_begin_write(stream, &buffer, &num_bytes);
size_t readsamples = device->m_ring_buffer.getReadSize();
device->mix((data_t*)buffer, num_bytes / AUD_DEVICE_SAMPLE_SIZE(device->m_specs));
readsamples = std::min(readsamples, size_t(num_bytes)) / sample_size;
device->m_ring_buffer.read(buffer, readsamples * sample_size);
if(readsamples * sample_size < num_bytes)
std::memset(buffer + readsamples * sample_size, 0, num_bytes - readsamples * sample_size);
if(device->m_mixingLock.try_lock())
{
device->m_mixingCondition.notify_all();
device->m_mixingLock.unlock();
}
AUD_pa_stream_write(stream, reinterpret_cast<void*>(buffer), num_bytes, nullptr, 0, PA_SEEK_RELATIVE);
AUD_pa_stream_write(stream, buffer, num_bytes, nullptr, 0, PA_SEEK_RELATIVE);
total_bytes -= num_bytes;
}
}
void PulseAudioDevice::playing(bool playing)
void PulseAudioDevice::PulseAudio_underflow(pa_stream *stream, void *data)
{
m_playback = playing;
PulseAudioDevice* device = (PulseAudioDevice*)data;
AUD_pa_threaded_mainloop_lock(m_mainloop);
AUD_pa_stream_cork(m_stream, playing ? 0 : 1, nullptr, nullptr);
AUD_pa_threaded_mainloop_unlock(m_mainloop);
DeviceSpecs specs = device->getSpecs();
if(++device->m_underflows > 4 && device->m_buffersize < AUD_DEVICE_SAMPLE_SIZE(specs) * specs.rate * 2)
{
device->m_buffersize <<= 1;
device->m_underflows = 0;
pa_buffer_attr buffer_attr;
buffer_attr.fragsize = -1U;
buffer_attr.maxlength = -1U;
buffer_attr.minreq = -1U;
buffer_attr.prebuf = -1U;
buffer_attr.tlength = device->m_buffersize;
AUD_pa_stream_set_buffer_attr(stream, &buffer_attr, nullptr, nullptr);
}
}
void PulseAudioDevice::runMixingThread()
{
for(;;)
{
{
std::lock_guard<ILockable> lock(*this);
if(shouldStop())
{
AUD_pa_stream_cork(m_stream, 1, nullptr, nullptr);
AUD_pa_stream_flush(m_stream, nullptr, nullptr);
doStop();
return;
}
}
if(AUD_pa_stream_is_corked(m_stream))
AUD_pa_stream_cork(m_stream, 0, nullptr, nullptr);
// similar to AUD_pa_mainloop_iterate(m_mainloop, false, nullptr); except with a longer timeout
AUD_pa_mainloop_prepare(m_mainloop, 1 << 14);
AUD_pa_mainloop_poll(m_mainloop);
AUD_pa_mainloop_dispatch(m_mainloop);
}
}
PulseAudioDevice::PulseAudioDevice(std::string name, DeviceSpecs specs, int buffersize) :
m_synchronizer(this),
m_playback(false),
m_state(PA_CONTEXT_UNCONNECTED),
m_valid(true),
m_underflows(0)
{
m_mainloop = AUD_pa_threaded_mainloop_new();
m_mainloop = AUD_pa_mainloop_new();
AUD_pa_threaded_mainloop_lock(m_mainloop);
m_context = AUD_pa_context_new(AUD_pa_threaded_mainloop_get_api(m_mainloop), name.c_str());
m_context = AUD_pa_context_new(AUD_pa_mainloop_get_api(m_mainloop), name.c_str());
if(!m_context)
{
AUD_pa_threaded_mainloop_unlock(m_mainloop);
AUD_pa_threaded_mainloop_free(m_mainloop);
AUD_pa_mainloop_free(m_mainloop);
AUD_THROW(DeviceException, "Could not connect to PulseAudio.");
}
@@ -146,26 +120,21 @@ PulseAudioDevice::PulseAudioDevice(std::string name, DeviceSpecs specs, int buff
AUD_pa_context_connect(m_context, nullptr, PA_CONTEXT_NOFLAGS, nullptr);
AUD_pa_threaded_mainloop_start(m_mainloop);
while(m_state != PA_CONTEXT_READY)
{
switch(m_state)
{
case PA_CONTEXT_FAILED:
case PA_CONTEXT_TERMINATED:
AUD_pa_threaded_mainloop_unlock(m_mainloop);
AUD_pa_threaded_mainloop_stop(m_mainloop);
AUD_pa_context_disconnect(m_context);
AUD_pa_context_unref(m_context);
AUD_pa_threaded_mainloop_free(m_mainloop);
AUD_pa_mainloop_free(m_mainloop);
AUD_THROW(DeviceException, "Could not connect to PulseAudio.");
break;
default:
AUD_pa_threaded_mainloop_wait(m_mainloop);
AUD_pa_mainloop_iterate(m_mainloop, true, nullptr);
break;
}
}
@@ -213,18 +182,16 @@ PulseAudioDevice::PulseAudioDevice(std::string name, DeviceSpecs specs, int buff
if(!m_stream)
{
AUD_pa_threaded_mainloop_unlock(m_mainloop);
AUD_pa_threaded_mainloop_stop(m_mainloop);
AUD_pa_context_disconnect(m_context);
AUD_pa_context_unref(m_context);
AUD_pa_threaded_mainloop_free(m_mainloop);
AUD_pa_mainloop_free(m_mainloop);
AUD_THROW(DeviceException, "Could not create PulseAudio stream.");
}
AUD_pa_stream_set_write_callback(m_stream, PulseAudio_request, this);
AUD_pa_stream_set_underflow_callback(m_stream, PulseAudio_underflow, this);
buffersize *= AUD_DEVICE_SAMPLE_SIZE(m_specs);
m_buffersize = buffersize;
@@ -237,53 +204,31 @@ PulseAudioDevice::PulseAudioDevice(std::string name, DeviceSpecs specs, int buff
buffer_attr.prebuf = -1U;
buffer_attr.tlength = buffersize;
m_ring_buffer.resize(buffersize);
if(AUD_pa_stream_connect_playback(m_stream, nullptr, &buffer_attr, static_cast<pa_stream_flags_t>(PA_STREAM_INTERPOLATE_TIMING | PA_STREAM_ADJUST_LATENCY | PA_STREAM_AUTO_TIMING_UPDATE), nullptr, nullptr) < 0)
if(AUD_pa_stream_connect_playback(m_stream, nullptr, &buffer_attr, static_cast<pa_stream_flags_t>(PA_STREAM_START_CORKED | PA_STREAM_INTERPOLATE_TIMING | PA_STREAM_ADJUST_LATENCY | PA_STREAM_AUTO_TIMING_UPDATE), nullptr, nullptr) < 0)
{
AUD_pa_threaded_mainloop_unlock(m_mainloop);
AUD_pa_threaded_mainloop_stop(m_mainloop);
AUD_pa_context_disconnect(m_context);
AUD_pa_context_unref(m_context);
AUD_pa_threaded_mainloop_free(m_mainloop);
AUD_pa_mainloop_free(m_mainloop);
AUD_THROW(DeviceException, "Could not connect PulseAudio stream.");
}
AUD_pa_threaded_mainloop_unlock(m_mainloop);
create();
m_mixingThread = std::thread(&PulseAudioDevice::updateRingBuffer, this);
}
PulseAudioDevice::~PulseAudioDevice()
{
m_valid = false;
m_mixingLock.lock();
m_mixingCondition.notify_all();
m_mixingLock.unlock();
m_mixingThread.join();
AUD_pa_threaded_mainloop_stop(m_mainloop);
stopMixingThread();
AUD_pa_context_disconnect(m_context);
AUD_pa_context_unref(m_context);
AUD_pa_threaded_mainloop_free(m_mainloop);
AUD_pa_mainloop_free(m_mainloop);
destroy();
}
ISynchronizer *PulseAudioDevice::getSynchronizer()
{
return &m_synchronizer;
}
class PulseAudioDeviceFactory : public IDeviceFactory
{
private:

View File

@@ -26,11 +26,7 @@
* The PulseAudioDevice class.
*/
#include "devices/SoftwareDevice.h"
#include "util/RingBuffer.h"
#include <condition_variable>
#include <thread>
#include "devices/ThreadedDevice.h"
#include <pulse/pulseaudio.h>
@@ -39,65 +35,17 @@ AUD_NAMESPACE_BEGIN
/**
* This device plays back through PulseAudio, the simple direct media layer.
*/
class AUD_PLUGIN_API PulseAudioDevice : public SoftwareDevice
class AUD_PLUGIN_API PulseAudioDevice : public ThreadedDevice
{
private:
class PulseAudioSynchronizer : public DefaultSynchronizer
{
PulseAudioDevice* m_device;
public:
PulseAudioSynchronizer(PulseAudioDevice* device);
virtual double getPosition(std::shared_ptr<IHandle> handle);
};
/// Synchronizer.
PulseAudioSynchronizer m_synchronizer;
/**
* Whether there is currently playback.
*/
volatile bool m_playback;
pa_threaded_mainloop* m_mainloop;
pa_mainloop* m_mainloop;
pa_context* m_context;
pa_stream* m_stream;
pa_context_state_t m_state;
/**
* The mixing ring buffer.
*/
RingBuffer m_ring_buffer;
/**
* Whether the device is valid.
*/
bool m_valid;
int m_buffersize;
uint32_t m_underflows;
/**
* The mixing thread.
*/
std::thread m_mixingThread;
/**
* Mutex for mixing.
*/
std::mutex m_mixingLock;
/**
* Condition for mixing.
*/
std::condition_variable m_mixingCondition;
/**
* Updates the ring buffer.
*/
AUD_LOCAL void updateRingBuffer();
/**
* Reports the state of the PulseAudio server connection.
* \param context The PulseAudio context.
@@ -113,13 +61,23 @@ private:
*/
AUD_LOCAL static void PulseAudio_request(pa_stream* stream, size_t total_bytes, void* data);
/**
* Reports an underflow from the PulseAudio server.
* Automatically adjusts the latency if this happens too often.
* @param stream The PulseAudio stream.
* \param data The PulseAudio device.
*/
AUD_LOCAL static void PulseAudio_underflow(pa_stream* stream, void* data);
/**
* Streaming thread main function.
*/
AUD_LOCAL void runMixingThread();
// delete copy constructor and operator=
PulseAudioDevice(const PulseAudioDevice&) = delete;
PulseAudioDevice& operator=(const PulseAudioDevice&) = delete;
protected:
virtual void playing(bool playing);
public:
/**
* Opens the PulseAudio audio device for playback.
@@ -135,8 +93,6 @@ public:
*/
virtual ~PulseAudioDevice();
virtual ISynchronizer* getSynchronizer();
/**
* Registers this plugin.
*/

View File

@@ -25,7 +25,6 @@ PULSEAUDIO_SYMBOL(pa_stream_begin_write);
PULSEAUDIO_SYMBOL(pa_stream_connect_playback);
PULSEAUDIO_SYMBOL(pa_stream_cork);
PULSEAUDIO_SYMBOL(pa_stream_flush);
PULSEAUDIO_SYMBOL(pa_stream_get_latency);
PULSEAUDIO_SYMBOL(pa_stream_is_corked);
PULSEAUDIO_SYMBOL(pa_stream_new);
PULSEAUDIO_SYMBOL(pa_stream_set_buffer_attr);
@@ -40,13 +39,3 @@ PULSEAUDIO_SYMBOL(pa_mainloop_iterate);
PULSEAUDIO_SYMBOL(pa_mainloop_prepare);
PULSEAUDIO_SYMBOL(pa_mainloop_poll);
PULSEAUDIO_SYMBOL(pa_mainloop_dispatch);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_free);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_get_api);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_lock);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_new);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_signal);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_start);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_stop);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_unlock);
PULSEAUDIO_SYMBOL(pa_threaded_mainloop_wait);

View File

@@ -1,137 +0,0 @@
/*******************************************************************************
* Copyright 2009-2021 Jörg Müller
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
#include "util/RingBuffer.h"
#include <algorithm>
#include <cstring>
#include <cstdlib>
#define ALIGNMENT 32
#define ALIGN(a) (a + ALIGNMENT - ((long long)a & (ALIGNMENT-1)))
AUD_NAMESPACE_BEGIN
RingBuffer::RingBuffer(int size) :
m_buffer(size),
m_read(0),
m_write(0)
{
}
sample_t* RingBuffer::getBuffer() const
{
return m_buffer.getBuffer();
}
int RingBuffer::getSize() const
{
return m_buffer.getSize();
}
size_t RingBuffer::getReadSize() const
{
size_t read = m_read;
size_t write = m_write;
if(read > write)
return write + getSize() - read;
else
return write - read;
}
size_t RingBuffer::getWriteSize() const
{
size_t read = m_read;
size_t write = m_write;
if(read > write)
return read - write - 1;
else
return read + getSize() - write - 1;
}
size_t RingBuffer::read(data_t* target, size_t size)
{
size = std::min(size, getReadSize());
data_t* buffer = reinterpret_cast<data_t*>(m_buffer.getBuffer());
if(m_read + size > m_buffer.getSize())
{
size_t read_first = m_buffer.getSize() - m_read;
size_t read_second = size - read_first;
std::memcpy(target, buffer + m_read, read_first);
std::memcpy(target + read_first, buffer, read_second);
m_read = read_second;
}
else
{
std::memcpy(target, buffer + m_read, size);
m_read += size;
}
return size;
}
size_t RingBuffer::write(data_t* source, size_t size)
{
size = std::min(size, getWriteSize());
data_t* buffer = reinterpret_cast<data_t*>(m_buffer.getBuffer());
if(m_write + size > m_buffer.getSize())
{
size_t write_first = m_buffer.getSize() - m_write;
size_t write_second = size - write_first;
std::memcpy(buffer + m_write, source, write_first);
std::memcpy(buffer, source + write_first, write_second);
m_write = write_second;
}
else
{
std::memcpy(buffer + m_write, source, size);
m_write += size;
}
return size;
}
void RingBuffer::reset()
{
m_read = 0;
m_write = 0;
}
void RingBuffer::resize(int size)
{
m_buffer.resize(size);
reset();
}
void RingBuffer::assureSize(int size)
{
m_buffer.assureSize(size);
reset();
}
AUD_NAMESPACE_END

View File

@@ -645,8 +645,7 @@ typedef enum CUdevice_P2PAttribute_enum {
CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01,
CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02,
CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03,
CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x04,
CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04,
CU_DEVICE_P2P_ATTRIBUTE_ARRAY_ACCESS_ACCESS_SUPPORTED = 0x04,
} CUdevice_P2PAttribute;
typedef void (CUDA_CB *CUstreamCallback)(CUstream hStream, CUresult status, void* userData);

View File

@@ -1,39 +0,0 @@
# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# The Original Code is Copyright (C) 2021, Blender Foundation
# All rights reserved.
# ***** END GPL LICENSE BLOCK *****
set(INC
.
include
)
set(INC_SYS
)
set(SRC
src/hipew.c
include/hipew.h
)
set(LIB
)
blender_add_lib(extern_hipew "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")

File diff suppressed because it is too large Load Diff

View File

@@ -1,533 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/
#ifdef _MSC_VER
# if _MSC_VER < 1900
# define snprintf _snprintf
# endif
# define popen _popen
# define pclose _pclose
# define _CRT_SECURE_NO_WARNINGS
#endif
#include <hipew.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
# define VC_EXTRALEAN
# include <windows.h>
/* Utility macros. */
typedef HMODULE DynamicLibrary;
# define dynamic_library_open(path) LoadLibraryA(path)
# define dynamic_library_close(lib) FreeLibrary(lib)
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
#else
# include <dlfcn.h>
typedef void* DynamicLibrary;
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
# define dynamic_library_close(lib) dlclose(lib)
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
#endif
#define _LIBRARY_FIND_CHECKED(lib, name) \
name = (t##name *)dynamic_library_find(lib, #name); \
assert(name);
#define _LIBRARY_FIND(lib, name) \
name = (t##name *)dynamic_library_find(lib, #name);
#define HIP_LIBRARY_FIND_CHECKED(name) \
_LIBRARY_FIND_CHECKED(hip_lib, name)
#define HIP_LIBRARY_FIND(name) _LIBRARY_FIND(hip_lib, name)
static DynamicLibrary hip_lib;
/* Function definitions. */
thipGetErrorName *hipGetErrorName;
thipInit *hipInit;
thipDriverGetVersion *hipDriverGetVersion;
thipGetDevice *hipGetDevice;
thipGetDeviceCount *hipGetDeviceCount;
thipDeviceGetName *hipDeviceGetName;
thipDeviceGetAttribute *hipDeviceGetAttribute;
thipDeviceComputeCapability *hipDeviceComputeCapability;
thipDevicePrimaryCtxRetain *hipDevicePrimaryCtxRetain;
thipDevicePrimaryCtxRelease *hipDevicePrimaryCtxRelease;
thipDevicePrimaryCtxSetFlags *hipDevicePrimaryCtxSetFlags;
thipDevicePrimaryCtxGetState *hipDevicePrimaryCtxGetState;
thipDevicePrimaryCtxReset *hipDevicePrimaryCtxReset;
thipCtxCreate *hipCtxCreate;
thipCtxDestroy *hipCtxDestroy;
thipCtxPushCurrent *hipCtxPushCurrent;
thipCtxPopCurrent *hipCtxPopCurrent;
thipCtxSetCurrent *hipCtxSetCurrent;
thipCtxGetCurrent *hipCtxGetCurrent;
thipCtxGetDevice *hipCtxGetDevice;
thipCtxGetFlags *hipCtxGetFlags;
thipCtxSynchronize *hipCtxSynchronize;
thipDeviceSynchronize *hipDeviceSynchronize;
thipCtxGetCacheConfig *hipCtxGetCacheConfig;
thipCtxSetCacheConfig *hipCtxSetCacheConfig;
thipCtxGetSharedMemConfig *hipCtxGetSharedMemConfig;
thipCtxSetSharedMemConfig *hipCtxSetSharedMemConfig;
thipCtxGetApiVersion *hipCtxGetApiVersion;
thipModuleLoad *hipModuleLoad;
thipModuleLoadData *hipModuleLoadData;
thipModuleLoadDataEx *hipModuleLoadDataEx;
thipModuleUnload *hipModuleUnload;
thipModuleGetFunction *hipModuleGetFunction;
thipModuleGetGlobal *hipModuleGetGlobal;
thipModuleGetTexRef *hipModuleGetTexRef;
thipMemGetInfo *hipMemGetInfo;
thipMalloc *hipMalloc;
thipMemAllocPitch *hipMemAllocPitch;
thipFree *hipFree;
thipMemGetAddressRange *hipMemGetAddressRange;
thipHostMalloc *hipHostMalloc;
thipHostFree *hipHostFree;
thipHostGetDevicePointer *hipHostGetDevicePointer;
thipHostGetFlags *hipHostGetFlags;
thipMallocManaged *hipMallocManaged;
thipDeviceGetByPCIBusId *hipDeviceGetByPCIBusId;
thipDeviceGetPCIBusId *hipDeviceGetPCIBusId;
thipMemcpyPeer *hipMemcpyPeer;
thipMemcpyHtoD *hipMemcpyHtoD;
thipMemcpyDtoH *hipMemcpyDtoH;
thipMemcpyDtoD *hipMemcpyDtoD;
thipDrvMemcpy2DUnaligned *hipDrvMemcpy2DUnaligned;
thipMemcpyParam2D *hipMemcpyParam2D;
thipDrvMemcpy3D *hipDrvMemcpy3D;
thipMemcpyHtoDAsync *hipMemcpyHtoDAsync;
thipMemcpyDtoHAsync *hipMemcpyDtoHAsync;
thipMemcpyParam2DAsync *hipMemcpyParam2DAsync;
thipDrvMemcpy3DAsync *hipDrvMemcpy3DAsync;
thipMemsetD8 *hipMemsetD8;
thipMemsetD16 *hipMemsetD16;
thipMemsetD32 *hipMemsetD32;
thipMemsetD8Async *hipMemsetD8Async;
thipMemsetD16Async *hipMemsetD16Async;
thipMemsetD32Async *hipMemsetD32Async;
thipArrayCreate *hipArrayCreate;
thipArrayDestroy *hipArrayDestroy;
thipArray3DCreate *hipArray3DCreate;
thipStreamCreateWithFlags *hipStreamCreateWithFlags;
thipStreamCreateWithPriority *hipStreamCreateWithPriority;
thipStreamGetPriority *hipStreamGetPriority;
thipStreamGetFlags *hipStreamGetFlags;
thipStreamWaitEvent *hipStreamWaitEvent;
thipStreamAddCallback *hipStreamAddCallback;
thipStreamQuery *hipStreamQuery;
thipStreamSynchronize *hipStreamSynchronize;
thipStreamDestroy *hipStreamDestroy;
thipEventCreateWithFlags *hipEventCreateWithFlags;
thipEventRecord *hipEventRecord;
thipEventQuery *hipEventQuery;
thipEventSynchronize *hipEventSynchronize;
thipEventDestroy *hipEventDestroy;
thipEventElapsedTime *hipEventElapsedTime;
thipFuncGetAttribute *hipFuncGetAttribute;
thipFuncSetCacheConfig *hipFuncSetCacheConfig;
thipModuleLaunchKernel *hipModuleLaunchKernel;
thipDrvOccupancyMaxActiveBlocksPerMultiprocessor *hipDrvOccupancyMaxActiveBlocksPerMultiprocessor;
thipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags *hipDrvOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
thipModuleOccupancyMaxPotentialBlockSize *hipModuleOccupancyMaxPotentialBlockSize;
thipTexRefSetArray *hipTexRefSetArray;
thipTexRefSetAddress *hipTexRefSetAddress;
thipTexRefSetAddress2D *hipTexRefSetAddress2D;
thipTexRefSetFormat *hipTexRefSetFormat;
thipTexRefSetAddressMode *hipTexRefSetAddressMode;
thipTexRefSetFilterMode *hipTexRefSetFilterMode;
thipTexRefSetFlags *hipTexRefSetFlags;
thipTexRefGetAddress *hipTexRefGetAddress;
thipTexRefGetArray *hipTexRefGetArray;
thipTexRefGetAddressMode *hipTexRefGetAddressMode;
thipTexObjectCreate *hipTexObjectCreate;
thipTexObjectDestroy *hipTexObjectDestroy;
thipDeviceCanAccessPeer *hipDeviceCanAccessPeer;
thipCtxEnablePeerAccess *hipCtxEnablePeerAccess;
thipCtxDisablePeerAccess *hipCtxDisablePeerAccess;
thipDeviceGetP2PAttribute *hipDeviceGetP2PAttribute;
thipGraphicsUnregisterResource *hipGraphicsUnregisterResource;
thipGraphicsMapResources *hipGraphicsMapResources;
thipGraphicsUnmapResources *hipGraphicsUnmapResources;
thipGraphicsResourceGetMappedPointer *hipGraphicsResourceGetMappedPointer;
thipGraphicsGLRegisterBuffer *hipGraphicsGLRegisterBuffer;
thipGLGetDevices *hipGLGetDevices;
static DynamicLibrary dynamic_library_open_find(const char **paths) {
int i = 0;
while (paths[i] != NULL) {
DynamicLibrary lib = dynamic_library_open(paths[i]);
if (lib != NULL) {
return lib;
}
++i;
}
return NULL;
}
/* Implementation function. */
static void hipewHipExit(void) {
if (hip_lib != NULL) {
/* Ignore errors. */
dynamic_library_close(hip_lib);
hip_lib = NULL;
}
}
static int hipewHipInit(void) {
/* Library paths. */
#ifdef _WIN32
/* Expected in c:/windows/system or similar, no path needed. */
const char *hip_paths[] = {"amdhip64.dll", NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *hip_paths[] = {"", NULL};
#else
const char *hip_paths[] = {"/opt/rocm/hip/lib/libamdhip64.so", NULL};
#endif
static int initialized = 0;
static int result = 0;
int error, driver_version;
if (initialized) {
return result;
}
initialized = 1;
error = atexit(hipewHipExit);
if (error) {
result = HIPEW_ERROR_ATEXIT_FAILED;
return result;
}
/* Load library. */
hip_lib = dynamic_library_open_find(hip_paths);
if (hip_lib == NULL) {
result = HIPEW_ERROR_OPEN_FAILED;
return result;
}
/* Fetch all function pointers. */
HIP_LIBRARY_FIND_CHECKED(hipGetErrorName);
HIP_LIBRARY_FIND_CHECKED(hipInit);
HIP_LIBRARY_FIND_CHECKED(hipDriverGetVersion);
HIP_LIBRARY_FIND_CHECKED(hipGetDevice);
HIP_LIBRARY_FIND_CHECKED(hipGetDeviceCount);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetName);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetAttribute);
HIP_LIBRARY_FIND_CHECKED(hipDeviceComputeCapability);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRetain);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxRelease);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxSetFlags);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxGetState);
HIP_LIBRARY_FIND_CHECKED(hipDevicePrimaryCtxReset);
HIP_LIBRARY_FIND_CHECKED(hipCtxCreate);
HIP_LIBRARY_FIND_CHECKED(hipCtxDestroy);
HIP_LIBRARY_FIND_CHECKED(hipCtxPushCurrent);
HIP_LIBRARY_FIND_CHECKED(hipCtxPopCurrent);
HIP_LIBRARY_FIND_CHECKED(hipCtxSetCurrent);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetCurrent);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetDevice);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetFlags);
HIP_LIBRARY_FIND_CHECKED(hipCtxSynchronize);
HIP_LIBRARY_FIND_CHECKED(hipDeviceSynchronize);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetCacheConfig);
HIP_LIBRARY_FIND_CHECKED(hipCtxSetCacheConfig);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetSharedMemConfig);
HIP_LIBRARY_FIND_CHECKED(hipCtxSetSharedMemConfig);
HIP_LIBRARY_FIND_CHECKED(hipCtxGetApiVersion);
HIP_LIBRARY_FIND_CHECKED(hipModuleLoad);
HIP_LIBRARY_FIND_CHECKED(hipModuleLoadData);
HIP_LIBRARY_FIND_CHECKED(hipModuleLoadDataEx);
HIP_LIBRARY_FIND_CHECKED(hipModuleUnload);
HIP_LIBRARY_FIND_CHECKED(hipModuleGetFunction);
HIP_LIBRARY_FIND_CHECKED(hipModuleGetGlobal);
HIP_LIBRARY_FIND_CHECKED(hipModuleGetTexRef);
HIP_LIBRARY_FIND_CHECKED(hipMemGetInfo);
HIP_LIBRARY_FIND_CHECKED(hipMalloc);
HIP_LIBRARY_FIND_CHECKED(hipMemAllocPitch);
HIP_LIBRARY_FIND_CHECKED(hipFree);
HIP_LIBRARY_FIND_CHECKED(hipMemGetAddressRange);
HIP_LIBRARY_FIND_CHECKED(hipHostMalloc);
HIP_LIBRARY_FIND_CHECKED(hipHostFree);
HIP_LIBRARY_FIND_CHECKED(hipHostGetDevicePointer);
HIP_LIBRARY_FIND_CHECKED(hipHostGetFlags);
HIP_LIBRARY_FIND_CHECKED(hipMallocManaged);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetByPCIBusId);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetPCIBusId);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyPeer);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoD);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoH);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoD);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2D);
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3D);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyHtoDAsync);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyDtoHAsync);
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy2DUnaligned);
HIP_LIBRARY_FIND_CHECKED(hipMemcpyParam2DAsync);
HIP_LIBRARY_FIND_CHECKED(hipDrvMemcpy3DAsync);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD8);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD16);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD32);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD8Async);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD16Async);
HIP_LIBRARY_FIND_CHECKED(hipMemsetD32Async);
HIP_LIBRARY_FIND_CHECKED(hipArrayCreate);
HIP_LIBRARY_FIND_CHECKED(hipArrayDestroy);
HIP_LIBRARY_FIND_CHECKED(hipArray3DCreate);
HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithFlags);
HIP_LIBRARY_FIND_CHECKED(hipStreamCreateWithPriority);
HIP_LIBRARY_FIND_CHECKED(hipStreamGetPriority);
HIP_LIBRARY_FIND_CHECKED(hipStreamGetFlags);
HIP_LIBRARY_FIND_CHECKED(hipStreamWaitEvent);
HIP_LIBRARY_FIND_CHECKED(hipStreamAddCallback);
HIP_LIBRARY_FIND_CHECKED(hipStreamQuery);
HIP_LIBRARY_FIND_CHECKED(hipStreamSynchronize);
HIP_LIBRARY_FIND_CHECKED(hipStreamDestroy);
HIP_LIBRARY_FIND_CHECKED(hipEventCreateWithFlags);
HIP_LIBRARY_FIND_CHECKED(hipEventRecord);
HIP_LIBRARY_FIND_CHECKED(hipEventQuery);
HIP_LIBRARY_FIND_CHECKED(hipEventSynchronize);
HIP_LIBRARY_FIND_CHECKED(hipEventDestroy);
HIP_LIBRARY_FIND_CHECKED(hipEventElapsedTime);
HIP_LIBRARY_FIND_CHECKED(hipFuncGetAttribute);
HIP_LIBRARY_FIND_CHECKED(hipFuncSetCacheConfig);
HIP_LIBRARY_FIND_CHECKED(hipModuleLaunchKernel);
HIP_LIBRARY_FIND_CHECKED(hipModuleOccupancyMaxPotentialBlockSize);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetArray);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddress2D);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFormat);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetAddressMode);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFilterMode);
HIP_LIBRARY_FIND_CHECKED(hipTexRefSetFlags);
HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddress);
HIP_LIBRARY_FIND_CHECKED(hipTexRefGetAddressMode);
HIP_LIBRARY_FIND_CHECKED(hipTexObjectCreate);
HIP_LIBRARY_FIND_CHECKED(hipTexObjectDestroy);
HIP_LIBRARY_FIND_CHECKED(hipDeviceCanAccessPeer);
HIP_LIBRARY_FIND_CHECKED(hipCtxEnablePeerAccess);
HIP_LIBRARY_FIND_CHECKED(hipCtxDisablePeerAccess);
HIP_LIBRARY_FIND_CHECKED(hipDeviceGetP2PAttribute);
#ifdef _WIN32
HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnregisterResource);
HIP_LIBRARY_FIND_CHECKED(hipGraphicsMapResources);
HIP_LIBRARY_FIND_CHECKED(hipGraphicsUnmapResources);
HIP_LIBRARY_FIND_CHECKED(hipGraphicsResourceGetMappedPointer);
HIP_LIBRARY_FIND_CHECKED(hipGraphicsGLRegisterBuffer);
HIP_LIBRARY_FIND_CHECKED(hipGLGetDevices);
#endif
result = HIPEW_SUCCESS;
return result;
}
int hipewInit(hipuint32_t flags) {
int result = HIPEW_SUCCESS;
if (flags & HIPEW_INIT_HIP) {
result = hipewHipInit();
if (result != HIPEW_SUCCESS) {
return result;
}
}
return result;
}
const char *hipewErrorString(hipError_t result) {
switch (result) {
case hipSuccess: return "No errors";
case hipErrorInvalidValue: return "Invalid value";
case hipErrorOutOfMemory: return "Out of memory";
case hipErrorNotInitialized: return "Driver not initialized";
case hipErrorDeinitialized: return "Driver deinitialized";
case hipErrorProfilerDisabled: return "Profiler disabled";
case hipErrorProfilerNotInitialized: return "Profiler not initialized";
case hipErrorProfilerAlreadyStarted: return "Profiler already started";
case hipErrorProfilerAlreadyStopped: return "Profiler already stopped";
case hipErrorNoDevice: return "No HIP-capable device available";
case hipErrorInvalidDevice: return "Invalid device";
case hipErrorInvalidImage: return "Invalid kernel image";
case hipErrorInvalidContext: return "Invalid context";
case hipErrorContextAlreadyCurrent: return "Context already current";
case hipErrorMapFailed: return "Map failed";
case hipErrorUnmapFailed: return "Unmap failed";
case hipErrorArrayIsMapped: return "Array is mapped";
case hipErrorAlreadyMapped: return "Already mapped";
case hipErrorNoBinaryForGpu: return "No binary for GPU";
case hipErrorAlreadyAcquired: return "Already acquired";
case hipErrorNotMapped: return "Not mapped";
case hipErrorNotMappedAsArray: return "Mapped resource not available for access as an array";
case hipErrorNotMappedAsPointer: return "Mapped resource not available for access as a pointer";
case hipErrorECCNotCorrectable: return "Uncorrectable ECC error detected";
case hipErrorUnsupportedLimit: return "hipLimit_t not supported by device";
case hipErrorContextAlreadyInUse: return "Context already in use";
case hipErrorPeerAccessUnsupported: return "Peer access unsupported";
case hipErrorInvalidKernelFile: return "Invalid ptx";
case hipErrorInvalidGraphicsContext: return "Invalid graphics context";
case hipErrorInvalidSource: return "Invalid source";
case hipErrorFileNotFound: return "File not found";
case hipErrorSharedObjectSymbolNotFound: return "Link to a shared object failed to resolve";
case hipErrorSharedObjectInitFailed: return "Shared object initialization failed";
case hipErrorOperatingSystem: return "Operating system";
case hipErrorInvalidHandle: return "Invalid handle";
case hipErrorNotFound: return "Not found";
case hipErrorNotReady: return "HIP not ready";
case hipErrorIllegalAddress: return "Illegal address";
case hipErrorLaunchOutOfResources: return "Launch exceeded resources";
case hipErrorLaunchTimeOut: return "Launch exceeded timeout";
case hipErrorPeerAccessAlreadyEnabled: return "Peer access already enabled";
case hipErrorPeerAccessNotEnabled: return "Peer access not enabled";
case hipErrorSetOnActiveProcess: return "Primary context active";
case hipErrorAssert: return "Assert";
case hipErrorHostMemoryAlreadyRegistered: return "Host memory already registered";
case hipErrorHostMemoryNotRegistered: return "Host memory not registered";
case hipErrorLaunchFailure: return "Launch failed";
case hipErrorCooperativeLaunchTooLarge: return "Cooperative launch too large";
case hipErrorNotSupported: return "Not supported";
case hipErrorUnknown: return "Unknown error";
default: return "Unknown HIP error value";
}
}
static void path_join(const char *path1,
const char *path2,
int maxlen,
char *result) {
#if defined(WIN32) || defined(_WIN32)
const char separator = '\\';
#else
const char separator = '/';
#endif
int n = snprintf(result, maxlen, "%s%c%s", path1, separator, path2);
if (n != -1 && n < maxlen) {
result[n] = '\0';
}
else {
result[maxlen - 1] = '\0';
}
}
static int path_exists(const char *path) {
struct stat st;
if (stat(path, &st)) {
return 0;
}
return 1;
}
const char *hipewCompilerPath(void) {
#ifdef _WIN32
const char *hipPath = getenv("HIP_ROCCLR_HOME");
const char *windowsCommand = "perl ";
const char *executable = "bin/hipcc";
static char hipcc[65536];
static char finalCommand[65536];
if(hipPath) {
path_join(hipPath, executable, sizeof(hipcc), hipcc);
if(path_exists(hipcc)) {
snprintf(finalCommand, sizeof(hipcc), "%s %s", windowsCommand, hipcc);
return finalCommand;
} else {
printf("Could not find hipcc. Make sure HIP_ROCCLR_HOME points to the directory holding /bin/hipcc");
}
}
#else
const char *hipPath = "opt/rocm/hip/bin";
const char *executable = "hipcc";
static char hipcc[65536];
if(hipPath) {
path_join(hipPath, executable, sizeof(hipcc), hipcc);
if(path_exists(hipcc)){
return hipcc;
}
}
#endif
{
#ifdef _WIN32
FILE *handle = popen("where hipcc", "r");
#else
FILE *handle = popen("which hipcc", "r");
#endif
if (handle) {
char buffer[4096] = {0};
int len = fread(buffer, 1, sizeof(buffer) - 1, handle);
buffer[len] = '\0';
pclose(handle);
if (buffer[0]) {
return "hipcc";
}
}
}
return NULL;
}
int hipewCompilerVersion(void) {
const char *path = hipewCompilerPath();
const char *marker = "Hip compilation tools, release ";
FILE *pipe;
int major, minor;
char *versionstr;
char buf[128];
char output[65536] = "\0";
char command[65536] = "\0";
if (path == NULL) {
return 0;
}
/* get --version output */
strcat(command, "\"");
strncat(command, path, sizeof(command) - 1);
strncat(command, "\" --version", sizeof(command) - strlen(path) - 1);
pipe = popen(command, "r");
if (!pipe) {
fprintf(stderr, "HIP: failed to run compiler to retrieve version");
return 0;
}
while (!feof(pipe)) {
if (fgets(buf, sizeof(buf), pipe) != NULL) {
strncat(output, buf, sizeof(output) - strlen(output) - 1);
}
}
pclose(pipe);
return 40;
}

View File

@@ -1,5 +0,0 @@
Project: JSON
URL: https://github.com/nlohmann/json/
License: MIT License
Upstream version: 3.10.2
Local modifications: None

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +0,0 @@
Project: TinyGLTF
URL: https://github.com/syoyo/tinygltf
License: MIT
Upstream version: 2.5.0, 19a41d20ec0
Local modifications:
* Silence "enum value not handled in switch" warnings due to JSON dependency.

File diff suppressed because it is too large Load Diff

View File

@@ -247,7 +247,7 @@ if(WITH_CYCLES_OSL)
endif()
if(WITH_CYCLES_DEVICE_OPTIX)
find_package(OptiX 7.3.0)
find_package(OptiX)
if(OPTIX_FOUND)
add_definitions(-DWITH_OPTIX)
@@ -286,18 +286,11 @@ if(WITH_OPENSUBDIV)
)
endif()
if(WITH_OPENIMAGEDENOISE)
add_definitions(-DWITH_OPENIMAGEDENOISE)
add_definitions(-DOIDN_STATIC_LIB)
include_directories(
SYSTEM
${OPENIMAGEDENOISE_INCLUDE_DIRS}
)
endif()
if(WITH_CYCLES_STANDALONE)
set(WITH_CYCLES_DEVICE_OPENCL TRUE)
set(WITH_CYCLES_DEVICE_CUDA TRUE)
set(WITH_CYCLES_DEVICE_HIP TRUE)
# Experimental and unfinished.
set(WITH_CYCLES_NETWORK FALSE)
endif()
# TODO(sergey): Consider removing it, only causes confusion in interface.
set(WITH_CYCLES_DEVICE_MULTI TRUE)
@@ -393,12 +386,18 @@ if(WITH_CYCLES_BLENDER)
add_subdirectory(blender)
endif()
add_subdirectory(app)
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
if(WITH_CYCLES_STANDALONE OR WITH_CYCLES_NETWORK OR WITH_CYCLES_CUBIN_COMPILER)
add_subdirectory(app)
endif()
add_subdirectory(bvh)
add_subdirectory(device)
add_subdirectory(doc)
add_subdirectory(graph)
add_subdirectory(integrator)
add_subdirectory(kernel)
add_subdirectory(render)
add_subdirectory(subd)

View File

@@ -64,8 +64,6 @@ if(WITH_CYCLES_STANDALONE)
cycles_standalone.cpp
cycles_xml.cpp
cycles_xml.h
oiio_output_driver.cpp
oiio_output_driver.h
)
add_executable(cycles ${SRC} ${INC} ${INC_SYS})
unset(SRC)
@@ -75,7 +73,7 @@ if(WITH_CYCLES_STANDALONE)
if(APPLE)
if(WITH_OPENCOLORIO)
set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit -framework Carbon")
set_property(TARGET cycles APPEND_STRING PROPERTY LINK_FLAGS " -framework IOKit")
endif()
if(WITH_OPENIMAGEDENOISE AND "${CMAKE_OSX_ARCHITECTURES}" STREQUAL "arm64")
# OpenImageDenoise uses BNNS from the Accelerate framework.
@@ -92,6 +90,24 @@ if(WITH_CYCLES_STANDALONE)
endif()
endif()
#####################################################################
# Cycles network server executable
#####################################################################
if(WITH_CYCLES_NETWORK)
set(SRC
cycles_server.cpp
)
add_executable(cycles_server ${SRC})
target_link_libraries(cycles_server ${LIBRARIES})
cycles_target_link_libraries(cycles_server)
if(UNIX AND NOT APPLE)
set_target_properties(cycles_server PROPERTIES INSTALL_RPATH $ORIGIN/lib)
endif()
unset(SRC)
endif()
#####################################################################
# Cycles cubin compiler executable
#####################################################################

View File

@@ -36,9 +36,6 @@
#include "util/util_unique_ptr.h"
#include "util/util_version.h"
#include "app/cycles_xml.h"
#include "app/oiio_output_driver.h"
#ifdef WITH_CYCLES_STANDALONE_GUI
# include "util/util_view.h"
#endif
@@ -56,8 +53,7 @@ struct Options {
SessionParams session_params;
bool quiet;
bool show_help, interactive, pause;
string output_filepath;
string output_pass;
string output_path;
} options;
static void session_print(const string &str)
@@ -93,6 +89,30 @@ static void session_print_status()
session_print(status);
}
static bool write_render(const uchar *pixels, int w, int h, int channels)
{
string msg = string_printf("Writing image %s", options.output_path.c_str());
session_print(msg);
unique_ptr<ImageOutput> out = unique_ptr<ImageOutput>(ImageOutput::create(options.output_path));
if (!out) {
return false;
}
ImageSpec spec(w, h, channels, TypeDesc::UINT8);
if (!out->open(options.output_path, spec)) {
return false;
}
/* conversion for different top/bottom convention */
out->write_image(
TypeDesc::UINT8, pixels + (h - 1) * w * channels, AutoStride, -w * channels, AutoStride);
out->close();
return true;
}
static BufferParams &session_buffer_params()
{
static BufferParams buffer_params;
@@ -106,7 +126,7 @@ static BufferParams &session_buffer_params()
static void scene_init()
{
options.scene = options.session->scene;
options.scene = new Scene(options.scene_params, options.session->device);
/* Read XML */
xml_read_file(options.scene, options.filepath.c_str());
@@ -127,13 +147,8 @@ static void scene_init()
static void session_init()
{
options.output_pass = "combined";
options.session = new Session(options.session_params, options.scene_params);
if (!options.output_filepath.empty()) {
options.session->set_output_driver(make_unique<OIIOOutputDriver>(
options.output_filepath, options.output_pass, session_print));
}
options.session_params.write_render_cb = write_render;
options.session = new Session(options.session_params);
if (options.session_params.background && !options.quiet)
options.session->progress.set_update_callback(function_bind(&session_print_status));
@@ -144,13 +159,9 @@ static void session_init()
/* load scene */
scene_init();
options.session->scene = options.scene;
/* add pass for output. */
Pass *pass = options.scene->create_node<Pass>();
pass->set_name(ustring(options.output_pass.c_str()));
pass->set_type(PASS_COMBINED);
options.session->reset(options.session_params, session_buffer_params());
options.session->reset(session_buffer_params(), options.session_params.samples);
options.session->start();
}
@@ -212,7 +223,9 @@ static void display_info(Progress &progress)
static void display()
{
options.session->draw();
static DeviceDrawParams draw_params = DeviceDrawParams();
options.session->draw(session_buffer_params(), draw_params);
display_info(options.session->progress);
}
@@ -242,7 +255,7 @@ static void motion(int x, int y, int button)
options.session->scene->camera->need_flags_update = true;
options.session->scene->camera->need_device_update = true;
options.session->reset(options.session_params, session_buffer_params());
options.session->reset(session_buffer_params(), options.session_params.samples);
}
}
@@ -259,7 +272,7 @@ static void resize(int width, int height)
options.session->scene->camera->need_flags_update = true;
options.session->scene->camera->need_device_update = true;
options.session->reset(options.session_params, session_buffer_params());
options.session->reset(session_buffer_params(), options.session_params.samples);
}
}
@@ -271,7 +284,7 @@ static void keyboard(unsigned char key)
/* Reset */
else if (key == 'r')
options.session->reset(options.session_params, session_buffer_params());
options.session->reset(session_buffer_params(), options.session_params.samples);
/* Cancel */
else if (key == 27) // escape
@@ -308,7 +321,7 @@ static void keyboard(unsigned char key)
options.session->scene->camera->need_flags_update = true;
options.session->scene->camera->need_device_update = true;
options.session->reset(options.session_params, session_buffer_params());
options.session->reset(session_buffer_params(), options.session_params.samples);
}
/* Set Max Bounces */
@@ -334,7 +347,7 @@ static void keyboard(unsigned char key)
options.session->scene->integrator->set_max_bounce(bounce);
options.session->reset(options.session_params, session_buffer_params());
options.session->reset(session_buffer_params(), options.session_params.samples);
}
}
#endif
@@ -349,13 +362,11 @@ static int files_parse(int argc, const char *argv[])
static void options_parse(int argc, const char **argv)
{
options.width = 1024;
options.height = 512;
options.width = 0;
options.height = 0;
options.filepath = "";
options.session = NULL;
options.quiet = false;
options.session_params.use_auto_tile = false;
options.session_params.tile_size = 0;
/* device names */
string device_names = "";
@@ -401,7 +412,7 @@ static void options_parse(int argc, const char **argv)
&options.session_params.samples,
"Number of samples to render",
"--output %s",
&options.output_filepath,
&options.output_path,
"File path to write output image",
"--threads %d",
&options.session_params.threads,
@@ -412,9 +423,12 @@ static void options_parse(int argc, const char **argv)
"--height %d",
&options.height,
"Window height in pixel",
"--tile-size %d",
&options.session_params.tile_size,
"Tile size in pixels",
"--tile-width %d",
&options.session_params.tile_size.x,
"Tile width in pixels",
"--tile-height %d",
&options.session_params.tile_size.y,
"Tile height in pixels",
"--list-devices",
&list,
"List information about all available devices",
@@ -476,9 +490,8 @@ static void options_parse(int argc, const char **argv)
options.session_params.background = true;
#endif
if (options.session_params.tile_size > 0) {
options.session_params.use_auto_tile = true;
}
/* Use progressive rendering */
options.session_params.progressive = true;
/* find matching device */
DeviceType device_type = Device::type_from_string(devicename.c_str());
@@ -514,6 +527,9 @@ static void options_parse(int argc, const char **argv)
fprintf(stderr, "No file path specified\n");
exit(EXIT_FAILURE);
}
/* For smoother Viewport */
options.session_params.start_resolution = 64;
}
CCL_NAMESPACE_END

View File

@@ -333,7 +333,6 @@ static void xml_read_shader_graph(XMLReadState &state, Shader *shader, xml_node
}
snode = (ShaderNode *)node_type->create(node_type);
snode->set_owner(graph);
}
xml_read_node(graph_reader, snode, node);
@@ -704,7 +703,7 @@ void xml_read_file(Scene *scene, const char *filepath)
xml_read_include(state, path_filename(filepath));
scene->params.bvh_type = BVH_TYPE_STATIC;
scene->params.bvh_type = SceneParams::BVH_STATIC;
}
CCL_NAMESPACE_END

View File

@@ -1,71 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "app/oiio_output_driver.h"
CCL_NAMESPACE_BEGIN
OIIOOutputDriver::OIIOOutputDriver(const string_view filepath,
const string_view pass,
LogFunction log)
: filepath_(filepath), pass_(pass), log_(log)
{
}
OIIOOutputDriver::~OIIOOutputDriver()
{
}
void OIIOOutputDriver::write_render_tile(const Tile &tile)
{
/* Only write the full buffer, no intermediate tiles. */
if (!(tile.size == tile.full_size)) {
return;
}
log_(string_printf("Writing image %s", filepath_.c_str()));
unique_ptr<ImageOutput> image_output(ImageOutput::create(filepath_));
if (image_output == nullptr) {
log_("Failed to create image file");
return;
}
const int width = tile.size.x;
const int height = tile.size.y;
ImageSpec spec(width, height, 4, TypeDesc::FLOAT);
if (!image_output->open(filepath_, spec)) {
log_("Failed to create image file");
return;
}
vector<float> pixels(width * height * 4);
if (!tile.get_pass_pixels(pass_, 4, pixels.data())) {
log_("Failed to read render pass pixels");
return;
}
/* Manipulate offset and stride to convert from bottom-up to top-down convention. */
image_output->write_image(TypeDesc::FLOAT,
pixels.data() + (height - 1) * width * 4,
AutoStride,
-width * 4 * sizeof(float),
AutoStride);
image_output->close();
}
CCL_NAMESPACE_END

View File

@@ -1,42 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "render/output_driver.h"
#include "util/util_function.h"
#include "util/util_image.h"
#include "util/util_string.h"
#include "util/util_unique_ptr.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class OIIOOutputDriver : public OutputDriver {
public:
typedef function<void(const string &)> LogFunction;
OIIOOutputDriver(const string_view filepath, const string_view pass, LogFunction log);
virtual ~OIIOOutputDriver();
void write_render_tile(const Tile &tile) override;
protected:
string filepath_;
string pass_;
LogFunction log_;
};
CCL_NAMESPACE_END

View File

@@ -31,14 +31,12 @@ set(INC_SYS
set(SRC
blender_camera.cpp
blender_device.cpp
blender_display_driver.cpp
blender_image.cpp
blender_geometry.cpp
blender_light.cpp
blender_mesh.cpp
blender_object.cpp
blender_object_cull.cpp
blender_output_driver.cpp
blender_particles.cpp
blender_curves.cpp
blender_logging.cpp
@@ -52,11 +50,9 @@ set(SRC
CCL_api.h
blender_device.h
blender_display_driver.h
blender_id_map.h
blender_image.h
blender_object_cull.h
blender_output_driver.h
blender_sync.h
blender_session.h
blender_texture.h
@@ -97,9 +93,14 @@ set(ADDON_FILES
add_definitions(${GL_DEFINITIONS})
if(WITH_CYCLES_DEVICE_HIP)
add_definitions(-DWITH_HIP)
if(WITH_CYCLES_DEVICE_OPENCL)
add_definitions(-DWITH_OPENCL)
endif()
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
if(WITH_MOD_FLUID)
add_definitions(-DWITH_FLUID)
endif()

View File

@@ -58,6 +58,7 @@ class CyclesRender(bpy.types.RenderEngine):
bl_use_eevee_viewport = True
bl_use_preview = True
bl_use_exclude_layers = True
bl_use_save_buffers = True
bl_use_spherical_stereo = True
bl_use_custom_freestyle = True
bl_use_alembic_procedural = True
@@ -84,12 +85,6 @@ class CyclesRender(bpy.types.RenderEngine):
def render(self, depsgraph):
engine.render(self, depsgraph)
def render_frame_finish(self):
engine.render_frame_finish(self)
def draw(self, context, depsgraph):
engine.draw(self, depsgraph, context.space_data)
def bake(self, depsgraph, obj, pass_type, pass_filter, width, height):
engine.bake(self, depsgraph, obj, pass_type, pass_filter, width, height)
@@ -103,7 +98,7 @@ class CyclesRender(bpy.types.RenderEngine):
engine.sync(self, depsgraph, context.blend_data)
def view_draw(self, context, depsgraph):
engine.view_draw(self, depsgraph, context.region, context.space_data, context.region_data)
engine.draw(self, depsgraph, context.region, context.space_data, context.region_data)
def update_script_node(self, node):
if engine.with_osl():

View File

@@ -18,17 +18,62 @@
from __future__ import annotations
def _is_using_buggy_driver():
import gpu
# We need to be conservative here because in multi-GPU systems display card
# might be quite old, but others one might be just good.
#
# So We shouldn't disable possible good dedicated cards just because display
# card seems weak. And instead we only blacklist configurations which are
# proven to cause problems.
if gpu.platform.vendor_get() == "ATI Technologies Inc.":
import re
version = gpu.platform.version_get()
if version.endswith("Compatibility Profile Context"):
# Old HD 4xxx and 5xxx series drivers did not have driver version
# in the version string, but those cards do not quite work and
# causing crashes.
return True
regex = re.compile(".*Compatibility Profile Context ([0-9]+(\\.[0-9]+)+)$")
if not regex.match(version):
# Skip cards like FireGL
return False
version = regex.sub("\\1", version).split('.')
return int(version[0]) == 8
return False
def _workaround_buggy_drivers():
if _is_using_buggy_driver():
import _cycles
if hasattr(_cycles, "opencl_disable"):
print("Cycles: OpenGL driver known to be buggy, disabling OpenCL platform.")
_cycles.opencl_disable()
def _configure_argument_parser():
import argparse
# No help because it conflicts with general Python scripts argument parsing
parser = argparse.ArgumentParser(description="Cycles Addon argument parser",
add_help=False)
parser.add_argument("--cycles-resumable-num-chunks",
help="Number of chunks to split sample range into",
default=None)
parser.add_argument("--cycles-resumable-current-chunk",
help="Current chunk of samples range to render",
default=None)
parser.add_argument("--cycles-resumable-start-chunk",
help="Start chunk to render",
default=None)
parser.add_argument("--cycles-resumable-end-chunk",
help="End chunk to render",
default=None)
parser.add_argument("--cycles-print-stats",
help="Print rendering statistics to stderr",
action='store_true')
parser.add_argument("--cycles-device",
help="Set the device to use for Cycles, overriding user preferences and the scene setting."
"Valid options are 'CPU', 'CUDA', 'OPTIX', or 'HIP'"
"Valid options are 'CPU', 'CUDA', 'OPTIX' or 'OPENCL'."
"Additionally, you can append '+CPU' to any GPU type for hybrid rendering.",
default=None)
return parser
@@ -44,6 +89,21 @@ def _parse_command_line():
parser = _configure_argument_parser()
args, _ = parser.parse_known_args(argv[argv.index("--") + 1:])
if args.cycles_resumable_num_chunks is not None:
if args.cycles_resumable_current_chunk is not None:
import _cycles
_cycles.set_resumable_chunk(
int(args.cycles_resumable_num_chunks),
int(args.cycles_resumable_current_chunk),
)
elif args.cycles_resumable_start_chunk is not None and \
args.cycles_resumable_end_chunk:
import _cycles
_cycles.set_resumable_chunk_range(
int(args.cycles_resumable_num_chunks),
int(args.cycles_resumable_start_chunk),
int(args.cycles_resumable_end_chunk),
)
if args.cycles_print_stats:
import _cycles
_cycles.enable_print_stats()
@@ -58,11 +118,23 @@ def init():
import _cycles
import os.path
# Workaround possibly buggy legacy drivers which crashes on the OpenCL
# device enumeration.
#
# This checks are not really correct because they might still fail
# in the case of multiple GPUs. However, currently buggy drivers
# are really old and likely to be used in single GPU systems only
# anyway.
#
# Can't do it in the background mode, so we hope OpenCL is no enabled
# in the user preferences.
if not bpy.app.background:
_workaround_buggy_drivers()
path = os.path.dirname(__file__)
user_path = os.path.dirname(os.path.abspath(bpy.utils.user_resource('CONFIG', path='')))
temp_path = bpy.app.tempdir
_cycles.init(path, user_path, temp_path, bpy.app.background)
_cycles.init(path, user_path, bpy.app.background)
_parse_command_line()
@@ -105,25 +177,6 @@ def render(engine, depsgraph):
_cycles.render(engine.session, depsgraph.as_pointer())
def render_frame_finish(engine):
if not engine.session:
return
import _cycles
_cycles.render_frame_finish(engine.session)
def draw(engine, depsgraph, space_image):
if not engine.session:
return
depsgraph_ptr = depsgraph.as_pointer()
space_image_ptr = space_image.as_pointer()
screen_ptr = space_image.id_data.as_pointer()
import _cycles
_cycles.draw(engine.session, depsgraph_ptr, screen_ptr, space_image_ptr)
def bake(engine, depsgraph, obj, pass_type, pass_filter, width, height):
import _cycles
session = getattr(engine, "session", None)
@@ -151,14 +204,14 @@ def sync(engine, depsgraph, data):
_cycles.sync(engine.session, depsgraph.as_pointer())
def view_draw(engine, depsgraph, region, v3d, rv3d):
def draw(engine, depsgraph, region, v3d, rv3d):
import _cycles
depsgraph = depsgraph.as_pointer()
v3d = v3d.as_pointer()
rv3d = rv3d.as_pointer()
# draw render image
_cycles.view_draw(engine.session, depsgraph, v3d, rv3d)
_cycles.draw(engine.session, depsgraph, v3d, rv3d)
def available_devices():
@@ -171,6 +224,11 @@ def with_osl():
return _cycles.with_osl
def with_network():
import _cycles
return _cycles.with_network
def system_info():
import _cycles
return _cycles.system_info()
@@ -185,7 +243,6 @@ def list_render_passes(scene, srl):
# Data passes.
if srl.use_pass_z: yield ("Depth", "Z", 'VALUE')
if srl.use_pass_mist: yield ("Mist", "Z", 'VALUE')
if srl.use_pass_position: yield ("Position", "XYZ", 'VECTOR')
if srl.use_pass_normal: yield ("Normal", "XYZ", 'VECTOR')
if srl.use_pass_vector: yield ("Vector", "XYZW", 'VECTOR')
if srl.use_pass_uv: yield ("UV", "UVA", 'VECTOR')
@@ -208,9 +265,9 @@ def list_render_passes(scene, srl):
if srl.use_pass_environment: yield ("Env", "RGB", 'COLOR')
if srl.use_pass_shadow: yield ("Shadow", "RGB", 'COLOR')
if srl.use_pass_ambient_occlusion: yield ("AO", "RGB", 'COLOR')
if crl.use_pass_shadow_catcher: yield ("Shadow Catcher", "RGB", 'COLOR')
# Debug passes.
if crl.pass_debug_render_time: yield ("Debug Render Time", "X", 'VALUE')
if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE')
# Cryptomatte passes.
@@ -226,20 +283,30 @@ def list_render_passes(scene, srl):
yield ("CryptoAsset" + '{:02d}'.format(i), "RGBA", 'COLOR')
# Denoising passes.
if scene.cycles.use_denoising and crl.use_denoising:
if (scene.cycles.use_denoising and crl.use_denoising) or crl.denoising_store_passes:
yield ("Noisy Image", "RGBA", 'COLOR')
if crl.use_pass_shadow_catcher:
yield ("Noisy Shadow Catcher", "RGBA", 'COLOR')
if crl.denoising_store_passes:
yield ("Denoising Normal", "XYZ", 'VECTOR')
yield ("Denoising Albedo", "RGB", 'COLOR')
if crl.denoising_store_passes:
yield ("Denoising Normal", "XYZ", 'VECTOR')
yield ("Denoising Albedo", "RGB", 'COLOR')
yield ("Denoising Depth", "Z", 'VALUE')
if scene.cycles.denoiser == 'NLM':
yield ("Denoising Shadowing", "X", 'VALUE')
yield ("Denoising Variance", "RGB", 'COLOR')
yield ("Denoising Intensity", "X", 'VALUE')
clean_options = ("denoising_diffuse_direct", "denoising_diffuse_indirect",
"denoising_glossy_direct", "denoising_glossy_indirect",
"denoising_transmission_direct", "denoising_transmission_indirect")
if any(getattr(crl, option) for option in clean_options):
yield ("Denoising Clean", "RGB", 'COLOR')
# Custom AOV passes.
for aov in srl.aovs:
if aov.type == 'VALUE':
yield (aov.name, "X", 'VALUE')
else:
yield (aov.name, "RGB", 'COLOR')
yield (aov.name, "RGBA", 'COLOR')
def register_passes(engine, scene, view_layer):

View File

@@ -60,48 +60,32 @@ class AddPresetSampling(AddPresetBase, Operator):
]
preset_values = [
"cycles.use_adaptive_sampling",
"cycles.samples",
"cycles.adaptive_threshold",
"cycles.adaptive_min_samples",
"cycles.time_limit",
"cycles.use_denoising",
"cycles.denoiser",
"cycles.denoising_input_passes",
"cycles.denoising_prefilter",
"cycles.preview_samples",
"cycles.aa_samples",
"cycles.preview_aa_samples",
"cycles.diffuse_samples",
"cycles.glossy_samples",
"cycles.transmission_samples",
"cycles.ao_samples",
"cycles.mesh_light_samples",
"cycles.subsurface_samples",
"cycles.volume_samples",
"cycles.use_square_samples",
"cycles.progressive",
"cycles.seed",
"cycles.sample_clamp_direct",
"cycles.sample_clamp_indirect",
"cycles.sample_all_lights_direct",
"cycles.sample_all_lights_indirect",
]
preset_subdir = "cycles/sampling"
class AddPresetViewportSampling(AddPresetBase, Operator):
'''Add a Viewport Sampling Preset'''
bl_idname = "render.cycles_viewport_sampling_preset_add"
bl_label = "Add Viewport Sampling Preset"
preset_menu = "CYCLES_PT_viewport_sampling_presets"
preset_defines = [
"cycles = bpy.context.scene.cycles"
]
preset_values = [
"cycles.use_preview_adaptive_sampling",
"cycles.preview_samples",
"cycles.preview_adaptive_threshold",
"cycles.preview_adaptive_min_samples",
"cycles.use_preview_denoising",
"cycles.preview_denoiser",
"cycles.preview_denoising_input_passes",
"cycles.preview_denoising_prefilter",
"cycles.preview_denoising_start_sample",
]
preset_subdir = "cycles/viewport_sampling"
classes = (
AddPresetIntegrator,
AddPresetSampling,
AddPresetViewportSampling,
)

View File

@@ -39,6 +39,11 @@ enum_devices = (
('GPU', "GPU Compute", "Use GPU compute device for rendering, configured in the system tab in the user preferences"),
)
from _cycles import with_network
if with_network:
enum_devices += (('NETWORK', "Networked Device", "Use networked device for rendering"),)
del with_network
enum_feature_set = (
('SUPPORTED', "Supported", "Only use finished and supported features"),
('EXPERIMENTAL', "Experimental", "Use experimental and incomplete features that might be broken or change in the future", 'ERROR', 1),
@@ -79,6 +84,15 @@ enum_curve_shape = (
('THICK', "3D Curves", "Render hair as 3D curve, for accurate results when viewing hair close up"),
)
enum_tile_order = (
('CENTER', "Center", "Render from center to the edges"),
('RIGHT_TO_LEFT', "Right to Left", "Render from right to left"),
('LEFT_TO_RIGHT', "Left to Right", "Render from left to right"),
('TOP_TO_BOTTOM', "Top to Bottom", "Render from top to bottom"),
('BOTTOM_TO_TOP', "Bottom to Top", "Render from bottom to top"),
('HILBERT_SPIRAL', "Hilbert Spiral", "Render in a Hilbert Spiral"),
)
enum_use_layer_samples = (
('USE', "Use", "Per render layer number of samples override scene samples"),
('BOUNDED', "Bounded", "Bound per render layer number of samples by global samples"),
@@ -87,9 +101,15 @@ enum_use_layer_samples = (
enum_sampling_pattern = (
('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"),
)
enum_integrator = (
('BRANCHED_PATH', "Branched Path Tracing", "Path tracing integrator that branches on the first bounce, giving more control over the number of light and material samples"),
('PATH', "Path Tracing", "Pure path tracing integrator"),
)
enum_volume_sampling = (
('DISTANCE', "Distance", "Use distance sampling, best for dense volumes with lights far away"),
('EQUIANGULAR', "Equiangular", "Use equiangular sampling, best for volumes with low density with light inside or near the volume"),
@@ -111,7 +131,7 @@ enum_device_type = (
('CPU', "CPU", "CPU", 0),
('CUDA', "CUDA", "CUDA", 1),
('OPTIX', "OptiX", "OptiX", 3),
("HIP", "HIP", "HIP", 4)
('OPENCL', "OpenCL", "OpenCL", 2)
)
enum_texture_limit = (
@@ -125,45 +145,38 @@ enum_texture_limit = (
('8192', "8192", "Limit texture size to 8192 pixels", 7),
)
# NOTE: Identifiers are expected to be an upper case version of identifiers from `Pass::get_type_enum()`
enum_view3d_shading_render_pass = (
('', "General", ""),
('COMBINED', "Combined", "Show the Combined Render pass"),
('EMISSION', "Emission", "Show the Emission render pass"),
('BACKGROUND', "Background", "Show the Background render pass"),
('AO', "Ambient Occlusion", "Show the Ambient Occlusion render pass"),
('SHADOW', "Shadow", "Show the Shadow render pass"),
('SHADOW_CATCHER', "Shadow Catcher", "Show the Shadow Catcher render pass"),
('COMBINED', "Combined", "Show the Combined Render pass", 1),
('EMISSION', "Emission", "Show the Emission render pass", 33),
('BACKGROUND', "Background", "Show the Background render pass", 34),
('AO', "Ambient Occlusion", "Show the Ambient Occlusion render pass", 35),
('', "Light", ""),
('DIFFUSE_DIRECT', "Diffuse Direct", "Show the Diffuse Direct render pass"),
('DIFFUSE_INDIRECT', "Diffuse Indirect", "Show the Diffuse Indirect render pass"),
('DIFFUSE_COLOR', "Diffuse Color", "Show the Diffuse Color render pass"),
('DIFFUSE_DIRECT', "Diffuse Direct", "Show the Diffuse Direct render pass", 38),
('DIFFUSE_INDIRECT', "Diffuse Indirect", "Show the Diffuse Indirect render pass", 39),
('DIFFUSE_COLOR', "Diffuse Color", "Show the Diffuse Color render pass", 40),
('GLOSSY_DIRECT', "Glossy Direct", "Show the Glossy Direct render pass"),
('GLOSSY_INDIRECT', "Glossy Indirect", "Show the Glossy Indirect render pass"),
('GLOSSY_COLOR', "Glossy Color", "Show the Glossy Color render pass"),
('GLOSSY_DIRECT', "Glossy Direct", "Show the Glossy Direct render pass", 41),
('GLOSSY_INDIRECT', "Glossy Indirect", "Show the Glossy Indirect render pass", 42),
('GLOSSY_COLOR', "Glossy Color", "Show the Glossy Color render pass", 43),
('', "", ""),
('TRANSMISSION_DIRECT', "Transmission Direct", "Show the Transmission Direct render pass"),
('TRANSMISSION_INDIRECT', "Transmission Indirect", "Show the Transmission Indirect render pass"),
('TRANSMISSION_COLOR', "Transmission Color", "Show the Transmission Color render pass"),
('TRANSMISSION_DIRECT', "Transmission Direct", "Show the Transmission Direct render pass", 44),
('TRANSMISSION_INDIRECT', "Transmission Indirect", "Show the Transmission Indirect render pass", 45),
('TRANSMISSION_COLOR', "Transmission Color", "Show the Transmission Color render pass", 46),
('VOLUME_DIRECT', "Volume Direct", "Show the Volume Direct render pass"),
('VOLUME_INDIRECT', "Volume Indirect", "Show the Volume Indirect render pass"),
('VOLUME_DIRECT', "Volume Direct", "Show the Volume Direct render pass", 50),
('VOLUME_INDIRECT', "Volume Indirect", "Show the Volume Indirect render pass", 51),
('', "Data", ""),
('POSITION', "Position", "Show the Position render pass"),
('NORMAL', "Normal", "Show the Normal render pass"),
('UV', "UV", "Show the UV render pass"),
('MIST', "Mist", "Show the Mist render pass"),
('DENOISING_ALBEDO', "Denoising Albedo", "Albedo pass used by denoiser"),
('DENOISING_NORMAL', "Denoising Normal", "Normal pass used by denoiser"),
('SAMPLE_COUNT', "Sample Count", "Per-pixel number of samples"),
('NORMAL', "Normal", "Show the Normal render pass", 3),
('UV', "UV", "Show the UV render pass", 4),
('MIST', "Mist", "Show the Mist render pass", 32),
)
@@ -195,23 +208,18 @@ def enum_preview_denoiser(self, context):
def enum_denoiser(self, context):
items = []
items = [('NLM', "NLM", "Cycles native non-local means denoiser, running on any compute device", 1)]
items += enum_optix_denoiser(self, context)
items += enum_openimagedenoise_denoiser(self, context)
return items
enum_denoising_input_passes = (
('RGB', "None", "Don't use utility passes for denoising", 1),
('RGB_ALBEDO', "Albedo", "Use albedo pass for denoising", 2),
('RGB_ALBEDO_NORMAL', "Albedo and Normal", "Use albedo and normal passes for denoising", 3),
('RGB', "Color", "Use only color as input", 1),
('RGB_ALBEDO', "Color + Albedo", "Use color and albedo data as input", 2),
('RGB_ALBEDO_NORMAL', "Color + Albedo + Normal", "Use color, albedo and normal data as input", 3),
)
enum_denoising_prefilter = (
('NONE', "None", "No prefiltering, use when guiding passes are noise-free", 1),
('FAST', "Fast", "Denoise color and guiding passes together. Improves quality when guiding passes are noisy using least amount of extra processing time", 2),
('ACCURATE', "Accurate", "Prefilter noisy guiding passes before denoising color. Improves quality when guiding passes are noisy using extra processing time", 3),
)
def update_render_passes(self, context):
scene = context.scene
@@ -244,6 +252,13 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
description="Use Open Shading Language (CPU rendering only)",
)
progressive: EnumProperty(
name="Integrator",
description="Method to sample lights and materials",
items=enum_integrator,
default='PATH',
)
preview_pause: BoolProperty(
name="Pause Preview",
description="Pause all viewport preview renders",
@@ -253,88 +268,110 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
use_denoising: BoolProperty(
name="Use Denoising",
description="Denoise the rendered image",
default=True,
default=False,
update=update_render_passes,
)
denoiser: EnumProperty(
name="Denoiser",
description="Denoise the image with the selected denoiser. "
"For denoising the image after rendering",
items=enum_denoiser,
default=4, # Use integer to avoid error in builds without OpenImageDenoise.
update=update_render_passes,
)
denoising_prefilter: EnumProperty(
name="Denoising Prefilter",
description="Prefilter noisy guiding (albedo and normal) passes to improve denoising quality when using OpenImageDenoiser",
items=enum_denoising_prefilter,
default='ACCURATE',
)
denoising_input_passes: EnumProperty(
name="Denoising Input Passes",
description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
items=enum_denoising_input_passes,
default='RGB_ALBEDO_NORMAL',
)
use_preview_denoising: BoolProperty(
name="Use Viewport Denoising",
description="Denoise the image in the 3D viewport",
default=False,
)
denoiser: EnumProperty(
name="Denoiser",
description="Denoise the image with the selected denoiser. "
"For denoising the image after rendering, denoising data render passes "
"also adapt to the selected denoiser",
items=enum_denoiser,
default=1,
update=update_render_passes,
)
preview_denoiser: EnumProperty(
name="Viewport Denoiser",
description="Denoise the image after each preview update with the selected denoiser",
items=enum_preview_denoiser,
default=0,
)
preview_denoising_prefilter: EnumProperty(
name="Viewport Denoising Prefilter",
description="Prefilter noisy guiding (albedo and normal) passes to improve denoising quality when using OpenImageDenoiser",
items=enum_denoising_prefilter,
default='FAST',
)
preview_denoising_input_passes: EnumProperty(
name="Viewport Denoising Input Passes",
description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
items=enum_denoising_input_passes,
default='RGB_ALBEDO',
)
preview_denoising_start_sample: IntProperty(
name="Start Denoising",
description="Sample to start denoising the preview at",
min=0, max=(1 << 24),
default=1,
use_square_samples: BoolProperty(
name="Square Samples",
description="Square sampling values for easier artist control",
default=False,
)
samples: IntProperty(
name="Samples",
description="Number of samples to render for each pixel",
min=1, max=(1 << 24),
default=4096,
default=128,
)
preview_samples: IntProperty(
name="Viewport Samples",
description="Number of samples to render in the viewport, unlimited if 0",
min=0, max=(1 << 24),
default=1024,
default=32,
)
aa_samples: IntProperty(
name="AA Samples",
description="Number of antialiasing samples to render for each pixel",
min=1, max=2097151,
default=128,
)
preview_aa_samples: IntProperty(
name="AA Samples",
description="Number of antialiasing samples to render in the viewport, unlimited if 0",
min=0, max=2097151,
default=32,
)
time_limit: FloatProperty(
name="Time Limit",
description="Limit the render time (excluding synchronization time)."
"Zero disables the limit",
min=0.0,
default=0.0,
step=100.0,
unit='TIME_ABSOLUTE',
diffuse_samples: IntProperty(
name="Diffuse Samples",
description="Number of diffuse bounce samples to render for each AA sample",
min=1, max=1024,
default=1,
)
glossy_samples: IntProperty(
name="Glossy Samples",
description="Number of glossy bounce samples to render for each AA sample",
min=1, max=1024,
default=1,
)
transmission_samples: IntProperty(
name="Transmission Samples",
description="Number of transmission bounce samples to render for each AA sample",
min=1, max=1024,
default=1,
)
ao_samples: IntProperty(
name="Ambient Occlusion Samples",
description="Number of ambient occlusion samples to render for each AA sample",
min=1, max=1024,
default=1,
)
mesh_light_samples: IntProperty(
name="Mesh Light Samples",
description="Number of mesh emission light samples to render for each AA sample",
min=1, max=1024,
default=1,
)
subsurface_samples: IntProperty(
name="Subsurface Samples",
description="Number of subsurface scattering samples to render for each AA sample",
min=1, max=1024,
default=1,
)
volume_samples: IntProperty(
name="Volume Samples",
description="Number of volume scattering samples to render for each AA sample",
min=1, max=1024,
default=1,
)
sampling_pattern: EnumProperty(
name="Sampling Pattern",
description="Random sampling pattern used by the integrator",
items=enum_sampling_pattern,
default='PROGRESSIVE_MUTI_JITTER',
default='SOBOL',
)
use_layer_samples: EnumProperty(
@@ -344,6 +381,17 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default='USE',
)
sample_all_lights_direct: BoolProperty(
name="Sample All Direct Lights",
description="Sample all lights (for direct samples), rather than randomly picking one",
default=True,
)
sample_all_lights_indirect: BoolProperty(
name="Sample All Indirect Lights",
description="Sample all lights (for indirect samples), rather than randomly picking one",
default=True,
)
light_sampling_threshold: FloatProperty(
name="Light Sampling Threshold",
description="Probabilistically terminate light samples when the light contribution is below this threshold (more noise but faster rendering). "
@@ -355,39 +403,19 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
use_adaptive_sampling: BoolProperty(
name="Use Adaptive Sampling",
description="Automatically reduce the number of samples per pixel based on estimated noise level",
default=True,
default=False,
)
adaptive_threshold: FloatProperty(
name="Adaptive Sampling Threshold",
description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples",
min=0.0, max=1.0,
soft_min=0.001,
default=0.01,
default=0.0,
precision=4,
)
adaptive_min_samples: IntProperty(
name="Adaptive Min Samples",
description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold",
min=0, max=4096,
default=0,
)
use_preview_adaptive_sampling: BoolProperty(
name="Use Adaptive Sampling",
description="Automatically reduce the number of samples per pixel based on estimated noise level, for viewport renders",
default=True,
)
preview_adaptive_threshold: FloatProperty(
name="Adaptive Sampling Threshold",
description="Noise level step to stop sampling at, lower values reduce noise at the cost of render time. Zero for automatic setting based on number of AA samples, for viewport renders",
min=0.0, max=1.0,
soft_min=0.001,
default=0.1,
precision=4,
)
preview_adaptive_min_samples: IntProperty(
name="Adaptive Min Samples",
description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on noise threshold, for viewport renders",
description="Minimum AA samples for adaptive sampling, to discover noisy features before stopping sampling. Zero for automatic setting based on number of AA samples",
min=0, max=4096,
default=0,
)
@@ -604,6 +632,53 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=10.0,
)
debug_tile_size: IntProperty(
name="Tile Size",
description="",
min=1, max=4096,
default=1024,
)
preview_start_resolution: IntProperty(
name="Start Resolution",
description="Resolution to start rendering preview at, "
"progressively increasing it to the full viewport size",
min=8, max=16384,
default=64,
subtype='PIXEL'
)
preview_denoising_start_sample: IntProperty(
name="Start Denoising",
description="Sample to start denoising the preview at",
min=0, max=(1 << 24),
default=1,
)
preview_denoising_input_passes: EnumProperty(
name="Viewport Input Passes",
description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
items=enum_denoising_input_passes,
default='RGB_ALBEDO',
)
debug_reset_timeout: FloatProperty(
name="Reset timeout",
description="",
min=0.01, max=10.0,
default=0.1,
)
debug_cancel_timeout: FloatProperty(
name="Cancel timeout",
description="",
min=0.01, max=10.0,
default=0.1,
)
debug_text_timeout: FloatProperty(
name="Text timeout",
description="",
min=0.01, max=10.0,
default=1.0,
)
debug_bvh_type: EnumProperty(
name="Viewport BVH Type",
description="Choose between faster updates, or faster render",
@@ -626,24 +701,38 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=0,
min=0, max=16,
)
tile_order: EnumProperty(
name="Tile Order",
description="Tile order for rendering",
items=enum_tile_order,
default='HILBERT_SPIRAL',
options=set(), # Not animatable!
)
use_progressive_refine: BoolProperty(
name="Progressive Refine",
description="Instead of rendering each tile until it is finished, "
"refine the whole image progressively "
"(this renders somewhat slower, "
"but time can be saved by manually stopping the render when the noise is low enough)",
default=False,
)
bake_type: EnumProperty(
name="Bake Type",
default='COMBINED',
description="Type of pass to bake",
items=(
('COMBINED', "Combined", "", 0),
('AO', "Ambient Occlusion", "", 1),
('SHADOW', "Shadow", "", 2),
('POSITION', "Position", "", 11),
('NORMAL', "Normal", "", 3),
('UV', "UV", "", 4),
('ROUGHNESS', "Roughness", "", 5),
('EMIT', "Emit", "", 6),
('ENVIRONMENT', "Environment", "", 7),
('DIFFUSE', "Diffuse", "", 8),
('GLOSSY', "Glossy", "", 9),
('TRANSMISSION', "Transmission", "", 10),
('COMBINED', "Combined", ""),
('AO', "Ambient Occlusion", ""),
('SHADOW', "Shadow", ""),
('NORMAL', "Normal", ""),
('UV', "UV", ""),
('ROUGHNESS', "Roughness", ""),
('EMIT', "Emit", ""),
('ENVIRONMENT', "Environment", ""),
('DIFFUSE', "Diffuse", ""),
('GLOSSY', "Glossy", ""),
('TRANSMISSION', "Transmission", ""),
),
)
@@ -738,18 +827,6 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
min=0, max=1024,
)
use_auto_tile: BoolProperty(
name="Auto Tiles",
description="Automatically render high resolution images in tiles to reduce memory usage, using the specified tile size. Tiles are cached to disk while rendering to save memory",
default=True,
)
tile_size: IntProperty(
name="Tile Size",
default=2048,
description="",
min=8, max=16384,
)
# Various fine-tuning debug flags
def _devices_update_callback(self, context):
@@ -767,13 +844,45 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
items=enum_bvh_layouts,
default='EMBREE',
)
debug_use_cpu_split_kernel: BoolProperty(name="Split Kernel", default=False)
debug_use_cuda_adaptive_compile: BoolProperty(name="Adaptive Compile", default=False)
debug_use_cuda_split_kernel: BoolProperty(name="Split Kernel", default=False)
debug_use_optix_debug: BoolProperty(
name="OptiX Module Debug",
description="Load OptiX module in debug mode: lower logging verbosity level, enable validations, and lower optimization level",
default=False
debug_optix_cuda_streams: IntProperty(name="CUDA Streams", default=1, min=1)
debug_optix_curves_api: BoolProperty(name="Native OptiX Curve Primitive", default=False)
debug_opencl_kernel_type: EnumProperty(
name="OpenCL Kernel Type",
default='DEFAULT',
items=(
('DEFAULT', "Default", ""),
('MEGA', "Mega", ""),
('SPLIT', "Split", ""),
),
update=CyclesRenderSettings._devices_update_callback
)
debug_opencl_device_type: EnumProperty(
name="OpenCL Device Type",
default='ALL',
items=(
('NONE', "None", ""),
('ALL', "All", ""),
('DEFAULT', "Default", ""),
('CPU', "CPU", ""),
('GPU', "GPU", ""),
('ACCELERATOR', "Accelerator", ""),
),
update=CyclesRenderSettings._devices_update_callback
)
debug_use_opencl_debug: BoolProperty(name="Debug OpenCL", default=False)
debug_opencl_mem_limit: IntProperty(
name="Memory limit",
default=0,
description="Artificial limit on OpenCL memory usage in MB (0 to disable limit)"
)
@classmethod
@@ -922,6 +1031,12 @@ class CyclesLightSettings(bpy.types.PropertyGroup):
description="Light casts shadows",
default=True,
)
samples: IntProperty(
name="Samples",
description="Number of light samples to render for each AA sample",
min=1, max=10000,
default=1,
)
max_bounces: IntProperty(
name="Max Bounces",
description="Maximum number of bounces the light will contribute to the render",
@@ -969,6 +1084,12 @@ class CyclesWorldSettings(bpy.types.PropertyGroup):
min=4, max=8192,
default=1024,
)
samples: IntProperty(
name="Samples",
description="Number of light samples to render for each AA sample",
min=1, max=10000,
default=1,
)
max_bounces: IntProperty(
name="Max Bounces",
description="Maximum number of bounces the background light will contribute to the render",
@@ -1197,6 +1318,12 @@ class CyclesCurveRenderSettings(bpy.types.PropertyGroup):
class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
pass_debug_render_time: BoolProperty(
name="Debug Render Time",
description="Render time in milliseconds per sample and pixel",
default=False,
update=update_render_passes,
)
pass_debug_sample_count: BoolProperty(
name="Debug Sample Count",
description="Number of samples/camera rays per pixel",
@@ -1216,25 +1343,91 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
update=update_render_passes,
)
use_pass_shadow_catcher: BoolProperty(
name="Shadow Catcher",
description="Pass containing shadows and light which is to be multiplied into backdrop",
default=False,
update=update_render_passes,
)
use_denoising: BoolProperty(
name="Use Denoising",
description="Denoise the rendered image",
default=True,
update=update_render_passes,
)
denoising_diffuse_direct: BoolProperty(
name="Diffuse Direct",
description="Denoise the direct diffuse lighting",
default=True,
)
denoising_diffuse_indirect: BoolProperty(
name="Diffuse Indirect",
description="Denoise the indirect diffuse lighting",
default=True,
)
denoising_glossy_direct: BoolProperty(
name="Glossy Direct",
description="Denoise the direct glossy lighting",
default=True,
)
denoising_glossy_indirect: BoolProperty(
name="Glossy Indirect",
description="Denoise the indirect glossy lighting",
default=True,
)
denoising_transmission_direct: BoolProperty(
name="Transmission Direct",
description="Denoise the direct transmission lighting",
default=True,
)
denoising_transmission_indirect: BoolProperty(
name="Transmission Indirect",
description="Denoise the indirect transmission lighting",
default=True,
)
denoising_strength: FloatProperty(
name="Denoising Strength",
description="Controls neighbor pixel weighting for the denoising filter (lower values preserve more detail, but aren't as smooth)",
min=0.0, max=1.0,
default=0.5,
)
denoising_feature_strength: FloatProperty(
name="Denoising Feature Strength",
description="Controls removal of noisy image feature passes (lower values preserve more detail, but aren't as smooth)",
min=0.0, max=1.0,
default=0.5,
)
denoising_radius: IntProperty(
name="Denoising Radius",
description="Size of the image area that's used to denoise a pixel (higher values are smoother, but might lose detail and are slower)",
min=1, max=25,
default=8,
subtype="PIXEL",
)
denoising_relative_pca: BoolProperty(
name="Relative Filter",
description="When removing pixels that don't carry information, use a relative threshold instead of an absolute one (can help to reduce artifacts, but might cause detail loss around edges)",
default=False,
)
denoising_store_passes: BoolProperty(
name="Store Denoising Passes",
description="Store the denoising feature passes and the noisy image. The passes adapt to the denoiser selected for rendering",
default=False,
update=update_render_passes,
)
denoising_neighbor_frames: IntProperty(
name="Neighbor Frames",
description="Number of neighboring frames to use for denoising animations (more frames produce smoother results at the cost of performance)",
min=0, max=7,
default=0,
)
denoising_optix_input_passes: EnumProperty(
name="Input Passes",
description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
items=enum_denoising_input_passes,
default='RGB_ALBEDO',
)
denoising_openimagedenoise_input_passes: EnumProperty(
name="Input Passes",
description="Passes used by the denoiser to distinguish noise from shader and geometry detail",
items=enum_denoising_input_passes,
default='RGB_ALBEDO_NORMAL',
)
@classmethod
def register(cls):
@@ -1261,16 +1454,14 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def get_device_types(self, context):
import _cycles
has_cuda, has_optix, has_hip = _cycles.get_device_types()
has_cuda, has_optix, has_opencl = _cycles.get_device_types()
list = [('NONE', "None", "Don't use compute device", 0)]
if has_cuda:
list.append(('CUDA', "CUDA", "Use CUDA for GPU acceleration", 1))
if has_optix:
list.append(('OPTIX', "OptiX", "Use OptiX for GPU acceleration", 3))
if has_hip:
list.append(('HIP', "HIP", "Use HIP for GPU acceleration", 4))
if has_opencl:
list.append(('OPENCL', "OpenCL", "Use OpenCL for GPU acceleration", 2))
return list
compute_device_type: EnumProperty(
@@ -1295,7 +1486,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
def update_device_entries(self, device_list):
for device in device_list:
if not device[1] in {'CUDA', 'OPTIX', 'CPU', 'HIP'}:
if not device[1] in {'CUDA', 'OPTIX', 'OPENCL', 'CPU'}:
continue
# Try to find existing Device entry
entry = self.find_existing_device_entry(device)
@@ -1329,23 +1520,22 @@ class CyclesPreferences(bpy.types.AddonPreferences):
elif entry.type == 'CPU':
cpu_devices.append(entry)
# Extend all GPU devices with CPU.
if compute_device_type != 'CPU' and compute_device_type != 'HIP':
if compute_device_type in {'CUDA', 'OPTIX', 'OPENCL'}:
devices.extend(cpu_devices)
return devices
# Refresh device list. This does not happen automatically on Blender
# startup due to unstable OpenCL implementations that can cause crashes.
def refresh_devices(self):
# For backwards compatibility, only returns CUDA and OpenCL but still
# refreshes all devices.
def get_devices(self, compute_device_type=''):
import _cycles
# Ensure `self.devices` is not re-allocated when the second call to
# get_devices_for_type is made, freeing items from the first list.
for device_type in ('CUDA', 'OPTIX', 'HIP'):
for device_type in ('CUDA', 'OPTIX', 'OPENCL'):
self.update_device_entries(_cycles.available_devices(device_type))
# Deprecated: use refresh_devices instead.
def get_devices(self, compute_device_type=''):
self.refresh_devices()
return None
cuda_devices = self.get_devices_for_type('CUDA')
opencl_devices = self.get_devices_for_type('OPENCL')
return cuda_devices, opencl_devices
def get_num_gpu_devices(self):
import _cycles
@@ -1411,10 +1601,6 @@ class CyclesView3DShadingSettings(bpy.types.PropertyGroup):
items=enum_view3d_shading_render_pass,
default='COMBINED',
)
show_active_pixels: BoolProperty(
name="Show Active Pixels",
description="When using adaptive sampling highlight pixels which are being sampled",
)
def register():

View File

@@ -34,12 +34,6 @@ class CYCLES_PT_sampling_presets(PresetPanel, Panel):
preset_add_operator = "render.cycles_sampling_preset_add"
COMPAT_ENGINES = {'CYCLES'}
class CYCLES_PT_viewport_sampling_presets(PresetPanel, Panel):
bl_label = "Viewport Sampling Presets"
preset_subdir = "cycles/viewport_sampling"
preset_operator = "script.execute_preset"
preset_add_operator = "render.cycles_viewport_sampling_preset_add"
COMPAT_ENGINES = {'CYCLES'}
class CYCLES_PT_integrator_presets(PresetPanel, Panel):
bl_label = "Integrator Presets"
@@ -60,15 +54,6 @@ class CyclesButtonsPanel:
return context.engine in cls.COMPAT_ENGINES
class CyclesDebugButtonsPanel(CyclesButtonsPanel):
@classmethod
def poll(cls, context):
prefs = bpy.context.preferences
return (CyclesButtonsPanel.poll(context)
and prefs.experimental.use_cycles_debug
and prefs.view.show_developer_ui)
# Adapt properties editor panel to display in node editor. We have to
# copy the class rather than inherit due to the way bpy registration works.
def node_panel(cls):
@@ -93,23 +78,30 @@ def use_cpu(context):
return (get_device_type(context) == 'NONE' or cscene.device == 'CPU')
def use_opencl(context):
cscene = context.scene.cycles
return (get_device_type(context) == 'OPENCL' and cscene.device == 'GPU')
def use_cuda(context):
cscene = context.scene.cycles
return (get_device_type(context) == 'CUDA' and cscene.device == 'GPU')
def use_hip(context):
cscene = context.scene.cycles
return (get_device_type(context) == 'HIP' and cscene.device == 'GPU')
def use_optix(context):
cscene = context.scene.cycles
return (get_device_type(context) == 'OPTIX' and cscene.device == 'GPU')
def use_branched_path(context):
cscene = context.scene.cycles
return (cscene.progressive == 'BRANCHED_PATH' and not use_optix(context))
def use_sample_all_lights(context):
cscene = context.scene.cycles
@@ -123,93 +115,55 @@ def show_device_active(context):
return context.preferences.addons[__package__].preferences.has_active_device()
def get_effective_preview_denoiser(context):
scene = context.scene
cscene = scene.cycles
def draw_samples_info(layout, context):
cscene = context.scene.cycles
integrator = cscene.progressive
if cscene.preview_denoiser != "AUTO":
return cscene.preview_denoiser
# Calculate sample values
if integrator == 'PATH':
aa = cscene.samples
if cscene.use_square_samples:
aa = aa * aa
else:
aa = cscene.aa_samples
d = cscene.diffuse_samples
g = cscene.glossy_samples
t = cscene.transmission_samples
ao = cscene.ao_samples
ml = cscene.mesh_light_samples
sss = cscene.subsurface_samples
vol = cscene.volume_samples
if context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX'):
return 'OPTIX'
return 'OIDN'
if cscene.use_square_samples:
aa = aa * aa
d = d * d
g = g * g
t = t * t
ao = ao * ao
ml = ml * ml
sss = sss * sss
vol = vol * vol
# Draw interface
# Do not draw for progressive, when Square Samples are disabled
if use_branched_path(context) or (cscene.use_square_samples and integrator == 'PATH'):
col = layout.column(align=True)
col.scale_y = 0.6
col.label(text="Total Samples:")
col.separator()
if integrator == 'PATH':
col.label(text="%s AA" % aa)
else:
col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" %
(aa, d * aa, g * aa, t * aa))
col.separator()
col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" %
(ao * aa, ml * aa, sss * aa, vol * aa))
class CYCLES_RENDER_PT_sampling(CyclesButtonsPanel, Panel):
bl_label = "Sampling"
def draw(self, context):
pass
class CYCLES_RENDER_PT_sampling_viewport(CyclesButtonsPanel, Panel):
bl_label = "Viewport"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
def draw_header_preset(self, context):
CYCLES_PT_viewport_sampling_presets.draw_panel_header(self.layout)
def draw(self, context):
layout = self.layout
scene = context.scene
cscene = scene.cycles
layout.use_property_split = True
layout.use_property_decorate = False
heading = layout.column(align=True, heading="Noise Threshold")
row = heading.row(align=True)
row.prop(cscene, "use_preview_adaptive_sampling", text="")
sub = row.row()
sub.active = cscene.use_preview_adaptive_sampling
sub.prop(cscene, "preview_adaptive_threshold", text="")
if cscene.use_preview_adaptive_sampling:
col = layout.column(align=True)
col.prop(cscene, "preview_samples", text=" Max Samples")
col.prop(cscene, "preview_adaptive_min_samples", text="Min Samples")
else:
layout.prop(cscene, "preview_samples", text="Samples")
class CYCLES_RENDER_PT_sampling_viewport_denoise(CyclesButtonsPanel, Panel):
bl_label = "Denoise"
bl_parent_id = 'CYCLES_RENDER_PT_sampling_viewport'
bl_options = {'DEFAULT_CLOSED'}
def draw_header(self, context):
scene = context.scene
cscene = scene.cycles
self.layout.prop(context.scene.cycles, "use_preview_denoising", text="")
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False
scene = context.scene
cscene = scene.cycles
col = layout.column()
col.active = cscene.use_preview_denoising
col.prop(cscene, "preview_denoiser", text="Denoiser")
col.prop(cscene, "preview_denoising_input_passes", text="Passes")
effective_preview_denoiser = get_effective_preview_denoiser(context)
if effective_preview_denoiser == 'OPENIMAGEDENOISE':
col.prop(cscene, "preview_denoising_prefilter", text="Prefilter")
col.prop(cscene, "preview_denoising_start_sample", text="Start Sample")
class CYCLES_RENDER_PT_sampling_render(CyclesButtonsPanel, Panel):
bl_label = "Render"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
def draw_header_preset(self, context):
CYCLES_PT_sampling_presets.draw_panel_header(self.layout)
@@ -222,32 +176,29 @@ class CYCLES_RENDER_PT_sampling_render(CyclesButtonsPanel, Panel):
layout.use_property_split = True
layout.use_property_decorate = False
heading = layout.column(align=True, heading="Noise Threshold")
row = heading.row(align=True)
row.prop(cscene, "use_adaptive_sampling", text="")
sub = row.row()
sub.active = cscene.use_adaptive_sampling
sub.prop(cscene, "adaptive_threshold", text="")
if not use_optix(context):
layout.prop(cscene, "progressive")
col = layout.column(align=True)
if cscene.use_adaptive_sampling:
col.prop(cscene, "samples", text=" Max Samples")
col.prop(cscene, "adaptive_min_samples", text="Min Samples")
if not use_branched_path(context):
col = layout.column(align=True)
col.prop(cscene, "samples", text="Render")
col.prop(cscene, "preview_samples", text="Viewport")
else:
col.prop(cscene, "samples", text="Samples")
col.prop(cscene, "time_limit")
col = layout.column(align=True)
col.prop(cscene, "aa_samples", text="Render")
col.prop(cscene, "preview_aa_samples", text="Viewport")
if not use_branched_path(context):
draw_samples_info(layout, context)
class CYCLES_RENDER_PT_sampling_render_denoise(CyclesButtonsPanel, Panel):
bl_label = "Denoise"
bl_parent_id = 'CYCLES_RENDER_PT_sampling_render'
bl_options = {'DEFAULT_CLOSED'}
class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
bl_label = "Sub Samples"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
def draw_header(self, context):
scene = context.scene
cscene = scene.cycles
self.layout.prop(context.scene.cycles, "use_denoising", text="")
@classmethod
def poll(cls, context):
return use_branched_path(context)
def draw(self, context):
layout = self.layout
@@ -257,12 +208,88 @@ class CYCLES_RENDER_PT_sampling_render_denoise(CyclesButtonsPanel, Panel):
scene = context.scene
cscene = scene.cycles
col = layout.column()
col.active = cscene.use_denoising
col.prop(cscene, "denoiser", text="Denoiser")
col.prop(cscene, "denoising_input_passes", text="Passes")
if cscene.denoiser == 'OPENIMAGEDENOISE':
col.prop(cscene, "denoising_prefilter", text="Prefilter")
col = layout.column(align=True)
col.prop(cscene, "diffuse_samples", text="Diffuse")
col.prop(cscene, "glossy_samples", text="Glossy")
col.prop(cscene, "transmission_samples", text="Transmission")
col.prop(cscene, "ao_samples", text="AO")
sub = col.row(align=True)
sub.active = use_sample_all_lights(context)
sub.prop(cscene, "mesh_light_samples", text="Mesh Light")
col.prop(cscene, "subsurface_samples", text="Subsurface")
col.prop(cscene, "volume_samples", text="Volume")
draw_samples_info(layout, context)
class CYCLES_RENDER_PT_sampling_adaptive(CyclesButtonsPanel, Panel):
bl_label = "Adaptive Sampling"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
bl_options = {'DEFAULT_CLOSED'}
def draw_header(self, context):
layout = self.layout
scene = context.scene
cscene = scene.cycles
layout.prop(cscene, "use_adaptive_sampling", text="")
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False
scene = context.scene
cscene = scene.cycles
layout.active = cscene.use_adaptive_sampling
col = layout.column(align=True)
col.prop(cscene, "adaptive_threshold", text="Noise Threshold")
col.prop(cscene, "adaptive_min_samples", text="Min Samples")
class CYCLES_RENDER_PT_sampling_denoising(CyclesButtonsPanel, Panel):
bl_label = "Denoising"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
bl_options = {'DEFAULT_CLOSED'}
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False
scene = context.scene
cscene = scene.cycles
heading = layout.column(align=True, heading="Render")
row = heading.row(align=True)
row.prop(cscene, "use_denoising", text="")
sub = row.row()
sub.active = cscene.use_denoising
for view_layer in scene.view_layers:
if view_layer.cycles.denoising_store_passes:
sub.active = True
sub.prop(cscene, "denoiser", text="")
layout.separator()
heading = layout.column(align=False, heading="Viewport")
row = heading.row(align=True)
row.prop(cscene, "use_preview_denoising", text="")
sub = row.row()
sub.active = cscene.use_preview_denoising
sub.prop(cscene, "preview_denoiser", text="")
sub = heading.row(align=True)
sub.active = cscene.use_preview_denoising
sub.prop(cscene, "preview_denoising_start_sample", text="Start Sample")
sub = heading.row(align=True)
sub.active = cscene.use_preview_denoising
sub.prop(cscene, "preview_denoising_input_passes", text="Input Passes")
class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
@@ -286,6 +313,8 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
col.active = not(cscene.use_adaptive_sampling)
col.prop(cscene, "sampling_pattern", text="Pattern")
layout.prop(cscene, "use_square_samples")
layout.separator()
col = layout.column(align=True)
@@ -293,6 +322,11 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
col.prop(cscene, "min_transparent_bounces")
col.prop(cscene, "light_sampling_threshold", text="Light Threshold")
if cscene.progressive != 'PATH' and use_branched_path(context):
col = layout.column(align=True)
col.prop(cscene, "sample_all_lights_direct")
col.prop(cscene, "sample_all_lights_indirect")
for view_layer in scene.view_layers:
if view_layer.samples > 0:
layout.separator()
@@ -300,6 +334,62 @@ class CYCLES_RENDER_PT_sampling_advanced(CyclesButtonsPanel, Panel):
break
class CYCLES_RENDER_PT_sampling_total(CyclesButtonsPanel, Panel):
bl_label = "Total Samples"
bl_parent_id = "CYCLES_RENDER_PT_sampling"
@classmethod
def poll(cls, context):
scene = context.scene
cscene = scene.cycles
if cscene.use_square_samples:
return True
return cscene.progressive != 'PATH' and use_branched_path(context)
def draw(self, context):
layout = self.layout
cscene = context.scene.cycles
integrator = cscene.progressive
# Calculate sample values
if integrator == 'PATH':
aa = cscene.samples
if cscene.use_square_samples:
aa = aa * aa
else:
aa = cscene.aa_samples
d = cscene.diffuse_samples
g = cscene.glossy_samples
t = cscene.transmission_samples
ao = cscene.ao_samples
ml = cscene.mesh_light_samples
sss = cscene.subsurface_samples
vol = cscene.volume_samples
if cscene.use_square_samples:
aa = aa * aa
d = d * d
g = g * g
t = t * t
ao = ao * ao
ml = ml * ml
sss = sss * sss
vol = vol * vol
col = layout.column(align=True)
col.scale_y = 0.6
if integrator == 'PATH':
col.label(text="%s AA" % aa)
else:
col.label(text="%s AA, %s Diffuse, %s Glossy, %s Transmission" %
(aa, d * aa, g * aa, t * aa))
col.separator()
col.label(text="%s AO, %s Mesh Light, %s Subsurface, %s Volume" %
(ao * aa, ml * aa, sss * aa, vol * aa))
class CYCLES_RENDER_PT_subdivision(CyclesButtonsPanel, Panel):
bl_label = "Subdivision"
bl_options = {'DEFAULT_CLOSED'}
@@ -458,8 +548,6 @@ class CYCLES_RENDER_PT_light_paths_fast_gi(CyclesButtonsPanel, Panel):
layout.use_property_split = True
layout.use_property_decorate = False
layout.active = cscene.use_fast_gi
col = layout.column(align=True)
col.prop(cscene, "ao_bounces", text="Viewport Bounces")
col.prop(cscene, "ao_bounces_render", text="Render Bounces")
@@ -618,8 +706,8 @@ class CYCLES_RENDER_PT_performance_threads(CyclesButtonsPanel, Panel):
sub.prop(rd, "threads")
class CYCLES_RENDER_PT_performance_memory(CyclesButtonsPanel, Panel):
bl_label = "Memory"
class CYCLES_RENDER_PT_performance_tiles(CyclesButtonsPanel, Panel):
bl_label = "Tiles"
bl_parent_id = "CYCLES_RENDER_PT_performance"
def draw(self, context):
@@ -628,13 +716,19 @@ class CYCLES_RENDER_PT_performance_memory(CyclesButtonsPanel, Panel):
layout.use_property_decorate = False
scene = context.scene
rd = scene.render
cscene = scene.cycles
col = layout.column()
col.prop(cscene, "use_auto_tile")
sub = col.column(align=True)
sub.prop(rd, "tile_x", text="Tiles X")
sub.prop(rd, "tile_y", text="Y")
col.prop(cscene, "tile_order", text="Order")
sub = col.column()
sub.active = cscene.use_auto_tile
sub.prop(cscene, "tile_size")
sub.active = not rd.use_save_buffers and not cscene.use_adaptive_sampling
sub.prop(cscene, "use_progressive_refine")
class CYCLES_RENDER_PT_performance_acceleration_structure(CyclesButtonsPanel, Panel):
@@ -684,6 +778,7 @@ class CYCLES_RENDER_PT_performance_final_render(CyclesButtonsPanel, Panel):
col = layout.column()
col.prop(rd, "use_save_buffers")
col.prop(rd, "use_persistent_data", text="Persistent Data")
@@ -702,6 +797,7 @@ class CYCLES_RENDER_PT_performance_viewport(CyclesButtonsPanel, Panel):
col = layout.column()
col.prop(rd, "preview_pixel_size", text="Pixel Size")
col.prop(cscene, "preview_start_resolution", text="Start Pixels")
class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
@@ -722,6 +818,7 @@ class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
col = layout.column(heading="Include")
col.prop(view_layer, "use_sky", text="Environment")
col.prop(view_layer, "use_ao", text="Ambient Occlusion")
col.prop(view_layer, "use_solid", text="Surfaces")
col.prop(view_layer, "use_strand", text="Hair")
col.prop(view_layer, "use_volumes", text="Volumes")
@@ -730,9 +827,6 @@ class CYCLES_RENDER_PT_filter(CyclesButtonsPanel, Panel):
sub = col.row()
sub.prop(view_layer, "use_motion_blur", text="Motion Blur")
sub.active = rd.use_motion_blur
sub = col.row()
sub.prop(view_layer.cycles, 'use_denoising', text='Denoising')
sub.active = scene.cycles.use_denoising
class CYCLES_RENDER_PT_override(CyclesButtonsPanel, Panel):
@@ -778,7 +872,6 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
col.prop(view_layer, "use_pass_combined")
col.prop(view_layer, "use_pass_z")
col.prop(view_layer, "use_pass_mist")
col.prop(view_layer, "use_pass_position")
col.prop(view_layer, "use_pass_normal")
sub = col.column()
sub.active = not rd.use_motion_blur
@@ -792,6 +885,7 @@ class CYCLES_RENDER_PT_passes_data(CyclesButtonsPanel, Panel):
col.prop(view_layer, "use_pass_material_index")
col = layout.column(heading="Debug", align=True)
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")
layout.prop(view_layer, "pass_alpha_threshold")
@@ -834,7 +928,6 @@ class CYCLES_RENDER_PT_passes_light(CyclesButtonsPanel, Panel):
col.prop(view_layer, "use_pass_environment")
col.prop(view_layer, "use_pass_shadow")
col.prop(view_layer, "use_pass_ambient_occlusion", text="Ambient Occlusion")
col.prop(cycles_view_layer, "use_pass_shadow_catcher")
class CYCLES_RENDER_PT_passes_crypto(CyclesButtonsPanel, ViewLayerCryptomattePanel, Panel):
@@ -849,6 +942,70 @@ class CYCLES_RENDER_PT_passes_aov(CyclesButtonsPanel, ViewLayerAOVPanel):
bl_parent_id = "CYCLES_RENDER_PT_passes"
class CYCLES_RENDER_PT_denoising(CyclesButtonsPanel, Panel):
bl_label = "Denoising"
bl_context = "view_layer"
bl_options = {'DEFAULT_CLOSED'}
@classmethod
def poll(cls, context):
cscene = context.scene.cycles
return CyclesButtonsPanel.poll(context) and cscene.use_denoising
def draw_header(self, context):
scene = context.scene
view_layer = context.view_layer
cycles_view_layer = view_layer.cycles
layout = self.layout
layout.prop(cycles_view_layer, "use_denoising", text="")
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False
scene = context.scene
view_layer = context.view_layer
cycles_view_layer = view_layer.cycles
denoiser = scene.cycles.denoiser
layout.active = denoiser != 'NONE' and cycles_view_layer.use_denoising
col = layout.column()
if denoiser == 'OPTIX':
col.prop(cycles_view_layer, "denoising_optix_input_passes")
return
elif denoiser == 'OPENIMAGEDENOISE':
col.prop(cycles_view_layer, "denoising_openimagedenoise_input_passes")
return
col.prop(cycles_view_layer, "denoising_radius", text="Radius")
col = layout.column()
col.prop(cycles_view_layer, "denoising_strength", slider=True, text="Strength")
col.prop(cycles_view_layer, "denoising_feature_strength", slider=True, text="Feature Strength")
col.prop(cycles_view_layer, "denoising_relative_pca")
layout.separator()
col = layout.column()
col.active = cycles_view_layer.use_denoising or cycles_view_layer.denoising_store_passes
row = col.row(heading="Diffuse", align=True)
row.prop(cycles_view_layer, "denoising_diffuse_direct", text="Direct", toggle=True)
row.prop(cycles_view_layer, "denoising_diffuse_indirect", text="Indirect", toggle=True)
row = col.row(heading="Glossy", align=True)
row.prop(cycles_view_layer, "denoising_glossy_direct", text="Direct", toggle=True)
row.prop(cycles_view_layer, "denoising_glossy_indirect", text="Indirect", toggle=True)
row = col.row(heading="Transmission", align=True)
row.prop(cycles_view_layer, "denoising_transmission_direct", text="Direct", toggle=True)
row.prop(cycles_view_layer, "denoising_transmission_indirect", text="Indirect", toggle=True)
class CYCLES_PT_post_processing(CyclesButtonsPanel, Panel):
bl_label = "Post Processing"
bl_options = {'DEFAULT_CLOSED'}
@@ -1260,6 +1417,10 @@ class CYCLES_LIGHT_PT_light(CyclesButtonsPanel, Panel):
if not (light.type == 'AREA' and clamp.is_portal):
sub = col.column()
if use_branched_path(context):
subsub = sub.row(align=True)
subsub.active = use_sample_all_lights(context)
subsub.prop(clamp, "samples")
sub.prop(clamp, "max_bounces")
sub = col.column(align=True)
@@ -1365,6 +1526,34 @@ class CYCLES_WORLD_PT_volume(CyclesButtonsPanel, Panel):
panel_node_draw(layout, world, 'OUTPUT_WORLD', 'Volume')
class CYCLES_WORLD_PT_ambient_occlusion(CyclesButtonsPanel, Panel):
bl_label = "Ambient Occlusion"
bl_context = "world"
bl_options = {'DEFAULT_CLOSED'}
@classmethod
def poll(cls, context):
return context.world and CyclesButtonsPanel.poll(context)
def draw_header(self, context):
light = context.world.light_settings
self.layout.prop(light, "use_ambient_occlusion", text="")
def draw(self, context):
layout = self.layout
layout.use_property_split = True
layout.use_property_decorate = False
light = context.world.light_settings
scene = context.scene
col = layout.column()
sub = col.column()
sub.active = light.use_ambient_occlusion or scene.render.use_simplify
sub.prop(light, "ao_factor", text="Factor")
col.prop(light, "distance", text="Distance")
class CYCLES_WORLD_PT_mist(CyclesButtonsPanel, Panel):
bl_label = "Mist Pass"
bl_context = "world"
@@ -1461,6 +1650,10 @@ class CYCLES_WORLD_PT_settings_surface(CyclesButtonsPanel, Panel):
subsub = sub.row(align=True)
subsub.active = cworld.sampling_method == 'MANUAL'
subsub.prop(cworld, "sample_map_resolution")
if use_branched_path(context):
subsub = sub.column(align=True)
subsub.active = use_sample_all_lights(context)
subsub.prop(cworld, "samples")
sub.prop(cworld, "max_bounces")
@@ -1484,7 +1677,8 @@ class CYCLES_WORLD_PT_settings_volume(CyclesButtonsPanel, Panel):
col = layout.column()
sub = col.column()
col.prop(cworld, "volume_sampling", text="Sampling")
sub.active = use_cpu(context)
sub.prop(cworld, "volume_sampling", text="Sampling")
col.prop(cworld, "volume_interpolation", text="Interpolation")
col.prop(cworld, "homogeneous_volume", text="Homogeneous")
sub = col.column()
@@ -1623,7 +1817,8 @@ class CYCLES_MATERIAL_PT_settings_volume(CyclesButtonsPanel, Panel):
col = layout.column()
sub = col.column()
col.prop(cmat, "volume_sampling", text="Sampling")
sub.active = use_cpu(context)
sub.prop(cmat, "volume_sampling", text="Sampling")
col.prop(cmat, "volume_interpolation", text="Interpolation")
col.prop(cmat, "homogeneous_volume", text="Homogeneous")
sub = col.column()
@@ -1650,6 +1845,9 @@ class CYCLES_RENDER_PT_bake(CyclesButtonsPanel, Panel):
cbk = scene.render.bake
rd = scene.render
if use_optix(context):
layout.label(text="Baking is performed using CUDA instead of OptiX", icon='INFO')
if rd.use_bake_multires:
layout.operator("object.bake_image", icon='RENDER_STILL')
layout.prop(rd, "use_bake_multires")
@@ -1707,6 +1905,7 @@ class CYCLES_RENDER_PT_bake_influence(CyclesButtonsPanel, Panel):
col.prop(cbk, "use_pass_diffuse")
col.prop(cbk, "use_pass_glossy")
col.prop(cbk, "use_pass_transmission")
col.prop(cbk, "use_pass_ambient_occlusion")
col.prop(cbk, "use_pass_emit")
elif cscene.bake_type in {'DIFFUSE', 'GLOSSY', 'TRANSMISSION'}:
@@ -1790,12 +1989,19 @@ class CYCLES_RENDER_PT_bake_output(CyclesButtonsPanel, Panel):
layout.prop(cbk, "use_clear", text="Clear Image")
class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
class CYCLES_RENDER_PT_debug(CyclesButtonsPanel, Panel):
bl_label = "Debug"
bl_context = "render"
bl_options = {'DEFAULT_CLOSED'}
COMPAT_ENGINES = {'CYCLES'}
@classmethod
def poll(cls, context):
prefs = bpy.context.preferences
return (CyclesButtonsPanel.poll(context)
and prefs.experimental.use_cycles_debug
and prefs.view.show_developer_ui)
def draw(self, context):
layout = self.layout
@@ -1812,18 +2018,29 @@ class CYCLES_RENDER_PT_debug(CyclesDebugButtonsPanel, Panel):
row.prop(cscene, "debug_use_cpu_avx", toggle=True)
row.prop(cscene, "debug_use_cpu_avx2", toggle=True)
col.prop(cscene, "debug_bvh_layout")
col.prop(cscene, "debug_use_cpu_split_kernel")
col.separator()
col = layout.column()
col.label(text="CUDA Flags:")
col.prop(cscene, "debug_use_cuda_adaptive_compile")
col.prop(cscene, "debug_use_cuda_split_kernel")
col.separator()
col = layout.column()
col.label(text="OptiX Flags:")
col.prop(cscene, "debug_use_optix_debug")
col.prop(cscene, "debug_optix_cuda_streams")
col.prop(cscene, "debug_optix_curves_api")
col.separator()
col = layout.column()
col.label(text="OpenCL Flags:")
col.prop(cscene, "debug_opencl_device_type", text="Device")
col.prop(cscene, "debug_use_opencl_debug", text="Debug")
col.prop(cscene, "debug_opencl_mem_limit")
col.separator()
@@ -1924,22 +2141,20 @@ class CYCLES_RENDER_PT_simplify_culling(CyclesButtonsPanel, Panel):
sub.prop(cscene, "distance_cull_margin", text="")
class CyclesShadingButtonsPanel(CyclesButtonsPanel):
class CYCLES_VIEW3D_PT_shading_render_pass(Panel):
bl_space_type = 'VIEW_3D'
bl_region_type = 'HEADER'
bl_label = "Render Pass"
bl_parent_id = 'VIEW3D_PT_shading'
COMPAT_ENGINES = {'CYCLES'}
@classmethod
def poll(cls, context):
return (
CyclesButtonsPanel.poll(context) and
context.engine in cls.COMPAT_ENGINES and
context.space_data.shading.type == 'RENDERED'
)
class CYCLES_VIEW3D_PT_shading_render_pass(CyclesShadingButtonsPanel, Panel):
bl_label = "Render Pass"
def draw(self, context):
shading = context.space_data.shading
@@ -1947,26 +2162,6 @@ class CYCLES_VIEW3D_PT_shading_render_pass(CyclesShadingButtonsPanel, Panel):
layout.prop(shading.cycles, "render_pass", text="")
class CYCLES_VIEW3D_PT_shading_debug(CyclesDebugButtonsPanel,
CyclesShadingButtonsPanel,
Panel):
bl_label = "Debug"
@classmethod
def poll(cls, context):
return (
CyclesDebugButtonsPanel.poll(context) and
CyclesShadingButtonsPanel.poll(context)
)
def draw(self, context):
shading = context.space_data.shading
layout = self.layout
layout.active = context.scene.cycles.use_preview_adaptive_sampling
layout.prop(shading.cycles, "show_active_pixels")
class CYCLES_VIEW3D_PT_shading_lighting(Panel):
bl_space_type = 'VIEW_3D'
bl_region_type = 'HEADER'
@@ -2080,13 +2275,11 @@ def get_panels():
classes = (
CYCLES_PT_sampling_presets,
CYCLES_PT_viewport_sampling_presets,
CYCLES_PT_integrator_presets,
CYCLES_RENDER_PT_sampling,
CYCLES_RENDER_PT_sampling_viewport,
CYCLES_RENDER_PT_sampling_viewport_denoise,
CYCLES_RENDER_PT_sampling_render,
CYCLES_RENDER_PT_sampling_render_denoise,
CYCLES_RENDER_PT_sampling_sub_samples,
CYCLES_RENDER_PT_sampling_adaptive,
CYCLES_RENDER_PT_sampling_denoising,
CYCLES_RENDER_PT_sampling_advanced,
CYCLES_RENDER_PT_light_paths,
CYCLES_RENDER_PT_light_paths_max_bounces,
@@ -2103,7 +2296,6 @@ classes = (
CYCLES_VIEW3D_PT_simplify_greasepencil,
CYCLES_VIEW3D_PT_shading_lighting,
CYCLES_VIEW3D_PT_shading_render_pass,
CYCLES_VIEW3D_PT_shading_debug,
CYCLES_RENDER_PT_motion_blur,
CYCLES_RENDER_PT_motion_blur_curve,
CYCLES_RENDER_PT_film,
@@ -2111,7 +2303,7 @@ classes = (
CYCLES_RENDER_PT_film_transparency,
CYCLES_RENDER_PT_performance,
CYCLES_RENDER_PT_performance_threads,
CYCLES_RENDER_PT_performance_memory,
CYCLES_RENDER_PT_performance_tiles,
CYCLES_RENDER_PT_performance_acceleration_structure,
CYCLES_RENDER_PT_performance_final_render,
CYCLES_RENDER_PT_performance_viewport,
@@ -2122,6 +2314,7 @@ classes = (
CYCLES_RENDER_PT_passes_aov,
CYCLES_RENDER_PT_filter,
CYCLES_RENDER_PT_override,
CYCLES_RENDER_PT_denoising,
CYCLES_PT_post_processing,
CYCLES_CAMERA_PT_dof,
CYCLES_CAMERA_PT_dof_aperture,
@@ -2140,6 +2333,7 @@ classes = (
CYCLES_WORLD_PT_preview,
CYCLES_WORLD_PT_surface,
CYCLES_WORLD_PT_volume,
CYCLES_WORLD_PT_ambient_occlusion,
CYCLES_WORLD_PT_mist,
CYCLES_WORLD_PT_ray_visibility,
CYCLES_WORLD_PT_settings,

View File

@@ -109,7 +109,7 @@ def do_versions(self):
library_versions.setdefault(library.version, []).append(library)
# Do versioning per library, since they might have different versions.
max_need_versioning = (3, 0, 25)
max_need_versioning = (2, 93, 7)
for version, libraries in library_versions.items():
if version > max_need_versioning:
continue
@@ -166,6 +166,10 @@ def do_versions(self):
if not cscene.is_property_set("filter_type"):
cscene.pixel_filter_type = 'GAUSSIAN'
# Tile Order
if not cscene.is_property_set("tile_order"):
cscene.tile_order = 'CENTER'
if version <= (2, 76, 10):
cscene = scene.cycles
if cscene.is_property_set("filter_type"):
@@ -182,6 +186,10 @@ def do_versions(self):
if version <= (2, 79, 0):
cscene = scene.cycles
# Default changes
if not cscene.is_property_set("aa_samples"):
cscene.aa_samples = 4
if not cscene.is_property_set("preview_aa_samples"):
cscene.preview_aa_samples = 4
if not cscene.is_property_set("blur_glossy"):
cscene.blur_glossy = 0.0
if not cscene.is_property_set("sample_clamp_indirect"):
@@ -195,6 +203,7 @@ def do_versions(self):
view_layer.use_pass_cryptomatte_material = cview_layer.get("use_pass_crypto_material", False)
view_layer.use_pass_cryptomatte_asset = cview_layer.get("use_pass_crypto_asset", False)
view_layer.pass_cryptomatte_depth = cview_layer.get("pass_crypto_depth", 6)
view_layer.use_pass_cryptomatte_accurate = cview_layer.get("pass_crypto_accurate", True)
if version <= (2, 93, 7):
if scene.render.engine == 'CYCLES':
@@ -220,35 +229,6 @@ def do_versions(self):
cscene.ao_bounces = 1
cscene.ao_bounces_render = 1
if version <= (3, 0, 25):
cscene = scene.cycles
# Default changes.
if not cscene.is_property_set("samples"):
cscene.samples = 128
if not cscene.is_property_set("preview_samples"):
cscene.preview_samples = 32
if not cscene.is_property_set("use_adaptive_sampling"):
cscene.use_adaptive_sampling = False
cscene.use_preview_adaptive_sampling = False
if not cscene.is_property_set("use_denoising"):
cscene.use_denoising = False
if not cscene.is_property_set("use_preview_denoising"):
cscene.use_preview_denoising = False
if not cscene.is_property_set("sampling_pattern"):
cscene.sampling_pattern = 'PROGRESSIVE_MUTI_JITTER'
# Removal of square samples.
cscene = scene.cycles
use_square_samples = cscene.get("use_square_samples", False)
if use_square_samples:
cscene.samples *= cscene.samples
cscene.preview_samples *= cscene.preview_samples
for layer in scene.view_layers:
layer.samples *= layer.samples
cscene["use_square_samples"] = False
# Lamps
for light in bpy.data.lights:
if light.library not in libraries:
@@ -269,6 +249,10 @@ def do_versions(self):
if version <= (2, 76, 9):
cworld = world.cycles
# World MIS Samples
if not cworld.is_property_set("samples"):
cworld.samples = 4
# World MIS Resolution
if not cworld.is_property_set("sample_map_resolution"):
cworld.sample_map_resolution = 256

View File

@@ -894,8 +894,12 @@ void BlenderSync::sync_view(BL::SpaceView3D &b_v3d,
}
}
BufferParams BlenderSync::get_buffer_params(
BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height)
BufferParams BlenderSync::get_buffer_params(BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width,
int height,
const bool use_denoiser)
{
BufferParams params;
bool use_border = false;
@@ -927,6 +931,11 @@ BufferParams BlenderSync::get_buffer_params(
params.height = height;
}
PassType display_pass = update_viewport_display_passes(b_v3d, params.passes);
/* Can only denoise the combined image pass */
params.denoising_data_pass = display_pass == PASS_COMBINED && use_denoiser;
return params;
}

View File

@@ -283,13 +283,10 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
return;
Attribute *attr_intercept = NULL;
Attribute *attr_length = NULL;
Attribute *attr_random = NULL;
if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT))
attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH))
attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM))
attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
@@ -339,10 +336,6 @@ static void ExportCurveSegments(Scene *scene, Hair *hair, ParticleCurveData *CDa
num_curve_keys++;
}
if (attr_length != NULL) {
attr_length->add(CData->curve_length[curve]);
}
if (attr_random != NULL) {
attr_random->add(hash_uint2_to_float(num_curves, 0));
}
@@ -664,15 +657,11 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
/* Add requested attributes. */
Attribute *attr_intercept = NULL;
Attribute *attr_length = NULL;
Attribute *attr_random = NULL;
if (hair->need_attribute(scene, ATTR_STD_CURVE_INTERCEPT)) {
attr_intercept = hair->attributes.add(ATTR_STD_CURVE_INTERCEPT);
}
if (hair->need_attribute(scene, ATTR_STD_CURVE_LENGTH)) {
attr_length = hair->attributes.add(ATTR_STD_CURVE_LENGTH);
}
if (hair->need_attribute(scene, ATTR_STD_CURVE_RANDOM)) {
attr_random = hair->attributes.add(ATTR_STD_CURVE_RANDOM);
}
@@ -725,10 +714,6 @@ static void export_hair_curves(Scene *scene, Hair *hair, BL::Hair b_hair)
}
}
if (attr_length) {
attr_length->add(length);
}
/* Random number per curve. */
if (attr_random != NULL) {
attr_random->add(hash_uint2_to_float(b_curve.index(), 0));

View File

@@ -25,8 +25,8 @@ CCL_NAMESPACE_BEGIN
enum ComputeDevice {
COMPUTE_DEVICE_CPU = 0,
COMPUTE_DEVICE_CUDA = 1,
COMPUTE_DEVICE_OPENCL = 2,
COMPUTE_DEVICE_OPTIX = 3,
COMPUTE_DEVICE_HIP = 4,
COMPUTE_DEVICE_NUM
};
@@ -68,6 +68,13 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
device = Device::get_multi_device(devices, threads, background);
}
}
else if (get_enum(cscene, "device") == 2) {
/* Find network device. */
vector<DeviceInfo> devices = Device::available_devices(DEVICE_MASK_NETWORK);
if (!devices.empty()) {
device = devices.front();
}
}
else if (get_enum(cscene, "device") == 1) {
/* Test if we are using GPU devices. */
ComputeDevice compute_device = (ComputeDevice)get_enum(
@@ -82,8 +89,8 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
else if (compute_device == COMPUTE_DEVICE_OPTIX) {
mask |= DEVICE_MASK_OPTIX;
}
else if (compute_device == COMPUTE_DEVICE_HIP) {
mask |= DEVICE_MASK_HIP;
else if (compute_device == COMPUTE_DEVICE_OPENCL) {
mask |= DEVICE_MASK_OPENCL;
}
vector<DeviceInfo> devices = Device::available_devices(mask);

View File

@@ -1,754 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "blender/blender_display_driver.h"
#include "device/device.h"
#include "util/util_logging.h"
#include "util/util_opengl.h"
extern "C" {
struct RenderEngine;
bool RE_engine_has_render_context(struct RenderEngine *engine);
void RE_engine_render_context_enable(struct RenderEngine *engine);
void RE_engine_render_context_disable(struct RenderEngine *engine);
bool DRW_opengl_context_release();
void DRW_opengl_context_activate(bool drw_state);
void *WM_opengl_context_create();
void WM_opengl_context_activate(void *gl_context);
void WM_opengl_context_dispose(void *gl_context);
void WM_opengl_context_release(void *context);
}
CCL_NAMESPACE_BEGIN
/* --------------------------------------------------------------------
* BlenderDisplayShader.
*/
unique_ptr<BlenderDisplayShader> BlenderDisplayShader::create(BL::RenderEngine &b_engine,
BL::Scene &b_scene)
{
if (b_engine.support_display_space_shader(b_scene)) {
return make_unique<BlenderDisplaySpaceShader>(b_engine, b_scene);
}
return make_unique<BlenderFallbackDisplayShader>();
}
int BlenderDisplayShader::get_position_attrib_location()
{
if (position_attribute_location_ == -1) {
const uint shader_program = get_shader_program();
position_attribute_location_ = glGetAttribLocation(shader_program, position_attribute_name);
}
return position_attribute_location_;
}
int BlenderDisplayShader::get_tex_coord_attrib_location()
{
if (tex_coord_attribute_location_ == -1) {
const uint shader_program = get_shader_program();
tex_coord_attribute_location_ = glGetAttribLocation(shader_program, tex_coord_attribute_name);
}
return tex_coord_attribute_location_;
}
/* --------------------------------------------------------------------
* BlenderFallbackDisplayShader.
*/
/* TODO move shaders to standalone .glsl file. */
static const char *FALLBACK_VERTEX_SHADER =
"#version 330\n"
"uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n"
"in vec2 pos;\n"
"out vec2 texCoord_interp;\n"
"\n"
"vec2 normalize_coordinates()\n"
"{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n"
"}\n\0";
static const char *FALLBACK_FRAGMENT_SHADER =
"#version 330\n"
"uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0";
static void shader_print_errors(const char *task, const char *log, const char *code)
{
LOG(ERROR) << "Shader: " << task << " error:";
LOG(ERROR) << "===== shader string ====";
stringstream stream(code);
string partial;
int line = 1;
while (getline(stream, partial, '\n')) {
if (line < 10) {
LOG(ERROR) << " " << line << " " << partial;
}
else {
LOG(ERROR) << line << " " << partial;
}
line++;
}
LOG(ERROR) << log;
}
static int compile_fallback_shader(void)
{
const struct Shader {
const char *source;
const GLenum type;
} shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
const GLuint program = glCreateProgram();
for (int i = 0; i < 2; i++) {
const GLuint shader = glCreateShader(shaders[i].type);
string source_str = shaders[i].source;
const char *c_str = source_str.c_str();
glShaderSource(shader, 1, &c_str, NULL);
glCompileShader(shader);
GLint compile_status;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_status);
if (!compile_status) {
GLchar log[5000];
GLsizei length = 0;
glGetShaderInfoLog(shader, sizeof(log), &length, log);
shader_print_errors("compile", log, c_str);
return 0;
}
glAttachShader(program, shader);
}
/* Link output. */
glBindFragDataLocation(program, 0, "fragColor");
/* Link and error check. */
glLinkProgram(program);
/* TODO(sergey): Find a way to nicely de-duplicate the error checking. */
GLint link_status;
glGetProgramiv(program, GL_LINK_STATUS, &link_status);
if (!link_status) {
GLchar log[5000];
GLsizei length = 0;
/* TODO(sergey): Is it really program passed to glGetShaderInfoLog? */
glGetShaderInfoLog(program, sizeof(log), &length, log);
shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
return 0;
}
return program;
}
void BlenderFallbackDisplayShader::bind(int width, int height)
{
create_shader_if_needed();
if (!shader_program_) {
return;
}
glUseProgram(shader_program_);
glUniform1i(image_texture_location_, 0);
glUniform2f(fullscreen_location_, width, height);
}
void BlenderFallbackDisplayShader::unbind()
{
}
uint BlenderFallbackDisplayShader::get_shader_program()
{
return shader_program_;
}
void BlenderFallbackDisplayShader::create_shader_if_needed()
{
if (shader_program_ || shader_compile_attempted_) {
return;
}
shader_compile_attempted_ = true;
shader_program_ = compile_fallback_shader();
if (!shader_program_) {
return;
}
glUseProgram(shader_program_);
image_texture_location_ = glGetUniformLocation(shader_program_, "image_texture");
if (image_texture_location_ < 0) {
LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
destroy_shader();
return;
}
fullscreen_location_ = glGetUniformLocation(shader_program_, "fullscreen");
if (fullscreen_location_ < 0) {
LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
destroy_shader();
return;
}
}
void BlenderFallbackDisplayShader::destroy_shader()
{
glDeleteProgram(shader_program_);
shader_program_ = 0;
}
/* --------------------------------------------------------------------
* BlenderDisplaySpaceShader.
*/
BlenderDisplaySpaceShader::BlenderDisplaySpaceShader(BL::RenderEngine &b_engine,
BL::Scene &b_scene)
: b_engine_(b_engine), b_scene_(b_scene)
{
DCHECK(b_engine_.support_display_space_shader(b_scene_));
}
void BlenderDisplaySpaceShader::bind(int /*width*/, int /*height*/)
{
b_engine_.bind_display_space_shader(b_scene_);
}
void BlenderDisplaySpaceShader::unbind()
{
b_engine_.unbind_display_space_shader();
}
uint BlenderDisplaySpaceShader::get_shader_program()
{
if (!shader_program_) {
glGetIntegerv(GL_CURRENT_PROGRAM, reinterpret_cast<int *>(&shader_program_));
}
if (!shader_program_) {
LOG(ERROR) << "Error retrieving shader program for display space shader.";
}
return shader_program_;
}
/* --------------------------------------------------------------------
* BlenderDisplayDriver.
*/
BlenderDisplayDriver::BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene)
: b_engine_(b_engine), display_shader_(BlenderDisplayShader::create(b_engine, b_scene))
{
/* Create context while on the main thread. */
gl_context_create();
}
BlenderDisplayDriver::~BlenderDisplayDriver()
{
gl_resources_destroy();
}
/* --------------------------------------------------------------------
* Update procedure.
*/
bool BlenderDisplayDriver::update_begin(const Params &params,
int texture_width,
int texture_height)
{
/* Note that it's the responsibility of BlenderDisplayDriver to ensure updating and drawing
* the texture does not happen at the same time. This is achieved indirectly.
*
* When enabling the OpenGL context, it uses an internal mutex lock DST.gl_context_lock.
* This same lock is also held when do_draw() is called, which together ensure mutual
* exclusion.
*
* This locking is not performed on the Cycles side, because that would cause lock inversion. */
if (!gl_context_enable()) {
return false;
}
if (gl_render_sync_) {
glWaitSync((GLsync)gl_render_sync_, 0, GL_TIMEOUT_IGNORED);
}
if (!gl_texture_resources_ensure()) {
gl_context_disable();
return false;
}
/* Update texture dimensions if needed. */
if (texture_.width != texture_width || texture_.height != texture_height) {
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
glTexImage2D(
GL_TEXTURE_2D, 0, GL_RGBA16F, texture_width, texture_height, 0, GL_RGBA, GL_HALF_FLOAT, 0);
texture_.width = texture_width;
texture_.height = texture_height;
glBindTexture(GL_TEXTURE_2D, 0);
/* Texture did change, and no pixel storage was provided. Tag for an explicit zeroing out to
* avoid undefined content. */
texture_.need_clear = true;
}
/* Update PBO dimensions if needed.
*
* NOTE: Allocate the PBO for the the size which will fit the final render resolution (as in,
* at a resolution divider 1. This was we don't need to recreate graphics interoperability
* objects which are costly and which are tied to the specific underlying buffer size.
* The downside of this approach is that when graphics interoperability is not used we are
* sending too much data to GPU when resolution divider is not 1. */
/* TODO(sergey): Investigate whether keeping the PBO exact size of the texture makes non-interop
* mode faster. */
const int buffer_width = params.full_size.x;
const int buffer_height = params.full_size.y;
if (texture_.buffer_width != buffer_width || texture_.buffer_height != buffer_height) {
const size_t size_in_bytes = sizeof(half4) * buffer_width * buffer_height;
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
glBufferData(GL_PIXEL_UNPACK_BUFFER, size_in_bytes, 0, GL_DYNAMIC_DRAW);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
texture_.buffer_width = buffer_width;
texture_.buffer_height = buffer_height;
}
/* New content will be provided to the texture in one way or another, so mark this in a
* centralized place. */
texture_.need_update = true;
return true;
}
void BlenderDisplayDriver::update_end()
{
gl_upload_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
glFlush();
gl_context_disable();
}
/* --------------------------------------------------------------------
* Texture buffer mapping.
*/
half4 *BlenderDisplayDriver::map_texture_buffer()
{
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
half4 *mapped_rgba_pixels = reinterpret_cast<half4 *>(
glMapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY));
if (!mapped_rgba_pixels) {
LOG(ERROR) << "Error mapping BlenderDisplayDriver pixel buffer object.";
}
if (texture_.need_clear) {
const int64_t texture_width = texture_.width;
const int64_t texture_height = texture_.height;
memset(reinterpret_cast<void *>(mapped_rgba_pixels),
0,
texture_width * texture_height * sizeof(half4));
texture_.need_clear = false;
}
return mapped_rgba_pixels;
}
void BlenderDisplayDriver::unmap_texture_buffer()
{
glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
/* --------------------------------------------------------------------
* Graphics interoperability.
*/
BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
{
GraphicsInterop interop_dst;
interop_dst.buffer_width = texture_.buffer_width;
interop_dst.buffer_height = texture_.buffer_height;
interop_dst.opengl_pbo_id = texture_.gl_pbo_id;
interop_dst.need_clear = texture_.need_clear;
texture_.need_clear = false;
return interop_dst;
}
void BlenderDisplayDriver::graphics_interop_activate()
{
gl_context_enable();
}
void BlenderDisplayDriver::graphics_interop_deactivate()
{
gl_context_disable();
}
/* --------------------------------------------------------------------
* Drawing.
*/
void BlenderDisplayDriver::clear()
{
texture_.need_clear = true;
}
void BlenderDisplayDriver::set_zoom(float zoom_x, float zoom_y)
{
zoom_ = make_float2(zoom_x, zoom_y);
}
void BlenderDisplayDriver::draw(const Params &params)
{
/* See do_update_begin() for why no locking is required here. */
const bool transparent = true; // TODO(sergey): Derive this from Film.
if (!gl_draw_resources_ensure()) {
return;
}
if (use_gl_context_) {
gl_context_mutex_.lock();
}
if (texture_.need_clear) {
/* Texture is requested to be cleared and was not yet cleared.
*
* Do early return which should be equivalent of drawing all-zero texture.
* Watch out for the lock though so that the clear happening during update is properly
* synchronized here. */
gl_context_mutex_.unlock();
return;
}
if (gl_upload_sync_) {
glWaitSync((GLsync)gl_upload_sync_, 0, GL_TIMEOUT_IGNORED);
}
if (transparent) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
}
display_shader_->bind(params.full_size.x, params.full_size.y);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
/* Trick to keep sharp rendering without jagged edges on all GPUs.
*
* The idea here is to enforce driver to use linear interpolation when the image is not zoomed
* in.
* For the render result with a resolution divider in effect we always use nearest interpolation.
*
* Use explicit MIN assignment to make sure the driver does not have an undefined behavior at
* the zoom level 1. The MAG filter is always NEAREST. */
const float zoomed_width = params.size.x * zoom_.x;
const float zoomed_height = params.size.y * zoom_.y;
if (texture_.width != params.size.x || texture_.height != params.size.y) {
/* Resolution divider is different from 1, force nearest interpolation. */
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
}
else if (zoomed_width - params.size.x > 0.5f || zoomed_height - params.size.y > 0.5f) {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
}
else {
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
}
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_);
texture_update_if_needed();
vertex_buffer_update(params);
/* TODO(sergey): Does it make sense/possible to cache/reuse the VAO? */
GLuint vertex_array_object;
glGenVertexArrays(1, &vertex_array_object);
glBindVertexArray(vertex_array_object);
const int texcoord_attribute = display_shader_->get_tex_coord_attrib_location();
const int position_attribute = display_shader_->get_position_attrib_location();
glEnableVertexAttribArray(texcoord_attribute);
glEnableVertexAttribArray(position_attribute);
glVertexAttribPointer(
texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute,
2,
GL_FLOAT,
GL_FALSE,
4 * sizeof(float),
(const GLvoid *)(sizeof(float) * 2));
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindTexture(GL_TEXTURE_2D, 0);
glDeleteVertexArrays(1, &vertex_array_object);
display_shader_->unbind();
if (transparent) {
glDisable(GL_BLEND);
}
gl_render_sync_ = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
glFlush();
if (use_gl_context_) {
gl_context_mutex_.unlock();
}
}
void BlenderDisplayDriver::gl_context_create()
{
/* When rendering in viewport there is no render context available via engine.
* Check whether own context is to be created here.
*
* NOTE: If the `b_engine_`'s context is not available, we are expected to be on a main thread
* here. */
use_gl_context_ = !RE_engine_has_render_context(
reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
if (use_gl_context_) {
const bool drw_state = DRW_opengl_context_release();
gl_context_ = WM_opengl_context_create();
if (gl_context_) {
/* On Windows an old context is restored after creation, and subsequent release of context
* generates a Win32 error. Harmless for users, but annoying to have possible misleading
* error prints in the console. */
#ifndef _WIN32
WM_opengl_context_release(gl_context_);
#endif
}
else {
LOG(ERROR) << "Error creating OpenGL context.";
}
DRW_opengl_context_activate(drw_state);
}
}
bool BlenderDisplayDriver::gl_context_enable()
{
if (use_gl_context_) {
if (!gl_context_) {
return false;
}
gl_context_mutex_.lock();
WM_opengl_context_activate(gl_context_);
return true;
}
RE_engine_render_context_enable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
return true;
}
void BlenderDisplayDriver::gl_context_disable()
{
if (use_gl_context_) {
if (gl_context_) {
WM_opengl_context_release(gl_context_);
gl_context_mutex_.unlock();
}
return;
}
RE_engine_render_context_disable(reinterpret_cast<RenderEngine *>(b_engine_.ptr.data));
}
void BlenderDisplayDriver::gl_context_dispose()
{
if (gl_context_) {
const bool drw_state = DRW_opengl_context_release();
WM_opengl_context_activate(gl_context_);
WM_opengl_context_dispose(gl_context_);
DRW_opengl_context_activate(drw_state);
}
}
bool BlenderDisplayDriver::gl_draw_resources_ensure()
{
if (!texture_.gl_id) {
/* If there is no texture allocated, there is nothing to draw. Inform the draw call that it can
* can not continue. Note that this is not an unrecoverable error, so once the texture is known
* we will come back here and create all the GPU resources needed for draw. */
return false;
}
if (gl_draw_resource_creation_attempted_) {
return gl_draw_resources_created_;
}
gl_draw_resource_creation_attempted_ = true;
if (!vertex_buffer_) {
glGenBuffers(1, &vertex_buffer_);
if (!vertex_buffer_) {
LOG(ERROR) << "Error creating vertex buffer.";
return false;
}
}
gl_draw_resources_created_ = true;
return true;
}
void BlenderDisplayDriver::gl_resources_destroy()
{
gl_context_enable();
if (vertex_buffer_ != 0) {
glDeleteBuffers(1, &vertex_buffer_);
}
if (texture_.gl_pbo_id) {
glDeleteBuffers(1, &texture_.gl_pbo_id);
texture_.gl_pbo_id = 0;
}
if (texture_.gl_id) {
glDeleteTextures(1, &texture_.gl_id);
texture_.gl_id = 0;
}
gl_context_disable();
gl_context_dispose();
}
bool BlenderDisplayDriver::gl_texture_resources_ensure()
{
if (texture_.creation_attempted) {
return texture_.is_created;
}
texture_.creation_attempted = true;
DCHECK(!texture_.gl_id);
DCHECK(!texture_.gl_pbo_id);
/* Create texture. */
glGenTextures(1, &texture_.gl_id);
if (!texture_.gl_id) {
LOG(ERROR) << "Error creating texture.";
return false;
}
/* Configure the texture. */
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, texture_.gl_id);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glBindTexture(GL_TEXTURE_2D, 0);
/* Create PBO for the texture. */
glGenBuffers(1, &texture_.gl_pbo_id);
if (!texture_.gl_pbo_id) {
LOG(ERROR) << "Error creating texture pixel buffer object.";
return false;
}
/* Creation finished with a success. */
texture_.is_created = true;
return true;
}
void BlenderDisplayDriver::texture_update_if_needed()
{
if (!texture_.need_update) {
return;
}
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, texture_.gl_pbo_id);
glTexSubImage2D(
GL_TEXTURE_2D, 0, 0, 0, texture_.width, texture_.height, GL_RGBA, GL_HALF_FLOAT, 0);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
texture_.need_update = false;
}
void BlenderDisplayDriver::vertex_buffer_update(const Params &params)
{
/* Invalidate old contents - avoids stalling if the buffer is still waiting in queue to be
* rendered. */
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
float *vpointer = reinterpret_cast<float *>(glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY));
if (!vpointer) {
return;
}
vpointer[0] = 0.0f;
vpointer[1] = 0.0f;
vpointer[2] = params.full_offset.x;
vpointer[3] = params.full_offset.y;
vpointer[4] = 1.0f;
vpointer[5] = 0.0f;
vpointer[6] = (float)params.size.x + params.full_offset.x;
vpointer[7] = params.full_offset.y;
vpointer[8] = 1.0f;
vpointer[9] = 1.0f;
vpointer[10] = (float)params.size.x + params.full_offset.x;
vpointer[11] = (float)params.size.y + params.full_offset.y;
vpointer[12] = 0.0f;
vpointer[13] = 1.0f;
vpointer[14] = params.full_offset.x;
vpointer[15] = (float)params.size.y + params.full_offset.y;
glUnmapBuffer(GL_ARRAY_BUFFER);
}
CCL_NAMESPACE_END

View File

@@ -1,210 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <atomic>
#include "MEM_guardedalloc.h"
#include "RNA_blender_cpp.h"
#include "render/display_driver.h"
#include "util/util_thread.h"
#include "util/util_unique_ptr.h"
CCL_NAMESPACE_BEGIN
/* Base class of shader used for display driver rendering. */
class BlenderDisplayShader {
public:
static constexpr const char *position_attribute_name = "pos";
static constexpr const char *tex_coord_attribute_name = "texCoord";
/* Create shader implementation suitable for the given render engine and scene configuration. */
static unique_ptr<BlenderDisplayShader> create(BL::RenderEngine &b_engine, BL::Scene &b_scene);
BlenderDisplayShader() = default;
virtual ~BlenderDisplayShader() = default;
virtual void bind(int width, int height) = 0;
virtual void unbind() = 0;
/* Get attribute location for position and texture coordinate respectively.
* NOTE: The shader needs to be bound to have access to those. */
virtual int get_position_attrib_location();
virtual int get_tex_coord_attrib_location();
protected:
/* Get program of this display shader.
* NOTE: The shader needs to be bound to have access to this. */
virtual uint get_shader_program() = 0;
/* Cached values of various OpenGL resources. */
int position_attribute_location_ = -1;
int tex_coord_attribute_location_ = -1;
};
/* Implementation of display rendering shader used in the case when render engine does not support
* display space shader. */
class BlenderFallbackDisplayShader : public BlenderDisplayShader {
public:
virtual void bind(int width, int height) override;
virtual void unbind() override;
protected:
virtual uint get_shader_program() override;
void create_shader_if_needed();
void destroy_shader();
uint shader_program_ = 0;
int image_texture_location_ = -1;
int fullscreen_location_ = -1;
/* Shader compilation attempted. Which means, that if the shader program is 0 then compilation or
* linking has failed. Do not attempt to re-compile the shader. */
bool shader_compile_attempted_ = false;
};
class BlenderDisplaySpaceShader : public BlenderDisplayShader {
public:
BlenderDisplaySpaceShader(BL::RenderEngine &b_engine, BL::Scene &b_scene);
virtual void bind(int width, int height) override;
virtual void unbind() override;
protected:
virtual uint get_shader_program() override;
BL::RenderEngine b_engine_;
BL::Scene &b_scene_;
/* Cached values of various OpenGL resources. */
uint shader_program_ = 0;
};
/* Display driver implementation which is specific for Blender viewport integration. */
class BlenderDisplayDriver : public DisplayDriver {
public:
BlenderDisplayDriver(BL::RenderEngine &b_engine, BL::Scene &b_scene);
~BlenderDisplayDriver();
virtual void graphics_interop_activate() override;
virtual void graphics_interop_deactivate() override;
virtual void clear() override;
void set_zoom(float zoom_x, float zoom_y);
protected:
virtual bool update_begin(const Params &params, int texture_width, int texture_height) override;
virtual void update_end() override;
virtual half4 *map_texture_buffer() override;
virtual void unmap_texture_buffer() override;
virtual GraphicsInterop graphics_interop_get() override;
virtual void draw(const Params &params) override;
/* Helper function which allocates new GPU context. */
void gl_context_create();
bool gl_context_enable();
void gl_context_disable();
void gl_context_dispose();
/* Make sure texture is allocated and its initial configuration is performed. */
bool gl_texture_resources_ensure();
/* Ensure all runtime GPU resources needed for drawing are allocated.
* Returns true if all resources needed for drawing are available. */
bool gl_draw_resources_ensure();
/* Destroy all GPU resources which are being used by this object. */
void gl_resources_destroy();
/* Update GPU texture dimensions and content if needed (new pixel data was provided).
*
* NOTE: The texture needs to be bound. */
void texture_update_if_needed();
/* Update vertex buffer with new coordinates of vertex positions and texture coordinates.
* This buffer is used to render texture in the viewport.
*
* NOTE: The buffer needs to be bound. */
void vertex_buffer_update(const Params &params);
BL::RenderEngine b_engine_;
/* OpenGL context which is used the render engine doesn't have its own. */
void *gl_context_ = nullptr;
/* The when Blender RenderEngine side context is not available and the DisplayDriver is to create
* its own context. */
bool use_gl_context_ = false;
/* Mutex used to guard the `gl_context_`. */
thread_mutex gl_context_mutex_;
/* Texture which contains pixels of the render result. */
struct {
/* Indicates whether texture creation was attempted and succeeded.
* Used to avoid multiple attempts of texture creation on GPU issues or GPU context
* misconfiguration. */
bool creation_attempted = false;
bool is_created = false;
/* OpenGL resource IDs of the texture itself and Pixel Buffer Object (PBO) used to write
* pixels to it.
*
* NOTE: Allocated on the engine's context. */
uint gl_id = 0;
uint gl_pbo_id = 0;
/* Is true when new data was written to the PBO, meaning, the texture might need to be resized
* and new data is to be uploaded to the GPU. */
bool need_update = false;
/* Content of the texture is to be filled with zeroes. */
std::atomic<bool> need_clear = true;
/* Dimensions of the texture in pixels. */
int width = 0;
int height = 0;
/* Dimensions of the underlying PBO. */
int buffer_width = 0;
int buffer_height = 0;
} texture_;
unique_ptr<BlenderDisplayShader> display_shader_;
/* Special track of whether GPU resources were attempted to be created, to avoid attempts of
* their re-creation on failure on every redraw. */
bool gl_draw_resource_creation_attempted_ = false;
bool gl_draw_resources_created_ = false;
/* Vertex buffer which hold vertices of a triangle fan which is textures with the texture
* holding the render result. */
uint vertex_buffer_ = 0;
void *gl_render_sync_ = nullptr;
void *gl_upload_sync_ = nullptr;
float2 zoom_ = make_float2(1.0f, 1.0f);
};
CCL_NAMESPACE_END

View File

@@ -80,11 +80,7 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
{
/* Test if we can instance or if the object is modified. */
Geometry::Type geom_type = determine_geom_type(b_ob_info, use_particle_hair);
BL::ID b_key_id = (b_ob_info.is_real_object_data() &&
BKE_object_is_modified(b_ob_info.real_object)) ?
b_ob_info.real_object :
b_ob_info.object_data;
GeometryKey key(b_key_id.ptr.data, geom_type);
GeometryKey key(b_ob_info.object_data, geom_type);
/* Find shader indices. */
array<Node *> used_shaders = find_used_shaders(b_ob_info.iter_object);
@@ -114,7 +110,7 @@ Geometry *BlenderSync::sync_geometry(BL::Depsgraph &b_depsgraph,
}
else {
/* Test if we need to update existing geometry. */
sync = geometry_map.update(geom, b_key_id);
sync = geometry_map.update(geom, b_ob_info.object_data);
}
if (!sync) {

View File

@@ -125,10 +125,17 @@ void BlenderSync::sync_light(BL::Object &b_parent,
light->set_shader(static_cast<Shader *>(used_shaders[0]));
/* shadow */
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
PointerRNA clight = RNA_pointer_get(&b_light.ptr, "cycles");
light->set_cast_shadow(get_boolean(clight, "cast_shadow"));
light->set_use_mis(get_boolean(clight, "use_multiple_importance_sampling"));
int samples = get_int(clight, "samples");
if (get_boolean(cscene, "use_square_samples"))
light->set_samples(samples * samples);
else
light->set_samples(samples);
light->set_max_bounces(get_int(clight, "max_bounces"));
if (b_ob_info.real_object != b_ob_info.iter_object) {
@@ -148,12 +155,10 @@ void BlenderSync::sync_light(BL::Object &b_parent,
/* visibility */
uint visibility = object_ray_visibility(b_ob_info.real_object);
light->set_use_camera((visibility & PATH_RAY_CAMERA) != 0);
light->set_use_diffuse((visibility & PATH_RAY_DIFFUSE) != 0);
light->set_use_glossy((visibility & PATH_RAY_GLOSSY) != 0);
light->set_use_transmission((visibility & PATH_RAY_TRANSMIT) != 0);
light->set_use_scatter((visibility & PATH_RAY_VOLUME_SCATTER) != 0);
light->set_is_shadow_catcher(b_ob_info.real_object.is_shadow_catcher());
/* tag */
light->tag_update(scene);
@@ -164,6 +169,7 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
BL::World b_world = b_scene.world();
if (b_world) {
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
PointerRNA cworld = RNA_pointer_get(&b_world.ptr, "cycles");
enum SamplingMethod { SAMPLING_NONE = 0, SAMPLING_AUTOMATIC, SAMPLING_MANUAL, SAMPLING_NUM };
@@ -191,6 +197,12 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
/* force enable light again when world is resynced */
light->set_is_enabled(true);
int samples = get_int(cworld, "samples");
if (get_boolean(cscene, "use_square_samples"))
light->set_samples(samples * samples);
else
light->set_samples(samples);
light->tag_update(scene);
light_map.set_recalc(b_world);
}
@@ -199,7 +211,7 @@ void BlenderSync::sync_background_light(BL::SpaceView3D &b_v3d, bool use_portal)
world_map = b_world.ptr.data;
world_recalc = false;
viewport_parameters = BlenderViewportParameters(b_v3d, use_developer_ui);
viewport_parameters = BlenderViewportParameters(b_v3d);
}
CCL_NAMESPACE_END

View File

@@ -568,7 +568,7 @@ void BlenderSync::sync_objects(BL::Depsgraph &b_depsgraph,
/* object loop */
bool cancel = false;
bool use_portal = false;
const bool show_lights = BlenderViewportParameters(b_v3d, use_developer_ui).use_scene_lights;
const bool show_lights = BlenderViewportParameters(b_v3d).use_scene_lights;
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
BL::Depsgraph::object_instances_iterator b_instance_iter;

View File

@@ -1,127 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "blender/blender_output_driver.h"
CCL_NAMESPACE_BEGIN
BlenderOutputDriver::BlenderOutputDriver(BL::RenderEngine &b_engine) : b_engine_(b_engine)
{
}
BlenderOutputDriver::~BlenderOutputDriver()
{
}
bool BlenderOutputDriver::read_render_tile(const Tile &tile)
{
/* Get render result. */
BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
tile.offset.y,
tile.size.x,
tile.size.y,
tile.layer.c_str(),
tile.view.c_str());
/* Can happen if the intersected rectangle gives 0 width or height. */
if (b_rr.ptr.data == NULL) {
return false;
}
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
/* layer will be missing if it was disabled in the UI */
if (b_single_rlay == b_rr.layers.end()) {
return false;
}
BL::RenderLayer b_rlay = *b_single_rlay;
vector<float> pixels(tile.size.x * tile.size.y * 4);
/* Copy each pass.
* TODO:copy only the required ones for better performance? */
for (BL::RenderPass &b_pass : b_rlay.passes) {
tile.set_pass_pixels(b_pass.name(), b_pass.channels(), (float *)b_pass.rect());
}
b_engine_.end_result(b_rr, false, false, false);
return true;
}
bool BlenderOutputDriver::update_render_tile(const Tile &tile)
{
/* Use final write for preview renders, otherwise render result wouldn't be be updated
* quickly on Blender side. For all other cases we use the display driver. */
if (b_engine_.is_preview()) {
write_render_tile(tile);
return true;
}
else {
/* Don't highlight full-frame tile. */
if (!(tile.size == tile.full_size)) {
b_engine_.tile_highlight_clear_all();
b_engine_.tile_highlight_set(tile.offset.x, tile.offset.y, tile.size.x, tile.size.y, true);
}
return false;
}
}
void BlenderOutputDriver::write_render_tile(const Tile &tile)
{
b_engine_.tile_highlight_clear_all();
/* Get render result. */
BL::RenderResult b_rr = b_engine_.begin_result(tile.offset.x,
tile.offset.y,
tile.size.x,
tile.size.y,
tile.layer.c_str(),
tile.view.c_str());
/* Can happen if the intersected rectangle gives 0 width or height. */
if (b_rr.ptr.data == NULL) {
return;
}
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
/* Layer will be missing if it was disabled in the UI. */
if (b_single_rlay == b_rr.layers.end()) {
return;
}
BL::RenderLayer b_rlay = *b_single_rlay;
vector<float> pixels(tile.size.x * tile.size.y * 4);
/* Copy each pass. */
for (BL::RenderPass &b_pass : b_rlay.passes) {
if (!tile.get_pass_pixels(b_pass.name(), b_pass.channels(), &pixels[0])) {
memset(&pixels[0], 0, pixels.size() * sizeof(float));
}
b_pass.rect(&pixels[0]);
}
b_engine_.end_result(b_rr, true, false, true);
}
CCL_NAMESPACE_END

View File

@@ -1,40 +0,0 @@
/*
* Copyright 2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "MEM_guardedalloc.h"
#include "RNA_blender_cpp.h"
#include "render/output_driver.h"
CCL_NAMESPACE_BEGIN
class BlenderOutputDriver : public OutputDriver {
public:
BlenderOutputDriver(BL::RenderEngine &b_engine);
~BlenderOutputDriver();
virtual void write_render_tile(const Tile &tile) override;
virtual bool update_render_tile(const Tile &tile) override;
virtual bool read_render_tile(const Tile &tile) override;
protected:
BL::RenderEngine b_engine_;
};
CCL_NAMESPACE_END

View File

@@ -45,6 +45,10 @@
# include <OSL/oslquery.h>
#endif
#ifdef WITH_OPENCL
# include "device/device_intern.h"
#endif
CCL_NAMESPACE_BEGIN
namespace {
@@ -68,10 +72,12 @@ PyObject *pyunicode_from_string(const char *str)
/* Synchronize debug flags from a given Blender scene.
* Return truth when device list needs invalidation.
*/
static void debug_flags_sync_from_scene(BL::Scene b_scene)
bool debug_flags_sync_from_scene(BL::Scene b_scene)
{
DebugFlagsRef flags = DebugFlags();
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
/* Backup some settings for comparison. */
DebugFlags::OpenCL::DeviceType opencl_device_type = flags.opencl.device_type;
/* Synchronize shared flags. */
flags.viewport_static_bvh = get_enum(cscene, "debug_bvh_type");
/* Synchronize CPU flags. */
@@ -81,19 +87,50 @@ static void debug_flags_sync_from_scene(BL::Scene b_scene)
flags.cpu.sse3 = get_boolean(cscene, "debug_use_cpu_sse3");
flags.cpu.sse2 = get_boolean(cscene, "debug_use_cpu_sse2");
flags.cpu.bvh_layout = (BVHLayout)get_enum(cscene, "debug_bvh_layout");
flags.cpu.split_kernel = get_boolean(cscene, "debug_use_cpu_split_kernel");
/* Synchronize CUDA flags. */
flags.cuda.adaptive_compile = get_boolean(cscene, "debug_use_cuda_adaptive_compile");
flags.cuda.split_kernel = get_boolean(cscene, "debug_use_cuda_split_kernel");
/* Synchronize OptiX flags. */
flags.optix.use_debug = get_boolean(cscene, "debug_use_optix_debug");
flags.optix.cuda_streams = get_int(cscene, "debug_optix_cuda_streams");
flags.optix.curves_api = get_boolean(cscene, "debug_optix_curves_api");
/* Synchronize OpenCL device type. */
switch (get_enum(cscene, "debug_opencl_device_type")) {
case 0:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
break;
case 1:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_ALL;
break;
case 2:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_DEFAULT;
break;
case 3:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_CPU;
break;
case 4:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_GPU;
break;
case 5:
flags.opencl.device_type = DebugFlags::OpenCL::DEVICE_ACCELERATOR;
break;
}
/* Synchronize other OpenCL flags. */
flags.opencl.debug = get_boolean(cscene, "debug_use_opencl_debug");
flags.opencl.mem_limit = ((size_t)get_int(cscene, "debug_opencl_mem_limit")) * 1024 * 1024;
return flags.opencl.device_type != opencl_device_type;
}
/* Reset debug flags to default values.
* Return truth when device list needs invalidation.
*/
static void debug_flags_reset()
bool debug_flags_reset()
{
DebugFlagsRef flags = DebugFlags();
/* Backup some settings for comparison. */
DebugFlags::OpenCL::DeviceType opencl_device_type = flags.opencl.device_type;
flags.reset();
return flags.opencl.device_type != opencl_device_type;
}
} /* namespace */
@@ -138,20 +175,18 @@ static const char *PyC_UnicodeAsByte(PyObject *py_str, PyObject **coerce)
static PyObject *init_func(PyObject * /*self*/, PyObject *args)
{
PyObject *path, *user_path, *temp_path;
PyObject *path, *user_path;
int headless;
if (!PyArg_ParseTuple(args, "OOOi", &path, &user_path, &temp_path, &headless)) {
return nullptr;
if (!PyArg_ParseTuple(args, "OOi", &path, &user_path, &headless)) {
return NULL;
}
PyObject *path_coerce = nullptr, *user_path_coerce = nullptr, *temp_path_coerce = nullptr;
PyObject *path_coerce = NULL, *user_path_coerce = NULL;
path_init(PyC_UnicodeAsByte(path, &path_coerce),
PyC_UnicodeAsByte(user_path, &user_path_coerce),
PyC_UnicodeAsByte(temp_path, &temp_path_coerce));
PyC_UnicodeAsByte(user_path, &user_path_coerce));
Py_XDECREF(path_coerce);
Py_XDECREF(user_path_coerce);
Py_XDECREF(temp_path_coerce);
BlenderSession::headless = headless;
@@ -264,50 +299,6 @@ static PyObject *render_func(PyObject * /*self*/, PyObject *args)
Py_RETURN_NONE;
}
static PyObject *render_frame_finish_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession;
if (!PyArg_ParseTuple(args, "O", &pysession)) {
return nullptr;
}
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(pysession);
/* Allow Blender to execute other Python scripts. */
python_thread_state_save(&session->python_thread_state);
session->render_frame_finish();
python_thread_state_restore(&session->python_thread_state);
Py_RETURN_NONE;
}
static PyObject *draw_func(PyObject * /*self*/, PyObject *args)
{
PyObject *py_session, *py_graph, *py_screen, *py_space_image;
if (!PyArg_ParseTuple(args, "OOOO", &py_session, &py_graph, &py_screen, &py_space_image)) {
return nullptr;
}
BlenderSession *session = (BlenderSession *)PyLong_AsVoidPtr(py_session);
ID *b_screen = (ID *)PyLong_AsVoidPtr(py_screen);
PointerRNA b_space_image_ptr;
RNA_pointer_create(b_screen,
&RNA_SpaceImageEditor,
pylong_as_voidptr_typesafe(py_space_image),
&b_space_image_ptr);
BL::SpaceImageEditor b_space_image(b_space_image_ptr);
session->draw(b_space_image);
Py_RETURN_NONE;
}
/* pixel_array and result passed as pointers */
static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
{
@@ -345,7 +336,7 @@ static PyObject *bake_func(PyObject * /*self*/, PyObject *args)
Py_RETURN_NONE;
}
static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
static PyObject *draw_func(PyObject * /*self*/, PyObject *args)
{
PyObject *pysession, *pygraph, *pyv3d, *pyrv3d;
@@ -359,7 +350,7 @@ static PyObject *view_draw_func(PyObject * /*self*/, PyObject *args)
int viewport[4];
glGetIntegerv(GL_VIEWPORT, viewport);
session->view_draw(viewport[2], viewport[3]);
session->draw(viewport[2], viewport[3]);
}
Py_RETURN_NONE;
@@ -706,6 +697,40 @@ static PyObject *system_info_func(PyObject * /*self*/, PyObject * /*value*/)
return pyunicode_from_string(system_info.c_str());
}
#ifdef WITH_OPENCL
static PyObject *opencl_disable_func(PyObject * /*self*/, PyObject * /*value*/)
{
VLOG(2) << "Disabling OpenCL platform.";
DebugFlags().opencl.device_type = DebugFlags::OpenCL::DEVICE_NONE;
Py_RETURN_NONE;
}
static PyObject *opencl_compile_func(PyObject * /*self*/, PyObject *args)
{
PyObject *sequence = PySequence_Fast(args, "Arguments must be a sequence");
if (sequence == NULL) {
Py_RETURN_FALSE;
}
vector<string> parameters;
for (Py_ssize_t i = 0; i < PySequence_Fast_GET_SIZE(sequence); i++) {
PyObject *item = PySequence_Fast_GET_ITEM(sequence, i);
PyObject *item_as_string = PyObject_Str(item);
const char *parameter_string = PyUnicode_AsUTF8(item_as_string);
parameters.push_back(parameter_string);
Py_DECREF(item_as_string);
}
Py_DECREF(sequence);
if (device_opencl_compile_kernel(parameters)) {
Py_RETURN_TRUE;
}
else {
Py_RETURN_FALSE;
}
}
#endif
static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepaths)
{
if (PyUnicode_Check(pyfilepaths)) {
@@ -737,10 +762,6 @@ static bool image_parse_filepaths(PyObject *pyfilepaths, vector<string> &filepat
static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *keywords)
{
#if 1
(void)args;
(void)keywords;
#else
static const char *keyword_list[] = {
"preferences", "scene", "view_layer", "input", "output", "tile_size", "samples", NULL};
PyObject *pypreferences, *pyscene, *pyviewlayer;
@@ -814,7 +835,7 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
}
/* Create denoiser. */
DenoiserPipeline denoiser(device);
Denoiser denoiser(device);
denoiser.params = params;
denoiser.input = input;
denoiser.output = output;
@@ -831,7 +852,6 @@ static PyObject *denoise_func(PyObject * /*self*/, PyObject *args, PyObject *key
PyErr_SetString(PyExc_ValueError, denoiser.error.c_str());
return NULL;
}
#endif
Py_RETURN_NONE;
}
@@ -883,7 +903,10 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
RNA_id_pointer_create((ID *)PyLong_AsVoidPtr(pyscene), &sceneptr);
BL::Scene b_scene(sceneptr);
debug_flags_sync_from_scene(b_scene);
if (debug_flags_sync_from_scene(b_scene)) {
VLOG(2) << "Tagging device list for update.";
Device::tag_update();
}
VLOG(2) << "Debug flags set to:\n" << DebugFlags();
@@ -894,7 +917,10 @@ static PyObject *debug_flags_update_func(PyObject * /*self*/, PyObject *args)
static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/)
{
debug_flags_reset();
if (debug_flags_reset()) {
VLOG(2) << "Tagging device list for update.";
Device::tag_update();
}
if (debug_flags_set) {
VLOG(2) << "Debug flags reset to:\n" << DebugFlags();
debug_flags_set = false;
@@ -902,6 +928,84 @@ static PyObject *debug_flags_reset_func(PyObject * /*self*/, PyObject * /*args*/
Py_RETURN_NONE;
}
static PyObject *set_resumable_chunk_func(PyObject * /*self*/, PyObject *args)
{
int num_resumable_chunks, current_resumable_chunk;
if (!PyArg_ParseTuple(args, "ii", &num_resumable_chunks, &current_resumable_chunk)) {
Py_RETURN_NONE;
}
if (num_resumable_chunks <= 0) {
fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
abort();
Py_RETURN_NONE;
}
if (current_resumable_chunk < 1 || current_resumable_chunk > num_resumable_chunks) {
fprintf(stderr, "Cycles: Bad value for current resumable chunk number.\n");
abort();
Py_RETURN_NONE;
}
VLOG(1) << "Initialized resumable render: "
<< "num_resumable_chunks=" << num_resumable_chunks << ", "
<< "current_resumable_chunk=" << current_resumable_chunk;
BlenderSession::num_resumable_chunks = num_resumable_chunks;
BlenderSession::current_resumable_chunk = current_resumable_chunk;
printf("Cycles: Will render chunk %d of %d\n", current_resumable_chunk, num_resumable_chunks);
Py_RETURN_NONE;
}
static PyObject *set_resumable_chunk_range_func(PyObject * /*self*/, PyObject *args)
{
int num_chunks, start_chunk, end_chunk;
if (!PyArg_ParseTuple(args, "iii", &num_chunks, &start_chunk, &end_chunk)) {
Py_RETURN_NONE;
}
if (num_chunks <= 0) {
fprintf(stderr, "Cycles: Bad value for number of resumable chunks.\n");
abort();
Py_RETURN_NONE;
}
if (start_chunk < 1 || start_chunk > num_chunks) {
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
abort();
Py_RETURN_NONE;
}
if (end_chunk < 1 || end_chunk > num_chunks) {
fprintf(stderr, "Cycles: Bad value for start chunk number.\n");
abort();
Py_RETURN_NONE;
}
if (start_chunk > end_chunk) {
fprintf(stderr, "Cycles: End chunk should be higher than start one.\n");
abort();
Py_RETURN_NONE;
}
VLOG(1) << "Initialized resumable render: "
<< "num_resumable_chunks=" << num_chunks << ", "
<< "start_resumable_chunk=" << start_chunk << "end_resumable_chunk=" << end_chunk;
BlenderSession::num_resumable_chunks = num_chunks;
BlenderSession::start_resumable_chunk = start_chunk;
BlenderSession::end_resumable_chunk = end_chunk;
printf("Cycles: Will render chunks %d to %d of %d\n", start_chunk, end_chunk, num_chunks);
Py_RETURN_NONE;
}
static PyObject *clear_resumable_chunk_func(PyObject * /*self*/, PyObject * /*value*/)
{
VLOG(1) << "Clear resumable render";
BlenderSession::num_resumable_chunks = 0;
BlenderSession::current_resumable_chunk = 0;
Py_RETURN_NONE;
}
static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*/)
{
BlenderSession::print_render_stats = true;
@@ -911,16 +1015,16 @@ static PyObject *enable_print_stats_func(PyObject * /*self*/, PyObject * /*args*
static PyObject *get_device_types_func(PyObject * /*self*/, PyObject * /*args*/)
{
vector<DeviceType> device_types = Device::available_types();
bool has_cuda = false, has_optix = false, has_hip = false;
bool has_cuda = false, has_optix = false, has_opencl = false;
foreach (DeviceType device_type, device_types) {
has_cuda |= (device_type == DEVICE_CUDA);
has_optix |= (device_type == DEVICE_OPTIX);
has_hip |= (device_type == DEVICE_HIP);
has_opencl |= (device_type == DEVICE_OPENCL);
}
PyObject *list = PyTuple_New(3);
PyTuple_SET_ITEM(list, 0, PyBool_FromLong(has_cuda));
PyTuple_SET_ITEM(list, 1, PyBool_FromLong(has_optix));
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_hip));
PyTuple_SET_ITEM(list, 2, PyBool_FromLong(has_opencl));
return list;
}
@@ -940,15 +1044,15 @@ static PyObject *set_device_override_func(PyObject * /*self*/, PyObject *arg)
if (override == "CPU") {
BlenderSession::device_override = DEVICE_MASK_CPU;
}
else if (override == "OPENCL") {
BlenderSession::device_override = DEVICE_MASK_OPENCL;
}
else if (override == "CUDA") {
BlenderSession::device_override = DEVICE_MASK_CUDA;
}
else if (override == "OPTIX") {
BlenderSession::device_override = DEVICE_MASK_OPTIX;
}
else if (override == "HIP") {
BlenderSession::device_override = DEVICE_MASK_HIP;
}
else {
printf("\nError: %s is not a valid Cycles device.\n", override.c_str());
Py_RETURN_FALSE;
@@ -968,10 +1072,8 @@ static PyMethodDef methods[] = {
{"create", create_func, METH_VARARGS, ""},
{"free", free_func, METH_O, ""},
{"render", render_func, METH_VARARGS, ""},
{"render_frame_finish", render_frame_finish_func, METH_VARARGS, ""},
{"draw", draw_func, METH_VARARGS, ""},
{"bake", bake_func, METH_VARARGS, ""},
{"view_draw", view_draw_func, METH_VARARGS, ""},
{"draw", draw_func, METH_VARARGS, ""},
{"sync", sync_func, METH_VARARGS, ""},
{"reset", reset_func, METH_VARARGS, ""},
#ifdef WITH_OSL
@@ -980,6 +1082,10 @@ static PyMethodDef methods[] = {
#endif
{"available_devices", available_devices_func, METH_VARARGS, ""},
{"system_info", system_info_func, METH_NOARGS, ""},
#ifdef WITH_OPENCL
{"opencl_disable", opencl_disable_func, METH_NOARGS, ""},
{"opencl_compile", opencl_compile_func, METH_VARARGS, ""},
#endif
/* Standalone denoising */
{"denoise", (PyCFunction)denoise_func, METH_VARARGS | METH_KEYWORDS, ""},
@@ -992,6 +1098,11 @@ static PyMethodDef methods[] = {
/* Statistics. */
{"enable_print_stats", enable_print_stats_func, METH_NOARGS, ""},
/* Resumable render */
{"set_resumable_chunk", set_resumable_chunk_func, METH_VARARGS, ""},
{"set_resumable_chunk_range", set_resumable_chunk_range_func, METH_VARARGS, ""},
{"clear_resumable_chunk", clear_resumable_chunk_func, METH_NOARGS, ""},
/* Compute Device selection */
{"get_device_types", get_device_types_func, METH_VARARGS, ""},
{"set_device_override", set_device_override_func, METH_O, ""},
@@ -1042,6 +1153,14 @@ void *CCL_python_module_init()
PyModule_AddStringConstant(mod, "osl_version_string", "unknown");
#endif
#ifdef WITH_NETWORK
PyModule_AddObject(mod, "with_network", Py_True);
Py_INCREF(Py_True);
#else /* WITH_NETWORK */
PyModule_AddObject(mod, "with_network", Py_False);
Py_INCREF(Py_False);
#endif /* WITH_NETWORK */
#ifdef WITH_EMBREE
PyModule_AddObject(mod, "with_embree", Py_True);
Py_INCREF(Py_True);

View File

@@ -38,12 +38,9 @@
#include "util/util_hash.h"
#include "util/util_logging.h"
#include "util/util_murmurhash.h"
#include "util/util_path.h"
#include "util/util_progress.h"
#include "util/util_time.h"
#include "blender/blender_display_driver.h"
#include "blender/blender_output_driver.h"
#include "blender/blender_session.h"
#include "blender/blender_sync.h"
#include "blender/blender_util.h"
@@ -52,6 +49,10 @@ CCL_NAMESPACE_BEGIN
DeviceTypeMask BlenderSession::device_override = DEVICE_MASK_ALL;
bool BlenderSession::headless = false;
int BlenderSession::num_resumable_chunks = 0;
int BlenderSession::current_resumable_chunk = 0;
int BlenderSession::start_resumable_chunk = 0;
int BlenderSession::end_resumable_chunk = 0;
bool BlenderSession::print_render_stats = false;
BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
@@ -72,8 +73,7 @@ BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
width(0),
height(0),
preview_osl(preview_osl),
python_thread_state(NULL),
use_developer_ui(false)
python_thread_state(NULL)
{
/* offline render */
background = true;
@@ -103,9 +103,7 @@ BlenderSession::BlenderSession(BL::RenderEngine &b_engine,
width(width),
height(height),
preview_osl(false),
python_thread_state(NULL),
use_developer_ui(b_userpref.experimental().use_cycles_debug() &&
b_userpref.view().show_developer_ui())
python_thread_state(NULL)
{
/* 3d view render */
background = false;
@@ -121,10 +119,10 @@ BlenderSession::~BlenderSession()
void BlenderSession::create_session()
{
const SessionParams session_params = BlenderSync::get_session_params(
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
/* reset status/progress */
last_status = "";
@@ -133,18 +131,20 @@ void BlenderSession::create_session()
start_resize_time = 0.0;
/* create session */
session = new Session(session_params, scene_params);
session = new Session(session_params);
session->scene = scene;
session->progress.set_update_callback(function_bind(&BlenderSession::tag_redraw, this));
session->progress.set_cancel_callback(function_bind(&BlenderSession::test_cancel, this));
session->set_pause(session_pause);
/* create scene */
scene = session->scene;
scene = new Scene(scene_params, session->device);
scene->name = b_scene.name();
session->scene = scene;
/* create sync */
sync = new BlenderSync(
b_engine, b_data, b_scene, scene, !background, use_developer_ui, session->progress);
sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
BL::Object b_camera_override(b_engine.camera_override());
if (b_v3d) {
sync->sync_view(b_v3d, b_rv3d, width, height);
@@ -154,27 +154,13 @@ void BlenderSession::create_session()
}
/* set buffer parameters */
const BufferParams buffer_params = BlenderSync::get_buffer_params(
b_v3d, b_rv3d, scene->camera, width, height);
session->reset(session_params, buffer_params);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
session->reset(buffer_params, session_params.samples);
/* Create GPU display.
* TODO(sergey): Investigate whether DisplayDriver can be used for the preview as well. */
if (!b_engine.is_preview() && !headless) {
unique_ptr<BlenderDisplayDriver> display_driver = make_unique<BlenderDisplayDriver>(b_engine,
b_scene);
display_driver_ = display_driver.get();
session->set_display_driver(move(display_driver));
}
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
/* Viewport and preview (as in, material preview) does not do tiled rendering, so can inform
* engine that no tracking of the tiles state is needed.
* The offline rendering will make a decision when tile is being written. The penalty of asking
* the engine to keep track of tiles state is minimal, so there is nothing to worry about here
* about possible single-tiled final render. */
if (!b_engine.is_preview() && !b_v3d) {
b_engine.use_highlight_tiles(true);
}
update_resumable_tile_manager(session_params.samples);
}
void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsgraph)
@@ -216,9 +202,9 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
return;
}
const SessionParams session_params = BlenderSync::get_session_params(
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
if (scene->params.modified(scene_params) || session->params.modified(session_params) ||
!this->b_render.use_persistent_data()) {
@@ -234,6 +220,8 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
session->progress.reset();
session->tile_manager.set_tile_order(session_params.tile_order);
/* peak memory usage should show current render peak, not peak for all renders
* made by this render session
*/
@@ -242,8 +230,7 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
if (is_new_session) {
/* Sync object should be re-created for new scene. */
delete sync;
sync = new BlenderSync(
b_engine, b_data, b_scene, scene, !background, use_developer_ui, session->progress);
sync = new BlenderSync(b_engine, b_data, b_scene, scene, !background, session->progress);
}
else {
/* Sync recalculations to do just the required updates. */
@@ -255,35 +242,171 @@ void BlenderSession::reset_session(BL::BlendData &b_data, BL::Depsgraph &b_depsg
BL::SpaceView3D b_null_space_view3d(PointerRNA_NULL);
BL::RegionView3D b_null_region_view3d(PointerRNA_NULL);
const BufferParams buffer_params = BlenderSync::get_buffer_params(
b_null_space_view3d, b_null_region_view3d, scene->camera, width, height);
session->reset(session_params, buffer_params);
BufferParams buffer_params = BlenderSync::get_buffer_params(b_null_space_view3d,
b_null_region_view3d,
scene->camera,
width,
height,
session_params.denoising.use);
session->reset(buffer_params, session_params.samples);
b_engine.use_highlight_tiles(session_params.progressive_refine == false);
/* reset time */
start_resize_time = 0.0;
{
thread_scoped_lock lock(draw_state_.mutex);
draw_state_.last_pass_index = -1;
}
}
void BlenderSession::free_session()
{
if (session) {
session->cancel(true);
}
session->cancel();
delete sync;
sync = nullptr;
delete session;
session = nullptr;
}
void BlenderSession::full_buffer_written(string_view filename)
static ShaderEvalType get_shader_type(const string &pass_type)
{
full_buffer_files_.emplace_back(filename);
const char *shader_type = pass_type.c_str();
/* data passes */
if (strcmp(shader_type, "NORMAL") == 0)
return SHADER_EVAL_NORMAL;
else if (strcmp(shader_type, "UV") == 0)
return SHADER_EVAL_UV;
else if (strcmp(shader_type, "ROUGHNESS") == 0)
return SHADER_EVAL_ROUGHNESS;
else if (strcmp(shader_type, "DIFFUSE_COLOR") == 0)
return SHADER_EVAL_DIFFUSE_COLOR;
else if (strcmp(shader_type, "GLOSSY_COLOR") == 0)
return SHADER_EVAL_GLOSSY_COLOR;
else if (strcmp(shader_type, "TRANSMISSION_COLOR") == 0)
return SHADER_EVAL_TRANSMISSION_COLOR;
else if (strcmp(shader_type, "EMIT") == 0)
return SHADER_EVAL_EMISSION;
/* light passes */
else if (strcmp(shader_type, "AO") == 0)
return SHADER_EVAL_AO;
else if (strcmp(shader_type, "COMBINED") == 0)
return SHADER_EVAL_COMBINED;
else if (strcmp(shader_type, "SHADOW") == 0)
return SHADER_EVAL_SHADOW;
else if (strcmp(shader_type, "DIFFUSE") == 0)
return SHADER_EVAL_DIFFUSE;
else if (strcmp(shader_type, "GLOSSY") == 0)
return SHADER_EVAL_GLOSSY;
else if (strcmp(shader_type, "TRANSMISSION") == 0)
return SHADER_EVAL_TRANSMISSION;
/* extra */
else if (strcmp(shader_type, "ENVIRONMENT") == 0)
return SHADER_EVAL_ENVIRONMENT;
else
return SHADER_EVAL_BAKE;
}
static BL::RenderResult begin_render_result(BL::RenderEngine &b_engine,
int x,
int y,
int w,
int h,
const char *layername,
const char *viewname)
{
return b_engine.begin_result(x, y, w, h, layername, viewname);
}
static void end_render_result(BL::RenderEngine &b_engine,
BL::RenderResult &b_rr,
bool cancel,
bool highlight,
bool do_merge_results)
{
b_engine.end_result(b_rr, (int)cancel, (int)highlight, (int)do_merge_results);
}
void BlenderSession::do_write_update_render_tile(RenderTile &rtile,
bool do_update_only,
bool do_read_only,
bool highlight)
{
int x = rtile.x - session->tile_manager.params.full_x;
int y = rtile.y - session->tile_manager.params.full_y;
int w = rtile.w;
int h = rtile.h;
/* get render result */
BL::RenderResult b_rr = begin_render_result(
b_engine, x, y, w, h, b_rlay_name.c_str(), b_rview_name.c_str());
/* can happen if the intersected rectangle gives 0 width or height */
if (b_rr.ptr.data == NULL) {
return;
}
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
/* layer will be missing if it was disabled in the UI */
if (b_single_rlay == b_rr.layers.end())
return;
BL::RenderLayer b_rlay = *b_single_rlay;
if (do_read_only) {
/* copy each pass */
for (BL::RenderPass &b_pass : b_rlay.passes) {
/* find matching pass type */
PassType pass_type = BlenderSync::get_pass_type(b_pass);
int components = b_pass.channels();
rtile.buffers->set_pass_rect(
pass_type, components, (float *)b_pass.rect(), rtile.num_samples);
}
end_render_result(b_engine, b_rr, false, false, false);
}
else if (do_update_only) {
/* Sample would be zero at initial tile update, which is only needed
* to tag tile form blender side as IN PROGRESS for proper highlight
* no buffers should be sent to blender yet. For denoise we also
* keep showing the noisy buffers until denoise is done. */
bool merge = (rtile.sample != 0) && (rtile.task != RenderTile::DENOISE);
if (merge) {
update_render_result(b_rlay, rtile);
}
end_render_result(b_engine, b_rr, true, highlight, merge);
}
else {
/* Write final render result. */
write_render_result(b_rlay, rtile);
end_render_result(b_engine, b_rr, false, false, true);
}
}
void BlenderSession::read_render_tile(RenderTile &rtile)
{
do_write_update_render_tile(rtile, false, true, false);
}
void BlenderSession::write_render_tile(RenderTile &rtile)
{
do_write_update_render_tile(rtile, false, false, false);
}
void BlenderSession::update_render_tile(RenderTile &rtile, bool highlight)
{
/* use final write for preview renders, otherwise render result wouldn't be
* be updated in blender side
* would need to be investigated a bit further, but for now shall be fine
*/
if (!b_engine.is_preview())
do_write_update_render_tile(rtile, true, false, highlight);
else
do_write_update_render_tile(rtile, false, false, false);
}
static void add_cryptomatte_layer(BL::RenderResult &b_rr, string name, string manifest)
@@ -307,15 +430,12 @@ void BlenderSession::stamp_view_layer_metadata(Scene *scene, const string &view_
to_string(session->params.samples).c_str());
/* Store ranged samples information. */
/* TODO(sergey): Need to bring this information back. */
#if 0
if (session->tile_manager.range_num_samples != -1) {
b_rr.stamp_data_add_field((prefix + "range_start_sample").c_str(),
to_string(session->tile_manager.range_start_sample).c_str());
b_rr.stamp_data_add_field((prefix + "range_num_samples").c_str(),
to_string(session->tile_manager.range_num_samples).c_str());
}
#endif
/* Write cryptomatte metadata. */
if (scene->film->get_cryptomatte_passes() & CRYPT_OBJECT) {
@@ -354,35 +474,39 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
return;
}
/* Create driver to write out render results. */
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
session->full_buffer_written_cb = [&](string_view filename) { full_buffer_written(filename); };
/* set callback to write out render results */
session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
session->update_render_tile_cb = function_bind(
&BlenderSession::update_render_tile, this, _1, _2);
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
/* get buffer parameters */
const SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background, b_view_layer);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_v3d, b_rv3d, scene->camera, width, height);
b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
/* temporary render result to find needed passes and views */
BL::RenderResult b_rr = b_engine.begin_result(0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
BL::RenderResult b_rr = begin_render_result(
b_engine, 0, 0, 1, 1, b_view_layer.name().c_str(), NULL);
BL::RenderResult::layers_iterator b_single_rlay;
b_rr.layers.begin(b_single_rlay);
BL::RenderLayer b_rlay = *b_single_rlay;
b_rlay_name = b_view_layer.name();
{
thread_scoped_lock lock(draw_state_.mutex);
b_rlay_name = b_view_layer.name();
/* Signal that the display pass is to be updated. */
draw_state_.last_pass_index = -1;
}
/* Update denoising parameters. */
session->set_denoising(session_params.denoising);
/* Compute render passes and film settings. */
sync->sync_render_passes(b_rlay, b_view_layer);
vector<Pass> passes = sync->sync_render_passes(
b_scene, b_rlay, b_view_layer, session_params.adaptive_sampling, session_params.denoising);
/* Set buffer params, using film settings from sync_render_passes. */
buffer_params.passes = passes;
buffer_params.denoising_data_pass = scene->film->get_denoising_data_pass();
buffer_params.denoising_clean_pass = scene->film->get_denoising_clean_pass();
buffer_params.denoising_prefiltered_pass = scene->film->get_denoising_prefiltered_pass();
BL::RenderResult::views_iterator b_view_iter;
@@ -396,9 +520,6 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
++b_view_iter, ++view_index) {
b_rview_name = b_view_iter->name();
buffer_params.layer = b_view_layer.name();
buffer_params.view = b_rview_name;
/* set the current view */
b_engine.active_view_set(b_rview_name.c_str());
@@ -428,16 +549,20 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
}
/* Update number of samples per layer. */
const int samples = sync->get_layer_samples();
const bool bound_samples = sync->get_layer_bound_samples();
int samples = sync->get_layer_samples();
bool bound_samples = sync->get_layer_bound_samples();
int effective_layer_samples;
SessionParams effective_session_params = session_params;
if (samples != 0 && (!bound_samples || (samples < session_params.samples))) {
effective_session_params.samples = samples;
}
if (samples != 0 && (!bound_samples || (samples < session_params.samples)))
effective_layer_samples = samples;
else
effective_layer_samples = session_params.samples;
/* Update tile manager if we're doing resumable render. */
update_resumable_tile_manager(effective_layer_samples);
/* Update session itself. */
session->reset(effective_session_params, buffer_params);
session->reset(buffer_params, effective_layer_samples);
/* render */
if (!b_engine.is_preview() && background && print_render_stats) {
@@ -461,159 +586,65 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
stamp_view_layer_metadata(scene, b_rlay_name);
/* free result without merging */
b_engine.end_result(b_rr, true, false, false);
/* When tiled rendering is used there will be no "write" done for the tile. Forcefully clear
* highlighted tiles now, so that the highlight will be removed while processing full frame from
* file. */
b_engine.tile_highlight_clear_all();
end_render_result(b_engine, b_rr, true, true, false);
double total_time, render_time;
session->progress.get_time(total_time, render_time);
VLOG(1) << "Total render time: " << total_time;
VLOG(1) << "Render time (without synchronization): " << render_time;
/* clear callback */
session->write_render_tile_cb = function_null;
session->update_render_tile_cb = function_null;
}
void BlenderSession::render_frame_finish()
static int bake_pass_filter_get(const int pass_filter)
{
/* Processing of all layers and views is done. Clear the strings so that we can communicate
* progress about reading files and denoising them. */
b_rlay_name = "";
b_rview_name = "";
int flag = BAKE_FILTER_NONE;
if (!b_render.use_persistent_data()) {
/* Free the sync object so that it can properly dereference nodes from the scene graph before
* the graph is freed. */
delete sync;
sync = nullptr;
if ((pass_filter & BL::BakeSettings::pass_filter_DIRECT) != 0)
flag |= BAKE_FILTER_DIRECT;
if ((pass_filter & BL::BakeSettings::pass_filter_INDIRECT) != 0)
flag |= BAKE_FILTER_INDIRECT;
if ((pass_filter & BL::BakeSettings::pass_filter_COLOR) != 0)
flag |= BAKE_FILTER_COLOR;
session->device_free();
}
if ((pass_filter & BL::BakeSettings::pass_filter_DIFFUSE) != 0)
flag |= BAKE_FILTER_DIFFUSE;
if ((pass_filter & BL::BakeSettings::pass_filter_GLOSSY) != 0)
flag |= BAKE_FILTER_GLOSSY;
if ((pass_filter & BL::BakeSettings::pass_filter_TRANSMISSION) != 0)
flag |= BAKE_FILTER_TRANSMISSION;
for (string_view filename : full_buffer_files_) {
session->process_full_buffer_from_disk(filename);
if (check_and_report_session_error()) {
break;
}
}
if ((pass_filter & BL::BakeSettings::pass_filter_EMIT) != 0)
flag |= BAKE_FILTER_EMISSION;
if ((pass_filter & BL::BakeSettings::pass_filter_AO) != 0)
flag |= BAKE_FILTER_AO;
for (string_view filename : full_buffer_files_) {
path_remove(filename);
}
/* Clear driver. */
session->set_output_driver(nullptr);
session->full_buffer_written_cb = function_null;
/* All the files are handled.
* Clear the list so that this session can be re-used by Persistent Data. */
full_buffer_files_.clear();
}
static PassType bake_type_to_pass(const string &bake_type_str, const int bake_filter)
{
const char *bake_type = bake_type_str.c_str();
/* data passes */
if (strcmp(bake_type, "POSITION") == 0) {
return PASS_POSITION;
}
else if (strcmp(bake_type, "NORMAL") == 0) {
return PASS_NORMAL;
}
else if (strcmp(bake_type, "UV") == 0) {
return PASS_UV;
}
else if (strcmp(bake_type, "ROUGHNESS") == 0) {
return PASS_ROUGHNESS;
}
else if (strcmp(bake_type, "EMIT") == 0) {
return PASS_EMISSION;
}
/* light passes */
else if (strcmp(bake_type, "AO") == 0) {
return PASS_AO;
}
else if (strcmp(bake_type, "COMBINED") == 0) {
return PASS_COMBINED;
}
else if (strcmp(bake_type, "SHADOW") == 0) {
return PASS_SHADOW;
}
else if (strcmp(bake_type, "DIFFUSE") == 0) {
if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
return PASS_DIFFUSE;
}
else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
return PASS_DIFFUSE_DIRECT;
}
else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
return PASS_DIFFUSE_INDIRECT;
}
else {
return PASS_DIFFUSE_COLOR;
}
}
else if (strcmp(bake_type, "GLOSSY") == 0) {
if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
return PASS_GLOSSY;
}
else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
return PASS_GLOSSY_DIRECT;
}
else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
return PASS_GLOSSY_INDIRECT;
}
else {
return PASS_GLOSSY_COLOR;
}
}
else if (strcmp(bake_type, "TRANSMISSION") == 0) {
if ((bake_filter & BL::BakeSettings::pass_filter_DIRECT) &&
bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
return PASS_TRANSMISSION;
}
else if (bake_filter & BL::BakeSettings::pass_filter_DIRECT) {
return PASS_TRANSMISSION_DIRECT;
}
else if (bake_filter & BL::BakeSettings::pass_filter_INDIRECT) {
return PASS_TRANSMISSION_INDIRECT;
}
else {
return PASS_TRANSMISSION_COLOR;
}
}
/* extra */
else if (strcmp(bake_type, "ENVIRONMENT") == 0) {
return PASS_BACKGROUND;
}
return PASS_COMBINED;
return flag;
}
void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
BL::Object &b_object,
const string &bake_type,
const int bake_filter,
const string &pass_type,
const int pass_filter,
const int bake_width,
const int bake_height)
{
b_depsgraph = b_depsgraph_;
ShaderEvalType shader_type = get_shader_type(pass_type);
int bake_pass_filter = bake_pass_filter_get(pass_filter);
/* Initialize bake manager, before we load the baking kernels. */
scene->bake_manager->set(scene, b_object.name());
scene->bake_manager->set(scene, b_object.name(), shader_type, bake_pass_filter);
/* Add render pass that we want to bake, and name it Combined so that it is
* used as that on the Blender side. */
Pass *pass = scene->create_node<Pass>();
pass->set_name(ustring("Combined"));
pass->set_type(bake_type_to_pass(bake_type, bake_filter));
pass->set_include_albedo((bake_filter & BL::BakeSettings::pass_filter_COLOR));
/* Passes are identified by name, so in order to return the combined pass we need to set the
* name. */
Pass::add(PASS_COMBINED, scene->passes, "Combined");
session->set_display_driver(nullptr);
session->set_output_driver(make_unique<BlenderOutputDriver>(b_engine));
session->read_bake_tile_cb = function_bind(&BlenderSession::read_render_tile, this, _1);
session->write_render_tile_cb = function_bind(&BlenderSession::write_render_tile, this, _1);
if (!session->progress.get_cancel()) {
/* Sync scene. */
@@ -636,15 +667,18 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
if (object_found && !session->progress.get_cancel()) {
/* Get session and buffer parameters. */
const SessionParams session_params = BlenderSync::get_session_params(
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
session_params.progressive_refine = false;
BufferParams buffer_params;
buffer_params.width = bake_width;
buffer_params.height = bake_height;
buffer_params.passes = scene->passes;
/* Update session. */
session->reset(session_params, buffer_params);
session->tile_manager.set_samples(session_params.samples);
session->reset(buffer_params, session_params.samples);
session->progress.set_update_callback(
function_bind(&BlenderSession::update_bake_progress, this));
@@ -656,7 +690,71 @@ void BlenderSession::bake(BL::Depsgraph &b_depsgraph_,
session->wait();
}
session->set_output_driver(nullptr);
session->read_bake_tile_cb = function_null;
session->write_render_tile_cb = function_null;
}
void BlenderSession::do_write_update_render_result(BL::RenderLayer &b_rlay,
RenderTile &rtile,
bool do_update_only)
{
RenderBuffers *buffers = rtile.buffers;
/* copy data from device */
if (!buffers->copy_from_device())
return;
float exposure = scene->film->get_exposure();
vector<float> pixels(rtile.w * rtile.h * 4);
/* Adjust absolute sample number to the range. */
int sample = rtile.sample;
const int range_start_sample = session->tile_manager.range_start_sample;
if (range_start_sample != -1) {
sample -= range_start_sample;
}
if (!do_update_only) {
/* copy each pass */
for (BL::RenderPass &b_pass : b_rlay.passes) {
int components = b_pass.channels();
/* Copy pixels from regular render passes. */
bool read = buffers->get_pass_rect(b_pass.name(), exposure, sample, components, &pixels[0]);
/* If denoising pass, */
if (!read) {
int denoising_offset = BlenderSync::get_denoising_pass(b_pass);
if (denoising_offset >= 0) {
read = buffers->get_denoising_pass_rect(
denoising_offset, exposure, sample, components, &pixels[0]);
}
}
if (!read) {
memset(&pixels[0], 0, pixels.size() * sizeof(float));
}
b_pass.rect(&pixels[0]);
}
}
else {
/* copy combined pass */
BL::RenderPass b_combined_pass(b_rlay.passes.find_by_name("Combined", b_rview_name.c_str()));
if (buffers->get_pass_rect("Combined", exposure, sample, 4, &pixels[0]))
b_combined_pass.rect(&pixels[0]);
}
}
void BlenderSession::write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
{
do_write_update_render_result(b_rlay, rtile, false);
}
void BlenderSession::update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile)
{
do_write_update_render_result(b_rlay, rtile, true);
}
void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
@@ -666,19 +764,19 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
return;
/* on session/scene parameter changes, we recreate session entirely */
const SessionParams session_params = BlenderSync::get_session_params(
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
const SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
SceneParams scene_params = BlenderSync::get_scene_params(b_scene, background);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if (session->params.modified(session_params) || scene->params.modified(scene_params)) {
free_session();
create_session();
}
/* increase samples and render time, but never decrease */
/* increase samples, but never decrease */
session->set_samples(session_params.samples);
session->set_time_limit(session_params.time_limit);
session->set_denoising_start_sample(session_params.denoising.start_sample);
session->set_pause(session_pause);
/* copy recalc flags, outside of mutex so we can decide to do the real
@@ -710,12 +808,21 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
sync->sync_camera(b_render, b_camera_override, width, height, "");
/* get buffer parameters */
const BufferParams buffer_params = BlenderSync::get_buffer_params(
b_v3d, b_rv3d, scene->camera, width, height);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
if (!buffer_params.denoising_data_pass) {
session_params.denoising.use = false;
}
session->set_denoising(session_params.denoising);
/* Update film if denoising data was enabled or disabled. */
scene->film->set_denoising_data_pass(buffer_params.denoising_data_pass);
/* reset if needed */
if (scene->need_reset()) {
session->reset(session_params, buffer_params);
session->reset(buffer_params, session_params.samples);
/* After session reset, so device is not accessing image data anymore. */
builtin_images_load();
@@ -732,44 +839,7 @@ void BlenderSession::synchronize(BL::Depsgraph &b_depsgraph_)
session->start();
}
void BlenderSession::draw(BL::SpaceImageEditor &space_image)
{
if (!session || !session->scene) {
/* Offline render drawing does not force the render engine update, which means it's possible
* that the Session is not created yet. */
return;
}
thread_scoped_lock lock(draw_state_.mutex);
const int pass_index = space_image.image_user().multilayer_pass();
if (pass_index != draw_state_.last_pass_index) {
BL::RenderPass b_display_pass(b_engine.pass_by_index_get(b_rlay_name.c_str(), pass_index));
if (!b_display_pass) {
return;
}
Scene *scene = session->scene;
thread_scoped_lock lock(scene->mutex);
const Pass *pass = Pass::find(scene->passes, b_display_pass.name());
if (!pass) {
return;
}
scene->film->set_display_pass(pass->get_type());
draw_state_.last_pass_index = pass_index;
}
BL::Array<float, 2> zoom = space_image.zoom();
display_driver_->set_zoom(zoom[0], zoom[1]);
session->draw();
}
void BlenderSession::view_draw(int w, int h)
bool BlenderSession::draw(int w, int h)
{
/* pause in redraw in case update is not being called due to final render */
session->set_pause(BlenderSync::get_session_pause(b_scene, background));
@@ -815,14 +885,14 @@ void BlenderSession::view_draw(int w, int h)
/* reset if requested */
if (reset) {
const SessionParams session_params = BlenderSync::get_session_params(
SessionParams session_params = BlenderSync::get_session_params(
b_engine, b_userpref, b_scene, background);
const BufferParams buffer_params = BlenderSync::get_buffer_params(
b_v3d, b_rv3d, scene->camera, width, height);
const bool session_pause = BlenderSync::get_session_pause(b_scene, background);
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_v3d, b_rv3d, scene->camera, width, height, session_params.denoising.use);
bool session_pause = BlenderSync::get_session_pause(b_scene, background);
if (session_pause == false) {
session->reset(session_params, buffer_params);
session->reset(buffer_params, session_params.samples);
start_resize_time = 0.0;
}
}
@@ -835,7 +905,18 @@ void BlenderSession::view_draw(int w, int h)
update_status_progress();
/* draw */
session->draw();
BufferParams buffer_params = BlenderSync::get_buffer_params(
b_v3d, b_rv3d, scene->camera, width, height, session->params.denoising.use);
DeviceDrawParams draw_params;
if (session->params.display_buffer_linear) {
draw_params.bind_display_space_shader_cb = function_bind(
&BL::RenderEngine::bind_display_space_shader, &b_engine, b_scene);
draw_params.unbind_display_space_shader_cb = function_bind(
&BL::RenderEngine::unbind_display_space_shader, &b_engine);
}
return !session->draw(buffer_params, draw_params);
}
void BlenderSession::get_status(string &status, string &substatus)
@@ -843,6 +924,11 @@ void BlenderSession::get_status(string &status, string &substatus)
session->progress.get_status(status, substatus);
}
void BlenderSession::get_kernel_status(string &kernel_status)
{
session->progress.get_kernel_status(kernel_status);
}
void BlenderSession::get_progress(float &progress, double &total_time, double &render_time)
{
session->progress.get_time(total_time, render_time);
@@ -861,7 +947,7 @@ void BlenderSession::update_bake_progress()
void BlenderSession::update_status_progress()
{
string timestatus, status, substatus;
string timestatus, status, substatus, kernel_status;
string scene_status = "";
float progress;
double total_time, remaining_time = 0, render_time;
@@ -869,11 +955,11 @@ void BlenderSession::update_status_progress()
float mem_peak = (float)session->stats.mem_peak / 1024.0f / 1024.0f;
get_status(status, substatus);
get_kernel_status(kernel_status);
get_progress(progress, total_time, render_time);
if (progress > 0) {
remaining_time = session->get_estimated_remaining_time();
}
if (progress > 0)
remaining_time = (1.0 - (double)progress) * (render_time / (double)progress);
if (background) {
if (scene)
@@ -894,12 +980,14 @@ void BlenderSession::update_status_progress()
status = " | " + status;
if (substatus.size() > 0)
status += " | " + substatus;
if (kernel_status.size() > 0)
status += " | " + kernel_status;
}
double current_time = time_dt();
/* When rendering in a window, redraw the status at least once per second to keep the elapsed
* and remaining time up-to-date. For headless rendering, only report when something
* significant changes to keep the console output readable. */
/* When rendering in a window, redraw the status at least once per second to keep the elapsed and
* remaining time up-to-date. For headless rendering, only report when something significant
* changes to keep the console output readable. */
if (status != last_status || (!headless && (current_time - last_status_time) > 1.0)) {
b_engine.update_stats("", (timestatus + scene_status + status).c_str());
b_engine.update_memory_stats(mem_used, mem_peak);
@@ -911,27 +999,20 @@ void BlenderSession::update_status_progress()
last_progress = progress;
}
check_and_report_session_error();
}
bool BlenderSession::check_and_report_session_error()
{
if (!session->progress.get_error()) {
return false;
if (session->progress.get_error()) {
string error = session->progress.get_error_message();
if (error != last_error) {
/* TODO(sergey): Currently C++ RNA API doesn't let us to
* use mnemonic name for the variable. Would be nice to
* have this figured out.
*
* For until then, 1 << 5 means RPT_ERROR.
*/
b_engine.report(1 << 5, error.c_str());
b_engine.error_set(error.c_str());
last_error = error;
}
}
const string error = session->progress.get_error_message();
if (error != last_error) {
/* TODO(sergey): Currently C++ RNA API doesn't let us to use mnemonic name for the variable.
* Would be nice to have this figured out.
*
* For until then, 1 << 5 means RPT_ERROR. */
b_engine.report(1 << 5, error.c_str());
b_engine.error_set(error.c_str());
last_error = error;
}
return true;
}
void BlenderSession::tag_update()
@@ -967,6 +1048,56 @@ void BlenderSession::test_cancel()
session->progress.set_cancel("Cancelled");
}
void BlenderSession::update_resumable_tile_manager(int num_samples)
{
const int num_resumable_chunks = BlenderSession::num_resumable_chunks,
current_resumable_chunk = BlenderSession::current_resumable_chunk;
if (num_resumable_chunks == 0) {
return;
}
if (num_resumable_chunks > num_samples) {
fprintf(stderr,
"Cycles warning: more sample chunks (%d) than samples (%d), "
"this will cause some samples to be included in multiple chunks.\n",
num_resumable_chunks,
num_samples);
}
const float num_samples_per_chunk = (float)num_samples / num_resumable_chunks;
float range_start_sample, range_num_samples;
if (current_resumable_chunk != 0) {
/* Single chunk rendering. */
range_start_sample = num_samples_per_chunk * (current_resumable_chunk - 1);
range_num_samples = num_samples_per_chunk;
}
else {
/* Ranged-chunks. */
const int num_chunks = end_resumable_chunk - start_resumable_chunk + 1;
range_start_sample = num_samples_per_chunk * (start_resumable_chunk - 1);
range_num_samples = num_chunks * num_samples_per_chunk;
}
/* Round after doing the multiplications with num_chunks and num_samples_per_chunk
* to allow for many small chunks. */
int rounded_range_start_sample = (int)floorf(range_start_sample + 0.5f);
int rounded_range_num_samples = max((int)floorf(range_num_samples + 0.5f), 1);
/* Make sure we don't overshoot. */
if (rounded_range_start_sample + rounded_range_num_samples > num_samples) {
rounded_range_num_samples = num_samples - rounded_range_num_samples;
}
VLOG(1) << "Samples range start is " << range_start_sample << ", "
<< "number of samples to render is " << range_num_samples;
scene->integrator->set_start_sample(rounded_range_start_sample);
session->tile_manager.range_start_sample = rounded_range_start_sample;
session->tile_manager.range_num_samples = rounded_range_num_samples;
}
void BlenderSession::free_blender_memory_if_possible()
{
if (!background) {

View File

@@ -29,11 +29,12 @@
CCL_NAMESPACE_BEGIN
class BlenderDisplayDriver;
class BlenderSync;
class ImageMetaData;
class Scene;
class Session;
class RenderBuffers;
class RenderTile;
class BlenderSession {
public:
@@ -61,8 +62,6 @@ class BlenderSession {
/* offline render */
void render(BL::Depsgraph &b_depsgraph);
void render_frame_finish();
void bake(BL::Depsgraph &b_depsgrah,
BL::Object &b_object,
const string &pass_type,
@@ -70,16 +69,24 @@ class BlenderSession {
const int bake_width,
const int bake_height);
void full_buffer_written(string_view filename);
void write_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
void write_render_tile(RenderTile &rtile);
void read_render_tile(RenderTile &rtile);
/* update functions are used to update display buffer only after sample was rendered
* only needed for better visual feedback */
void update_render_result(BL::RenderLayer &b_rlay, RenderTile &rtile);
void update_render_tile(RenderTile &rtile, bool highlight);
/* interactive updates */
void synchronize(BL::Depsgraph &b_depsgraph);
/* drawing */
void draw(BL::SpaceImageEditor &space_image);
void view_draw(int w, int h);
bool draw(int w, int h);
void tag_redraw();
void tag_update();
void get_status(string &status, string &substatus);
void get_kernel_status(string &kernel_status);
void get_progress(float &progress, double &total_time, double &render_time);
void test_cancel();
void update_status_progress();
@@ -97,7 +104,8 @@ class BlenderSession {
BL::RenderSettings b_render;
BL::Depsgraph b_depsgraph;
/* NOTE: Blender's scene might become invalid after call
* #free_blender_memory_if_possible(). */
* free_blender_memory_if_possible().
*/
BL::Scene b_scene;
BL::SpaceView3D b_v3d;
BL::RegionView3D b_rv3d;
@@ -115,8 +123,6 @@ class BlenderSession {
void *python_thread_state;
bool use_developer_ui;
/* Global state which is common for all render sessions created from Blender.
* Usually denotes command line arguments.
*/
@@ -128,33 +134,41 @@ class BlenderSession {
*/
static bool headless;
/* ** Resumable render ** */
/* Overall number of chunks in which the sample range is to be divided. */
static int num_resumable_chunks;
/* Current resumable chunk index to render. */
static int current_resumable_chunk;
/* Alternative to single-chunk rendering to render a range of chunks. */
static int start_resumable_chunk;
static int end_resumable_chunk;
static bool print_render_stats;
protected:
void stamp_view_layer_metadata(Scene *scene, const string &view_layer_name);
/* Check whether session error happened.
* If so, it is reported to the render engine and true is returned.
* Otherwise false is returned. */
bool check_and_report_session_error();
void do_write_update_render_result(BL::RenderLayer &b_rlay,
RenderTile &rtile,
bool do_update_only);
void do_write_update_render_tile(RenderTile &rtile,
bool do_update_only,
bool do_read_only,
bool highlight);
void builtin_images_load();
/* Update tile manager to reflect resumable render settings. */
void update_resumable_tile_manager(int num_samples);
/* Is used after each render layer synchronization is done with the goal
* of freeing render engine data which is held from Blender side (for
* example, dependency graph).
*/
void free_blender_memory_if_possible();
struct {
thread_mutex mutex;
int last_pass_index = -1;
} draw_state_;
/* NOTE: The BlenderSession references the display driver. */
BlenderDisplayDriver *display_driver_ = nullptr;
vector<string> full_buffer_files_;
};
CCL_NAMESPACE_END

View File

@@ -17,7 +17,6 @@
#include "render/background.h"
#include "render/colorspace.h"
#include "render/graph.h"
#include "render/integrator.h"
#include "render/light.h"
#include "render/nodes.h"
#include "render/osl.h"
@@ -279,7 +278,7 @@ static ShaderNode *add_node(Scene *scene,
array<float3> curve_mapping_curves;
float min_x, max_x;
curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, true);
curvemapping_minmax(mapping, 4, &min_x, &max_x);
curvemapping_minmax(mapping, true, &min_x, &max_x);
curves->set_min_x(min_x);
curves->set_max_x(max_x);
curves->set_curves(curve_mapping_curves);
@@ -292,25 +291,12 @@ static ShaderNode *add_node(Scene *scene,
array<float3> curve_mapping_curves;
float min_x, max_x;
curvemapping_color_to_array(mapping, curve_mapping_curves, RAMP_TABLE_SIZE, false);
curvemapping_minmax(mapping, 3, &min_x, &max_x);
curvemapping_minmax(mapping, false, &min_x, &max_x);
curves->set_min_x(min_x);
curves->set_max_x(max_x);
curves->set_curves(curve_mapping_curves);
node = curves;
}
else if (b_node.is_a(&RNA_ShaderNodeFloatCurve)) {
BL::ShaderNodeFloatCurve b_curve_node(b_node);
BL::CurveMapping mapping(b_curve_node.mapping());
FloatCurveNode *curve = graph->create_node<FloatCurveNode>();
array<float> curve_mapping_curve;
float min_x, max_x;
curvemapping_float_to_array(mapping, curve_mapping_curve, RAMP_TABLE_SIZE);
curvemapping_minmax(mapping, 1, &min_x, &max_x);
curve->set_min_x(min_x);
curve->set_max_x(max_x);
curve->set_curve(curve_mapping_curve);
node = curve;
}
else if (b_node.is_a(&RNA_ShaderNodeValToRGB)) {
RGBRampNode *ramp = graph->create_node<RGBRampNode>();
BL::ShaderNodeValToRGB b_ramp_node(b_node);
@@ -489,11 +475,17 @@ static ShaderNode *add_node(Scene *scene,
SubsurfaceScatteringNode *subsurface = graph->create_node<SubsurfaceScatteringNode>();
switch (b_subsurface_node.falloff()) {
case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK_FIXED_RADIUS:
subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
case BL::ShaderNodeSubsurfaceScattering::falloff_CUBIC:
subsurface->set_falloff(CLOSURE_BSSRDF_CUBIC_ID);
break;
case BL::ShaderNodeSubsurfaceScattering::falloff_GAUSSIAN:
subsurface->set_falloff(CLOSURE_BSSRDF_GAUSSIAN_ID);
break;
case BL::ShaderNodeSubsurfaceScattering::falloff_BURLEY:
subsurface->set_falloff(CLOSURE_BSSRDF_BURLEY_ID);
break;
case BL::ShaderNodeSubsurfaceScattering::falloff_RANDOM_WALK:
subsurface->set_method(CLOSURE_BSSRDF_RANDOM_WALK_ID);
subsurface->set_falloff(CLOSURE_BSSRDF_RANDOM_WALK_ID);
break;
}
@@ -605,11 +597,11 @@ static ShaderNode *add_node(Scene *scene,
break;
}
switch (b_principled_node.subsurface_method()) {
case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK_FIXED_RADIUS:
principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID);
case BL::ShaderNodeBsdfPrincipled::subsurface_method_BURLEY:
principled->set_subsurface_method(CLOSURE_BSSRDF_PRINCIPLED_ID);
break;
case BL::ShaderNodeBsdfPrincipled::subsurface_method_RANDOM_WALK:
principled->set_subsurface_method(CLOSURE_BSSRDF_RANDOM_WALK_ID);
principled->set_subsurface_method(CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID);
break;
}
node = principled;
@@ -1368,11 +1360,10 @@ void BlenderSync::sync_materials(BL::Depsgraph &b_depsgraph, bool update_all)
void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d, bool update_all)
{
Background *background = scene->background;
Integrator *integrator = scene->integrator;
BL::World b_world = b_scene.world();
BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
BlenderViewportParameters new_viewport_parameters(b_v3d);
if (world_recalc || update_all || b_world.ptr.data != world_map ||
viewport_parameters.shader_modified(new_viewport_parameters)) {
@@ -1464,8 +1455,9 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
/* AO */
BL::WorldLighting b_light = b_world.light_settings();
integrator->set_ao_factor(b_light.ao_factor());
integrator->set_ao_distance(b_light.distance());
background->set_use_ao(b_light.use_ambient_occlusion());
background->set_ao_factor(b_light.ao_factor());
background->set_ao_distance(b_light.distance());
/* visibility */
PointerRNA cvisibility = RNA_pointer_get(&b_world.ptr, "cycles_visibility");
@@ -1480,8 +1472,9 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
background->set_visibility(visibility);
}
else {
integrator->set_ao_factor(1.0f);
integrator->set_ao_distance(10.0f);
background->set_use_ao(false);
background->set_ao_factor(0.0f);
background->set_ao_distance(FLT_MAX);
}
shader->set_graph(graph);
@@ -1503,6 +1496,7 @@ void BlenderSync::sync_world(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d,
background->set_use_shader(view_layer.use_background_shader ||
viewport_parameters.use_custom_shader());
background->set_use_ao(background->get_use_ao() && view_layer.use_background_ao);
background->tag_update(scene);
}

View File

@@ -53,7 +53,6 @@ BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
BL::Scene &b_scene,
Scene *scene,
bool preview,
bool use_developer_ui,
Progress &progress)
: b_engine(b_engine),
b_data(b_data),
@@ -69,7 +68,6 @@ BlenderSync::BlenderSync(BL::RenderEngine &b_engine,
scene(scene),
preview(preview),
experimental(false),
use_developer_ui(use_developer_ui),
dicing_rate(1.0f),
max_subdivisions(12),
progress(progress),
@@ -226,7 +224,7 @@ void BlenderSync::sync_recalc(BL::Depsgraph &b_depsgraph, BL::SpaceView3D &b_v3d
}
if (b_v3d) {
BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
BlenderViewportParameters new_viewport_parameters(b_v3d);
if (viewport_parameters.shader_modified(new_viewport_parameters)) {
world_recalc = true;
@@ -253,13 +251,9 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render,
BL::ViewLayer b_view_layer = b_depsgraph.view_layer_eval();
/* TODO(sergey): This feels weak to pass view layer to the integrator, and even weaker to have an
* implicit check on whether it is a background render or not. What is the nicer thing here? */
const bool background = !b_v3d;
sync_view_layer(b_view_layer);
sync_integrator(b_view_layer, background);
sync_film(b_view_layer, b_v3d);
sync_integrator();
sync_film(b_v3d);
sync_shaders(b_depsgraph, b_v3d);
sync_images();
@@ -286,7 +280,7 @@ void BlenderSync::sync_data(BL::RenderSettings &b_render,
/* Integrator */
void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
void BlenderSync::sync_integrator()
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
@@ -334,24 +328,59 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
integrator->set_motion_blur(view_layer.use_motion_blur);
}
integrator->set_method((Integrator::Method)get_enum(
cscene, "progressive", Integrator::NUM_METHODS, Integrator::PATH));
integrator->set_sample_all_lights_direct(get_boolean(cscene, "sample_all_lights_direct"));
integrator->set_sample_all_lights_indirect(get_boolean(cscene, "sample_all_lights_indirect"));
integrator->set_light_sampling_threshold(get_float(cscene, "light_sampling_threshold"));
SamplingPattern sampling_pattern = (SamplingPattern)get_enum(
cscene, "sampling_pattern", SAMPLING_NUM_PATTERNS, SAMPLING_PATTERN_SOBOL);
integrator->set_sampling_pattern(sampling_pattern);
if (preview) {
integrator->set_use_adaptive_sampling(
RNA_boolean_get(&cscene, "use_preview_adaptive_sampling"));
integrator->set_adaptive_threshold(get_float(cscene, "preview_adaptive_threshold"));
integrator->set_adaptive_min_samples(get_int(cscene, "preview_adaptive_min_samples"));
int adaptive_min_samples = INT_MAX;
if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) {
sampling_pattern = SAMPLING_PATTERN_PMJ;
adaptive_min_samples = get_int(cscene, "adaptive_min_samples");
integrator->set_adaptive_threshold(get_float(cscene, "adaptive_threshold"));
}
else {
integrator->set_use_adaptive_sampling(RNA_boolean_get(&cscene, "use_adaptive_sampling"));
integrator->set_adaptive_threshold(get_float(cscene, "adaptive_threshold"));
integrator->set_adaptive_min_samples(get_int(cscene, "adaptive_min_samples"));
integrator->set_adaptive_threshold(0.0f);
}
integrator->set_sampling_pattern(sampling_pattern);
int diffuse_samples = get_int(cscene, "diffuse_samples");
int glossy_samples = get_int(cscene, "glossy_samples");
int transmission_samples = get_int(cscene, "transmission_samples");
int ao_samples = get_int(cscene, "ao_samples");
int mesh_light_samples = get_int(cscene, "mesh_light_samples");
int subsurface_samples = get_int(cscene, "subsurface_samples");
int volume_samples = get_int(cscene, "volume_samples");
if (get_boolean(cscene, "use_square_samples")) {
integrator->set_diffuse_samples(diffuse_samples * diffuse_samples);
integrator->set_glossy_samples(glossy_samples * glossy_samples);
integrator->set_transmission_samples(transmission_samples * transmission_samples);
integrator->set_ao_samples(ao_samples * ao_samples);
integrator->set_mesh_light_samples(mesh_light_samples * mesh_light_samples);
integrator->set_subsurface_samples(subsurface_samples * subsurface_samples);
integrator->set_volume_samples(volume_samples * volume_samples);
adaptive_min_samples = min(adaptive_min_samples * adaptive_min_samples, INT_MAX);
}
else {
integrator->set_diffuse_samples(diffuse_samples);
integrator->set_glossy_samples(glossy_samples);
integrator->set_transmission_samples(transmission_samples);
integrator->set_ao_samples(ao_samples);
integrator->set_mesh_light_samples(mesh_light_samples);
integrator->set_subsurface_samples(subsurface_samples);
integrator->set_volume_samples(volume_samples);
}
integrator->set_adaptive_min_samples(adaptive_min_samples);
if (get_boolean(cscene, "use_fast_gi")) {
if (preview) {
integrator->set_ao_bounces(get_int(cscene, "ao_bounces"));
@@ -364,38 +393,20 @@ void BlenderSync::sync_integrator(BL::ViewLayer &b_view_layer, bool background)
integrator->set_ao_bounces(0);
}
const DenoiseParams denoise_params = get_denoise_params(b_scene, b_view_layer, background);
integrator->set_use_denoise(denoise_params.use);
/* Only update denoiser parameters if the denoiser is actually used. This allows to tweak
* denoiser parameters before enabling it without render resetting on every change. The downside
* is that the interface and the integrator are technically out of sync. */
if (denoise_params.use) {
integrator->set_denoiser_type(denoise_params.type);
integrator->set_denoise_start_sample(denoise_params.start_sample);
integrator->set_use_denoise_pass_albedo(denoise_params.use_pass_albedo);
integrator->set_use_denoise_pass_normal(denoise_params.use_pass_normal);
integrator->set_denoiser_prefilter(denoise_params.prefilter);
}
/* UPDATE_NONE as we don't want to tag the integrator as modified (this was done by the
* set calls above), but we need to make sure that the dependent things are tagged. */
/* UPDATE_NONE as we don't want to tag the integrator as modified, just tag dependent things */
integrator->tag_update(scene, Integrator::UPDATE_NONE);
}
/* Film */
void BlenderSync::sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d)
void BlenderSync::sync_film(BL::SpaceView3D &b_v3d)
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
Film *film = scene->film;
if (b_v3d) {
const BlenderViewportParameters new_viewport_parameters(b_v3d, use_developer_ui);
film->set_display_pass(new_viewport_parameters.display_pass);
film->set_show_active_pixels(new_viewport_parameters.show_active_pixels);
film->set_display_pass(update_viewport_display_passes(b_v3d, scene->passes));
}
film->set_exposure(get_float(cscene, "film_exposure"));
@@ -423,15 +434,6 @@ void BlenderSync::sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d)
break;
}
}
/* Blender viewport does not support proper shadow catcher compositing, so force an approximate
* mode to improve visual feedback. */
if (b_v3d) {
film->set_use_approximate_shadow_catcher(true);
}
else {
film->set_use_approximate_shadow_catcher(!get_boolean(crl, "use_pass_shadow_catcher"));
}
}
/* Render Layer */
@@ -442,6 +444,7 @@ void BlenderSync::sync_view_layer(BL::ViewLayer &b_view_layer)
/* Filter. */
view_layer.use_background_shader = b_view_layer.use_sky();
view_layer.use_background_ao = b_view_layer.use_ao();
/* Always enable surfaces for baking, otherwise there is nothing to bake to. */
view_layer.use_surfaces = b_view_layer.use_solid() || scene->bake_manager->get_baking();
view_layer.use_hair = b_view_layer.use_strand();
@@ -461,7 +464,10 @@ void BlenderSync::sync_view_layer(BL::ViewLayer &b_view_layer)
if (use_layer_samples != 2) {
int samples = b_view_layer.samples();
view_layer.samples = samples;
if (get_boolean(cscene, "use_square_samples"))
view_layer.samples = samples * samples;
else
view_layer.samples = samples;
}
}
@@ -493,8 +499,7 @@ void BlenderSync::sync_images()
}
/* Passes */
static PassType get_blender_pass_type(BL::RenderPass &b_pass)
PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
{
string name = b_pass.name();
#define MAP_PASS(passname, passtype) \
@@ -502,15 +507,10 @@ static PassType get_blender_pass_type(BL::RenderPass &b_pass)
return passtype; \
} \
((void)0)
/* NOTE: Keep in sync with defined names from DNA_scene_types.h */
MAP_PASS("Combined", PASS_COMBINED);
MAP_PASS("Noisy Image", PASS_COMBINED);
MAP_PASS("Depth", PASS_DEPTH);
MAP_PASS("Mist", PASS_MIST);
MAP_PASS("Position", PASS_POSITION);
MAP_PASS("Normal", PASS_NORMAL);
MAP_PASS("IndexOB", PASS_OBJECT_ID);
MAP_PASS("UV", PASS_UV);
@@ -539,86 +539,118 @@ static PassType get_blender_pass_type(BL::RenderPass &b_pass)
MAP_PASS("BakePrimitive", PASS_BAKE_PRIMITIVE);
MAP_PASS("BakeDifferential", PASS_BAKE_DIFFERENTIAL);
MAP_PASS("Denoising Normal", PASS_DENOISING_NORMAL);
MAP_PASS("Denoising Albedo", PASS_DENOISING_ALBEDO);
MAP_PASS("Shadow Catcher", PASS_SHADOW_CATCHER);
MAP_PASS("Noisy Shadow Catcher", PASS_SHADOW_CATCHER);
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
if (string_startswith(name, cryptomatte_prefix)) {
return PASS_CRYPTOMATTE;
}
#undef MAP_PASS
return PASS_NONE;
}
static Pass *pass_add(Scene *scene,
PassType type,
const char *name,
PassMode mode = PassMode::DENOISED)
int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
{
Pass *pass = scene->create_node<Pass>();
string name = b_pass.name();
pass->set_type(type);
pass->set_name(ustring(name));
pass->set_mode(mode);
if (name == "Noisy Image")
return DENOISING_PASS_PREFILTERED_COLOR;
return pass;
if (name.substr(0, 10) != "Denoising ") {
return -1;
}
name = name.substr(10);
#define MAP_PASS(passname, offset) \
if (name == passname) { \
return offset; \
} \
((void)0)
MAP_PASS("Normal", DENOISING_PASS_PREFILTERED_NORMAL);
MAP_PASS("Albedo", DENOISING_PASS_PREFILTERED_ALBEDO);
MAP_PASS("Depth", DENOISING_PASS_PREFILTERED_DEPTH);
MAP_PASS("Shadowing", DENOISING_PASS_PREFILTERED_SHADOWING);
MAP_PASS("Variance", DENOISING_PASS_PREFILTERED_VARIANCE);
MAP_PASS("Intensity", DENOISING_PASS_PREFILTERED_INTENSITY);
MAP_PASS("Clean", DENOISING_PASS_CLEAN);
#undef MAP_PASS
return -1;
}
void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
vector<Pass> BlenderSync::sync_render_passes(BL::Scene &b_scene,
BL::RenderLayer &b_rlay,
BL::ViewLayer &b_view_layer,
bool adaptive_sampling,
const DenoiseParams &denoising)
{
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
vector<Pass> passes;
/* Delete all existing passes. */
set<Pass *> clear_passes(scene->passes.begin(), scene->passes.end());
scene->delete_nodes(clear_passes);
/* Always add combined pass. */
pass_add(scene, PASS_COMBINED, "Combined");
/* Blender built-in data and light passes. */
/* loop over passes */
for (BL::RenderPass &b_pass : b_rlay.passes) {
const PassType pass_type = get_blender_pass_type(b_pass);
if (pass_type == PASS_NONE) {
LOG(ERROR) << "Unknown pass " << b_pass.name();
continue;
}
PassType pass_type = get_pass_type(b_pass);
if (pass_type == PASS_MOTION &&
(b_view_layer.use_motion_blur() && b_scene.render().use_motion_blur())) {
continue;
}
pass_add(scene, pass_type, b_pass.name().c_str());
if (pass_type != PASS_NONE)
Pass::add(pass_type, passes, b_pass.name().c_str());
}
PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
/* Debug passes. */
if (get_boolean(crl, "pass_debug_sample_count")) {
b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
pass_add(scene, PASS_SAMPLE_COUNT, "Debug Sample Count");
int denoising_flags = 0;
if (denoising.use || denoising.store_passes) {
if (denoising.type == DENOISER_NLM) {
#define MAP_OPTION(name, flag) \
if (!get_boolean(crl, name)) { \
denoising_flags |= flag; \
} \
((void)0)
MAP_OPTION("denoising_diffuse_direct", DENOISING_CLEAN_DIFFUSE_DIR);
MAP_OPTION("denoising_diffuse_indirect", DENOISING_CLEAN_DIFFUSE_IND);
MAP_OPTION("denoising_glossy_direct", DENOISING_CLEAN_GLOSSY_DIR);
MAP_OPTION("denoising_glossy_indirect", DENOISING_CLEAN_GLOSSY_IND);
MAP_OPTION("denoising_transmission_direct", DENOISING_CLEAN_TRANSMISSION_DIR);
MAP_OPTION("denoising_transmission_indirect", DENOISING_CLEAN_TRANSMISSION_IND);
#undef MAP_OPTION
}
b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
}
scene->film->set_denoising_flags(denoising_flags);
if (denoising.store_passes) {
b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Depth", 1, "Z", b_view_layer.name().c_str());
if (denoising.type == DENOISER_NLM) {
b_engine.add_pass("Denoising Shadowing", 1, "X", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Variance", 3, "RGB", b_view_layer.name().c_str());
b_engine.add_pass("Denoising Intensity", 1, "X", b_view_layer.name().c_str());
}
if (scene->film->get_denoising_flags() & DENOISING_CLEAN_ALL_PASSES) {
b_engine.add_pass("Denoising Clean", 3, "RGB", b_view_layer.name().c_str());
}
}
/* Cycles specific passes. */
if (get_boolean(crl, "pass_debug_render_time")) {
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
}
if (get_boolean(crl, "pass_debug_sample_count")) {
b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_SAMPLE_COUNT, passes, "Debug Sample Count");
}
if (get_boolean(crl, "use_pass_volume_direct")) {
b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
pass_add(scene, PASS_VOLUME_DIRECT, "VolumeDir");
Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
}
if (get_boolean(crl, "use_pass_volume_indirect")) {
b_engine.add_pass("VolumeInd", 3, "RGB", b_view_layer.name().c_str());
pass_add(scene, PASS_VOLUME_INDIRECT, "VolumeInd");
}
if (get_boolean(crl, "use_pass_shadow_catcher")) {
b_engine.add_pass("Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
pass_add(scene, PASS_SHADOW_CATCHER, "Shadow Catcher");
Pass::add(PASS_VOLUME_INDIRECT, passes, "VolumeInd");
}
/* Cryptomatte stores two ID/weight pairs per RGBA layer.
@@ -630,7 +662,7 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Object%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_OBJECT);
}
@@ -638,7 +670,7 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Material%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_MATERIAL);
}
@@ -646,33 +678,22 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
for (int i = 0; i < crypto_depth; i++) {
string passname = cryptomatte_prefix + string_printf("Asset%02d", i);
b_engine.add_pass(passname.c_str(), 4, "RGBA", b_view_layer.name().c_str());
pass_add(scene, PASS_CRYPTOMATTE, passname.c_str());
Pass::add(PASS_CRYPTOMATTE, passes, passname.c_str());
}
cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_ASSET);
}
if (b_view_layer.use_pass_cryptomatte_accurate() && cryptomatte_passes != CRYPT_NONE) {
cryptomatte_passes = (CryptomatteType)(cryptomatte_passes | CRYPT_ACCURATE);
}
scene->film->set_cryptomatte_passes(cryptomatte_passes);
/* Denoising passes. */
const bool use_denoising = get_boolean(cscene, "use_denoising") &&
get_boolean(crl, "use_denoising");
const bool store_denoising_passes = get_boolean(crl, "denoising_store_passes");
if (use_denoising) {
b_engine.add_pass("Noisy Image", 4, "RGBA", b_view_layer.name().c_str());
pass_add(scene, PASS_COMBINED, "Noisy Image", PassMode::NOISY);
if (get_boolean(crl, "use_pass_shadow_catcher")) {
b_engine.add_pass("Noisy Shadow Catcher", 3, "RGB", b_view_layer.name().c_str());
pass_add(scene, PASS_SHADOW_CATCHER, "Noisy Shadow Catcher", PassMode::NOISY);
if (adaptive_sampling) {
Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
if (!get_boolean(crl, "pass_debug_sample_count")) {
Pass::add(PASS_SAMPLE_COUNT, passes);
}
}
if (store_denoising_passes) {
b_engine.add_pass("Denoising Normal", 3, "XYZ", b_view_layer.name().c_str());
pass_add(scene, PASS_DENOISING_NORMAL, "Denoising Normal", PassMode::NOISY);
b_engine.add_pass("Denoising Albedo", 3, "RGB", b_view_layer.name().c_str());
pass_add(scene, PASS_DENOISING_ALBEDO, "Denoising Albedo", PassMode::NOISY);
}
/* Custom AOV passes. */
BL::ViewLayer::aovs_iterator b_aov_iter;
for (b_view_layer.aovs.begin(b_aov_iter); b_aov_iter != b_view_layer.aovs.end(); ++b_aov_iter) {
BL::AOV b_aov(*b_aov_iter);
@@ -685,15 +706,28 @@ void BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_v
if (is_color) {
b_engine.add_pass(name.c_str(), 4, "RGBA", b_view_layer.name().c_str());
pass_add(scene, PASS_AOV_COLOR, name.c_str());
Pass::add(PASS_AOV_COLOR, passes, name.c_str());
}
else {
b_engine.add_pass(name.c_str(), 1, "X", b_view_layer.name().c_str());
pass_add(scene, PASS_AOV_VALUE, name.c_str());
Pass::add(PASS_AOV_VALUE, passes, name.c_str());
}
}
scene->film->set_denoising_data_pass(denoising.use || denoising.store_passes);
scene->film->set_denoising_clean_pass(scene->film->get_denoising_flags() &
DENOISING_CLEAN_ALL_PASSES);
scene->film->set_denoising_prefiltered_pass(denoising.store_passes &&
denoising.type == DENOISER_NLM);
scene->film->set_pass_alpha_threshold(b_view_layer.pass_alpha_threshold());
if (!Pass::equals(passes, scene->passes)) {
scene->film->tag_passes_update(scene, passes);
scene->film->tag_modified();
scene->integrator->tag_update(scene, Integrator::UPDATE_ALL);
}
return passes;
}
void BlenderSync::free_data_after_sync(BL::Depsgraph &b_depsgraph)
@@ -739,9 +773,9 @@ SceneParams BlenderSync::get_scene_params(BL::Scene &b_scene, bool background)
params.shadingsystem = SHADINGSYSTEM_OSL;
if (background || DebugFlags().viewport_static_bvh)
params.bvh_type = BVH_TYPE_STATIC;
params.bvh_type = SceneParams::BVH_STATIC;
else
params.bvh_type = BVH_TYPE_DYNAMIC;
params.bvh_type = SceneParams::BVH_DYNAMIC;
params.use_bvh_spatial_split = RNA_boolean_get(&cscene, "debug_use_spatial_splits");
params.use_bvh_unaligned_nodes = RNA_boolean_get(&cscene, "debug_use_hair_bvh");
@@ -784,7 +818,8 @@ bool BlenderSync::get_session_pause(BL::Scene &b_scene, bool background)
SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
BL::Preferences &b_preferences,
BL::Scene &b_scene,
bool background)
bool background,
BL::ViewLayer b_view_layer)
{
SessionParams params;
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
@@ -792,8 +827,7 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* feature set */
params.experimental = (get_enum(cscene, "feature_set") != 0);
/* Headless and background rendering. */
params.headless = BlenderSession::headless;
/* Background */
params.background = background;
/* Device */
@@ -802,26 +836,111 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
/* samples */
int samples = get_int(cscene, "samples");
int aa_samples = get_int(cscene, "aa_samples");
int preview_samples = get_int(cscene, "preview_samples");
int preview_aa_samples = get_int(cscene, "preview_aa_samples");
if (background) {
params.samples = samples;
if (get_boolean(cscene, "use_square_samples")) {
aa_samples = aa_samples * aa_samples;
preview_aa_samples = preview_aa_samples * preview_aa_samples;
samples = samples * samples;
preview_samples = preview_samples * preview_samples;
}
if (get_enum(cscene, "progressive") == 0 && params.device.has_branched_path) {
if (background) {
params.samples = aa_samples;
}
else {
params.samples = preview_aa_samples;
if (params.samples == 0)
params.samples = INT_MAX;
}
}
else {
params.samples = preview_samples;
if (params.samples == 0)
params.samples = INT_MAX;
if (background) {
params.samples = samples;
}
else {
params.samples = preview_samples;
if (params.samples == 0)
params.samples = INT_MAX;
}
}
/* Clamp samples. */
params.samples = min(params.samples, Integrator::MAX_SAMPLES);
/* Adaptive sampling. */
params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling");
/* tiles */
const bool is_cpu = (params.device.type == DEVICE_CPU);
if (!is_cpu && !background) {
/* currently GPU could be much slower than CPU when using tiles,
* still need to be investigated, but meanwhile make it possible
* to work in viewport smoothly
*/
int debug_tile_size = get_int(cscene, "debug_tile_size");
params.tile_size = make_int2(debug_tile_size, debug_tile_size);
}
else {
int tile_x = b_engine.tile_x();
int tile_y = b_engine.tile_y();
params.tile_size = make_int2(tile_x, tile_y);
}
if ((BlenderSession::headless == false) && background) {
params.tile_order = (TileOrder)get_enum(cscene, "tile_order");
}
else {
params.tile_order = TILE_BOTTOM_TO_TOP;
}
/* Denoising */
params.denoising = get_denoise_params(b_scene, b_view_layer, background);
if (params.denoising.use) {
/* Add additional denoising devices if we are rendering and denoising
* with different devices. */
params.device.add_denoising_devices(params.denoising.type);
/* Check if denoiser is supported by device. */
if (!(params.device.denoisers & params.denoising.type)) {
params.denoising.use = false;
}
}
/* Viewport Performance */
params.start_resolution = get_int(cscene, "preview_start_resolution");
params.pixel_size = b_engine.get_preview_pixel_size(b_scene);
/* other parameters */
params.cancel_timeout = (double)get_float(cscene, "debug_cancel_timeout");
params.reset_timeout = (double)get_float(cscene, "debug_reset_timeout");
params.text_timeout = (double)get_float(cscene, "debug_text_timeout");
/* progressive refine */
BL::RenderSettings b_r = b_scene.render();
params.progressive_refine = b_engine.is_preview() ||
get_boolean(cscene, "use_progressive_refine");
if (b_r.use_save_buffers() || params.adaptive_sampling)
params.progressive_refine = false;
if (background) {
if (params.progressive_refine)
params.progressive = true;
else
params.progressive = false;
params.start_resolution = INT_MAX;
params.pixel_size = 1;
}
else
params.progressive = true;
/* shading system - scene level needs full refresh */
const bool shadingsystem = RNA_boolean_get(&cscene, "shading_system");
@@ -831,30 +950,19 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
else if (shadingsystem == 1)
params.shadingsystem = SHADINGSYSTEM_OSL;
/* Time limit. */
if (background) {
params.time_limit = get_float(cscene, "time_limit");
}
else {
/* For the viewport it kind of makes more sense to think in terms of the noise floor, which is
* usually higher than acceptable level for the final frame. */
/* TODO: It might be useful to support time limit in the viewport as well, but needs some
* extra thoughts and input. */
params.time_limit = 0.0;
/* Color management. */
params.display_buffer_linear = b_engine.support_display_space_shader(b_scene);
if (b_engine.is_preview()) {
/* For preview rendering we're using same timeout as
* blender's job update.
*/
params.progressive_update_timeout = 0.1;
}
/* Profiling. */
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
BlenderSession::print_render_stats;
if (background) {
params.use_auto_tile = RNA_boolean_get(&cscene, "use_auto_tile");
params.tile_size = max(get_int(cscene, "tile_size"), 8);
}
else {
params.use_auto_tile = false;
}
return params;
}
@@ -862,34 +970,33 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
BL::ViewLayer &b_view_layer,
bool background)
{
enum DenoiserInput {
DENOISER_INPUT_RGB = 1,
DENOISER_INPUT_RGB_ALBEDO = 2,
DENOISER_INPUT_RGB_ALBEDO_NORMAL = 3,
DENOISER_INPUT_NUM,
};
DenoiseParams denoising;
PointerRNA cscene = RNA_pointer_get(&b_scene.ptr, "cycles");
int input_passes = -1;
if (background) {
/* Final Render Denoising */
denoising.use = get_boolean(cscene, "use_denoising");
denoising.type = (DenoiserType)get_enum(cscene, "denoiser", DENOISER_NUM, DENOISER_NONE);
denoising.prefilter = (DenoiserPrefilter)get_enum(
cscene, "denoising_prefilter", DENOISER_PREFILTER_NUM, DENOISER_PREFILTER_NONE);
input_passes = (DenoiserInput)get_enum(
cscene, "denoising_input_passes", DENOISER_INPUT_NUM, DENOISER_INPUT_RGB_ALBEDO_NORMAL);
if (b_view_layer) {
PointerRNA clayer = RNA_pointer_get(&b_view_layer.ptr, "cycles");
if (!get_boolean(clayer, "use_denoising")) {
denoising.use = false;
}
denoising.radius = get_int(clayer, "denoising_radius");
denoising.strength = get_float(clayer, "denoising_strength");
denoising.feature_strength = get_float(clayer, "denoising_feature_strength");
denoising.relative_pca = get_boolean(clayer, "denoising_relative_pca");
denoising.input_passes = (DenoiserInput)get_enum(
clayer,
(denoising.type == DENOISER_OPTIX) ? "denoising_optix_input_passes" :
"denoising_openimagedenoise_input_passes",
DENOISER_INPUT_NUM,
DENOISER_INPUT_RGB_ALBEDO_NORMAL);
denoising.store_passes = get_boolean(clayer, "denoising_store_passes");
}
}
else {
@@ -897,12 +1004,10 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
denoising.use = get_boolean(cscene, "use_preview_denoising");
denoising.type = (DenoiserType)get_enum(
cscene, "preview_denoiser", DENOISER_NUM, DENOISER_NONE);
denoising.prefilter = (DenoiserPrefilter)get_enum(
cscene, "preview_denoising_prefilter", DENOISER_PREFILTER_NUM, DENOISER_PREFILTER_FAST);
denoising.start_sample = get_int(cscene, "preview_denoising_start_sample");
input_passes = (DenoiserInput)get_enum(
cscene, "preview_denoising_input_passes", DENOISER_INPUT_NUM, DENOISER_INPUT_RGB_ALBEDO);
denoising.input_passes = (DenoiserInput)get_enum(
cscene, "preview_denoising_input_passes", DENOISER_INPUT_NUM, (int)denoising.input_passes);
/* Auto select fastest denoiser. */
if (denoising.type == DENOISER_NONE) {
@@ -918,27 +1023,6 @@ DenoiseParams BlenderSync::get_denoise_params(BL::Scene &b_scene,
}
}
switch (input_passes) {
case DENOISER_INPUT_RGB:
denoising.use_pass_albedo = false;
denoising.use_pass_normal = false;
break;
case DENOISER_INPUT_RGB_ALBEDO:
denoising.use_pass_albedo = true;
denoising.use_pass_normal = false;
break;
case DENOISER_INPUT_RGB_ALBEDO_NORMAL:
denoising.use_pass_albedo = true;
denoising.use_pass_normal = true;
break;
default:
LOG(ERROR) << "Unhandled input passes enum " << input_passes;
break;
}
return denoising;
}

View File

@@ -60,7 +60,6 @@ class BlenderSync {
BL::Scene &b_scene,
Scene *scene,
bool preview,
bool use_developer_ui,
Progress &progress);
~BlenderSync();
@@ -76,8 +75,12 @@ class BlenderSync {
int height,
void **python_thread_state);
void sync_view_layer(BL::ViewLayer &b_view_layer);
void sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
void sync_integrator(BL::ViewLayer &b_view_layer, bool background);
vector<Pass> sync_render_passes(BL::Scene &b_scene,
BL::RenderLayer &b_render_layer,
BL::ViewLayer &b_view_layer,
bool adaptive_sampling,
const DenoiseParams &denoising);
void sync_integrator();
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
int width,
@@ -95,13 +98,22 @@ class BlenderSync {
/* get parameters */
static SceneParams get_scene_params(BL::Scene &b_scene, bool background);
static SessionParams get_session_params(BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::Scene &b_scene,
bool background);
static SessionParams get_session_params(
BL::RenderEngine &b_engine,
BL::Preferences &b_userpref,
BL::Scene &b_scene,
bool background,
BL::ViewLayer b_view_layer = BL::ViewLayer(PointerRNA_NULL));
static bool get_session_pause(BL::Scene &b_scene, bool background);
static BufferParams get_buffer_params(
BL::SpaceView3D &b_v3d, BL::RegionView3D &b_rv3d, Camera *cam, int width, int height);
static BufferParams get_buffer_params(BL::SpaceView3D &b_v3d,
BL::RegionView3D &b_rv3d,
Camera *cam,
int width,
int height,
const bool use_denoiser);
static PassType get_pass_type(BL::RenderPass &b_pass);
static int get_denoising_pass(BL::RenderPass &b_pass);
private:
static DenoiseParams get_denoise_params(BL::Scene &b_scene,
@@ -119,7 +131,7 @@ class BlenderSync {
int width,
int height,
void **python_thread_state);
void sync_film(BL::ViewLayer &b_view_layer, BL::SpaceView3D &b_v3d);
void sync_film(BL::SpaceView3D &b_v3d);
void sync_view();
/* Shader */
@@ -233,7 +245,6 @@ class BlenderSync {
Scene *scene;
bool preview;
bool experimental;
bool use_developer_ui;
float dicing_rate;
int max_subdivisions;
@@ -242,6 +253,7 @@ class BlenderSync {
RenderLayerInfo()
: material_override(PointerRNA_NULL),
use_background_shader(true),
use_background_ao(true),
use_surfaces(true),
use_hair(true),
use_volumes(true),
@@ -254,6 +266,7 @@ class BlenderSync {
string name;
BL::Material material_override;
bool use_background_shader;
bool use_background_ao;
bool use_surfaces;
bool use_hair;
bool use_volumes;

View File

@@ -90,27 +90,26 @@ static inline BL::Mesh object_to_mesh(BL::BlendData & /*data*/,
}
#endif
BL::Mesh mesh = (b_ob_info.object_data.is_a(&RNA_Mesh)) ? BL::Mesh(b_ob_info.object_data) :
BL::Mesh(PointerRNA_NULL);
BL::Mesh mesh(PointerRNA_NULL);
if (b_ob_info.object_data.is_a(&RNA_Mesh)) {
/* TODO: calc_undeformed is not used. */
mesh = BL::Mesh(b_ob_info.object_data);
if (b_ob_info.is_real_object_data()) {
if (mesh) {
/* Make a copy to split faces if we use autosmooth, otherwise not needed.
* Also in edit mode do we need to make a copy, to ensure data layers like
* UV are not empty. */
if (mesh.is_editmode() ||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
BL::Depsgraph depsgraph(PointerRNA_NULL);
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
}
}
else {
/* Make a copy to split faces if we use autosmooth, otherwise not needed.
* Also in edit mode do we need to make a copy, to ensure data layers like
* UV are not empty. */
if (mesh.is_editmode() ||
(mesh.use_auto_smooth() && subdivision_type == Mesh::SUBDIVISION_NONE)) {
BL::Depsgraph depsgraph(PointerRNA_NULL);
assert(b_ob_info.is_real_object_data());
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
}
}
else {
/* TODO: what to do about non-mesh geometry instances? */
BL::Depsgraph depsgraph(PointerRNA_NULL);
if (b_ob_info.is_real_object_data()) {
mesh = b_ob_info.real_object.to_mesh(false, depsgraph);
}
}
#if 0
@@ -171,11 +170,12 @@ static inline void curvemap_minmax_curve(/*const*/ BL::CurveMap &curve, float *m
}
static inline void curvemapping_minmax(/*const*/ BL::CurveMapping &cumap,
int num_curves,
bool rgb_curve,
float *min_x,
float *max_x)
{
// const int num_curves = cumap.curves.length(); /* Gives linking error so far. */
const int num_curves = rgb_curve ? 4 : 3;
*min_x = FLT_MAX;
*max_x = -FLT_MAX;
for (int i = 0; i < num_curves; ++i) {
@@ -195,28 +195,6 @@ static inline void curvemapping_to_array(BL::CurveMapping &cumap, array<float> &
}
}
static inline void curvemapping_float_to_array(BL::CurveMapping &cumap,
array<float> &data,
int size)
{
float min = 0.0f, max = 1.0f;
curvemapping_minmax(cumap, 1, &min, &max);
const float range = max - min;
cumap.update();
BL::CurveMap map = cumap.curves[0];
data.resize(size);
for (int i = 0; i < size; i++) {
float t = min + (float)i / (float)(size - 1) * range;
data[i] = cumap.evaluate(map, t);
}
}
static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
array<float3> &data,
int size,
@@ -235,8 +213,7 @@ static inline void curvemapping_color_to_array(BL::CurveMapping &cumap,
*
* There might be some better estimations here tho.
*/
const int num_curves = rgb_curve ? 4 : 3;
curvemapping_minmax(cumap, num_curves, &min_x, &max_x);
curvemapping_minmax(cumap, rgb_curve, &min_x, &max_x);
const float range_x = max_x - min_x;

View File

@@ -17,8 +17,6 @@
#include "blender_viewport.h"
#include "blender_util.h"
#include "render/pass.h"
#include "util/util_logging.h"
CCL_NAMESPACE_BEGIN
@@ -28,12 +26,11 @@ BlenderViewportParameters::BlenderViewportParameters()
studiolight_rotate_z(0.0f),
studiolight_intensity(1.0f),
studiolight_background_alpha(1.0f),
display_pass(PASS_COMBINED),
show_active_pixels(false)
display_pass(PASS_COMBINED)
{
}
BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d, bool use_developer_ui)
BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d)
: BlenderViewportParameters()
{
if (!b_v3d) {
@@ -58,25 +55,7 @@ BlenderViewportParameters::BlenderViewportParameters(BL::SpaceView3D &b_v3d, boo
}
/* Film. */
/* Lookup display pass based on the enum identifier.
* This is because integer values of python enum are not aligned with the passes definition in
* the kernel. */
display_pass = PASS_COMBINED;
const string display_pass_identifier = get_enum_identifier(cshading, "render_pass");
if (!display_pass_identifier.empty()) {
const ustring pass_type_identifier(string_to_lower(display_pass_identifier));
const NodeEnum *pass_type_enum = Pass::get_type_enum();
if (pass_type_enum->exists(pass_type_identifier)) {
display_pass = static_cast<PassType>((*pass_type_enum)[pass_type_identifier]);
}
}
if (use_developer_ui) {
show_active_pixels = get_boolean(cshading, "show_active_pixels");
}
display_pass = (PassType)get_enum(cshading, "render_pass", -1, -1);
}
bool BlenderViewportParameters::shader_modified(const BlenderViewportParameters &other) const
@@ -90,7 +69,7 @@ bool BlenderViewportParameters::shader_modified(const BlenderViewportParameters
bool BlenderViewportParameters::film_modified(const BlenderViewportParameters &other) const
{
return display_pass != other.display_pass || show_active_pixels != other.show_active_pixels;
return display_pass != other.display_pass;
}
bool BlenderViewportParameters::modified(const BlenderViewportParameters &other) const
@@ -103,4 +82,18 @@ bool BlenderViewportParameters::use_custom_shader() const
return !(use_scene_world && use_scene_lights);
}
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes)
{
if (b_v3d) {
const BlenderViewportParameters viewport_parameters(b_v3d);
const PassType display_pass = viewport_parameters.display_pass;
passes.clear();
Pass::add(display_pass, passes);
return display_pass;
}
return PASS_NONE;
}
CCL_NAMESPACE_END

View File

@@ -39,10 +39,9 @@ class BlenderViewportParameters {
/* Film. */
PassType display_pass;
bool show_active_pixels;
BlenderViewportParameters();
BlenderViewportParameters(BL::SpaceView3D &b_v3d, bool use_developer_ui);
explicit BlenderViewportParameters(BL::SpaceView3D &b_v3d);
/* Check whether any of shading related settings are different from the given parameters. */
bool shader_modified(const BlenderViewportParameters &other) const;
@@ -58,6 +57,8 @@ class BlenderViewportParameters {
bool use_custom_shader() const;
};
PassType update_viewport_display_passes(BL::SpaceView3D &b_v3d, vector<Pass> &passes);
CCL_NAMESPACE_END
#endif

View File

@@ -832,18 +832,18 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
typedef StackAllocator<256, float2> LeafTimeStackAllocator;
typedef StackAllocator<256, BVHReference> LeafReferenceStackAllocator;
vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM];
vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM];
vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM];
vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM];
vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM];
vector<int, LeafStackAllocator> p_type[PRIMITIVE_NUM_TOTAL];
vector<int, LeafStackAllocator> p_index[PRIMITIVE_NUM_TOTAL];
vector<int, LeafStackAllocator> p_object[PRIMITIVE_NUM_TOTAL];
vector<float2, LeafTimeStackAllocator> p_time[PRIMITIVE_NUM_TOTAL];
vector<BVHReference, LeafReferenceStackAllocator> p_ref[PRIMITIVE_NUM_TOTAL];
/* TODO(sergey): In theory we should be able to store references. */
vector<BVHReference, LeafReferenceStackAllocator> object_references;
uint visibility[PRIMITIVE_NUM] = {0};
uint visibility[PRIMITIVE_NUM_TOTAL] = {0};
/* NOTE: Keep initialization in sync with actual number of primitives. */
BoundBox bounds[PRIMITIVE_NUM] = {
BoundBox bounds[PRIMITIVE_NUM_TOTAL] = {
BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty};
int ob_num = 0;
int num_new_prims = 0;
@@ -877,7 +877,7 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
* TODO(sergey): With some pointer trickery we can write directly to the
* destination buffers for the non-spatial split BVH.
*/
BVHNode *leaves[PRIMITIVE_NUM + 1] = {NULL};
BVHNode *leaves[PRIMITIVE_NUM_TOTAL + 1] = {NULL};
int num_leaves = 0;
size_t start_index = 0;
vector<int, LeafStackAllocator> local_prim_type, local_prim_index, local_prim_object;
@@ -888,7 +888,7 @@ BVHNode *BVHBuild::create_leaf_node(const BVHRange &range, const vector<BVHRefer
if (need_prim_time) {
local_prim_time.resize(num_new_prims);
}
for (int i = 0; i < PRIMITIVE_NUM; ++i) {
for (int i = 0; i < PRIMITIVE_NUM_TOTAL; ++i) {
int num = (int)p_type[i].size();
if (num != 0) {
assert(p_type[i].size() == p_index[i].size());

View File

@@ -37,10 +37,10 @@
/* Kernel includes are necessary so that the filter function for Embree can access the packed BVH.
*/
# include "kernel/bvh/bvh_embree.h"
# include "kernel/bvh/bvh_util.h"
# include "kernel/device/cpu/compat.h"
# include "kernel/device/cpu/globals.h"
# include "kernel/kernel_compat_cpu.h"
# include "kernel/kernel_globals.h"
# include "kernel/kernel_random.h"
# include "kernel/split/kernel_split_data_types.h"
# include "render/hair.h"
# include "render/mesh.h"
@@ -73,69 +73,46 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
const RTCRay *ray = (RTCRay *)args->ray;
RTCHit *hit = (RTCHit *)args->hit;
CCLIntersectContext *ctx = ((IntersectContext *)args->context)->userRayExt;
const KernelGlobals *kg = ctx->kg;
KernelGlobals *kg = ctx->kg;
switch (ctx->type) {
case CCLIntersectContext::RAY_SHADOW_ALL: {
Intersection current_isect;
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
/* If no transparent shadows, all light is blocked. */
const int flags = intersection_get_shader_flags(kg, &current_isect);
if (!(flags & (SD_HAS_TRANSPARENT_SHADOW)) || ctx->max_hits == 0) {
ctx->opaque_hit = true;
return;
}
/* Test if we need to record this transparent intersection. */
if (ctx->num_hits < ctx->max_hits || ray->tfar < ctx->max_t) {
/* Skip already recorded intersections. */
int num_recorded_hits = min(ctx->num_hits, ctx->max_hits);
for (int i = 0; i < num_recorded_hits; ++i) {
/* Append the intersection to the end of the array. */
if (ctx->num_hits < ctx->max_hits) {
Intersection current_isect;
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
for (size_t i = 0; i < ctx->max_hits; ++i) {
if (current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
/* This intersection was already recorded, skip it. */
*args->valid = 0;
return;
break;
}
}
/* If maximum number of hits was reached, replace the intersection with the
* highest distance. We want to find the N closest intersections. */
int isect_index = num_recorded_hits;
if (num_recorded_hits + 1 >= ctx->max_hits) {
float max_t = ctx->isect_s[0].t;
int max_recorded_hit = 0;
for (int i = 1; i < num_recorded_hits; ++i) {
if (ctx->isect_s[i].t > max_t) {
max_recorded_hit = i;
max_t = ctx->isect_s[i].t;
}
}
if (num_recorded_hits >= ctx->max_hits) {
isect_index = max_recorded_hit;
}
/* Limit the ray distance and stop counting hits beyond this.
* TODO: is there some way we can tell Embree to stop intersecting beyond
* this distance when max number of hits is reached?. Or maybe it will
* become irrelevant if we make max_hits a very high number on the CPU. */
ctx->max_t = max(current_isect.t, max_t);
Intersection *isect = &ctx->isect_s[ctx->num_hits];
++ctx->num_hits;
*isect = current_isect;
int prim = kernel_tex_fetch(__prim_index, isect->prim);
int shader = 0;
if (kernel_tex_fetch(__prim_type, isect->prim) & PRIMITIVE_ALL_TRIANGLE) {
shader = kernel_tex_fetch(__tri_shader, prim);
}
else {
float4 str = kernel_tex_fetch(__curves, prim);
shader = __float_as_int(str.z);
}
int flag = kernel_tex_fetch(__shaders, shader & SHADER_MASK).flags;
/* If no transparent shadows, all light is blocked. */
if (flag & (SD_HAS_TRANSPARENT_SHADOW)) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
ctx->isect_s[isect_index] = current_isect;
}
/* Always increase the number of hits, even beyond ray.max_hits so that
* the caller can detect this as and consider it opaque, or trace another
* ray. */
++ctx->num_hits;
/* This tells Embree to continue tracing. */
*args->valid = 0;
else {
/* Increase the number of hits beyond ray.max_hits
* so that the caller can detect this as opaque. */
++ctx->num_hits;
}
break;
}
case CCLIntersectContext::RAY_LOCAL:
@@ -213,7 +190,7 @@ static void rtc_filter_occluded_func(const RTCFilterFunctionNArguments *args)
if (ctx->num_hits < ctx->max_hits) {
Intersection current_isect;
kernel_embree_convert_hit(kg, ray, hit, &current_isect);
for (size_t i = 0; i < ctx->num_hits; ++i) {
for (size_t i = 0; i < ctx->max_hits; ++i) {
if (current_isect.object == ctx->isect_s[i].object &&
current_isect.prim == ctx->isect_s[i].prim && current_isect.t == ctx->isect_s[i].t) {
/* This intersection was already recorded, skip it. */
@@ -352,7 +329,7 @@ void BVHEmbree::build(Progress &progress, Stats *stats, RTCDevice rtc_device_)
scene = NULL;
}
const bool dynamic = params.bvh_type == BVH_TYPE_DYNAMIC;
const bool dynamic = params.bvh_type == SceneParams::BVH_DYNAMIC;
scene = rtcNewScene(rtc_device);
const RTCSceneFlags scene_flags = (dynamic ? RTC_SCENE_FLAG_DYNAMIC : RTC_SCENE_FLAG_NONE) |

View File

@@ -31,27 +31,6 @@ CCL_NAMESPACE_BEGIN
*/
typedef KernelBVHLayout BVHLayout;
/* Type of BVH, in terms whether it is supported dynamic updates of meshes
* or whether modifying geometry requires full BVH rebuild.
*/
enum BVHType {
/* BVH supports dynamic updates of geometry.
*
* Faster for updating BVH tree when doing modifications in viewport,
* but slower for rendering.
*/
BVH_TYPE_DYNAMIC = 0,
/* BVH tree is calculated for specific scene, updates in geometry
* requires full tree rebuild.
*
* Slower to update BVH tree when modifying objects in viewport, also
* slower to build final BVH tree but gives best possible render speed.
*/
BVH_TYPE_STATIC = 1,
BVH_NUM_TYPES,
};
/* Names bitflag type to denote which BVH layouts are supported by
* particular area.
*

View File

@@ -287,6 +287,9 @@ if(CYCLES_STANDALONE_REPOSITORY)
endif()
set(__boost_packages filesystem regex system thread date_time)
if(WITH_CYCLES_NETWORK)
list(APPEND __boost_packages serialization)
endif()
if(WITH_CYCLES_OSL)
list(APPEND __boost_packages wave)
endif()
@@ -532,13 +535,4 @@ if(WITH_CYCLES_CUDA_BINARIES OR NOT WITH_CUDA_DYNLOAD)
endif()
endif()
###########################################################################
# HIP
###########################################################################
if(NOT WITH_HIP_DYNLOAD)
set(WITH_HIP_DYNLOAD ON)
endif()
unset(_cycles_lib_dir)

View File

@@ -156,16 +156,10 @@ macro(cycles_target_link_libraries target)
${PLATFORM_LINKLIBS}
)
if(WITH_CYCLES_DEVICE_CUDA OR WITH_CYCLES_DEVICE_OPTIX)
if(WITH_CUDA_DYNLOAD)
target_link_libraries(${target} extern_cuew)
else()
target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
target_link_libraries(${target} extern_hipew)
if(WITH_CUDA_DYNLOAD)
target_link_libraries(${target} extern_cuew)
else()
target_link_libraries(${target} ${CUDA_CUDA_LIBRARY})
endif()
if(CYCLES_STANDALONE_REPOSITORY)

View File

@@ -22,139 +22,91 @@ set(INC_SYS
../../../extern/clew/include
)
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
if(WITH_CUDA_DYNLOAD)
list(APPEND INC
../../../extern/cuew/include
)
add_definitions(-DWITH_CUDA_DYNLOAD)
else()
list(APPEND INC_SYS
${CUDA_TOOLKIT_INCLUDE}
)
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
if(WITH_CUDA_DYNLOAD)
list(APPEND INC
../../../extern/hipew/include
../../../extern/cuew/include
)
add_definitions(-DWITH_HIP_DYNLOAD)
add_definitions(-DWITH_CUDA_DYNLOAD)
else()
list(APPEND INC_SYS
${CUDA_TOOLKIT_INCLUDE}
)
add_definitions(-DCYCLES_CUDA_NVCC_EXECUTABLE="${CUDA_NVCC_EXECUTABLE}")
endif()
set(SRC
device.cpp
device_denoise.cpp
device_graphics_interop.cpp
device_kernel.cpp
device_cpu.cpp
device_cuda.cpp
device_denoising.cpp
device_dummy.cpp
device_memory.cpp
device_queue.cpp
)
set(SRC_CPU
cpu/device.cpp
cpu/device.h
cpu/device_impl.cpp
cpu/device_impl.h
cpu/kernel.cpp
cpu/kernel.h
cpu/kernel_function.h
cpu/kernel_thread_globals.cpp
cpu/kernel_thread_globals.h
device_multi.cpp
device_opencl.cpp
device_optix.cpp
device_split_kernel.cpp
device_task.cpp
)
set(SRC_CUDA
cuda/device.cpp
cuda/device.h
cuda/device_impl.cpp
cuda/device_impl.h
cuda/graphics_interop.cpp
cuda/graphics_interop.h
cuda/kernel.cpp
cuda/kernel.h
cuda/queue.cpp
cuda/queue.h
cuda/util.cpp
cuda/util.h
cuda/device_cuda.h
cuda/device_cuda_impl.cpp
)
set(SRC_HIP
hip/device.cpp
hip/device.h
hip/device_impl.cpp
hip/device_impl.h
hip/graphics_interop.cpp
hip/graphics_interop.h
hip/kernel.cpp
hip/kernel.h
hip/queue.cpp
hip/queue.h
hip/util.cpp
hip/util.h
set(SRC_OPENCL
opencl/device_opencl.h
opencl/device_opencl_impl.cpp
opencl/memory_manager.h
opencl/memory_manager.cpp
opencl/opencl_util.cpp
)
set(SRC_DUMMY
dummy/device.cpp
dummy/device.h
)
set(SRC_MULTI
multi/device.cpp
multi/device.h
)
set(SRC_OPTIX
optix/device.cpp
optix/device.h
optix/device_impl.cpp
optix/device_impl.h
optix/queue.cpp
optix/queue.h
optix/util.h
)
if(WITH_CYCLES_NETWORK)
list(APPEND SRC
device_network.cpp
)
endif()
set(SRC_HEADERS
device.h
device_denoise.h
device_graphics_interop.h
device_denoising.h
device_memory.h
device_kernel.h
device_queue.h
device_intern.h
device_network.h
device_split_kernel.h
device_task.h
)
set(LIB
cycles_render
cycles_kernel
cycles_util
${CYCLES_GL_LIBRARIES}
)
if(WITH_CYCLES_DEVICE_OPTIX OR WITH_CYCLES_DEVICE_CUDA)
if(WITH_CUDA_DYNLOAD)
list(APPEND LIB
extern_cuew
)
else()
list(APPEND LIB
${CUDA_CUDA_LIBRARY}
)
endif()
endif()
if(WITH_CYCLES_DEVICE_HIP AND WITH_HIP_DYNLOAD)
if(WITH_CUDA_DYNLOAD)
list(APPEND LIB
extern_hipew
extern_cuew
)
else()
list(APPEND LIB
${CUDA_CUDA_LIBRARY}
)
endif()
add_definitions(${GL_DEFINITIONS})
if(WITH_CYCLES_NETWORK)
add_definitions(-DWITH_NETWORK)
endif()
if(WITH_CYCLES_DEVICE_OPENCL)
list(APPEND LIB
extern_clew
)
add_definitions(-DWITH_OPENCL)
endif()
if(WITH_CYCLES_DEVICE_CUDA)
add_definitions(-DWITH_CUDA)
endif()
if(WITH_CYCLES_DEVICE_HIP)
add_definitions(-DWITH_HIP)
endif()
if(WITH_CYCLES_DEVICE_OPTIX)
add_definitions(-DWITH_OPTIX)
endif()
@@ -163,28 +115,18 @@ if(WITH_CYCLES_DEVICE_MULTI)
endif()
if(WITH_OPENIMAGEDENOISE)
add_definitions(-DWITH_OPENIMAGEDENOISE)
add_definitions(-DOIDN_STATIC_LIB)
list(APPEND INC_SYS
${OPENIMAGEDENOISE_INCLUDE_DIRS}
)
list(APPEND LIB
${OPENIMAGEDENOISE_LIBRARIES}
${TBB_LIBRARIES}
)
endif()
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
cycles_add_library(cycles_device "${LIB}"
${SRC}
${SRC_CPU}
${SRC_CUDA}
${SRC_HIP}
${SRC_DUMMY}
${SRC_MULTI}
${SRC_OPTIX}
${SRC_HEADERS}
)
source_group("cpu" FILES ${SRC_CPU})
source_group("cuda" FILES ${SRC_CUDA})
source_group("dummy" FILES ${SRC_DUMMY})
source_group("multi" FILES ${SRC_MULTI})
source_group("optix" FILES ${SRC_OPTIX})
source_group("common" FILES ${SRC} ${SRC_HEADERS})
cycles_add_library(cycles_device "${LIB}" ${SRC} ${SRC_CUDA} ${SRC_OPENCL} ${SRC_HEADERS})

View File

@@ -1,64 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/cpu/device.h"
#include "device/cpu/device_impl.h"
/* Used for `info.denoisers`. */
/* TODO(sergey): The denoisers are probably to be moved completely out of the device into their
* own class. But until then keep API consistent with how it used to work before. */
#include "util/util_openimagedenoise.h"
CCL_NAMESPACE_BEGIN
Device *device_cpu_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
{
return new CPUDevice(info, stats, profiler);
}
void device_cpu_info(vector<DeviceInfo> &devices)
{
DeviceInfo info;
info.type = DEVICE_CPU;
info.description = system_cpu_brand_string();
info.id = "CPU";
info.num = 0;
info.has_osl = true;
info.has_half_images = true;
info.has_nanovdb = true;
info.has_profiling = true;
if (openimagedenoise_supported()) {
info.denoisers |= DENOISER_OPENIMAGEDENOISE;
}
devices.insert(devices.begin(), info);
}
string device_cpu_capabilities()
{
string capabilities = "";
capabilities += system_cpu_support_sse2() ? "SSE2 " : "";
capabilities += system_cpu_support_sse3() ? "SSE3 " : "";
capabilities += system_cpu_support_sse41() ? "SSE41 " : "";
capabilities += system_cpu_support_avx() ? "AVX " : "";
capabilities += system_cpu_support_avx2() ? "AVX2" : "";
if (capabilities[capabilities.size() - 1] == ' ')
capabilities.resize(capabilities.size() - 1);
return capabilities;
}
CCL_NAMESPACE_END

View File

@@ -1,332 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/cpu/device_impl.h"
#include <stdlib.h>
#include <string.h>
/* So ImathMath is included before our kernel_cpu_compat. */
#ifdef WITH_OSL
/* So no context pollution happens from indirectly included windows.h */
# include "util/util_windows.h"
# include <OSL/oslexec.h>
#endif
#ifdef WITH_EMBREE
# include <embree3/rtcore.h>
#endif
#include "device/cpu/kernel.h"
#include "device/cpu/kernel_thread_globals.h"
#include "device/device.h"
// clang-format off
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/globals.h"
#include "kernel/device/cpu/kernel.h"
#include "kernel/kernel_types.h"
#include "kernel/osl/osl_shader.h"
#include "kernel/osl/osl_globals.h"
// clang-format on
#include "bvh/bvh_embree.h"
#include "render/buffers.h"
#include "util/util_debug.h"
#include "util/util_foreach.h"
#include "util/util_function.h"
#include "util/util_logging.h"
#include "util/util_map.h"
#include "util/util_openimagedenoise.h"
#include "util/util_optimization.h"
#include "util/util_progress.h"
#include "util/util_system.h"
#include "util/util_task.h"
#include "util/util_thread.h"
CCL_NAMESPACE_BEGIN
CPUDevice::CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: Device(info_, stats_, profiler_), texture_info(this, "__texture_info", MEM_GLOBAL)
{
/* Pick any kernel, all of them are supposed to have same level of microarchitecture
* optimization. */
VLOG(1) << "Will be using " << kernels.integrator_init_from_camera.get_uarch_name()
<< " kernels.";
if (info.cpu_threads == 0) {
info.cpu_threads = TaskScheduler::num_threads();
}
#ifdef WITH_OSL
kernel_globals.osl = &osl_globals;
#endif
#ifdef WITH_EMBREE
embree_device = rtcNewDevice("verbose=0");
#endif
need_texture_info = false;
}
CPUDevice::~CPUDevice()
{
#ifdef WITH_EMBREE
rtcReleaseDevice(embree_device);
#endif
texture_info.free();
}
bool CPUDevice::show_samples() const
{
return (info.cpu_threads == 1);
}
BVHLayoutMask CPUDevice::get_bvh_layout_mask() const
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_BVH2;
#ifdef WITH_EMBREE
bvh_layout_mask |= BVH_LAYOUT_EMBREE;
#endif /* WITH_EMBREE */
return bvh_layout_mask;
}
bool CPUDevice::load_texture_info()
{
if (!need_texture_info) {
return false;
}
texture_info.copy_to_device();
need_texture_info = false;
return true;
}
void CPUDevice::mem_alloc(device_memory &mem)
{
if (mem.type == MEM_TEXTURE) {
assert(!"mem_alloc not supported for textures.");
}
else if (mem.type == MEM_GLOBAL) {
assert(!"mem_alloc not supported for global memory.");
}
else {
if (mem.name) {
VLOG(1) << "Buffer allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
}
if (mem.type == MEM_DEVICE_ONLY) {
assert(!mem.host_pointer);
size_t alignment = MIN_ALIGNMENT_CPU_DATA_TYPES;
void *data = util_aligned_malloc(mem.memory_size(), alignment);
mem.device_pointer = (device_ptr)data;
}
else {
mem.device_pointer = (device_ptr)mem.host_pointer;
}
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
}
}
void CPUDevice::mem_copy_to(device_memory &mem)
{
if (mem.type == MEM_GLOBAL) {
global_free(mem);
global_alloc(mem);
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
tex_alloc((device_texture &)mem);
}
else {
if (!mem.device_pointer) {
mem_alloc(mem);
}
/* copy is no-op */
}
}
void CPUDevice::mem_copy_from(
device_memory & /*mem*/, size_t /*y*/, size_t /*w*/, size_t /*h*/, size_t /*elem*/)
{
/* no-op */
}
void CPUDevice::mem_zero(device_memory &mem)
{
if (!mem.device_pointer) {
mem_alloc(mem);
}
if (mem.device_pointer) {
memset((void *)mem.device_pointer, 0, mem.memory_size());
}
}
void CPUDevice::mem_free(device_memory &mem)
{
if (mem.type == MEM_GLOBAL) {
global_free(mem);
}
else if (mem.type == MEM_TEXTURE) {
tex_free((device_texture &)mem);
}
else if (mem.device_pointer) {
if (mem.type == MEM_DEVICE_ONLY) {
util_aligned_free((void *)mem.device_pointer);
}
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
mem.device_size = 0;
}
}
device_ptr CPUDevice::mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/)
{
return (device_ptr)(((char *)mem.device_pointer) + mem.memory_elements_size(offset));
}
void CPUDevice::const_copy_to(const char *name, void *host, size_t size)
{
#if WITH_EMBREE
if (strcmp(name, "__data") == 0) {
assert(size <= sizeof(KernelData));
// Update scene handle (since it is different for each device on multi devices)
KernelData *const data = (KernelData *)host;
data->bvh.scene = embree_scene;
}
#endif
kernel_const_copy(&kernel_globals, name, host, size);
}
void CPUDevice::global_alloc(device_memory &mem)
{
VLOG(1) << "Global memory allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
kernel_global_memory_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
mem.device_pointer = (device_ptr)mem.host_pointer;
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
}
void CPUDevice::global_free(device_memory &mem)
{
if (mem.device_pointer) {
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
mem.device_size = 0;
}
}
void CPUDevice::tex_alloc(device_texture &mem)
{
VLOG(1) << "Texture allocate: " << mem.name << ", "
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
<< string_human_readable_size(mem.memory_size()) << ")";
mem.device_pointer = (device_ptr)mem.host_pointer;
mem.device_size = mem.memory_size();
stats.mem_alloc(mem.device_size);
const uint slot = mem.slot;
if (slot >= texture_info.size()) {
/* Allocate some slots in advance, to reduce amount of re-allocations. */
texture_info.resize(slot + 128);
}
texture_info[slot] = mem.info;
texture_info[slot].data = (uint64_t)mem.host_pointer;
need_texture_info = true;
}
void CPUDevice::tex_free(device_texture &mem)
{
if (mem.device_pointer) {
mem.device_pointer = 0;
stats.mem_free(mem.device_size);
mem.device_size = 0;
need_texture_info = true;
}
}
void CPUDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
#ifdef WITH_EMBREE
if (bvh->params.bvh_layout == BVH_LAYOUT_EMBREE ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE) {
BVHEmbree *const bvh_embree = static_cast<BVHEmbree *>(bvh);
if (refit) {
bvh_embree->refit(progress);
}
else {
bvh_embree->build(progress, &stats, embree_device);
}
if (bvh->params.top_level) {
embree_scene = bvh_embree->scene;
}
}
else
#endif
Device::build_bvh(bvh, progress, refit);
}
const CPUKernels *CPUDevice::get_cpu_kernels() const
{
return &kernels;
}
void CPUDevice::get_cpu_kernel_thread_globals(
vector<CPUKernelThreadGlobals> &kernel_thread_globals)
{
/* Ensure latest texture info is loaded into kernel globals before returning. */
load_texture_info();
kernel_thread_globals.clear();
void *osl_memory = get_cpu_osl_memory();
for (int i = 0; i < info.cpu_threads; i++) {
kernel_thread_globals.emplace_back(kernel_globals, osl_memory, profiler);
}
}
void *CPUDevice::get_cpu_osl_memory()
{
#ifdef WITH_OSL
return &osl_globals;
#else
return NULL;
#endif
}
bool CPUDevice::load_kernels(const uint /*kernel_features*/)
{
return true;
}
CCL_NAMESPACE_END

View File

@@ -1,102 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
/* So ImathMath is included before our kernel_cpu_compat. */
#ifdef WITH_OSL
/* So no context pollution happens from indirectly included windows.h */
# include "util/util_windows.h"
# include <OSL/oslexec.h>
#endif
#ifdef WITH_EMBREE
# include <embree3/rtcore.h>
#endif
#include "device/cpu/kernel.h"
#include "device/device.h"
#include "device/device_memory.h"
// clang-format off
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/kernel.h"
#include "kernel/device/cpu/globals.h"
#include "kernel/osl/osl_shader.h"
#include "kernel/osl/osl_globals.h"
// clang-format on
CCL_NAMESPACE_BEGIN
class CPUDevice : public Device {
public:
KernelGlobals kernel_globals;
device_vector<TextureInfo> texture_info;
bool need_texture_info;
#ifdef WITH_OSL
OSLGlobals osl_globals;
#endif
#ifdef WITH_EMBREE
RTCScene embree_scene = NULL;
RTCDevice embree_device;
#endif
CPUKernels kernels;
CPUDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_);
~CPUDevice();
virtual bool show_samples() const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
/* Returns true if the texture info was copied to the device (meaning, some more
* re-initialization might be needed). */
bool load_texture_info();
virtual void mem_alloc(device_memory &mem) override;
virtual void mem_copy_to(device_memory &mem) override;
virtual void mem_copy_from(
device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
virtual void mem_zero(device_memory &mem) override;
virtual void mem_free(device_memory &mem) override;
virtual device_ptr mem_alloc_sub_ptr(device_memory &mem,
size_t offset,
size_t /*size*/) override;
virtual void const_copy_to(const char *name, void *host, size_t size) override;
void global_alloc(device_memory &mem);
void global_free(device_memory &mem);
void tex_alloc(device_texture &mem);
void tex_free(device_texture &mem);
void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
virtual const CPUKernels *get_cpu_kernels() const override;
virtual void get_cpu_kernel_thread_globals(
vector<CPUKernelThreadGlobals> &kernel_thread_globals) override;
virtual void *get_cpu_osl_memory() override;
protected:
virtual bool load_kernels(uint /*kernel_features*/) override;
};
CCL_NAMESPACE_END

View File

@@ -1,61 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/cpu/kernel.h"
#include "kernel/device/cpu/kernel.h"
CCL_NAMESPACE_BEGIN
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse2, name), \
KERNEL_NAME_EVAL(cpu_sse3, name), KERNEL_NAME_EVAL(cpu_sse41, name), \
KERNEL_NAME_EVAL(cpu_avx, name), KERNEL_NAME_EVAL(cpu_avx2, name)
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
CPUKernels::CPUKernels()
: /* Integrator. */
REGISTER_KERNEL(integrator_init_from_camera),
REGISTER_KERNEL(integrator_init_from_bake),
REGISTER_KERNEL(integrator_intersect_closest),
REGISTER_KERNEL(integrator_intersect_shadow),
REGISTER_KERNEL(integrator_intersect_subsurface),
REGISTER_KERNEL(integrator_intersect_volume_stack),
REGISTER_KERNEL(integrator_shade_background),
REGISTER_KERNEL(integrator_shade_light),
REGISTER_KERNEL(integrator_shade_shadow),
REGISTER_KERNEL(integrator_shade_surface),
REGISTER_KERNEL(integrator_shade_volume),
REGISTER_KERNEL(integrator_megakernel),
/* Shader evaluation. */
REGISTER_KERNEL(shader_eval_displace),
REGISTER_KERNEL(shader_eval_background),
/* Adaptive sampling. */
REGISTER_KERNEL(adaptive_sampling_convergence_check),
REGISTER_KERNEL(adaptive_sampling_filter_x),
REGISTER_KERNEL(adaptive_sampling_filter_y),
/* Cryptomatte. */
REGISTER_KERNEL(cryptomatte_postprocess),
/* Bake. */
REGISTER_KERNEL(bake)
{
}
#undef REGISTER_KERNEL
#undef KERNEL_FUNCTIONS
CCL_NAMESPACE_END

View File

@@ -1,111 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "device/cpu/kernel_function.h"
#include "util/util_types.h"
CCL_NAMESPACE_BEGIN
struct KernelGlobals;
struct IntegratorStateCPU;
struct TileInfo;
class CPUKernels {
public:
/* Integrator. */
using IntegratorFunction =
CPUKernelFunction<void (*)(const KernelGlobals *kg, IntegratorStateCPU *state)>;
using IntegratorShadeFunction = CPUKernelFunction<void (*)(
const KernelGlobals *kg, IntegratorStateCPU *state, ccl_global float *render_buffer)>;
using IntegratorInitFunction = CPUKernelFunction<bool (*)(const KernelGlobals *kg,
IntegratorStateCPU *state,
KernelWorkTile *tile,
ccl_global float *render_buffer)>;
IntegratorInitFunction integrator_init_from_camera;
IntegratorInitFunction integrator_init_from_bake;
IntegratorFunction integrator_intersect_closest;
IntegratorFunction integrator_intersect_shadow;
IntegratorFunction integrator_intersect_subsurface;
IntegratorFunction integrator_intersect_volume_stack;
IntegratorShadeFunction integrator_shade_background;
IntegratorShadeFunction integrator_shade_light;
IntegratorShadeFunction integrator_shade_shadow;
IntegratorShadeFunction integrator_shade_surface;
IntegratorShadeFunction integrator_shade_volume;
IntegratorShadeFunction integrator_megakernel;
/* Shader evaluation. */
using ShaderEvalFunction = CPUKernelFunction<void (*)(
const KernelGlobals *kg, const KernelShaderEvalInput *, float4 *, const int)>;
ShaderEvalFunction shader_eval_displace;
ShaderEvalFunction shader_eval_background;
/* Adaptive stopping. */
using AdaptiveSamplingConvergenceCheckFunction =
CPUKernelFunction<bool (*)(const KernelGlobals *kg,
ccl_global float *render_buffer,
int x,
int y,
float threshold,
bool reset,
int offset,
int stride)>;
using AdaptiveSamplingFilterXFunction =
CPUKernelFunction<void (*)(const KernelGlobals *kg,
ccl_global float *render_buffer,
int y,
int start_x,
int width,
int offset,
int stride)>;
using AdaptiveSamplingFilterYFunction =
CPUKernelFunction<void (*)(const KernelGlobals *kg,
ccl_global float *render_buffer,
int x,
int start_y,
int height,
int offset,
int stride)>;
AdaptiveSamplingConvergenceCheckFunction adaptive_sampling_convergence_check;
AdaptiveSamplingFilterXFunction adaptive_sampling_filter_x;
AdaptiveSamplingFilterYFunction adaptive_sampling_filter_y;
/* Cryptomatte. */
using CryptomattePostprocessFunction = CPUKernelFunction<void (*)(
const KernelGlobals *kg, ccl_global float *render_buffer, int pixel_index)>;
CryptomattePostprocessFunction cryptomatte_postprocess;
/* Bake. */
CPUKernelFunction<void (*)(const KernelGlobals *, float *, int, int, int, int, int)> bake;
CPUKernels();
};
CCL_NAMESPACE_END

View File

@@ -1,124 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "util/util_debug.h"
#include "util/util_system.h"
CCL_NAMESPACE_BEGIN
/* A wrapper around per-microarchitecture variant of a kernel function.
*
* Provides a function-call-like API which gets routed to the most suitable implementation.
*
* For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */
template<typename FunctionType> class CPUKernelFunction {
public:
CPUKernelFunction(FunctionType kernel_default,
FunctionType kernel_sse2,
FunctionType kernel_sse3,
FunctionType kernel_sse41,
FunctionType kernel_avx,
FunctionType kernel_avx2)
{
kernel_info_ = get_best_kernel_info(
kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
}
template<typename... Args> inline auto operator()(Args... args) const
{
assert(kernel_info_.kernel);
return kernel_info_.kernel(args...);
}
const char *get_uarch_name() const
{
return kernel_info_.uarch_name;
}
protected:
/* Helper class which allows to pass human-readable microarchitecture name together with function
* pointer. */
class KernelInfo {
public:
KernelInfo() : KernelInfo("", nullptr)
{
}
/* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without
* memory allocation. */
KernelInfo(const char *uarch_name, FunctionType kernel)
: uarch_name(uarch_name), kernel(kernel)
{
}
const char *uarch_name;
FunctionType kernel;
};
KernelInfo get_best_kernel_info(FunctionType kernel_default,
FunctionType kernel_sse2,
FunctionType kernel_sse3,
FunctionType kernel_sse41,
FunctionType kernel_avx,
FunctionType kernel_avx2)
{
/* Silence warnings about unused variables when compiling without some architectures. */
(void)kernel_sse2;
(void)kernel_sse3;
(void)kernel_sse41;
(void)kernel_avx;
(void)kernel_avx2;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
return KernelInfo("AVX2", kernel_avx2);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
return KernelInfo("AVX", kernel_avx);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
return KernelInfo("SSE4.1", kernel_sse41);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
return KernelInfo("SSE3", kernel_sse3);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
return KernelInfo("SSE2", kernel_sse2);
}
#endif
return KernelInfo("default", kernel_default);
}
KernelInfo kernel_info_;
};
CCL_NAMESPACE_END

View File

@@ -1,85 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/cpu/kernel_thread_globals.h"
// clang-format off
#include "kernel/osl/osl_shader.h"
#include "kernel/osl/osl_globals.h"
// clang-format on
#include "util/util_profiling.h"
CCL_NAMESPACE_BEGIN
CPUKernelThreadGlobals::CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
void *osl_globals_memory,
Profiler &cpu_profiler)
: KernelGlobals(kernel_globals), cpu_profiler_(cpu_profiler)
{
reset_runtime_memory();
#ifdef WITH_OSL
OSLShader::thread_init(this, reinterpret_cast<OSLGlobals *>(osl_globals_memory));
#else
(void)osl_globals_memory;
#endif
}
CPUKernelThreadGlobals::CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept
: KernelGlobals(std::move(other)), cpu_profiler_(other.cpu_profiler_)
{
other.reset_runtime_memory();
}
CPUKernelThreadGlobals::~CPUKernelThreadGlobals()
{
#ifdef WITH_OSL
OSLShader::thread_free(this);
#endif
}
CPUKernelThreadGlobals &CPUKernelThreadGlobals::operator=(CPUKernelThreadGlobals &&other)
{
if (this == &other) {
return *this;
}
*static_cast<KernelGlobals *>(this) = *static_cast<KernelGlobals *>(&other);
other.reset_runtime_memory();
return *this;
}
void CPUKernelThreadGlobals::reset_runtime_memory()
{
#ifdef WITH_OSL
osl = nullptr;
#endif
}
void CPUKernelThreadGlobals::start_profiling()
{
cpu_profiler_.add_state(&profiler);
}
void CPUKernelThreadGlobals::stop_profiling()
{
cpu_profiler_.remove_state(&profiler);
}
CCL_NAMESPACE_END

View File

@@ -1,57 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "kernel/device/cpu/compat.h"
#include "kernel/device/cpu/globals.h"
CCL_NAMESPACE_BEGIN
class Profiler;
/* A special class which extends memory ownership of the `KernelGlobals` decoupling any resource
* which is not thread-safe for access. Every worker thread which needs to operate on
* `KernelGlobals` needs to initialize its own copy of this object.
*
* NOTE: Only minimal subset of objects are copied: `KernelData` is never copied. This means that
* there is no unnecessary data duplication happening when using this object. */
class CPUKernelThreadGlobals : public KernelGlobals {
public:
/* TODO(sergey): Would be nice to have properly typed OSLGlobals even in the case when building
* without OSL support. Will avoid need to those unnamed pointers and casts. */
CPUKernelThreadGlobals(const KernelGlobals &kernel_globals,
void *osl_globals_memory,
Profiler &cpu_profiler);
~CPUKernelThreadGlobals();
CPUKernelThreadGlobals(const CPUKernelThreadGlobals &other) = delete;
CPUKernelThreadGlobals(CPUKernelThreadGlobals &&other) noexcept;
CPUKernelThreadGlobals &operator=(const CPUKernelThreadGlobals &other) = delete;
CPUKernelThreadGlobals &operator=(CPUKernelThreadGlobals &&other);
void start_profiling();
void stop_profiling();
protected:
void reset_runtime_memory();
Profiler &cpu_profiler_;
};
CCL_NAMESPACE_END

View File

@@ -0,0 +1,270 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_CUDA
# include "device/device.h"
# include "device/device_denoising.h"
# include "device/device_split_kernel.h"
# include "util/util_map.h"
# include "util/util_task.h"
# ifdef WITH_CUDA_DYNLOAD
# include "cuew.h"
# else
# include "util/util_opengl.h"
# include <cuda.h>
# include <cudaGL.h>
# endif
CCL_NAMESPACE_BEGIN
class CUDASplitKernel;
class CUDADevice : public Device {
friend class CUDASplitKernelFunction;
friend class CUDASplitKernel;
friend class CUDAContextScope;
public:
DedicatedTaskPool task_pool;
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule, cuFilterModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int cuDevId;
int cuDevArchitecture;
bool first_error;
CUDASplitKernel *split_kernel;
struct CUDAMem {
CUDAMem() : texobject(0), array(0), use_mapped_host(false)
{
}
CUtexObject texobject;
CUarray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, CUDAMem> CUDAMemMap;
CUDAMemMap cuda_mem_map;
thread_mutex cuda_mem_map_mutex;
struct PixelMem {
GLuint cuPBO;
CUgraphicsResource cuPBOresource;
GLuint cuTexId;
int w, h;
};
map<device_ptr, PixelMem> pixel_mem_map;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
/* Kernels */
struct {
bool loaded;
CUfunction adaptive_stopping;
CUfunction adaptive_filter_x;
CUfunction adaptive_filter_y;
CUfunction adaptive_scale_samples;
int adaptive_num_threads_per_block;
} functions;
static bool have_precompiled_kernels();
virtual bool show_samples() const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;
CUDADevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_);
virtual ~CUDADevice();
bool support_device(const DeviceRequestedFeatures & /*requested_features*/);
bool check_peer_access(Device *peer_device) override;
bool use_adaptive_compilation();
bool use_split_kernel();
virtual string compile_kernel_get_common_cflags(
const DeviceRequestedFeatures &requested_features, bool filter = false, bool split = false);
string compile_kernel(const DeviceRequestedFeatures &requested_features,
const char *name,
const char *base = "cuda",
bool force_ptx = false);
virtual bool load_kernels(const DeviceRequestedFeatures &requested_features) override;
void load_functions();
void reserve_local_memory(const DeviceRequestedFeatures &requested_features);
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem) override;
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override;
void mem_zero(device_memory &mem) override;
void mem_free(device_memory &mem) override;
device_ptr mem_alloc_sub_ptr(device_memory &mem, int offset, int /*size*/) override;
virtual void const_copy_to(const char *name, void *host, size_t size) override;
void global_alloc(device_memory &mem);
void global_free(device_memory &mem);
void tex_alloc(device_texture &mem);
void tex_free(device_texture &mem);
bool denoising_non_local_means(device_ptr image_ptr,
device_ptr guide_ptr,
device_ptr variance_ptr,
device_ptr out_ptr,
DenoisingTask *task);
bool denoising_construct_transform(DenoisingTask *task);
bool denoising_accumulate(device_ptr color_ptr,
device_ptr color_variance_ptr,
device_ptr scale_ptr,
int frame,
DenoisingTask *task);
bool denoising_solve(device_ptr output_ptr, DenoisingTask *task);
bool denoising_combine_halves(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr mean_ptr,
device_ptr variance_ptr,
int r,
int4 rect,
DenoisingTask *task);
bool denoising_divide_shadow(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr sample_variance_ptr,
device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr,
DenoisingTask *task);
bool denoising_get_feature(int mean_offset,
int variance_offset,
device_ptr mean_ptr,
device_ptr variance_ptr,
float scale,
DenoisingTask *task);
bool denoising_write_feature(int out_offset,
device_ptr from_ptr,
device_ptr buffer_ptr,
DenoisingTask *task);
bool denoising_detect_outliers(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr,
DenoisingTask *task);
void denoise(RenderTile &rtile, DenoisingTask &denoising);
void adaptive_sampling_filter(uint filter_sample,
WorkTile *wtile,
CUdeviceptr d_wtile,
CUstream stream = 0);
void adaptive_sampling_post(RenderTile &rtile,
WorkTile *wtile,
CUdeviceptr d_wtile,
CUstream stream = 0);
void render(DeviceTask &task, RenderTile &rtile, device_vector<WorkTile> &work_tiles);
void film_convert(DeviceTask &task,
device_ptr buffer,
device_ptr rgba_byte,
device_ptr rgba_half);
void shader(DeviceTask &task);
CUdeviceptr map_pixels(device_ptr mem);
void unmap_pixels(device_ptr mem);
void pixels_alloc(device_memory &mem);
void pixels_copy_from(device_memory &mem, int y, int w, int h);
void pixels_free(device_memory &mem);
void draw_pixels(device_memory &mem,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params) override;
void thread_run(DeviceTask &task);
virtual void task_add(DeviceTask &task) override;
virtual void task_wait() override;
virtual void task_cancel() override;
};
CCL_NAMESPACE_END
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,154 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_CUDA
# include "device/cuda/kernel.h"
# include "device/cuda/queue.h"
# include "device/cuda/util.h"
# include "device/device.h"
# include "util/util_map.h"
# ifdef WITH_CUDA_DYNLOAD
# include "cuew.h"
# else
# include <cuda.h>
# include <cudaGL.h>
# endif
CCL_NAMESPACE_BEGIN
class DeviceQueue;
class CUDADevice : public Device {
friend class CUDAContextScope;
public:
CUdevice cuDevice;
CUcontext cuContext;
CUmodule cuModule;
size_t device_texture_headroom;
size_t device_working_headroom;
bool move_texture_to_host;
size_t map_host_used;
size_t map_host_limit;
int can_map_host;
int pitch_alignment;
int cuDevId;
int cuDevArchitecture;
bool first_error;
struct CUDAMem {
CUDAMem() : texobject(0), array(0), use_mapped_host(false)
{
}
CUtexObject texobject;
CUarray array;
/* If true, a mapped host memory in shared_pointer is being used. */
bool use_mapped_host;
};
typedef map<device_memory *, CUDAMem> CUDAMemMap;
CUDAMemMap cuda_mem_map;
thread_mutex cuda_mem_map_mutex;
/* Bindless Textures */
device_vector<TextureInfo> texture_info;
bool need_texture_info;
CUDADeviceKernels kernels;
static bool have_precompiled_kernels();
virtual bool show_samples() const override;
virtual BVHLayoutMask get_bvh_layout_mask() const override;
void set_error(const string &error) override;
CUDADevice(const DeviceInfo &info, Stats &stats, Profiler &profiler);
virtual ~CUDADevice();
bool support_device(const uint /*kernel_features*/);
bool check_peer_access(Device *peer_device) override;
bool use_adaptive_compilation();
virtual string compile_kernel_get_common_cflags(const uint kernel_features);
string compile_kernel(const uint kernel_features,
const char *name,
const char *base = "cuda",
bool force_ptx = false);
virtual bool load_kernels(const uint kernel_features) override;
void reserve_local_memory(const uint kernel_features);
void init_host_memory();
void load_texture_info();
void move_textures_to_host(size_t size, bool for_texture);
CUDAMem *generic_alloc(device_memory &mem, size_t pitch_padding = 0);
void generic_copy_to(device_memory &mem);
void generic_free(device_memory &mem);
void mem_alloc(device_memory &mem) override;
void mem_copy_to(device_memory &mem) override;
void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) override;
void mem_zero(device_memory &mem) override;
void mem_free(device_memory &mem) override;
device_ptr mem_alloc_sub_ptr(device_memory &mem, size_t offset, size_t /*size*/) override;
virtual void const_copy_to(const char *name, void *host, size_t size) override;
void global_alloc(device_memory &mem);
void global_free(device_memory &mem);
void tex_alloc(device_texture &mem);
void tex_free(device_texture &mem);
virtual bool should_use_graphics_interop() override;
virtual unique_ptr<DeviceQueue> gpu_queue_create() override;
int get_num_multiprocessors();
int get_max_num_threads_per_multiprocessor();
protected:
bool get_device_attribute(CUdevice_attribute attribute, int *value);
int get_device_default_attribute(CUdevice_attribute attribute, int default_value);
};
CCL_NAMESPACE_END
#endif

View File

@@ -1,103 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_CUDA
# include "device/cuda/graphics_interop.h"
# include "device/cuda/device_impl.h"
# include "device/cuda/util.h"
CCL_NAMESPACE_BEGIN
CUDADeviceGraphicsInterop::CUDADeviceGraphicsInterop(CUDADeviceQueue *queue)
: queue_(queue), device_(static_cast<CUDADevice *>(queue->device))
{
}
CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
{
CUDAContextScope scope(device_);
if (cu_graphics_resource_) {
cuda_device_assert(device_, cuGraphicsUnregisterResource(cu_graphics_resource_));
}
}
void CUDADeviceGraphicsInterop::set_display_interop(
const DisplayDriver::GraphicsInterop &display_interop)
{
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
display_interop.buffer_height;
need_clear_ = display_interop.need_clear;
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
return;
}
CUDAContextScope scope(device_);
if (cu_graphics_resource_) {
cuda_device_assert(device_, cuGraphicsUnregisterResource(cu_graphics_resource_));
}
const CUresult result = cuGraphicsGLRegisterBuffer(
&cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
if (result != CUDA_SUCCESS) {
LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
}
opengl_pbo_id_ = display_interop.opengl_pbo_id;
buffer_area_ = new_buffer_area;
}
device_ptr CUDADeviceGraphicsInterop::map()
{
if (!cu_graphics_resource_) {
return 0;
}
CUDAContextScope scope(device_);
CUdeviceptr cu_buffer;
size_t bytes;
cuda_device_assert(device_, cuGraphicsMapResources(1, &cu_graphics_resource_, queue_->stream()));
cuda_device_assert(
device_, cuGraphicsResourceGetMappedPointer(&cu_buffer, &bytes, cu_graphics_resource_));
if (need_clear_) {
cuda_device_assert(
device_, cuMemsetD8Async(static_cast<CUdeviceptr>(cu_buffer), 0, bytes, queue_->stream()));
need_clear_ = false;
}
return static_cast<device_ptr>(cu_buffer);
}
void CUDADeviceGraphicsInterop::unmap()
{
CUDAContextScope scope(device_);
cuda_device_assert(device_,
cuGraphicsUnmapResources(1, &cu_graphics_resource_, queue_->stream()));
}
CCL_NAMESPACE_END
#endif

View File

@@ -1,66 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_CUDA
# include "device/device_graphics_interop.h"
# ifdef WITH_CUDA_DYNLOAD
# include "cuew.h"
# else
# include <cuda.h>
# endif
CCL_NAMESPACE_BEGIN
class CUDADevice;
class CUDADeviceQueue;
class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
public:
explicit CUDADeviceGraphicsInterop(CUDADeviceQueue *queue);
CUDADeviceGraphicsInterop(const CUDADeviceGraphicsInterop &other) = delete;
CUDADeviceGraphicsInterop(CUDADeviceGraphicsInterop &&other) noexcept = delete;
~CUDADeviceGraphicsInterop();
CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
virtual device_ptr map() override;
virtual void unmap() override;
protected:
CUDADeviceQueue *queue_ = nullptr;
CUDADevice *device_ = nullptr;
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
uint opengl_pbo_id_ = 0;
/* Buffer area in pixels of the corresponding PBO. */
int64_t buffer_area_ = 0;
/* The destination was requested to be cleared. */
bool need_clear_ = false;
CUgraphicsResource cu_graphics_resource_ = nullptr;
};
CCL_NAMESPACE_END
#endif

View File

@@ -1,69 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_CUDA
# include "device/cuda/kernel.h"
# include "device/cuda/device_impl.h"
CCL_NAMESPACE_BEGIN
void CUDADeviceKernels::load(CUDADevice *device)
{
CUmodule cuModule = device->cuModule;
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
CUDADeviceKernel &kernel = kernels_[i];
/* No mega-kernel used for GPU. */
if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
continue;
}
const std::string function_name = std::string("kernel_gpu_") +
device_kernel_as_string((DeviceKernel)i);
cuda_device_assert(device,
cuModuleGetFunction(&kernel.function, cuModule, function_name.c_str()));
if (kernel.function) {
cuda_device_assert(device, cuFuncSetCacheConfig(kernel.function, CU_FUNC_CACHE_PREFER_L1));
cuda_device_assert(
device,
cuOccupancyMaxPotentialBlockSize(
&kernel.min_blocks, &kernel.num_threads_per_block, kernel.function, NULL, 0, 0));
}
else {
LOG(ERROR) << "Unable to load kernel " << function_name;
}
}
loaded = true;
}
const CUDADeviceKernel &CUDADeviceKernels::get(DeviceKernel kernel) const
{
return kernels_[(int)kernel];
}
bool CUDADeviceKernels::available(DeviceKernel kernel) const
{
return kernels_[(int)kernel].function != nullptr;
}
CCL_NAMESPACE_END
#endif /* WITH_CUDA*/

View File

@@ -1,56 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifdef WITH_CUDA
# include "device/device_kernel.h"
# ifdef WITH_CUDA_DYNLOAD
# include "cuew.h"
# else
# include <cuda.h>
# endif
CCL_NAMESPACE_BEGIN
class CUDADevice;
/* CUDA kernel and associate occupancy information. */
class CUDADeviceKernel {
public:
CUfunction function = nullptr;
int num_threads_per_block = 0;
int min_blocks = 0;
};
/* Cache of CUDA kernels for each DeviceKernel. */
class CUDADeviceKernels {
public:
void load(CUDADevice *device);
const CUDADeviceKernel &get(DeviceKernel kernel) const;
bool available(DeviceKernel kernel) const;
protected:
CUDADeviceKernel kernels_[DEVICE_KERNEL_NUM];
bool loaded = false;
};
CCL_NAMESPACE_END
#endif /* WITH_CUDA */

View File

@@ -1,230 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_CUDA
# include "device/cuda/queue.h"
# include "device/cuda/device_impl.h"
# include "device/cuda/graphics_interop.h"
# include "device/cuda/kernel.h"
CCL_NAMESPACE_BEGIN
/* CUDADeviceQueue */
CUDADeviceQueue::CUDADeviceQueue(CUDADevice *device)
: DeviceQueue(device), cuda_device_(device), cuda_stream_(nullptr)
{
const CUDAContextScope scope(cuda_device_);
cuda_device_assert(cuda_device_, cuStreamCreate(&cuda_stream_, CU_STREAM_NON_BLOCKING));
}
CUDADeviceQueue::~CUDADeviceQueue()
{
const CUDAContextScope scope(cuda_device_);
cuStreamDestroy(cuda_stream_);
}
int CUDADeviceQueue::num_concurrent_states(const size_t state_size) const
{
int num_states = max(cuda_device_->get_num_multiprocessors() *
cuda_device_->get_max_num_threads_per_multiprocessor() * 16,
1048576);
const char *factor_str = getenv("CYCLES_CONCURRENT_STATES_FACTOR");
if (factor_str) {
num_states = max((int)(num_states * atof(factor_str)), 1024);
}
VLOG(3) << "GPU queue concurrent states: " << num_states << ", using up to "
<< string_human_readable_size(num_states * state_size);
return num_states;
}
int CUDADeviceQueue::num_concurrent_busy_states() const
{
const int max_num_threads = cuda_device_->get_num_multiprocessors() *
cuda_device_->get_max_num_threads_per_multiprocessor();
if (max_num_threads == 0) {
return 65536;
}
return 4 * max_num_threads;
}
void CUDADeviceQueue::init_execution()
{
/* Synchronize all textures and memory copies before executing task. */
CUDAContextScope scope(cuda_device_);
cuda_device_->load_texture_info();
cuda_device_assert(cuda_device_, cuCtxSynchronize());
debug_init_execution();
}
bool CUDADeviceQueue::kernel_available(DeviceKernel kernel) const
{
return cuda_device_->kernels.available(kernel);
}
bool CUDADeviceQueue::enqueue(DeviceKernel kernel, const int work_size, void *args[])
{
if (cuda_device_->have_error()) {
return false;
}
debug_enqueue(kernel, work_size);
const CUDAContextScope scope(cuda_device_);
const CUDADeviceKernel &cuda_kernel = cuda_device_->kernels.get(kernel);
/* Compute kernel launch parameters. */
const int num_threads_per_block = cuda_kernel.num_threads_per_block;
const int num_blocks = divide_up(work_size, num_threads_per_block);
int shared_mem_bytes = 0;
switch (kernel) {
case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
/* See parall_active_index.h for why this amount of shared memory is needed. */
shared_mem_bytes = (num_threads_per_block + 1) * sizeof(int);
break;
default:
break;
}
/* Launch kernel. */
assert_success(cuLaunchKernel(cuda_kernel.function,
num_blocks,
1,
1,
num_threads_per_block,
1,
1,
shared_mem_bytes,
cuda_stream_,
args,
0),
"enqueue");
return !(cuda_device_->have_error());
}
bool CUDADeviceQueue::synchronize()
{
if (cuda_device_->have_error()) {
return false;
}
const CUDAContextScope scope(cuda_device_);
assert_success(cuStreamSynchronize(cuda_stream_), "synchronize");
debug_synchronize();
return !(cuda_device_->have_error());
}
void CUDADeviceQueue::zero_to_device(device_memory &mem)
{
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
if (mem.memory_size() == 0) {
return;
}
/* Allocate on demand. */
if (mem.device_pointer == 0) {
cuda_device_->mem_alloc(mem);
}
/* Zero memory on device. */
assert(mem.device_pointer != 0);
const CUDAContextScope scope(cuda_device_);
assert_success(
cuMemsetD8Async((CUdeviceptr)mem.device_pointer, 0, mem.memory_size(), cuda_stream_),
"zero_to_device");
}
void CUDADeviceQueue::copy_to_device(device_memory &mem)
{
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
if (mem.memory_size() == 0) {
return;
}
/* Allocate on demand. */
if (mem.device_pointer == 0) {
cuda_device_->mem_alloc(mem);
}
assert(mem.device_pointer != 0);
assert(mem.host_pointer != nullptr);
/* Copy memory to device. */
const CUDAContextScope scope(cuda_device_);
assert_success(
cuMemcpyHtoDAsync(
(CUdeviceptr)mem.device_pointer, mem.host_pointer, mem.memory_size(), cuda_stream_),
"copy_to_device");
}
void CUDADeviceQueue::copy_from_device(device_memory &mem)
{
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
if (mem.memory_size() == 0) {
return;
}
assert(mem.device_pointer != 0);
assert(mem.host_pointer != nullptr);
/* Copy memory from device. */
const CUDAContextScope scope(cuda_device_);
assert_success(
cuMemcpyDtoHAsync(
mem.host_pointer, (CUdeviceptr)mem.device_pointer, mem.memory_size(), cuda_stream_),
"copy_from_device");
}
void CUDADeviceQueue::assert_success(CUresult result, const char *operation)
{
if (result != CUDA_SUCCESS) {
const char *name = cuewErrorString(result);
cuda_device_->set_error(string_printf(
"%s in CUDA queue %s (%s)", name, operation, debug_active_kernels().c_str()));
}
}
unique_ptr<DeviceGraphicsInterop> CUDADeviceQueue::graphics_interop_create()
{
return make_unique<CUDADeviceGraphicsInterop>(this);
}
CCL_NAMESPACE_END
#endif /* WITH_CUDA */

View File

@@ -1,69 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifdef WITH_CUDA
# include "device/device_kernel.h"
# include "device/device_memory.h"
# include "device/device_queue.h"
# include "device/cuda/util.h"
CCL_NAMESPACE_BEGIN
class CUDADevice;
class device_memory;
/* Base class for CUDA queues. */
class CUDADeviceQueue : public DeviceQueue {
public:
CUDADeviceQueue(CUDADevice *device);
~CUDADeviceQueue();
virtual int num_concurrent_states(const size_t state_size) const override;
virtual int num_concurrent_busy_states() const override;
virtual void init_execution() override;
virtual bool kernel_available(DeviceKernel kernel) const override;
virtual bool enqueue(DeviceKernel kernel, const int work_size, void *args[]) override;
virtual bool synchronize() override;
virtual void zero_to_device(device_memory &mem) override;
virtual void copy_to_device(device_memory &mem) override;
virtual void copy_from_device(device_memory &mem) override;
virtual CUstream stream()
{
return cuda_stream_;
}
virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create() override;
protected:
CUDADevice *cuda_device_;
CUstream cuda_stream_;
void assert_success(CUresult result, const char *operation);
};
CCL_NAMESPACE_END
#endif /* WITH_CUDA */

View File

@@ -1,61 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef WITH_CUDA
# include "device/cuda/util.h"
# include "device/cuda/device_impl.h"
CCL_NAMESPACE_BEGIN
CUDAContextScope::CUDAContextScope(CUDADevice *device) : device(device)
{
cuda_device_assert(device, cuCtxPushCurrent(device->cuContext));
}
CUDAContextScope::~CUDAContextScope()
{
cuda_device_assert(device, cuCtxPopCurrent(NULL));
}
# ifndef WITH_CUDA_DYNLOAD
const char *cuewErrorString(CUresult result)
{
/* We can only give error code here without major code duplication, that
* should be enough since dynamic loading is only being disabled by folks
* who knows what they're doing anyway.
*
* NOTE: Avoid call from several threads.
*/
static string error;
error = string_printf("%d", result);
return error.c_str();
}
const char *cuewCompilerPath()
{
return CYCLES_CUDA_NVCC_EXECUTABLE;
}
int cuewCompilerVersion()
{
return (CUDA_VERSION / 100) + (CUDA_VERSION % 100 / 10);
}
# endif
CCL_NAMESPACE_END
#endif /* WITH_CUDA */

View File

@@ -1,65 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#ifdef WITH_CUDA
# ifdef WITH_CUDA_DYNLOAD
# include "cuew.h"
# else
# include <cuda.h>
# endif
CCL_NAMESPACE_BEGIN
class CUDADevice;
/* Utility to push/pop CUDA context. */
class CUDAContextScope {
public:
CUDAContextScope(CUDADevice *device);
~CUDAContextScope();
private:
CUDADevice *device;
};
/* Utility for checking return values of CUDA function calls. */
# define cuda_device_assert(cuda_device, stmt) \
{ \
CUresult result = stmt; \
if (result != CUDA_SUCCESS) { \
const char *name = cuewErrorString(result); \
cuda_device->set_error( \
string_printf("%s in %s (%s:%d)", name, #stmt, __FILE__, __LINE__)); \
} \
} \
(void)0
# define cuda_assert(stmt) cuda_device_assert(this, stmt)
# ifndef WITH_CUDA_DYNLOAD
/* Transparently implement some functions, so majority of the file does not need
* to worry about difference between dynamically loaded and linked CUDA at all. */
const char *cuewErrorString(CUresult result);
const char *cuewCompilerPath();
int cuewCompilerVersion();
# endif /* WITH_CUDA_DYNLOAD */
CCL_NAMESPACE_END
#endif /* WITH_CUDA */

View File

@@ -20,19 +20,13 @@
#include "bvh/bvh2.h"
#include "device/device.h"
#include "device/device_queue.h"
#include "device/cpu/device.h"
#include "device/cuda/device.h"
#include "device/dummy/device.h"
#include "device/hip/device.h"
#include "device/multi/device.h"
#include "device/optix/device.h"
#include "device/device_intern.h"
#include "util/util_foreach.h"
#include "util/util_half.h"
#include "util/util_logging.h"
#include "util/util_math.h"
#include "util/util_opengl.h"
#include "util/util_string.h"
#include "util/util_system.h"
#include "util/util_time.h"
@@ -44,16 +38,332 @@ CCL_NAMESPACE_BEGIN
bool Device::need_types_update = true;
bool Device::need_devices_update = true;
thread_mutex Device::device_mutex;
vector<DeviceInfo> Device::opencl_devices;
vector<DeviceInfo> Device::cuda_devices;
vector<DeviceInfo> Device::optix_devices;
vector<DeviceInfo> Device::cpu_devices;
vector<DeviceInfo> Device::hip_devices;
vector<DeviceInfo> Device::network_devices;
uint Device::devices_initialized_mask = 0;
/* Device Requested Features */
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features)
{
os << "Experimental features: " << (requested_features.experimental ? "On" : "Off") << std::endl;
os << "Max nodes group: " << requested_features.max_nodes_group << std::endl;
/* TODO(sergey): Decode bitflag into list of names. */
os << "Nodes features: " << requested_features.nodes_features << std::endl;
os << "Use Hair: " << string_from_bool(requested_features.use_hair) << std::endl;
os << "Use Object Motion: " << string_from_bool(requested_features.use_object_motion)
<< std::endl;
os << "Use Camera Motion: " << string_from_bool(requested_features.use_camera_motion)
<< std::endl;
os << "Use Baking: " << string_from_bool(requested_features.use_baking) << std::endl;
os << "Use Subsurface: " << string_from_bool(requested_features.use_subsurface) << std::endl;
os << "Use Volume: " << string_from_bool(requested_features.use_volume) << std::endl;
os << "Use Branched Integrator: " << string_from_bool(requested_features.use_integrator_branched)
<< std::endl;
os << "Use Patch Evaluation: " << string_from_bool(requested_features.use_patch_evaluation)
<< std::endl;
os << "Use Transparent Shadows: " << string_from_bool(requested_features.use_transparent)
<< std::endl;
os << "Use Principled BSDF: " << string_from_bool(requested_features.use_principled)
<< std::endl;
os << "Use Denoising: " << string_from_bool(requested_features.use_denoising) << std::endl;
os << "Use Displacement: " << string_from_bool(requested_features.use_true_displacement)
<< std::endl;
os << "Use Background Light: " << string_from_bool(requested_features.use_background_light)
<< std::endl;
return os;
}
/* Device */
Device::~Device() noexcept(false)
{
if (!background) {
if (vertex_buffer != 0) {
glDeleteBuffers(1, &vertex_buffer);
}
if (fallback_shader_program != 0) {
glDeleteProgram(fallback_shader_program);
}
}
}
/* TODO move shaders to standalone .glsl file. */
const char *FALLBACK_VERTEX_SHADER =
"#version 330\n"
"uniform vec2 fullscreen;\n"
"in vec2 texCoord;\n"
"in vec2 pos;\n"
"out vec2 texCoord_interp;\n"
"\n"
"vec2 normalize_coordinates()\n"
"{\n"
" return (vec2(2.0) * (pos / fullscreen)) - vec2(1.0);\n"
"}\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = vec4(normalize_coordinates(), 0.0, 1.0);\n"
" texCoord_interp = texCoord;\n"
"}\n\0";
const char *FALLBACK_FRAGMENT_SHADER =
"#version 330\n"
"uniform sampler2D image_texture;\n"
"in vec2 texCoord_interp;\n"
"out vec4 fragColor;\n"
"\n"
"void main()\n"
"{\n"
" fragColor = texture(image_texture, texCoord_interp);\n"
"}\n\0";
static void shader_print_errors(const char *task, const char *log, const char *code)
{
LOG(ERROR) << "Shader: " << task << " error:";
LOG(ERROR) << "===== shader string ====";
stringstream stream(code);
string partial;
int line = 1;
while (getline(stream, partial, '\n')) {
if (line < 10) {
LOG(ERROR) << " " << line << " " << partial;
}
else {
LOG(ERROR) << line << " " << partial;
}
line++;
}
LOG(ERROR) << log;
}
static int bind_fallback_shader(void)
{
GLint status;
GLchar log[5000];
GLsizei length = 0;
GLuint program = 0;
struct Shader {
const char *source;
GLenum type;
} shaders[2] = {{FALLBACK_VERTEX_SHADER, GL_VERTEX_SHADER},
{FALLBACK_FRAGMENT_SHADER, GL_FRAGMENT_SHADER}};
program = glCreateProgram();
for (int i = 0; i < 2; i++) {
GLuint shader = glCreateShader(shaders[i].type);
string source_str = shaders[i].source;
const char *c_str = source_str.c_str();
glShaderSource(shader, 1, &c_str, NULL);
glCompileShader(shader);
glGetShaderiv(shader, GL_COMPILE_STATUS, &status);
if (!status) {
glGetShaderInfoLog(shader, sizeof(log), &length, log);
shader_print_errors("compile", log, c_str);
return 0;
}
glAttachShader(program, shader);
}
/* Link output. */
glBindFragDataLocation(program, 0, "fragColor");
/* Link and error check. */
glLinkProgram(program);
glGetProgramiv(program, GL_LINK_STATUS, &status);
if (!status) {
glGetShaderInfoLog(program, sizeof(log), &length, log);
shader_print_errors("linking", log, FALLBACK_VERTEX_SHADER);
shader_print_errors("linking", log, FALLBACK_FRAGMENT_SHADER);
return 0;
}
return program;
}
bool Device::bind_fallback_display_space_shader(const float width, const float height)
{
if (fallback_status == FALLBACK_SHADER_STATUS_ERROR) {
return false;
}
if (fallback_status == FALLBACK_SHADER_STATUS_NONE) {
fallback_shader_program = bind_fallback_shader();
fallback_status = FALLBACK_SHADER_STATUS_ERROR;
if (fallback_shader_program == 0) {
return false;
}
glUseProgram(fallback_shader_program);
image_texture_location = glGetUniformLocation(fallback_shader_program, "image_texture");
if (image_texture_location < 0) {
LOG(ERROR) << "Shader doesn't contain the 'image_texture' uniform.";
return false;
}
fullscreen_location = glGetUniformLocation(fallback_shader_program, "fullscreen");
if (fullscreen_location < 0) {
LOG(ERROR) << "Shader doesn't contain the 'fullscreen' uniform.";
return false;
}
fallback_status = FALLBACK_SHADER_STATUS_SUCCESS;
}
/* Run this every time. */
glUseProgram(fallback_shader_program);
glUniform1i(image_texture_location, 0);
glUniform2f(fullscreen_location, width, height);
return true;
}
void Device::draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params)
{
const bool use_fallback_shader = (draw_params.bind_display_space_shader_cb == NULL);
assert(rgba.type == MEM_PIXELS);
mem_copy_from(rgba, y, w, h, rgba.memory_elements_size(1));
GLuint texid;
glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &texid);
glBindTexture(GL_TEXTURE_2D, texid);
if (rgba.data_type == TYPE_HALF) {
GLhalf *data_pointer = (GLhalf *)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, w, h, 0, GL_RGBA, GL_HALF_FLOAT, data_pointer);
}
else {
uint8_t *data_pointer = (uint8_t *)rgba.host_pointer;
data_pointer += 4 * y * w;
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, w, h, 0, GL_RGBA, GL_UNSIGNED_BYTE, data_pointer);
}
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
if (transparent) {
glEnable(GL_BLEND);
glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);
}
GLint shader_program;
if (use_fallback_shader) {
if (!bind_fallback_display_space_shader(dw, dh)) {
return;
}
shader_program = fallback_shader_program;
}
else {
draw_params.bind_display_space_shader_cb();
glGetIntegerv(GL_CURRENT_PROGRAM, &shader_program);
}
if (!vertex_buffer) {
glGenBuffers(1, &vertex_buffer);
}
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer);
/* invalidate old contents - avoids stalling if buffer is still waiting in queue to be rendered
*/
glBufferData(GL_ARRAY_BUFFER, 16 * sizeof(float), NULL, GL_STREAM_DRAW);
float *vpointer = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
if (vpointer) {
/* texture coordinate - vertex pair */
vpointer[0] = 0.0f;
vpointer[1] = 0.0f;
vpointer[2] = dx;
vpointer[3] = dy;
vpointer[4] = 1.0f;
vpointer[5] = 0.0f;
vpointer[6] = (float)width + dx;
vpointer[7] = dy;
vpointer[8] = 1.0f;
vpointer[9] = 1.0f;
vpointer[10] = (float)width + dx;
vpointer[11] = (float)height + dy;
vpointer[12] = 0.0f;
vpointer[13] = 1.0f;
vpointer[14] = dx;
vpointer[15] = (float)height + dy;
if (vertex_buffer) {
glUnmapBuffer(GL_ARRAY_BUFFER);
}
}
GLuint vertex_array_object;
GLuint position_attribute, texcoord_attribute;
glGenVertexArrays(1, &vertex_array_object);
glBindVertexArray(vertex_array_object);
texcoord_attribute = glGetAttribLocation(shader_program, "texCoord");
position_attribute = glGetAttribLocation(shader_program, "pos");
glEnableVertexAttribArray(texcoord_attribute);
glEnableVertexAttribArray(position_attribute);
glVertexAttribPointer(
texcoord_attribute, 2, GL_FLOAT, GL_FALSE, 4 * sizeof(float), (const GLvoid *)0);
glVertexAttribPointer(position_attribute,
2,
GL_FLOAT,
GL_FALSE,
4 * sizeof(float),
(const GLvoid *)(sizeof(float) * 2));
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
if (vertex_buffer) {
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
if (use_fallback_shader) {
glUseProgram(0);
}
else {
draw_params.unbind_display_space_shader_cb();
}
glDeleteVertexArrays(1, &vertex_array_object);
glBindTexture(GL_TEXTURE_2D, 0);
glDeleteTextures(1, &texid);
if (transparent) {
glDisable(GL_BLEND);
}
}
void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
@@ -69,14 +379,14 @@ void Device::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *Device::create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
#ifdef WITH_MULTI
if (!info.multi_devices.empty()) {
/* Always create a multi device when info contains multiple devices.
* This is done so that the type can still be e.g. DEVICE_CPU to indicate
* that it is a homogeneous collection of devices, which simplifies checks. */
return device_multi_create(info, stats, profiler);
return device_multi_create(info, stats, profiler, background);
}
#endif
@@ -84,34 +394,37 @@ Device *Device::create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
switch (info.type) {
case DEVICE_CPU:
device = device_cpu_create(info, stats, profiler);
device = device_cpu_create(info, stats, profiler, background);
break;
#ifdef WITH_CUDA
case DEVICE_CUDA:
if (device_cuda_init())
device = device_cuda_create(info, stats, profiler);
device = device_cuda_create(info, stats, profiler, background);
break;
#endif
#ifdef WITH_OPTIX
case DEVICE_OPTIX:
if (device_optix_init())
device = device_optix_create(info, stats, profiler);
device = device_optix_create(info, stats, profiler, background);
break;
#endif
#ifdef WITH_HIP
case DEVICE_HIP:
if (device_hip_init())
device = device_hip_create(info, stats, profiler);
#ifdef WITH_NETWORK
case DEVICE_NETWORK:
device = device_network_create(info, stats, profiler, "127.0.0.1");
break;
#endif
#ifdef WITH_OPENCL
case DEVICE_OPENCL:
if (device_opencl_init())
device = device_opencl_create(info, stats, profiler, background);
break;
#endif
default:
break;
}
if (device == NULL) {
device = device_dummy_create(info, stats, profiler);
device = device_dummy_create(info, stats, profiler, background);
}
return device;
@@ -125,10 +438,12 @@ DeviceType Device::type_from_string(const char *name)
return DEVICE_CUDA;
else if (strcmp(name, "OPTIX") == 0)
return DEVICE_OPTIX;
else if (strcmp(name, "OPENCL") == 0)
return DEVICE_OPENCL;
else if (strcmp(name, "NETWORK") == 0)
return DEVICE_NETWORK;
else if (strcmp(name, "MULTI") == 0)
return DEVICE_MULTI;
else if (strcmp(name, "HIP") == 0)
return DEVICE_HIP;
return DEVICE_NONE;
}
@@ -141,10 +456,12 @@ string Device::string_from_type(DeviceType type)
return "CUDA";
else if (type == DEVICE_OPTIX)
return "OPTIX";
else if (type == DEVICE_OPENCL)
return "OPENCL";
else if (type == DEVICE_NETWORK)
return "NETWORK";
else if (type == DEVICE_MULTI)
return "MULTI";
else if (type == DEVICE_HIP)
return "HIP";
return "";
}
@@ -159,10 +476,12 @@ vector<DeviceType> Device::available_types()
#ifdef WITH_OPTIX
types.push_back(DEVICE_OPTIX);
#endif
#ifdef WITH_HIP
types.push_back(DEVICE_HIP);
#ifdef WITH_OPENCL
types.push_back(DEVICE_OPENCL);
#endif
#ifdef WITH_NETWORK
types.push_back(DEVICE_NETWORK);
#endif
return types;
}
@@ -174,6 +493,20 @@ vector<DeviceInfo> Device::available_devices(uint mask)
thread_scoped_lock lock(device_mutex);
vector<DeviceInfo> devices;
#ifdef WITH_OPENCL
if (mask & DEVICE_MASK_OPENCL) {
if (!(devices_initialized_mask & DEVICE_MASK_OPENCL)) {
if (device_opencl_init()) {
device_opencl_info(opencl_devices);
}
devices_initialized_mask |= DEVICE_MASK_OPENCL;
}
foreach (DeviceInfo &info, opencl_devices) {
devices.push_back(info);
}
}
#endif
#if defined(WITH_CUDA) || defined(WITH_OPTIX)
if (mask & (DEVICE_MASK_CUDA | DEVICE_MASK_OPTIX)) {
if (!(devices_initialized_mask & DEVICE_MASK_CUDA)) {
@@ -204,20 +537,6 @@ vector<DeviceInfo> Device::available_devices(uint mask)
}
#endif
#ifdef WITH_HIP
if (mask & DEVICE_MASK_HIP) {
if (!(devices_initialized_mask & DEVICE_MASK_HIP)) {
if (device_hip_init()) {
device_hip_info(hip_devices);
}
devices_initialized_mask |= DEVICE_MASK_HIP;
}
foreach (DeviceInfo &info, hip_devices) {
devices.push_back(info);
}
}
#endif
if (mask & DEVICE_MASK_CPU) {
if (!(devices_initialized_mask & DEVICE_MASK_CPU)) {
device_cpu_info(cpu_devices);
@@ -228,6 +547,18 @@ vector<DeviceInfo> Device::available_devices(uint mask)
}
}
#ifdef WITH_NETWORK
if (mask & DEVICE_MASK_NETWORK) {
if (!(devices_initialized_mask & DEVICE_MASK_NETWORK)) {
device_network_info(network_devices);
devices_initialized_mask |= DEVICE_MASK_NETWORK;
}
foreach (DeviceInfo &info, network_devices) {
devices.push_back(info);
}
}
#endif
return devices;
}
@@ -249,6 +580,15 @@ string Device::device_capabilities(uint mask)
capabilities += device_cpu_capabilities() + "\n";
}
#ifdef WITH_OPENCL
if (mask & DEVICE_MASK_OPENCL) {
if (device_opencl_init()) {
capabilities += "\nOpenCL device capabilities:\n";
capabilities += device_opencl_capabilities();
}
}
#endif
#ifdef WITH_CUDA
if (mask & DEVICE_MASK_CUDA) {
if (device_cuda_init()) {
@@ -258,15 +598,6 @@ string Device::device_capabilities(uint mask)
}
#endif
#ifdef WITH_HIP
if (mask & DEVICE_MASK_HIP) {
if (device_hip_init()) {
capabilities += "\nHIP device capabilities:\n";
capabilities += device_hip_capabilities();
}
}
#endif
return capabilities;
}
@@ -282,13 +613,16 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
}
DeviceInfo info;
info.type = DEVICE_NONE;
info.type = subdevices.front().type;
info.id = "MULTI";
info.description = "Multi Device";
info.num = 0;
info.has_half_images = true;
info.has_nanovdb = true;
info.has_volume_decoupled = true;
info.has_branched_path = true;
info.has_adaptive_stop_per_sample = true;
info.has_osl = true;
info.has_profiling = true;
info.has_peer_memory = false;
@@ -326,16 +660,16 @@ DeviceInfo Device::get_multi_device(const vector<DeviceInfo> &subdevices,
info.id += device.id;
/* Set device type to MULTI if subdevices are not of a common type. */
if (info.type == DEVICE_NONE) {
info.type = device.type;
}
else if (device.type != info.type) {
if (device.type != info.type) {
info.type = DEVICE_MULTI;
}
/* Accumulate device info. */
info.has_half_images &= device.has_half_images;
info.has_nanovdb &= device.has_nanovdb;
info.has_volume_decoupled &= device.has_volume_decoupled;
info.has_branched_path &= device.has_branched_path;
info.has_adaptive_stop_per_sample &= device.has_adaptive_stop_per_sample;
info.has_osl &= device.has_osl;
info.has_profiling &= device.has_profiling;
info.has_peer_memory |= device.has_peer_memory;
@@ -355,33 +689,60 @@ void Device::free_memory()
devices_initialized_mask = 0;
cuda_devices.free_memory();
optix_devices.free_memory();
hip_devices.free_memory();
opencl_devices.free_memory();
cpu_devices.free_memory();
}
unique_ptr<DeviceQueue> Device::gpu_queue_create()
{
LOG(FATAL) << "Device does not support queues.";
return nullptr;
}
const CPUKernels *Device::get_cpu_kernels() const
{
LOG(FATAL) << "Device does not support CPU kernels.";
return nullptr;
}
void Device::get_cpu_kernel_thread_globals(
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/)
{
LOG(FATAL) << "Device does not support CPU kernels.";
}
void *Device::get_cpu_osl_memory()
{
return nullptr;
network_devices.free_memory();
}
/* DeviceInfo */
void DeviceInfo::add_denoising_devices(DenoiserType denoiser_type)
{
assert(denoising_devices.empty());
if (denoiser_type == DENOISER_OPTIX && type != DEVICE_OPTIX) {
vector<DeviceInfo> optix_devices = Device::available_devices(DEVICE_MASK_OPTIX);
if (!optix_devices.empty()) {
/* Convert to a special multi device with separate denoising devices. */
if (multi_devices.empty()) {
multi_devices.push_back(*this);
}
/* Try to use the same physical devices for denoising. */
for (const DeviceInfo &cuda_device : multi_devices) {
if (cuda_device.type == DEVICE_CUDA) {
for (const DeviceInfo &optix_device : optix_devices) {
if (cuda_device.num == optix_device.num) {
id += optix_device.id;
denoising_devices.push_back(optix_device);
break;
}
}
}
}
if (denoising_devices.empty()) {
/* Simply use the first available OptiX device. */
const DeviceInfo optix_device = optix_devices.front();
id += optix_device.id; /* Uniquely identify this special multi device. */
denoising_devices.push_back(optix_device);
}
denoisers = denoiser_type;
}
}
else if (denoiser_type == DENOISER_OPENIMAGEDENOISE && type != DEVICE_CPU) {
/* Convert to a special multi device with separate denoising devices. */
if (multi_devices.empty()) {
multi_devices.push_back(*this);
}
/* Add CPU denoising devices. */
DeviceInfo cpu_device = Device::available_devices(DEVICE_MASK_CPU).front();
denoising_devices.push_back(cpu_device);
denoisers = denoiser_type;
}
}
CCL_NAMESPACE_END

View File

@@ -21,48 +21,52 @@
#include "bvh/bvh_params.h"
#include "device/device_denoise.h"
#include "device/device_memory.h"
#include "device/device_task.h"
#include "util/util_function.h"
#include "util/util_list.h"
#include "util/util_logging.h"
#include "util/util_stats.h"
#include "util/util_string.h"
#include "util/util_texture.h"
#include "util/util_thread.h"
#include "util/util_types.h"
#include "util/util_unique_ptr.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class BVH;
class DeviceQueue;
class Progress;
class CPUKernels;
class CPUKernelThreadGlobals;
class RenderTile;
/* Device Types */
enum DeviceType {
DEVICE_NONE = 0,
DEVICE_CPU,
DEVICE_OPENCL,
DEVICE_CUDA,
DEVICE_NETWORK,
DEVICE_MULTI,
DEVICE_OPTIX,
DEVICE_HIP,
DEVICE_DUMMY,
};
enum DeviceTypeMask {
DEVICE_MASK_CPU = (1 << DEVICE_CPU),
DEVICE_MASK_OPENCL = (1 << DEVICE_OPENCL),
DEVICE_MASK_CUDA = (1 << DEVICE_CUDA),
DEVICE_MASK_OPTIX = (1 << DEVICE_OPTIX),
DEVICE_MASK_HIP = (1 << DEVICE_HIP),
DEVICE_MASK_NETWORK = (1 << DEVICE_NETWORK),
DEVICE_MASK_ALL = ~0
};
enum DeviceKernelStatus {
DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE,
DEVICE_KERNEL_USING_FEATURE_KERNEL,
DEVICE_KERNEL_FEATURE_KERNEL_INVALID,
DEVICE_KERNEL_UNKNOWN,
};
#define DEVICE_MASK(type) (DeviceTypeMask)(1 << type)
class DeviceInfo {
@@ -71,16 +75,20 @@ class DeviceInfo {
string description;
string id; /* used for user preferences, should stay fixed with changing hardware config */
int num;
bool display_device; /* GPU is used as a display device. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_half_images; /* Support half-float textures. */
bool has_osl; /* Support Open Shading Language. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
bool has_gpu_queue; /* Device supports GPU queue. */
DenoiserTypeMask denoisers; /* Supported denoiser types. */
bool display_device; /* GPU is used as a display device. */
bool has_half_images; /* Support half-float textures. */
bool has_nanovdb; /* Support NanoVDB volumes. */
bool has_volume_decoupled; /* Decoupled volume shading. */
bool has_branched_path; /* Supports branched path tracing. */
bool has_adaptive_stop_per_sample; /* Per-sample adaptive sampling stopping. */
bool has_osl; /* Support Open Shading Language. */
bool use_split_kernel; /* Use split or mega kernel. */
bool has_profiling; /* Supports runtime collection of profiling info. */
bool has_peer_memory; /* GPU has P2P access to memory of another GPU. */
DenoiserTypeMask denoisers; /* Supported denoiser types. */
int cpu_threads;
vector<DeviceInfo> multi_devices;
vector<DeviceInfo> denoising_devices;
string error_msg;
DeviceInfo()
@@ -92,36 +100,228 @@ class DeviceInfo {
display_device = false;
has_half_images = false;
has_nanovdb = false;
has_volume_decoupled = false;
has_branched_path = true;
has_adaptive_stop_per_sample = false;
has_osl = false;
use_split_kernel = false;
has_profiling = false;
has_peer_memory = false;
has_gpu_queue = false;
denoisers = DENOISER_NONE;
}
bool operator==(const DeviceInfo &info) const
bool operator==(const DeviceInfo &info)
{
/* Multiple Devices with the same ID would be very bad. */
assert(id != info.id ||
(type == info.type && num == info.num && description == info.description));
return id == info.id;
}
/* Add additional devices needed for the specified denoiser. */
void add_denoising_devices(DenoiserType denoiser_type);
};
class DeviceRequestedFeatures {
public:
/* Use experimental feature set. */
bool experimental;
/* Selective nodes compilation. */
/* Identifier of a node group up to which all the nodes needs to be
* compiled in. Nodes from higher group indices will be ignores.
*/
int max_nodes_group;
/* Features bitfield indicating which features from the requested group
* will be compiled in. Nodes which corresponds to features which are not
* in this bitfield will be ignored even if they're in the requested group.
*/
int nodes_features;
/* BVH/sampling kernel features. */
bool use_hair;
bool use_hair_thick;
bool use_object_motion;
bool use_camera_motion;
/* Denotes whether baking functionality is needed. */
bool use_baking;
/* Use subsurface scattering materials. */
bool use_subsurface;
/* Use volume materials. */
bool use_volume;
/* Use branched integrator. */
bool use_integrator_branched;
/* Use OpenSubdiv patch evaluation */
bool use_patch_evaluation;
/* Use Transparent shadows */
bool use_transparent;
/* Use various shadow tricks, such as shadow catcher. */
bool use_shadow_tricks;
/* Per-uber shader usage flags. */
bool use_principled;
/* Denoising features. */
bool use_denoising;
/* Use raytracing in shaders. */
bool use_shader_raytrace;
/* Use true displacement */
bool use_true_displacement;
/* Use background lights */
bool use_background_light;
DeviceRequestedFeatures()
{
/* TODO(sergey): Find more meaningful defaults. */
max_nodes_group = 0;
nodes_features = 0;
use_hair = false;
use_hair_thick = false;
use_object_motion = false;
use_camera_motion = false;
use_baking = false;
use_subsurface = false;
use_volume = false;
use_integrator_branched = false;
use_patch_evaluation = false;
use_transparent = false;
use_shadow_tricks = false;
use_principled = false;
use_denoising = false;
use_shader_raytrace = false;
use_true_displacement = false;
use_background_light = false;
}
bool modified(const DeviceRequestedFeatures &requested_features)
{
return !(max_nodes_group == requested_features.max_nodes_group &&
nodes_features == requested_features.nodes_features &&
use_hair == requested_features.use_hair &&
use_hair_thick == requested_features.use_hair_thick &&
use_object_motion == requested_features.use_object_motion &&
use_camera_motion == requested_features.use_camera_motion &&
use_baking == requested_features.use_baking &&
use_subsurface == requested_features.use_subsurface &&
use_volume == requested_features.use_volume &&
use_integrator_branched == requested_features.use_integrator_branched &&
use_patch_evaluation == requested_features.use_patch_evaluation &&
use_transparent == requested_features.use_transparent &&
use_shadow_tricks == requested_features.use_shadow_tricks &&
use_principled == requested_features.use_principled &&
use_denoising == requested_features.use_denoising &&
use_shader_raytrace == requested_features.use_shader_raytrace &&
use_true_displacement == requested_features.use_true_displacement &&
use_background_light == requested_features.use_background_light);
}
/* Convert the requested features structure to a build options,
* which could then be passed to compilers.
*/
string get_build_options() const
{
string build_options = "";
if (experimental) {
build_options += "-D__KERNEL_EXPERIMENTAL__ ";
}
build_options += "-D__NODES_MAX_GROUP__=" + string_printf("%d", max_nodes_group);
build_options += " -D__NODES_FEATURES__=" + string_printf("%d", nodes_features);
if (!use_hair) {
build_options += " -D__NO_HAIR__";
}
if (!use_object_motion) {
build_options += " -D__NO_OBJECT_MOTION__";
}
if (!use_camera_motion) {
build_options += " -D__NO_CAMERA_MOTION__";
}
if (!use_baking) {
build_options += " -D__NO_BAKING__";
}
if (!use_volume) {
build_options += " -D__NO_VOLUME__";
}
if (!use_subsurface) {
build_options += " -D__NO_SUBSURFACE__";
}
if (!use_integrator_branched) {
build_options += " -D__NO_BRANCHED_PATH__";
}
if (!use_patch_evaluation) {
build_options += " -D__NO_PATCH_EVAL__";
}
if (!use_transparent && !use_volume) {
build_options += " -D__NO_TRANSPARENT__";
}
if (!use_shadow_tricks) {
build_options += " -D__NO_SHADOW_TRICKS__";
}
if (!use_principled) {
build_options += " -D__NO_PRINCIPLED__";
}
if (!use_denoising) {
build_options += " -D__NO_DENOISING__";
}
if (!use_shader_raytrace) {
build_options += " -D__NO_SHADER_RAYTRACE__";
}
return build_options;
}
};
std::ostream &operator<<(std::ostream &os, const DeviceRequestedFeatures &requested_features);
/* Device */
struct DeviceDrawParams {
function<void()> bind_display_space_shader_cb;
function<void()> unbind_display_space_shader_cb;
};
class Device {
friend class device_sub_ptr;
protected:
Device(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: info(info_), stats(stats_), profiler(profiler_)
enum {
FALLBACK_SHADER_STATUS_NONE = 0,
FALLBACK_SHADER_STATUS_ERROR,
FALLBACK_SHADER_STATUS_SUCCESS,
};
Device(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background)
: background(background),
vertex_buffer(0),
fallback_status(FALLBACK_SHADER_STATUS_NONE),
fallback_shader_program(0),
info(info_),
stats(stats_),
profiler(profiler_)
{
}
bool background;
string error_msg;
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, size_t /*offset*/, size_t /*size*/)
/* used for real time display */
unsigned int vertex_buffer;
int fallback_status, fallback_shader_program;
int image_texture_location, fullscreen_location;
bool bind_fallback_display_space_shader(const float width, const float height);
virtual device_ptr mem_alloc_sub_ptr(device_memory & /*mem*/, int /*offset*/, int /*size*/)
{
/* Only required for devices that implement denoising. */
assert(false);
@@ -161,31 +361,67 @@ class Device {
Stats &stats;
Profiler &profiler;
/* memory alignment */
virtual int mem_sub_ptr_alignment()
{
return MIN_ALIGNMENT_CPU_DATA_TYPES;
}
/* constant memory */
virtual void const_copy_to(const char *name, void *host, size_t size) = 0;
/* open shading language, only for CPU device */
virtual void *osl_memory()
{
return NULL;
}
/* load/compile kernels, must be called before adding tasks */
virtual bool load_kernels(uint /*kernel_features*/)
virtual bool load_kernels(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* GPU device only functions.
* These may not be used on CPU or multi-devices. */
/* Wait for device to become available to upload data and receive tasks
* This method is used by the OpenCL device to load the
* optimized kernels or when not (yet) available load the
* generic kernels (only during foreground rendering) */
virtual bool wait_for_availability(const DeviceRequestedFeatures & /*requested_features*/)
{
return true;
}
/* Check if there are 'better' kernels available to be used
* We can switch over to these kernels
* This method is used to determine if we can switch the preview kernels
* to regular kernels */
virtual DeviceKernelStatus get_active_kernel_switch_state()
{
return DEVICE_KERNEL_USING_FEATURE_KERNEL;
}
/* Create new queue for executing kernels in. */
virtual unique_ptr<DeviceQueue> gpu_queue_create();
/* tasks */
virtual int get_split_task_count(DeviceTask &)
{
return 1;
}
/* CPU device only functions.
* These may not be used on GPU or multi-devices. */
virtual void task_add(DeviceTask &task) = 0;
virtual void task_wait() = 0;
virtual void task_cancel() = 0;
/* Get CPU kernel functions for native instruction set. */
virtual const CPUKernels *get_cpu_kernels() const;
/* Get kernel globals to pass to kernels. */
virtual void get_cpu_kernel_thread_globals(
vector<CPUKernelThreadGlobals> & /*kernel_thread_globals*/);
/* Get OpenShadingLanguage memory buffer. */
virtual void *get_cpu_osl_memory();
/* opengl drawing */
virtual void draw_pixels(device_memory &mem,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params);
/* acceleration structure building */
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit);
@@ -193,11 +429,25 @@ class Device {
/* OptiX specific destructor. */
virtual void release_optix_bvh(BVH * /*bvh*/){};
#ifdef WITH_NETWORK
/* networking */
void server_run();
#endif
/* multi device */
virtual void map_tile(Device * /*sub_device*/, RenderTile & /*tile*/)
{
}
virtual int device_number(Device * /*sub_device*/)
{
return 0;
}
virtual void map_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
{
}
virtual void unmap_neighbor_tiles(Device * /*sub_device*/, RenderTileNeighbors & /*neighbors*/)
{
}
virtual bool is_resident(device_ptr /*key*/, Device *sub_device)
{
@@ -210,47 +460,11 @@ class Device {
return false;
}
/* Graphics resources interoperability.
*
* The interoperability comes here by the meaning that the device is capable of computing result
* directly into an OpenGL (or other graphics library) buffer. */
/* Check display is to be updated using graphics interoperability.
* The interoperability can not be used is it is not supported by the device. But the device
* might also force disable the interoperability if it detects that it will be slower than
* copying pixels from the render buffer. */
virtual bool should_use_graphics_interop()
{
return false;
}
/* Buffer denoising. */
/* Returns true if task is fully handled. */
virtual bool denoise_buffer(const DeviceDenoiseTask & /*task*/)
{
LOG(ERROR) << "Request buffer denoising from a device which does not support it.";
return false;
}
virtual DeviceQueue *get_denoise_queue()
{
LOG(ERROR) << "Request denoising queue from a device which does not support it.";
return nullptr;
}
/* Sub-devices */
/* Run given callback for every individual device which will be handling rendering.
* For the single device the callback is called for the device itself. For the multi-device the
* callback is only called for the sub-devices. */
virtual void foreach_device(const function<void(Device *)> &callback)
{
callback(this);
}
/* static */
static Device *create(const DeviceInfo &info, Stats &stats, Profiler &profiler);
static Device *create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
bool background = true);
static DeviceType type_from_string(const char *name);
static string string_from_type(DeviceType type);
@@ -275,7 +489,7 @@ class Device {
virtual void mem_alloc(device_memory &mem) = 0;
virtual void mem_copy_to(device_memory &mem) = 0;
virtual void mem_copy_from(device_memory &mem, size_t y, size_t w, size_t h, size_t elem) = 0;
virtual void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) = 0;
virtual void mem_zero(device_memory &mem) = 0;
virtual void mem_free(device_memory &mem) = 0;
@@ -285,8 +499,9 @@ class Device {
static thread_mutex device_mutex;
static vector<DeviceInfo> cuda_devices;
static vector<DeviceInfo> optix_devices;
static vector<DeviceInfo> opencl_devices;
static vector<DeviceInfo> cpu_devices;
static vector<DeviceInfo> hip_devices;
static vector<DeviceInfo> network_devices;
static uint devices_initialized_mask;
};

File diff suppressed because it is too large Load Diff

View File

@@ -14,25 +14,21 @@
* limitations under the License.
*/
#include "device/cuda/device.h"
#include "util/util_logging.h"
#ifdef WITH_CUDA
# include "device/cuda/device_impl.h"
# include "device/device.h"
# include "device/cuda/device_cuda.h"
# include "device/device.h"
# include "device/device_intern.h"
# include "util/util_logging.h"
# include "util/util_string.h"
# include "util/util_windows.h"
#endif /* WITH_CUDA */
CCL_NAMESPACE_BEGIN
bool device_cuda_init()
{
#if !defined(WITH_CUDA)
return false;
#elif defined(WITH_CUDA_DYNLOAD)
# ifdef WITH_CUDA_DYNLOAD
static bool initialized = false;
static bool result = false;
@@ -63,27 +59,16 @@ bool device_cuda_init()
}
return result;
#else /* WITH_CUDA_DYNLOAD */
# else /* WITH_CUDA_DYNLOAD */
return true;
#endif /* WITH_CUDA_DYNLOAD */
# endif /* WITH_CUDA_DYNLOAD */
}
Device *device_cuda_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
#ifdef WITH_CUDA
return new CUDADevice(info, stats, profiler);
#else
(void)info;
(void)stats;
(void)profiler;
LOG(FATAL) << "Request to create CUDA device without compiled-in support. Should never happen.";
return nullptr;
#endif
return new CUDADevice(info, stats, profiler, background);
}
#ifdef WITH_CUDA
static CUresult device_cuda_safe_init()
{
# ifdef _WIN32
@@ -101,11 +86,9 @@ static CUresult device_cuda_safe_init()
return cuInit(0);
# endif
}
#endif /* WITH_CUDA */
void device_cuda_info(vector<DeviceInfo> &devices)
{
#ifdef WITH_CUDA
CUresult result = device_cuda_safe_init();
if (result != CUDA_SUCCESS) {
if (result != CUDA_ERROR_NO_DEVICE)
@@ -146,9 +129,9 @@ void device_cuda_info(vector<DeviceInfo> &devices)
info.has_half_images = (major >= 3);
info.has_nanovdb = true;
info.denoisers = 0;
info.has_gpu_queue = true;
info.has_volume_decoupled = false;
info.has_adaptive_stop_per_sample = false;
info.denoisers = DENOISER_NLM;
/* Check if the device has P2P access to any other device in the system. */
for (int peer_num = 0; peer_num < count && !info.has_peer_memory; peer_num++) {
@@ -199,14 +182,10 @@ void device_cuda_info(vector<DeviceInfo> &devices)
if (!display_devices.empty())
devices.insert(devices.end(), display_devices.begin(), display_devices.end());
#else /* WITH_CUDA */
(void)devices;
#endif /* WITH_CUDA */
}
string device_cuda_capabilities()
{
#ifdef WITH_CUDA
CUresult result = device_cuda_safe_init();
if (result != CUDA_SUCCESS) {
if (result != CUDA_ERROR_NO_DEVICE) {
@@ -331,10 +310,8 @@ string device_cuda_capabilities()
}
return capabilities;
#else /* WITH_CUDA */
return "";
#endif /* WITH_CUDA */
}
CCL_NAMESPACE_END
#endif

View File

@@ -1,88 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/device_denoise.h"
CCL_NAMESPACE_BEGIN
const char *denoiserTypeToHumanReadable(DenoiserType type)
{
switch (type) {
case DENOISER_OPTIX:
return "OptiX";
case DENOISER_OPENIMAGEDENOISE:
return "OpenImageDenoise";
case DENOISER_NUM:
case DENOISER_NONE:
case DENOISER_ALL:
return "UNKNOWN";
}
return "UNKNOWN";
}
const NodeEnum *DenoiseParams::get_type_enum()
{
static NodeEnum type_enum;
if (type_enum.empty()) {
type_enum.insert("optix", DENOISER_OPTIX);
type_enum.insert("openimageio", DENOISER_OPENIMAGEDENOISE);
}
return &type_enum;
}
const NodeEnum *DenoiseParams::get_prefilter_enum()
{
static NodeEnum prefilter_enum;
if (prefilter_enum.empty()) {
prefilter_enum.insert("none", DENOISER_PREFILTER_NONE);
prefilter_enum.insert("fast", DENOISER_PREFILTER_FAST);
prefilter_enum.insert("accurate", DENOISER_PREFILTER_ACCURATE);
}
return &prefilter_enum;
}
NODE_DEFINE(DenoiseParams)
{
NodeType *type = NodeType::add("denoise_params", create);
const NodeEnum *type_enum = get_type_enum();
const NodeEnum *prefilter_enum = get_prefilter_enum();
SOCKET_BOOLEAN(use, "Use", false);
SOCKET_ENUM(type, "Type", *type_enum, DENOISER_OPENIMAGEDENOISE);
SOCKET_INT(start_sample, "Start Sample", 0);
SOCKET_BOOLEAN(use_pass_albedo, "Use Pass Albedo", true);
SOCKET_BOOLEAN(use_pass_normal, "Use Pass Normal", false);
SOCKET_ENUM(prefilter, "Prefilter", *prefilter_enum, DENOISER_PREFILTER_FAST);
return type;
}
DenoiseParams::DenoiseParams() : Node(get_node_type())
{
}
CCL_NAMESPACE_END

View File

@@ -1,110 +0,0 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "device/device_memory.h"
#include "graph/node.h"
#include "render/buffers.h"
CCL_NAMESPACE_BEGIN
enum DenoiserType {
DENOISER_OPTIX = 2,
DENOISER_OPENIMAGEDENOISE = 4,
DENOISER_NUM,
DENOISER_NONE = 0,
DENOISER_ALL = ~0,
};
/* COnstruct human-readable string which denotes the denoiser type. */
const char *denoiserTypeToHumanReadable(DenoiserType type);
typedef int DenoiserTypeMask;
enum DenoiserPrefilter {
/* Best quality of the result without extra processing time, but requires guiding passes to be
* noise-free. */
DENOISER_PREFILTER_NONE = 1,
/* Denoise color and guiding passes together.
* Improves quality when guiding passes are noisy using least amount of extra processing time. */
DENOISER_PREFILTER_FAST = 2,
/* Prefilter noisy guiding passes before denoising color.
* Improves quality when guiding passes are noisy using extra processing time. */
DENOISER_PREFILTER_ACCURATE = 3,
DENOISER_PREFILTER_NUM,
};
/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization.
* The default values here do not really matter as they are always initialized from the
* Integrator node. */
class DenoiseParams : public Node {
public:
NODE_DECLARE
/* Apply denoiser to image. */
bool use = false;
/* Denoiser type. */
DenoiserType type = DENOISER_OPENIMAGEDENOISE;
/* Viewport start sample. */
int start_sample = 0;
/* Auxiliary passes. */
bool use_pass_albedo = true;
bool use_pass_normal = true;
DenoiserPrefilter prefilter = DENOISER_PREFILTER_FAST;
static const NodeEnum *get_type_enum();
static const NodeEnum *get_prefilter_enum();
DenoiseParams();
bool modified(const DenoiseParams &other) const
{
return !(use == other.use && type == other.type && start_sample == other.start_sample &&
use_pass_albedo == other.use_pass_albedo &&
use_pass_normal == other.use_pass_normal && prefilter == other.prefilter);
}
};
/* All the parameters needed to perform buffer denoising on a device.
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
* single place where they are all listed, so that it's not required to modify all device methods
* when these parameters do change. */
class DeviceDenoiseTask {
public:
DenoiseParams params;
int num_samples;
RenderBuffers *render_buffers;
BufferParams buffer_params;
/* Allow to do in-place modification of the input passes (scaling them down i.e.). This will
* lower the memory footprint of the denoiser but will make input passes "invalid" (from path
* tracer) point of view. */
bool allow_inplace_modification;
};
CCL_NAMESPACE_END

View File

@@ -0,0 +1,353 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/device_denoising.h"
#include "kernel/filter/filter_defines.h"
CCL_NAMESPACE_BEGIN
DenoisingTask::DenoisingTask(Device *device, const DeviceTask &task)
: tile_info_mem(device, "denoising tile info mem", MEM_READ_WRITE),
profiler(NULL),
storage(device),
buffer(device),
device(device)
{
radius = task.denoising.radius;
nlm_k_2 = powf(2.0f, lerp(-5.0f, 3.0f, task.denoising.strength));
if (task.denoising.relative_pca) {
pca_threshold = -powf(10.0f, lerp(-8.0f, 0.0f, task.denoising.feature_strength));
}
else {
pca_threshold = powf(10.0f, lerp(-5.0f, 3.0f, task.denoising.feature_strength));
}
render_buffer.frame_stride = task.frame_stride;
render_buffer.pass_stride = task.pass_stride;
render_buffer.offset = task.pass_denoising_data;
target_buffer.pass_stride = task.target_pass_stride;
target_buffer.denoising_clean_offset = task.pass_denoising_clean;
target_buffer.offset = 0;
functions.map_neighbor_tiles = function_bind(task.map_neighbor_tiles, _1, device);
functions.unmap_neighbor_tiles = function_bind(task.unmap_neighbor_tiles, _1, device);
tile_info = (TileInfo *)tile_info_mem.alloc(sizeof(TileInfo) / sizeof(int));
tile_info->from_render = task.denoising_from_render ? 1 : 0;
tile_info->frames[0] = 0;
tile_info->num_frames = min(task.denoising_frames.size() + 1, DENOISE_MAX_FRAMES);
for (int i = 1; i < tile_info->num_frames; i++) {
tile_info->frames[i] = task.denoising_frames[i - 1];
}
do_prefilter = task.denoising.store_passes && task.denoising.type == DENOISER_NLM;
do_filter = task.denoising.use && task.denoising.type == DENOISER_NLM;
}
DenoisingTask::~DenoisingTask()
{
storage.XtWX.free();
storage.XtWY.free();
storage.transform.free();
storage.rank.free();
buffer.mem.free();
buffer.temporary_mem.free();
tile_info_mem.free();
}
void DenoisingTask::set_render_buffer(RenderTileNeighbors &neighbors)
{
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
RenderTile &rtile = neighbors.tiles[i];
tile_info->offsets[i] = rtile.offset;
tile_info->strides[i] = rtile.stride;
tile_info->buffers[i] = rtile.buffer;
}
tile_info->x[0] = neighbors.tiles[3].x;
tile_info->x[1] = neighbors.tiles[4].x;
tile_info->x[2] = neighbors.tiles[5].x;
tile_info->x[3] = neighbors.tiles[5].x + neighbors.tiles[5].w;
tile_info->y[0] = neighbors.tiles[1].y;
tile_info->y[1] = neighbors.tiles[4].y;
tile_info->y[2] = neighbors.tiles[7].y;
tile_info->y[3] = neighbors.tiles[7].y + neighbors.tiles[7].h;
target_buffer.offset = neighbors.target.offset;
target_buffer.stride = neighbors.target.stride;
target_buffer.ptr = neighbors.target.buffer;
if (do_prefilter && neighbors.target.buffers) {
target_buffer.denoising_output_offset =
neighbors.target.buffers->params.get_denoising_prefiltered_offset();
}
else {
target_buffer.denoising_output_offset = 0;
}
tile_info_mem.copy_to_device();
}
void DenoisingTask::setup_denoising_buffer()
{
/* Expand filter_area by radius pixels and clamp the result to the extent of the neighboring
* tiles */
rect = rect_from_shape(filter_area.x, filter_area.y, filter_area.z, filter_area.w);
rect = rect_expand(rect, radius);
rect = rect_clip(rect,
make_int4(tile_info->x[0], tile_info->y[0], tile_info->x[3], tile_info->y[3]));
buffer.use_intensity = do_prefilter || (tile_info->num_frames > 1);
buffer.passes = buffer.use_intensity ? 15 : 14;
buffer.width = rect.z - rect.x;
buffer.stride = align_up(buffer.width, 4);
buffer.h = rect.w - rect.y;
int alignment_floats = divide_up(device->mem_sub_ptr_alignment(), sizeof(float));
buffer.pass_stride = align_up(buffer.stride * buffer.h, alignment_floats);
buffer.frame_stride = buffer.pass_stride * buffer.passes;
/* Pad the total size by four floats since the SIMD kernels might go a bit over the end. */
int mem_size = align_up(tile_info->num_frames * buffer.frame_stride + 4, alignment_floats);
buffer.mem.alloc_to_device(mem_size, false);
buffer.use_time = (tile_info->num_frames > 1);
/* CPUs process shifts sequentially while GPUs process them in parallel. */
int num_layers;
if (buffer.gpu_temporary_mem) {
/* Shadowing prefiltering uses a radius of 6, so allocate at least that much. */
int max_radius = max(radius, 6);
int num_shifts = (2 * max_radius + 1) * (2 * max_radius + 1);
num_layers = 2 * num_shifts + 1;
}
else {
num_layers = 3;
}
/* Allocate two layers per shift as well as one for the weight accumulation. */
buffer.temporary_mem.alloc_to_device(num_layers * buffer.pass_stride);
}
void DenoisingTask::prefilter_shadowing()
{
device_ptr null_ptr = (device_ptr)0;
device_sub_ptr unfiltered_a(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr unfiltered_b(buffer.mem, 1 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var(buffer.mem, 2 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr sample_var_var(buffer.mem, 3 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr buffer_var(buffer.mem, 5 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr filtered_var(buffer.mem, 6 * buffer.pass_stride, buffer.pass_stride);
/* Get the A/B unfiltered passes, the combined sample variance, the estimated variance of the
* sample variance and the buffer variance. */
functions.divide_shadow(*unfiltered_a, *unfiltered_b, *sample_var, *sample_var_var, *buffer_var);
/* Smooth the (generally pretty noisy) buffer variance using the spatial information from the
* sample variance. */
nlm_state.set_parameters(6, 3, 4.0f, 1.0f, false);
functions.non_local_means(*buffer_var, *sample_var, *sample_var_var, *filtered_var);
/* Reuse memory, the previous data isn't needed anymore. */
device_ptr filtered_a = *buffer_var, filtered_b = *sample_var;
/* Use the smoothed variance to filter the two shadow half images using each other for weight
* calculation. */
nlm_state.set_parameters(5, 3, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered_a, *unfiltered_b, *filtered_var, filtered_a);
functions.non_local_means(*unfiltered_b, *unfiltered_a, *filtered_var, filtered_b);
device_ptr residual_var = *sample_var_var;
/* Estimate the residual variance between the two filtered halves. */
functions.combine_halves(filtered_a, filtered_b, null_ptr, residual_var, 2, rect);
device_ptr final_a = *unfiltered_a, final_b = *unfiltered_b;
/* Use the residual variance for a second filter pass. */
nlm_state.set_parameters(4, 2, 1.0f, 0.5f, false);
functions.non_local_means(filtered_a, filtered_b, residual_var, final_a);
functions.non_local_means(filtered_b, filtered_a, residual_var, final_b);
/* Combine the two double-filtered halves to a final shadow feature. */
device_sub_ptr shadow_pass(buffer.mem, 4 * buffer.pass_stride, buffer.pass_stride);
functions.combine_halves(final_a, final_b, *shadow_pass, null_ptr, 0, rect);
}
void DenoisingTask::prefilter_features()
{
device_sub_ptr unfiltered(buffer.mem, 8 * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr variance(buffer.mem, 9 * buffer.pass_stride, buffer.pass_stride);
int mean_from[] = {0, 1, 2, 12, 6, 7, 8};
int variance_from[] = {3, 4, 5, 13, 9, 10, 11};
int pass_to[] = {1, 2, 3, 0, 5, 6, 7};
for (int pass = 0; pass < 7; pass++) {
device_sub_ptr feature_pass(
buffer.mem, pass_to[pass] * buffer.pass_stride, buffer.pass_stride);
/* Get the unfiltered pass and its variance from the RenderBuffers. */
functions.get_feature(mean_from[pass],
variance_from[pass],
*unfiltered,
*variance,
1.0f / render_buffer.samples);
/* Smooth the pass and store the result in the denoising buffers. */
nlm_state.set_parameters(2, 2, 1.0f, 0.25f, false);
functions.non_local_means(*unfiltered, *unfiltered, *variance, *feature_pass);
}
}
void DenoisingTask::prefilter_color()
{
int mean_from[] = {20, 21, 22};
int variance_from[] = {23, 24, 25};
int mean_to[] = {8, 9, 10};
int variance_to[] = {11, 12, 13};
int num_color_passes = 3;
device_only_memory<float> temporary_color(device, "denoising temporary color");
temporary_color.alloc_to_device(6 * buffer.pass_stride, false);
for (int pass = 0; pass < num_color_passes; pass++) {
device_sub_ptr color_pass(temporary_color, pass * buffer.pass_stride, buffer.pass_stride);
device_sub_ptr color_var_pass(
temporary_color, (pass + 3) * buffer.pass_stride, buffer.pass_stride);
functions.get_feature(mean_from[pass],
variance_from[pass],
*color_pass,
*color_var_pass,
1.0f / render_buffer.samples);
}
device_sub_ptr depth_pass(buffer.mem, 0, buffer.pass_stride);
device_sub_ptr color_var_pass(
buffer.mem, variance_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
device_sub_ptr output_pass(buffer.mem, mean_to[0] * buffer.pass_stride, 3 * buffer.pass_stride);
functions.detect_outliers(
temporary_color.device_pointer, *color_var_pass, *depth_pass, *output_pass);
if (buffer.use_intensity) {
device_sub_ptr intensity_pass(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
nlm_state.set_parameters(radius, 4, 2.0f, nlm_k_2 * 4.0f, true);
functions.non_local_means(*output_pass, *output_pass, *color_var_pass, *intensity_pass);
}
}
void DenoisingTask::load_buffer()
{
device_ptr null_ptr = (device_ptr)0;
int original_offset = render_buffer.offset;
int num_passes = buffer.use_intensity ? 15 : 14;
for (int i = 0; i < tile_info->num_frames; i++) {
for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr to_pass(
buffer.mem, i * buffer.frame_stride + pass * buffer.pass_stride, buffer.pass_stride);
bool is_variance = (pass >= 11) && (pass <= 13);
functions.get_feature(
pass, -1, *to_pass, null_ptr, is_variance ? (1.0f / render_buffer.samples) : 1.0f);
}
render_buffer.offset += render_buffer.frame_stride;
}
render_buffer.offset = original_offset;
}
void DenoisingTask::write_buffer()
{
reconstruction_state.buffer_params = make_int4(target_buffer.offset,
target_buffer.stride,
target_buffer.pass_stride,
target_buffer.denoising_clean_offset);
int num_passes = buffer.use_intensity ? 15 : 14;
for (int pass = 0; pass < num_passes; pass++) {
device_sub_ptr from_pass(buffer.mem, pass * buffer.pass_stride, buffer.pass_stride);
int out_offset = pass + target_buffer.denoising_output_offset;
functions.write_feature(out_offset, *from_pass, target_buffer.ptr);
}
}
void DenoisingTask::construct_transform()
{
storage.w = filter_area.z;
storage.h = filter_area.w;
storage.transform.alloc_to_device(storage.w * storage.h * TRANSFORM_SIZE, false);
storage.rank.alloc_to_device(storage.w * storage.h, false);
functions.construct_transform();
}
void DenoisingTask::reconstruct()
{
storage.XtWX.alloc_to_device(storage.w * storage.h * XTWX_SIZE, false);
storage.XtWY.alloc_to_device(storage.w * storage.h * XTWY_SIZE, false);
storage.XtWX.zero_to_device();
storage.XtWY.zero_to_device();
reconstruction_state.filter_window = rect_from_shape(
filter_area.x - rect.x, filter_area.y - rect.y, storage.w, storage.h);
int tile_coordinate_offset = filter_area.y * target_buffer.stride + filter_area.x;
reconstruction_state.buffer_params = make_int4(target_buffer.offset + tile_coordinate_offset,
target_buffer.stride,
target_buffer.pass_stride,
target_buffer.denoising_clean_offset);
reconstruction_state.source_w = rect.z - rect.x;
reconstruction_state.source_h = rect.w - rect.y;
device_sub_ptr color_ptr(buffer.mem, 8 * buffer.pass_stride, 3 * buffer.pass_stride);
device_sub_ptr color_var_ptr(buffer.mem, 11 * buffer.pass_stride, 3 * buffer.pass_stride);
for (int f = 0; f < tile_info->num_frames; f++) {
device_ptr scale_ptr = 0;
device_sub_ptr *scale_sub_ptr = NULL;
if (tile_info->frames[f] != 0 && (tile_info->num_frames > 1)) {
scale_sub_ptr = new device_sub_ptr(buffer.mem, 14 * buffer.pass_stride, buffer.pass_stride);
scale_ptr = **scale_sub_ptr;
}
functions.accumulate(*color_ptr, *color_var_ptr, scale_ptr, f);
delete scale_sub_ptr;
}
functions.solve(target_buffer.ptr);
}
void DenoisingTask::run_denoising(RenderTile &tile)
{
RenderTileNeighbors neighbors(tile);
functions.map_neighbor_tiles(neighbors);
set_render_buffer(neighbors);
setup_denoising_buffer();
if (tile_info->from_render) {
prefilter_shadowing();
prefilter_features();
prefilter_color();
}
else {
load_buffer();
}
if (do_filter) {
construct_transform();
reconstruct();
}
if (do_prefilter) {
write_buffer();
}
functions.unmap_neighbor_tiles(neighbors);
}
CCL_NAMESPACE_END

View File

@@ -0,0 +1,197 @@
/*
* Copyright 2011-2017 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __DEVICE_DENOISING_H__
#define __DEVICE_DENOISING_H__
#include "device/device.h"
#include "render/buffers.h"
#include "kernel/filter/filter_defines.h"
#include "util/util_profiling.h"
CCL_NAMESPACE_BEGIN
class DenoisingTask {
public:
/* Parameters of the denoising algorithm. */
int radius;
float nlm_k_2;
float pca_threshold;
/* Parameters of the RenderBuffers. */
struct RenderBuffers {
int offset;
int pass_stride;
int frame_stride;
int samples;
} render_buffer;
/* Pointer and parameters of the target buffer. */
struct TargetBuffer {
int offset;
int stride;
int pass_stride;
int denoising_clean_offset;
int denoising_output_offset;
device_ptr ptr;
} target_buffer;
TileInfo *tile_info;
device_vector<int> tile_info_mem;
ProfilingState *profiler;
int4 rect;
int4 filter_area;
bool do_prefilter;
bool do_filter;
struct DeviceFunctions {
function<bool(
device_ptr image_ptr, /* Contains the values that are smoothed. */
device_ptr guide_ptr, /* Contains the values that are used to calculate weights. */
device_ptr variance_ptr, /* Contains the variance of the guide image. */
device_ptr out_ptr /* The filtered output is written into this image. */
)>
non_local_means;
function<bool(
device_ptr color_ptr, device_ptr color_variance_ptr, device_ptr scale_ptr, int frame)>
accumulate;
function<bool(device_ptr output_ptr)> solve;
function<bool()> construct_transform;
function<bool(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr mean_ptr,
device_ptr variance_ptr,
int r,
int4 rect)>
combine_halves;
function<bool(device_ptr a_ptr,
device_ptr b_ptr,
device_ptr sample_variance_ptr,
device_ptr sv_variance_ptr,
device_ptr buffer_variance_ptr)>
divide_shadow;
function<bool(int mean_offset,
int variance_offset,
device_ptr mean_ptr,
device_ptr variance_ptr,
float scale)>
get_feature;
function<bool(device_ptr image_ptr,
device_ptr variance_ptr,
device_ptr depth_ptr,
device_ptr output_ptr)>
detect_outliers;
function<bool(int out_offset, device_ptr frop_ptr, device_ptr buffer_ptr)> write_feature;
function<void(RenderTileNeighbors &neighbors)> map_neighbor_tiles;
function<void(RenderTileNeighbors &neighbors)> unmap_neighbor_tiles;
} functions;
/* Stores state of the current Reconstruction operation,
* which is accessed by the device in order to perform the operation. */
struct ReconstructionState {
int4 filter_window;
int4 buffer_params;
int source_w;
int source_h;
} reconstruction_state;
/* Stores state of the current NLM operation,
* which is accessed by the device in order to perform the operation. */
struct NLMState {
int r; /* Search radius of the filter. */
int f; /* Patch size of the filter. */
float a; /* Variance compensation factor in the MSE estimation. */
float k_2; /* Squared value of the k parameter of the filter. */
bool is_color;
void set_parameters(int r_, int f_, float a_, float k_2_, bool is_color_)
{
r = r_;
f = f_;
a = a_, k_2 = k_2_;
is_color = is_color_;
}
} nlm_state;
struct Storage {
device_only_memory<float> transform;
device_only_memory<int> rank;
device_only_memory<float> XtWX;
device_only_memory<float3> XtWY;
int w;
int h;
Storage(Device *device)
: transform(device, "denoising transform"),
rank(device, "denoising rank"),
XtWX(device, "denoising XtWX"),
XtWY(device, "denoising XtWY")
{
}
} storage;
DenoisingTask(Device *device, const DeviceTask &task);
~DenoisingTask();
void run_denoising(RenderTile &tile);
struct DenoiseBuffers {
int pass_stride;
int passes;
int stride;
int h;
int width;
int frame_stride;
device_only_memory<float> mem;
device_only_memory<float> temporary_mem;
bool use_time;
bool use_intensity;
bool gpu_temporary_mem;
DenoiseBuffers(Device *device)
: mem(device, "denoising pixel buffer"),
temporary_mem(device, "denoising temporary mem", true)
{
}
} buffer;
protected:
Device *device;
void set_render_buffer(RenderTileNeighbors &neighbors);
void setup_denoising_buffer();
void prefilter_shadowing();
void prefilter_features();
void prefilter_color();
void construct_transform();
void reconstruct();
void load_buffer();
void write_buffer();
};
CCL_NAMESPACE_END
#endif /* __DEVICE_DENOISING_H__ */

View File

@@ -14,10 +14,8 @@
* limitations under the License.
*/
#include "device/dummy/device.h"
#include "device/device.h"
#include "device/device_queue.h"
#include "device/device_intern.h"
CCL_NAMESPACE_BEGIN
@@ -25,8 +23,8 @@ CCL_NAMESPACE_BEGIN
class DummyDevice : public Device {
public:
DummyDevice(const DeviceInfo &info_, Stats &stats_, Profiler &profiler_)
: Device(info_, stats_, profiler_)
DummyDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_)
: Device(info_, stats_, profiler_, background_)
{
error_msg = info.error_msg;
}
@@ -48,7 +46,7 @@ class DummyDevice : public Device {
{
}
virtual void mem_copy_from(device_memory &, size_t, size_t, size_t, size_t) override
virtual void mem_copy_from(device_memory &, int, int, int, int) override
{
}
@@ -63,11 +61,23 @@ class DummyDevice : public Device {
virtual void const_copy_to(const char *, void *, size_t) override
{
}
virtual void task_add(DeviceTask &) override
{
}
virtual void task_wait() override
{
}
virtual void task_cancel() override
{
}
};
Device *device_dummy_create(const DeviceInfo &info, Stats &stats, Profiler &profiler)
Device *device_dummy_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
return new DummyDevice(info, stats, profiler);
return new DummyDevice(info, stats, profiler, background);
}
CCL_NAMESPACE_END

View File

@@ -1,42 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "render/display_driver.h"
#include "util/util_types.h"
CCL_NAMESPACE_BEGIN
/* Device-side graphics interoperability support.
*
* Takes care of holding all the handlers needed by the device to implement interoperability with
* the graphics library. */
class DeviceGraphicsInterop {
public:
DeviceGraphicsInterop() = default;
virtual ~DeviceGraphicsInterop() = default;
/* Update this device-side graphics interoperability object with the given destination resource
* information. */
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
virtual device_ptr map() = 0;
virtual void unmap() = 0;
};
CCL_NAMESPACE_END

View File

@@ -0,0 +1,58 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __DEVICE_INTERN_H__
#define __DEVICE_INTERN_H__
#include "util/util_string.h"
#include "util/util_vector.h"
CCL_NAMESPACE_BEGIN
class Device;
class DeviceInfo;
class Profiler;
class Stats;
Device *device_cpu_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_init();
Device *device_opencl_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_opencl_compile_kernel(const vector<string> &parameters);
bool device_cuda_init();
Device *device_cuda_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
bool device_optix_init();
Device *device_optix_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
Device *device_dummy_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
Device *device_network_create(DeviceInfo &info,
Stats &stats,
Profiler &profiler,
const char *address);
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background);
void device_cpu_info(vector<DeviceInfo> &devices);
void device_opencl_info(vector<DeviceInfo> &devices);
void device_cuda_info(vector<DeviceInfo> &devices);
void device_optix_info(const vector<DeviceInfo> &cuda_devices, vector<DeviceInfo> &devices);
void device_network_info(vector<DeviceInfo> &devices);
string device_cpu_capabilities();
string device_opencl_capabilities();
string device_cuda_capabilities();
CCL_NAMESPACE_END
#endif /* __DEVICE_INTERN_H__ */

View File

@@ -1,157 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "device/device_kernel.h"
#include "util/util_logging.h"
CCL_NAMESPACE_BEGIN
const char *device_kernel_as_string(DeviceKernel kernel)
{
switch (kernel) {
/* Integrator. */
case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_CAMERA:
return "integrator_init_from_camera";
case DEVICE_KERNEL_INTEGRATOR_INIT_FROM_BAKE:
return "integrator_init_from_bake";
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST:
return "integrator_intersect_closest";
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW:
return "integrator_intersect_shadow";
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE:
return "integrator_intersect_subsurface";
case DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK:
return "integrator_intersect_volume_stack";
case DEVICE_KERNEL_INTEGRATOR_SHADE_BACKGROUND:
return "integrator_shade_background";
case DEVICE_KERNEL_INTEGRATOR_SHADE_LIGHT:
return "integrator_shade_light";
case DEVICE_KERNEL_INTEGRATOR_SHADE_SHADOW:
return "integrator_shade_shadow";
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE:
return "integrator_shade_surface";
case DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE:
return "integrator_shade_surface_raytrace";
case DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME:
return "integrator_shade_volume";
case DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL:
return "integrator_megakernel";
case DEVICE_KERNEL_INTEGRATOR_QUEUED_PATHS_ARRAY:
return "integrator_queued_paths_array";
case DEVICE_KERNEL_INTEGRATOR_QUEUED_SHADOW_PATHS_ARRAY:
return "integrator_queued_shadow_paths_array";
case DEVICE_KERNEL_INTEGRATOR_ACTIVE_PATHS_ARRAY:
return "integrator_active_paths_array";
case DEVICE_KERNEL_INTEGRATOR_TERMINATED_PATHS_ARRAY:
return "integrator_terminated_paths_array";
case DEVICE_KERNEL_INTEGRATOR_SORTED_PATHS_ARRAY:
return "integrator_sorted_paths_array";
case DEVICE_KERNEL_INTEGRATOR_COMPACT_PATHS_ARRAY:
return "integrator_compact_paths_array";
case DEVICE_KERNEL_INTEGRATOR_COMPACT_STATES:
return "integrator_compact_states";
case DEVICE_KERNEL_INTEGRATOR_RESET:
return "integrator_reset";
case DEVICE_KERNEL_INTEGRATOR_SHADOW_CATCHER_COUNT_POSSIBLE_SPLITS:
return "integrator_shadow_catcher_count_possible_splits";
/* Shader evaluation. */
case DEVICE_KERNEL_SHADER_EVAL_DISPLACE:
return "shader_eval_displace";
case DEVICE_KERNEL_SHADER_EVAL_BACKGROUND:
return "shader_eval_background";
/* Film. */
#define FILM_CONVERT_KERNEL_AS_STRING(variant, variant_lowercase) \
case DEVICE_KERNEL_FILM_CONVERT_##variant: \
return "film_convert_" #variant_lowercase; \
case DEVICE_KERNEL_FILM_CONVERT_##variant##_HALF_RGBA: \
return "film_convert_" #variant_lowercase "_half_rgba";
FILM_CONVERT_KERNEL_AS_STRING(DEPTH, depth)
FILM_CONVERT_KERNEL_AS_STRING(MIST, mist)
FILM_CONVERT_KERNEL_AS_STRING(SAMPLE_COUNT, sample_count)
FILM_CONVERT_KERNEL_AS_STRING(FLOAT, float)
FILM_CONVERT_KERNEL_AS_STRING(LIGHT_PATH, light_path)
FILM_CONVERT_KERNEL_AS_STRING(FLOAT3, float3)
FILM_CONVERT_KERNEL_AS_STRING(MOTION, motion)
FILM_CONVERT_KERNEL_AS_STRING(CRYPTOMATTE, cryptomatte)
FILM_CONVERT_KERNEL_AS_STRING(SHADOW_CATCHER, shadow_catcher)
FILM_CONVERT_KERNEL_AS_STRING(SHADOW_CATCHER_MATTE_WITH_SHADOW,
shadow_catcher_matte_with_shadow)
FILM_CONVERT_KERNEL_AS_STRING(COMBINED, combined)
FILM_CONVERT_KERNEL_AS_STRING(FLOAT4, float4)
#undef FILM_CONVERT_KERNEL_AS_STRING
/* Adaptive sampling. */
case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_CHECK:
return "adaptive_sampling_convergence_check";
case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_X:
return "adaptive_sampling_filter_x";
case DEVICE_KERNEL_ADAPTIVE_SAMPLING_CONVERGENCE_FILTER_Y:
return "adaptive_sampling_filter_y";
/* Denoising. */
case DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS:
return "filter_guiding_preprocess";
case DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO:
return "filter_guiding_set_fake_albedo";
case DEVICE_KERNEL_FILTER_COLOR_PREPROCESS:
return "filter_color_preprocess";
case DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS:
return "filter_color_postprocess";
/* Cryptomatte. */
case DEVICE_KERNEL_CRYPTOMATTE_POSTPROCESS:
return "cryptomatte_postprocess";
/* Generic */
case DEVICE_KERNEL_PREFIX_SUM:
return "prefix_sum";
case DEVICE_KERNEL_NUM:
break;
};
LOG(FATAL) << "Unhandled kernel " << static_cast<int>(kernel) << ", should never happen.";
return "UNKNOWN";
}
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel)
{
os << device_kernel_as_string(kernel);
return os;
}
string device_kernel_mask_as_string(DeviceKernelMask mask)
{
string str;
for (uint64_t i = 0; i < sizeof(DeviceKernelMask) * 8; i++) {
if (mask & (uint64_t(1) << i)) {
if (!str.empty()) {
str += " ";
}
str += device_kernel_as_string((DeviceKernel)i);
}
}
return str;
}
CCL_NAMESPACE_END

View File

@@ -1,33 +0,0 @@
/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "kernel/kernel_types.h"
#include "util/util_string.h"
#include <ostream> // NOLINT
CCL_NAMESPACE_BEGIN
const char *device_kernel_as_string(DeviceKernel kernel);
std::ostream &operator<<(std::ostream &os, DeviceKernel kernel);
typedef uint64_t DeviceKernelMask;
string device_kernel_mask_as_string(DeviceKernelMask mask);
CCL_NAMESPACE_END

View File

@@ -23,7 +23,7 @@ CCL_NAMESPACE_BEGIN
device_memory::device_memory(Device *device, const char *name, MemoryType type)
: data_type(device_type_traits<uchar>::data_type),
data_elements(device_type_traits<uchar>::num_elements_cpu),
data_elements(device_type_traits<uchar>::num_elements),
data_size(0),
device_size(0),
data_width(0),
@@ -136,7 +136,7 @@ void device_memory::device_copy_to()
}
}
void device_memory::device_copy_from(size_t y, size_t w, size_t h, size_t elem)
void device_memory::device_copy_from(int y, int w, int h, int elem)
{
assert(type != MEM_TEXTURE && type != MEM_READ_ONLY && type != MEM_GLOBAL);
device->mem_copy_from(*this, y, w, h, elem);
@@ -149,11 +149,6 @@ void device_memory::device_zero()
}
}
bool device_memory::device_is_cpu()
{
return (device->info.type == DEVICE_CPU);
}
void device_memory::swap_device(Device *new_device,
size_t new_device_size,
device_ptr new_device_ptr)
@@ -181,7 +176,7 @@ bool device_memory::is_resident(Device *sub_device) const
/* Device Sub Ptr */
device_sub_ptr::device_sub_ptr(device_memory &mem, size_t offset, size_t size) : device(mem.device)
device_sub_ptr::device_sub_ptr(device_memory &mem, int offset, int size) : device(mem.device)
{
ptr = device->mem_alloc_sub_ptr(mem, offset, size);
}

View File

@@ -38,6 +38,7 @@ enum MemoryType {
MEM_DEVICE_ONLY,
MEM_GLOBAL,
MEM_TEXTURE,
MEM_PIXELS
};
/* Supported Data Types */
@@ -53,7 +54,7 @@ enum DataType {
TYPE_UINT64,
};
static constexpr size_t datatype_size(DataType datatype)
static inline size_t datatype_size(DataType datatype)
{
switch (datatype) {
case TYPE_UNKNOWN:
@@ -81,155 +82,112 @@ static constexpr size_t datatype_size(DataType datatype)
template<typename T> struct device_type_traits {
static const DataType data_type = TYPE_UNKNOWN;
static const size_t num_elements_cpu = sizeof(T);
static const size_t num_elements_gpu = sizeof(T);
static const int num_elements = sizeof(T);
};
template<> struct device_type_traits<uchar> {
static const DataType data_type = TYPE_UCHAR;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uchar) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 1;
};
template<> struct device_type_traits<uchar2> {
static const DataType data_type = TYPE_UCHAR;
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(uchar2) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 2;
};
template<> struct device_type_traits<uchar3> {
static const DataType data_type = TYPE_UCHAR;
static const size_t num_elements_cpu = 3;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(uchar3) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 3;
};
template<> struct device_type_traits<uchar4> {
static const DataType data_type = TYPE_UCHAR;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(uchar4) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 4;
};
template<> struct device_type_traits<uint> {
static const DataType data_type = TYPE_UINT;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 1;
};
template<> struct device_type_traits<uint2> {
static const DataType data_type = TYPE_UINT;
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(uint2) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 2;
};
template<> struct device_type_traits<uint3> {
static const DataType data_type = TYPE_UINT;
static const size_t num_elements_cpu = 3;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(uint3) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 3;
};
template<> struct device_type_traits<uint4> {
static const DataType data_type = TYPE_UINT;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(uint4) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 4;
};
template<> struct device_type_traits<int> {
static const DataType data_type = TYPE_INT;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(int) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 1;
};
template<> struct device_type_traits<int2> {
static const DataType data_type = TYPE_INT;
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(int2) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 2;
};
template<> struct device_type_traits<int3> {
static const DataType data_type = TYPE_INT;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(int3) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 3;
};
template<> struct device_type_traits<int4> {
static const DataType data_type = TYPE_INT;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(int4) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 4;
};
template<> struct device_type_traits<float> {
static const DataType data_type = TYPE_FLOAT;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(float) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 1;
};
template<> struct device_type_traits<float2> {
static const DataType data_type = TYPE_FLOAT;
static const size_t num_elements_cpu = 2;
static const size_t num_elements_gpu = 2;
static_assert(sizeof(float2) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 2;
};
template<> struct device_type_traits<float3> {
static const DataType data_type = TYPE_FLOAT;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 3;
static_assert(sizeof(float3) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 4;
};
template<> struct device_type_traits<float4> {
static const DataType data_type = TYPE_FLOAT;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(float4) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 4;
};
template<> struct device_type_traits<half> {
static const DataType data_type = TYPE_HALF;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(half) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 1;
};
template<> struct device_type_traits<ushort4> {
static const DataType data_type = TYPE_UINT16;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(ushort4) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 4;
};
template<> struct device_type_traits<uint16_t> {
static const DataType data_type = TYPE_UINT16;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint16_t) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 1;
};
template<> struct device_type_traits<half4> {
static const DataType data_type = TYPE_HALF;
static const size_t num_elements_cpu = 4;
static const size_t num_elements_gpu = 4;
static_assert(sizeof(half4) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 4;
};
template<> struct device_type_traits<uint64_t> {
static const DataType data_type = TYPE_UINT64;
static const size_t num_elements_cpu = 1;
static const size_t num_elements_gpu = 1;
static_assert(sizeof(uint64_t) == num_elements_cpu * datatype_size(data_type));
static const int num_elements = 1;
};
/* Device Memory
@@ -277,7 +235,6 @@ class device_memory {
protected:
friend class CUDADevice;
friend class OptiXDevice;
friend class HIPDevice;
/* Only create through subclasses. */
device_memory(Device *device, const char *name, MemoryType type);
@@ -297,11 +254,9 @@ class device_memory {
void device_alloc();
void device_free();
void device_copy_to();
void device_copy_from(size_t y, size_t w, size_t h, size_t elem);
void device_copy_from(int y, int w, int h, int elem);
void device_zero();
bool device_is_cpu();
device_ptr original_device_ptr;
size_t original_device_size;
Device *original_device;
@@ -320,9 +275,7 @@ template<typename T> class device_only_memory : public device_memory {
: device_memory(device, name, allow_host_memory_fallback ? MEM_READ_WRITE : MEM_DEVICE_ONLY)
{
data_type = device_type_traits<T>::data_type;
data_elements = max(device_is_cpu() ? device_type_traits<T>::num_elements_cpu :
device_type_traits<T>::num_elements_gpu,
1);
data_elements = max(device_type_traits<T>::num_elements, 1);
}
device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other))
@@ -378,15 +331,11 @@ template<typename T> class device_only_memory : public device_memory {
template<typename T> class device_vector : public device_memory {
public:
/* Can only use this for types that have the same size on CPU and GPU. */
static_assert(device_type_traits<T>::num_elements_cpu ==
device_type_traits<T>::num_elements_gpu);
device_vector(Device *device, const char *name, MemoryType type)
: device_memory(device, name, type)
{
data_type = device_type_traits<T>::data_type;
data_elements = device_type_traits<T>::num_elements_cpu;
data_elements = device_type_traits<T>::num_elements;
modified = true;
need_realloc_ = true;
@@ -528,11 +477,6 @@ template<typename T> class device_vector : public device_memory {
return (T *)host_pointer;
}
const T *data() const
{
return (T *)host_pointer;
}
T &operator[](size_t i)
{
assert(i < data_size);
@@ -563,10 +507,10 @@ template<typename T> class device_vector : public device_memory {
void copy_from_device()
{
device_copy_from(0, data_width, (data_height == 0) ? 1 : data_height, sizeof(T));
device_copy_from(0, data_width, data_height, sizeof(T));
}
void copy_from_device(size_t y, size_t w, size_t h)
void copy_from_device(int y, int w, int h)
{
device_copy_from(y, w, h, sizeof(T));
}
@@ -591,6 +535,33 @@ template<typename T> class device_vector : public device_memory {
}
};
/* Pixel Memory
*
* Device memory to efficiently draw as pixels to the screen in interactive
* rendering. Only copying pixels from the device is supported, not copying to. */
template<typename T> class device_pixels : public device_vector<T> {
public:
device_pixels(Device *device, const char *name) : device_vector<T>(device, name, MEM_PIXELS)
{
}
void alloc_to_device(size_t width, size_t height, size_t depth = 0)
{
device_vector<T>::alloc(width, height, depth);
if (!device_memory::device_pointer) {
device_memory::device_alloc();
}
}
T *copy_from_device(int y, int w, int h)
{
device_memory::device_copy_from(y, w, h, sizeof(T));
return device_vector<T>::data();
}
};
/* Device Sub Memory
*
* Pointer into existing memory. It is not allocated separately, but created
@@ -602,7 +573,7 @@ template<typename T> class device_vector : public device_memory {
class device_sub_ptr {
public:
device_sub_ptr(device_memory &mem, size_t offset, size_t size);
device_sub_ptr(device_memory &mem, int offset, int size);
~device_sub_ptr();
device_ptr operator*() const

View File

@@ -0,0 +1,826 @@
/*
* Copyright 2011-2013 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sstream>
#include <stdlib.h>
#include "bvh/bvh_multi.h"
#include "device/device.h"
#include "device/device_intern.h"
#include "device/device_network.h"
#include "render/buffers.h"
#include "render/geometry.h"
#include "util/util_foreach.h"
#include "util/util_list.h"
#include "util/util_logging.h"
#include "util/util_map.h"
#include "util/util_time.h"
CCL_NAMESPACE_BEGIN
class MultiDevice : public Device {
public:
struct SubDevice {
Stats stats;
Device *device;
map<device_ptr, device_ptr> ptr_map;
int peer_island_index = -1;
};
list<SubDevice> devices, denoising_devices;
device_ptr unique_key;
vector<vector<SubDevice *>> peer_islands;
bool use_denoising;
bool matching_rendering_and_denoising_devices;
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
: Device(info, stats, profiler, background_),
unique_key(1),
use_denoising(!info.denoising_devices.empty())
{
foreach (DeviceInfo &subinfo, info.multi_devices) {
/* Always add CPU devices at the back since GPU devices can change
* host memory pointers, which CPU uses as device pointer. */
SubDevice *sub;
if (subinfo.type == DEVICE_CPU) {
devices.emplace_back();
sub = &devices.back();
}
else {
devices.emplace_front();
sub = &devices.front();
}
/* The pointer to 'sub->stats' will stay valid even after new devices
* are added, since 'devices' is a linked list. */
sub->device = Device::create(subinfo, sub->stats, profiler, background);
}
foreach (DeviceInfo &subinfo, info.denoising_devices) {
denoising_devices.emplace_front();
SubDevice *sub = &denoising_devices.front();
sub->device = Device::create(subinfo, sub->stats, profiler, background);
}
/* Build a list of peer islands for the available render devices */
foreach (SubDevice &sub, devices) {
/* First ensure that every device is in at least once peer island */
if (sub.peer_island_index < 0) {
peer_islands.emplace_back();
sub.peer_island_index = (int)peer_islands.size() - 1;
peer_islands[sub.peer_island_index].push_back(&sub);
}
if (!info.has_peer_memory) {
continue;
}
/* Second check peer access between devices and fill up the islands accordingly */
foreach (SubDevice &peer_sub, devices) {
if (peer_sub.peer_island_index < 0 &&
peer_sub.device->info.type == sub.device->info.type &&
peer_sub.device->check_peer_access(sub.device)) {
peer_sub.peer_island_index = sub.peer_island_index;
peer_islands[sub.peer_island_index].push_back(&peer_sub);
}
}
}
/* Try to re-use memory when denoising and render devices use the same physical devices
* (e.g. OptiX denoising and CUDA rendering device pointing to the same GPU).
* Ordering has to match as well, so that 'DeviceTask::split' behaves consistent. */
matching_rendering_and_denoising_devices = denoising_devices.empty() ||
(devices.size() == denoising_devices.size());
if (matching_rendering_and_denoising_devices) {
for (list<SubDevice>::iterator device_it = devices.begin(),
denoising_device_it = denoising_devices.begin();
device_it != devices.end() && denoising_device_it != denoising_devices.end();
++device_it, ++denoising_device_it) {
const DeviceInfo &info = device_it->device->info;
const DeviceInfo &denoising_info = denoising_device_it->device->info;
if ((info.type != DEVICE_CUDA && info.type != DEVICE_OPTIX) ||
(denoising_info.type != DEVICE_CUDA && denoising_info.type != DEVICE_OPTIX) ||
info.num != denoising_info.num) {
matching_rendering_and_denoising_devices = false;
break;
}
}
}
#ifdef WITH_NETWORK
/* try to add network devices */
ServerDiscovery discovery(true);
time_sleep(1.0);
vector<string> servers = discovery.get_server_list();
foreach (string &server, servers) {
Device *device = device_network_create(info, stats, profiler, server.c_str());
if (device)
devices.push_back(SubDevice(device));
}
#endif
}
~MultiDevice()
{
foreach (SubDevice &sub, devices)
delete sub.device;
foreach (SubDevice &sub, denoising_devices)
delete sub.device;
}
const string &error_message() override
{
error_msg.clear();
foreach (SubDevice &sub, devices)
error_msg += sub.device->error_message();
foreach (SubDevice &sub, denoising_devices)
error_msg += sub.device->error_message();
return error_msg;
}
virtual bool show_samples() const override
{
if (devices.size() > 1) {
return false;
}
return devices.front().device->show_samples();
}
virtual BVHLayoutMask get_bvh_layout_mask() const override
{
BVHLayoutMask bvh_layout_mask = BVH_LAYOUT_ALL;
BVHLayoutMask bvh_layout_mask_all = BVH_LAYOUT_NONE;
foreach (const SubDevice &sub_device, devices) {
BVHLayoutMask device_bvh_layout_mask = sub_device.device->get_bvh_layout_mask();
bvh_layout_mask &= device_bvh_layout_mask;
bvh_layout_mask_all |= device_bvh_layout_mask;
}
/* With multiple OptiX devices, every device needs its own acceleration structure */
if (bvh_layout_mask == BVH_LAYOUT_OPTIX) {
return BVH_LAYOUT_MULTI_OPTIX;
}
/* When devices do not share a common BVH layout, fall back to creating one for each */
const BVHLayoutMask BVH_LAYOUT_OPTIX_EMBREE = (BVH_LAYOUT_OPTIX | BVH_LAYOUT_EMBREE);
if ((bvh_layout_mask_all & BVH_LAYOUT_OPTIX_EMBREE) == BVH_LAYOUT_OPTIX_EMBREE) {
return BVH_LAYOUT_MULTI_OPTIX_EMBREE;
}
return bvh_layout_mask;
}
bool load_kernels(const DeviceRequestedFeatures &requested_features) override
{
foreach (SubDevice &sub, devices)
if (!sub.device->load_kernels(requested_features))
return false;
use_denoising = requested_features.use_denoising;
if (requested_features.use_denoising) {
/* Only need denoising feature, everything else is unused. */
DeviceRequestedFeatures denoising_features;
denoising_features.use_denoising = true;
foreach (SubDevice &sub, denoising_devices)
if (!sub.device->load_kernels(denoising_features))
return false;
}
return true;
}
bool wait_for_availability(const DeviceRequestedFeatures &requested_features) override
{
foreach (SubDevice &sub, devices)
if (!sub.device->wait_for_availability(requested_features))
return false;
if (requested_features.use_denoising) {
foreach (SubDevice &sub, denoising_devices)
if (!sub.device->wait_for_availability(requested_features))
return false;
}
return true;
}
DeviceKernelStatus get_active_kernel_switch_state() override
{
DeviceKernelStatus result = DEVICE_KERNEL_USING_FEATURE_KERNEL;
foreach (SubDevice &sub, devices) {
DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state();
switch (subresult) {
case DEVICE_KERNEL_FEATURE_KERNEL_INVALID:
case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE:
return subresult;
case DEVICE_KERNEL_USING_FEATURE_KERNEL:
case DEVICE_KERNEL_UNKNOWN:
break;
}
}
return result;
}
void build_bvh(BVH *bvh, Progress &progress, bool refit) override
{
/* Try to build and share a single acceleration structure, if possible */
if (bvh->params.bvh_layout == BVH_LAYOUT_BVH2 || bvh->params.bvh_layout == BVH_LAYOUT_EMBREE) {
devices.back().device->build_bvh(bvh, progress, refit);
return;
}
assert(bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX ||
bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE);
BVHMulti *const bvh_multi = static_cast<BVHMulti *>(bvh);
bvh_multi->sub_bvhs.resize(devices.size());
vector<BVHMulti *> geom_bvhs;
geom_bvhs.reserve(bvh->geometry.size());
foreach (Geometry *geom, bvh->geometry) {
geom_bvhs.push_back(static_cast<BVHMulti *>(geom->bvh));
}
/* Broadcast acceleration structure build to all render devices */
size_t i = 0;
foreach (SubDevice &sub, devices) {
/* Change geometry BVH pointers to the sub BVH */
for (size_t k = 0; k < bvh->geometry.size(); ++k) {
bvh->geometry[k]->bvh = geom_bvhs[k]->sub_bvhs[i];
}
if (!bvh_multi->sub_bvhs[i]) {
BVHParams params = bvh->params;
if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX)
params.bvh_layout = BVH_LAYOUT_OPTIX;
else if (bvh->params.bvh_layout == BVH_LAYOUT_MULTI_OPTIX_EMBREE)
params.bvh_layout = sub.device->info.type == DEVICE_OPTIX ? BVH_LAYOUT_OPTIX :
BVH_LAYOUT_EMBREE;
/* Skip building a bottom level acceleration structure for non-instanced geometry on Embree
* (since they are put into the top level directly, see bvh_embree.cpp) */
if (!params.top_level && params.bvh_layout == BVH_LAYOUT_EMBREE &&
!bvh->geometry[0]->is_instanced()) {
i++;
continue;
}
bvh_multi->sub_bvhs[i] = BVH::create(params, bvh->geometry, bvh->objects, sub.device);
}
sub.device->build_bvh(bvh_multi->sub_bvhs[i], progress, refit);
i++;
}
/* Change geometry BVH pointers back to the multi BVH. */
for (size_t k = 0; k < bvh->geometry.size(); ++k) {
bvh->geometry[k]->bvh = geom_bvhs[k];
}
}
virtual void *osl_memory() override
{
if (devices.size() > 1) {
return NULL;
}
return devices.front().device->osl_memory();
}
bool is_resident(device_ptr key, Device *sub_device) override
{
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
return find_matching_mem_device(key, sub)->device == sub_device;
}
}
return false;
}
SubDevice *find_matching_mem_device(device_ptr key, SubDevice &sub)
{
assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
/* Get the memory owner of this key (first try current device, then peer devices) */
SubDevice *owner_sub = &sub;
if (owner_sub->ptr_map.find(key) == owner_sub->ptr_map.end()) {
foreach (SubDevice *island_sub, peer_islands[sub.peer_island_index]) {
if (island_sub != owner_sub &&
island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) {
owner_sub = island_sub;
}
}
}
return owner_sub;
}
SubDevice *find_suitable_mem_device(device_ptr key, const vector<SubDevice *> &island)
{
assert(!island.empty());
/* Get the memory owner of this key or the device with the lowest memory usage when new */
SubDevice *owner_sub = island.front();
foreach (SubDevice *island_sub, island) {
if (key ? (island_sub->ptr_map.find(key) != island_sub->ptr_map.end()) :
(island_sub->device->stats.mem_used < owner_sub->device->stats.mem_used)) {
owner_sub = island_sub;
}
}
return owner_sub;
}
inline device_ptr find_matching_mem(device_ptr key, SubDevice &sub)
{
return find_matching_mem_device(key, sub)->ptr_map[key];
}
void mem_alloc(device_memory &mem) override
{
device_ptr key = unique_key++;
if (mem.type == MEM_PIXELS) {
/* Always allocate pixels memory on all devices
* This is necessary to ensure PBOs are registered everywhere, which FILM_CONVERT uses */
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = 0;
mem.device_size = 0;
sub.device->mem_alloc(mem);
sub.ptr_map[key] = mem.device_pointer;
}
}
else {
assert(mem.type == MEM_READ_ONLY || mem.type == MEM_READ_WRITE ||
mem.type == MEM_DEVICE_ONLY);
/* The remaining memory types can be distributed across devices */
foreach (const vector<SubDevice *> &island, peer_islands) {
SubDevice *owner_sub = find_suitable_mem_device(key, island);
mem.device = owner_sub->device;
mem.device_pointer = 0;
mem.device_size = 0;
owner_sub->device->mem_alloc(mem);
owner_sub->ptr_map[key] = mem.device_pointer;
}
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size);
}
void mem_copy_to(device_memory &mem) override
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
/* The tile buffers are allocated on each device (see below), so copy to all of them */
if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) {
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size;
sub.device->mem_copy_to(mem);
sub.ptr_map[key] = mem.device_pointer;
}
}
else {
foreach (const vector<SubDevice *> &island, peer_islands) {
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
mem.device = owner_sub->device;
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
mem.device_size = existing_size;
owner_sub->device->mem_copy_to(mem);
owner_sub->ptr_map[key] = mem.device_pointer;
if (mem.type == MEM_GLOBAL || mem.type == MEM_TEXTURE) {
/* Need to create texture objects and update pointer in kernel globals on all devices */
foreach (SubDevice *island_sub, island) {
if (island_sub != owner_sub) {
island_sub->device->mem_copy_to(mem);
}
}
}
}
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
}
void mem_copy_from(device_memory &mem, int y, int w, int h, int elem) override
{
device_ptr key = mem.device_pointer;
int i = 0, sub_h = h / devices.size();
foreach (SubDevice &sub, devices) {
int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
SubDevice *owner_sub = find_matching_mem_device(key, sub);
mem.device = owner_sub->device;
mem.device_pointer = owner_sub->ptr_map[key];
owner_sub->device->mem_copy_from(mem, sy, w, sh, elem);
i++;
}
mem.device = this;
mem.device_pointer = key;
}
void mem_zero(device_memory &mem) override
{
device_ptr existing_key = mem.device_pointer;
device_ptr key = (existing_key) ? existing_key : unique_key++;
size_t existing_size = mem.device_size;
/* This is a hack to only allocate the tile buffers on denoising devices
* Similarly the tile buffers also need to be allocated separately on all devices so any
* overlap rendered for denoising does not interfere with each other */
if (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising) {
vector<device_ptr> device_pointers;
device_pointers.reserve(devices.size());
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size;
sub.device->mem_zero(mem);
sub.ptr_map[key] = mem.device_pointer;
device_pointers.push_back(mem.device_pointer);
}
foreach (SubDevice &sub, denoising_devices) {
if (matching_rendering_and_denoising_devices) {
sub.ptr_map[key] = device_pointers.front();
device_pointers.erase(device_pointers.begin());
}
else {
mem.device = sub.device;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
mem.device_size = existing_size;
sub.device->mem_zero(mem);
sub.ptr_map[key] = mem.device_pointer;
}
}
}
else {
foreach (const vector<SubDevice *> &island, peer_islands) {
SubDevice *owner_sub = find_suitable_mem_device(existing_key, island);
mem.device = owner_sub->device;
mem.device_pointer = (existing_key) ? owner_sub->ptr_map[existing_key] : 0;
mem.device_size = existing_size;
owner_sub->device->mem_zero(mem);
owner_sub->ptr_map[key] = mem.device_pointer;
}
}
mem.device = this;
mem.device_pointer = key;
stats.mem_alloc(mem.device_size - existing_size);
}
void mem_free(device_memory &mem) override
{
device_ptr key = mem.device_pointer;
size_t existing_size = mem.device_size;
/* Free memory that was allocated for all devices (see above) on each device */
if (mem.type == MEM_PIXELS || (strcmp(mem.name, "RenderBuffers") == 0 && use_denoising)) {
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
mem.device_size = existing_size;
sub.device->mem_free(mem);
sub.ptr_map.erase(sub.ptr_map.find(key));
}
foreach (SubDevice &sub, denoising_devices) {
if (matching_rendering_and_denoising_devices) {
sub.ptr_map.erase(key);
}
else {
mem.device = sub.device;
mem.device_pointer = sub.ptr_map[key];
mem.device_size = existing_size;
sub.device->mem_free(mem);
sub.ptr_map.erase(sub.ptr_map.find(key));
}
}
}
else {
foreach (const vector<SubDevice *> &island, peer_islands) {
SubDevice *owner_sub = find_matching_mem_device(key, *island.front());
mem.device = owner_sub->device;
mem.device_pointer = owner_sub->ptr_map[key];
mem.device_size = existing_size;
owner_sub->device->mem_free(mem);
owner_sub->ptr_map.erase(owner_sub->ptr_map.find(key));
if (mem.type == MEM_TEXTURE) {
/* Free texture objects on all devices */
foreach (SubDevice *island_sub, island) {
if (island_sub != owner_sub) {
island_sub->device->mem_free(mem);
}
}
}
}
}
mem.device = this;
mem.device_pointer = 0;
mem.device_size = 0;
stats.mem_free(existing_size);
}
void const_copy_to(const char *name, void *host, size_t size) override
{
foreach (SubDevice &sub, devices)
sub.device->const_copy_to(name, host, size);
}
void draw_pixels(device_memory &rgba,
int y,
int w,
int h,
int width,
int height,
int dx,
int dy,
int dw,
int dh,
bool transparent,
const DeviceDrawParams &draw_params) override
{
assert(rgba.type == MEM_PIXELS);
device_ptr key = rgba.device_pointer;
int i = 0, sub_h = h / devices.size();
int sub_height = height / devices.size();
foreach (SubDevice &sub, devices) {
int sy = y + i * sub_h;
int sh = (i == (int)devices.size() - 1) ? h - sub_h * i : sub_h;
int sheight = (i == (int)devices.size() - 1) ? height - sub_height * i : sub_height;
int sdy = dy + i * sub_height;
/* adjust math for w/width */
rgba.device_pointer = sub.ptr_map[key];
sub.device->draw_pixels(
rgba, sy, w, sh, width, sheight, dx, sdy, dw, dh, transparent, draw_params);
i++;
}
rgba.device_pointer = key;
}
void map_tile(Device *sub_device, RenderTile &tile) override
{
if (!tile.buffer) {
return;
}
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device) {
tile.buffer = find_matching_mem(tile.buffer, sub);
return;
}
}
foreach (SubDevice &sub, denoising_devices) {
if (sub.device == sub_device) {
tile.buffer = sub.ptr_map[tile.buffer];
return;
}
}
}
int device_number(Device *sub_device) override
{
int i = 0;
foreach (SubDevice &sub, devices) {
if (sub.device == sub_device)
return i;
i++;
}
foreach (SubDevice &sub, denoising_devices) {
if (sub.device == sub_device)
return i;
i++;
}
return -1;
}
void map_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override
{
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
RenderTile &tile = neighbors.tiles[i];
if (!tile.buffers) {
continue;
}
device_vector<float> &mem = tile.buffers->buffer;
tile.buffer = mem.device_pointer;
if (mem.device == this && matching_rendering_and_denoising_devices) {
/* Skip unnecessary copies in viewport mode (buffer covers the
* whole image), but still need to fix up the tile device pointer. */
map_tile(sub_device, tile);
continue;
}
/* If the tile was rendered on another device, copy its memory to
* to the current device now, for the duration of the denoising task.
* Note that this temporarily modifies the RenderBuffers and calls
* the device, so this function is not thread safe. */
if (mem.device != sub_device) {
/* Only copy from device to host once. This is faster, but
* also required for the case where a CPU thread is denoising
* a tile rendered on the GPU. In that case we have to avoid
* overwriting the buffer being de-noised by the CPU thread. */
if (!tile.buffers->map_neighbor_copied) {
tile.buffers->map_neighbor_copied = true;
mem.copy_from_device();
}
if (mem.device == this) {
/* Can re-use memory if tile is already allocated on the sub device. */
map_tile(sub_device, tile);
mem.swap_device(sub_device, mem.device_size, tile.buffer);
}
else {
mem.swap_device(sub_device, 0, 0);
}
mem.copy_to_device();
tile.buffer = mem.device_pointer;
tile.device_size = mem.device_size;
mem.restore_device();
}
}
}
void unmap_neighbor_tiles(Device *sub_device, RenderTileNeighbors &neighbors) override
{
RenderTile &target_tile = neighbors.target;
device_vector<float> &mem = target_tile.buffers->buffer;
if (mem.device == this && matching_rendering_and_denoising_devices) {
return;
}
/* Copy denoised result back to the host. */
mem.swap_device(sub_device, target_tile.device_size, target_tile.buffer);
mem.copy_from_device();
mem.restore_device();
/* Copy denoised result to the original device. */
mem.copy_to_device();
for (int i = 0; i < RenderTileNeighbors::SIZE; i++) {
RenderTile &tile = neighbors.tiles[i];
if (!tile.buffers) {
continue;
}
device_vector<float> &mem = tile.buffers->buffer;
if (mem.device != sub_device && mem.device != this) {
/* Free up memory again if it was allocated for the copy above. */
mem.swap_device(sub_device, tile.device_size, tile.buffer);
sub_device->mem_free(mem);
mem.restore_device();
}
}
}
int get_split_task_count(DeviceTask &task) override
{
int total_tasks = 0;
list<DeviceTask> tasks;
task.split(tasks, devices.size());
foreach (SubDevice &sub, devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
total_tasks += sub.device->get_split_task_count(subtask);
}
}
return total_tasks;
}
void task_add(DeviceTask &task) override
{
list<SubDevice> task_devices = devices;
if (!denoising_devices.empty()) {
if (task.type == DeviceTask::DENOISE_BUFFER) {
/* Denoising tasks should be redirected to the denoising devices entirely. */
task_devices = denoising_devices;
}
else if (task.type == DeviceTask::RENDER && (task.tile_types & RenderTile::DENOISE)) {
const uint tile_types = task.tile_types;
/* For normal rendering tasks only redirect the denoising part to the denoising devices.
* Do not need to split the task here, since they all run through 'acquire_tile'. */
task.tile_types = RenderTile::DENOISE;
foreach (SubDevice &sub, denoising_devices) {
sub.device->task_add(task);
}
/* Rendering itself should still be executed on the rendering devices. */
task.tile_types = tile_types ^ RenderTile::DENOISE;
}
}
list<DeviceTask> tasks;
task.split(tasks, task_devices.size());
foreach (SubDevice &sub, task_devices) {
if (!tasks.empty()) {
DeviceTask subtask = tasks.front();
tasks.pop_front();
if (task.buffer)
subtask.buffer = find_matching_mem(task.buffer, sub);
if (task.rgba_byte)
subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
if (task.rgba_half)
subtask.rgba_half = sub.ptr_map[task.rgba_half];
if (task.shader_input)
subtask.shader_input = find_matching_mem(task.shader_input, sub);
if (task.shader_output)
subtask.shader_output = find_matching_mem(task.shader_output, sub);
sub.device->task_add(subtask);
if (task.buffers && task.buffers->buffer.device == this) {
/* Synchronize access to RenderBuffers, since 'map_neighbor_tiles' is not thread-safe. */
sub.device->task_wait();
}
}
}
}
void task_wait() override
{
foreach (SubDevice &sub, devices)
sub.device->task_wait();
foreach (SubDevice &sub, denoising_devices)
sub.device->task_wait();
}
void task_cancel() override
{
foreach (SubDevice &sub, devices)
sub.device->task_cancel();
foreach (SubDevice &sub, denoising_devices)
sub.device->task_cancel();
}
};
Device *device_multi_create(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background)
{
return new MultiDevice(info, stats, profiler, background);
}
CCL_NAMESPACE_END

Some files were not shown because too many files have changed in this diff Show More